From fec577fb7f516e0d12ff821b1af272fd754e120a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Feb 2007 10:40:21 -0500 Subject: Btrfs: Add fsx-style randomized tree tester Add debug-tree command to print the tree Add extent-tree.c to the repo Comment ctree.h Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 296 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 296 insertions(+) create mode 100644 fs/btrfs/extent-tree.c (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c new file mode 100644 index 00000000000..2b663ef8449 --- /dev/null +++ b/fs/btrfs/extent-tree.c @@ -0,0 +1,296 @@ +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" + +/* + * pending extents are blocks that we're trying to allocate in the extent + * map while trying to grow the map because of other allocations. To avoid + * recursing, they are tagged in the radix tree and cleaned up after + * other allocations are done. The pending tag is also used in the same + * manner for deletes. + */ +#define CTREE_EXTENT_PENDING 0 + +/* + * find all the blocks marked as pending in the radix tree and remove + * them from the extent map + */ +static int del_pending_extents(struct ctree_root *extent_root) +{ + int ret; + struct key key; + struct tree_buffer *gang[4]; + int i; + struct ctree_path path; + + while(1) { + ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, + (void **)gang, 0, + ARRAY_SIZE(gang), + CTREE_EXTENT_PENDING); + if (!ret) + break; + for (i = 0; i < ret; i++) { + key.objectid = gang[i]->blocknr; + key.flags = 0; + key.offset = 1; + init_path(&path); + ret = search_slot(extent_root, &key, &path, 0); + if (ret) { + print_tree(extent_root, extent_root->node); + printf("unable to find %lu\n", key.objectid); + BUG(); + // FIXME undo it and return sane + return ret; + } + ret = del_item(extent_root, &path); + if (ret) { + BUG(); + return ret; + } + release_path(extent_root, &path); + radix_tree_tag_clear(&extent_root->cache_radix, + gang[i]->blocknr, + CTREE_EXTENT_PENDING); + tree_block_release(extent_root, gang[i]); + } + } + return 0; +} + +/* + * remove an extent from the root, returns 0 on success + */ +int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) +{ + struct ctree_path path; + struct key key; + struct ctree_root *extent_root = root->extent_root; + struct tree_buffer *t; + int pending_ret; + int ret; + key.objectid = blocknr; + key.flags = 0; + key.offset = num_blocks; + if (root == extent_root) { + t = read_tree_block(root, key.objectid); + radix_tree_tag_set(&root->cache_radix, key.objectid, + CTREE_EXTENT_PENDING); + return 0; + } + init_path(&path); + ret = search_slot(extent_root, &key, &path, 0); + if (ret) { + print_tree(extent_root, extent_root->node); + printf("failed to find %lu\n", key.objectid); + BUG(); + } + ret = del_item(extent_root, &path); + if (ret) + BUG(); + release_path(extent_root, &path); + pending_ret = del_pending_extents(root->extent_root); + return ret ? ret : pending_ret; +} + +/* + * walks the btree of allocated extents and find a hole of a given size. + * The key ins is changed to record the hole: + * ins->objectid == block start + * ins->flags = 0 + * ins->offset == number of blocks + * Any available blocks before search_start are skipped. + */ +int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, + u64 search_start, u64 search_end, struct key *ins) +{ + struct ctree_path path; + struct key *key; + int ret; + u64 hole_size = 0; + int slot = 0; + u64 last_block; + int start_found; + struct leaf *l; + struct ctree_root * root = orig_root->extent_root; + +check_failed: + init_path(&path); + ins->objectid = search_start; + ins->offset = 0; + ins->flags = 0; + start_found = 0; + ret = search_slot(root, ins, &path, 0); + while (1) { + l = &path.nodes[0]->leaf; + slot = path.slots[0]; + if (slot >= l->header.nritems) { + ret = next_leaf(root, &path); + if (ret == 0) + continue; + if (!start_found) { + ins->objectid = search_start; + ins->offset = num_blocks; + start_found = 1; + goto check_pending; + } + ins->objectid = last_block > search_start ? + last_block : search_start; + ins->offset = num_blocks; + goto check_pending; + } + key = &l->items[slot].key; + if (key->objectid >= search_start) { + if (start_found) { + hole_size = key->objectid - last_block; + if (hole_size > num_blocks) { + ins->objectid = last_block; + ins->offset = num_blocks; + goto check_pending; + } + } else + start_found = 1; + last_block = key->objectid + key->offset; + } + path.slots[0]++; + } + // FIXME -ENOSPC +check_pending: + /* we have to make sure we didn't find an extent that has already + * been allocated by the map tree or the original allocation + */ + release_path(root, &path); + BUG_ON(ins->objectid < search_start); + if (orig_root->extent_root == orig_root) { + BUG_ON(num_blocks != 1); + if ((root->current_insert.objectid <= ins->objectid && + root->current_insert.objectid + + root->current_insert.offset > ins->objectid) || + (root->current_insert.objectid > ins->objectid && + root->current_insert.objectid <= ins->objectid + + ins->offset) || + radix_tree_tag_get(&root->cache_radix, ins->objectid, + CTREE_EXTENT_PENDING)) { + search_start = ins->objectid + 1; + goto check_failed; + } + } + if (ins->offset != 1) + BUG(); + return 0; +} + +/* + * insert all of the pending extents reserved during the original + * allocation. (CTREE_EXTENT_PENDING). Returns zero if it all worked out + */ +static int insert_pending_extents(struct ctree_root *extent_root) +{ + int ret; + struct key key; + struct extent_item item; + struct tree_buffer *gang[4]; + int i; + + // FIXME -ENOSPC + item.refs = 1; + item.owner = extent_root->node->node.header.parentid; + while(1) { + ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, + (void **)gang, 0, + ARRAY_SIZE(gang), + CTREE_EXTENT_PENDING); + if (!ret) + break; + for (i = 0; i < ret; i++) { + key.objectid = gang[i]->blocknr; + key.flags = 0; + key.offset = 1; + ret = insert_item(extent_root, &key, &item, + sizeof(item)); + if (ret) { + BUG(); + // FIXME undo it and return sane + return ret; + } + radix_tree_tag_clear(&extent_root->cache_radix, + gang[i]->blocknr, + CTREE_EXTENT_PENDING); + tree_block_release(extent_root, gang[i]); + } + } + return 0; +} + +/* + * finds a free extent and does all the dirty work required for allocation + * returns the key for the extent through ins, and a tree buffer for + * the first block of the extent through buf. + * + * returns 0 if everything worked, non-zero otherwise. + */ +int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, + u64 search_end, u64 owner, struct key *ins, + struct tree_buffer **buf) +{ + int ret; + int pending_ret; + struct extent_item extent_item; + extent_item.refs = 1; + extent_item.owner = owner; + + ret = find_free_extent(root, num_blocks, search_start, search_end, ins); + if (ret) + return ret; + if (root != root->extent_root) { + memcpy(&root->extent_root->current_insert, ins, sizeof(*ins)); + ret = insert_item(root->extent_root, ins, &extent_item, + sizeof(extent_item)); + memset(&root->extent_root->current_insert, 0, + sizeof(struct key)); + pending_ret = insert_pending_extents(root->extent_root); + if (ret) + return ret; + if (pending_ret) + return pending_ret; + *buf = find_tree_block(root, ins->objectid); + return 0; + } + /* we're allocating an extent for the extent tree, don't recurse */ + BUG_ON(ins->offset != 1); + *buf = find_tree_block(root, ins->objectid); + BUG_ON(!*buf); + radix_tree_tag_set(&root->cache_radix, ins->objectid, + CTREE_EXTENT_PENDING); + (*buf)->count++; + return 0; + +} + +/* + * helper function to allocate a block for a given tree + * returns the tree buffer or NULL. + */ +struct tree_buffer *alloc_free_block(struct ctree_root *root) +{ + struct key ins; + int ret; + struct tree_buffer *buf = NULL; + + ret = alloc_extent(root, 1, 0, (unsigned long)-1, + root->node->node.header.parentid, + &ins, &buf); + + if (ret) { + BUG(); + return NULL; + } + if (root != root->extent_root) + BUG_ON(radix_tree_tag_get(&root->extent_root->cache_radix, + buf->blocknr, CTREE_EXTENT_PENDING)); + return buf; +} -- cgit v1.2.3-70-g09d2 From 7cf75962acd27ed0f819df791486e7b803c7109c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Feb 2007 10:55:01 -0500 Subject: Btrfs: u64 cleanups Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/kerncompat.h | 2 +- fs/btrfs/print-tree.c | 10 +++++----- fs/btrfs/random-test.c | 16 ++++++++-------- 5 files changed, 17 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 855e8f499e3..ea9b6665da3 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -17,6 +17,6 @@ tester: $(objects) random-test.o $(objects) : $(headers) clean : - rm ctree *.o + rm debug-tree tester *.o diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2b663ef8449..26321524c18 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -42,7 +42,7 @@ static int del_pending_extents(struct ctree_root *extent_root) ret = search_slot(extent_root, &key, &path, 0); if (ret) { print_tree(extent_root, extent_root->node); - printf("unable to find %lu\n", key.objectid); + printf("unable to find %Lu\n", key.objectid); BUG(); // FIXME undo it and return sane return ret; @@ -86,7 +86,7 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) ret = search_slot(extent_root, &key, &path, 0); if (ret) { print_tree(extent_root, extent_root->node); - printf("failed to find %lu\n", key.objectid); + printf("failed to find %Lu\n", key.objectid); BUG(); } ret = del_item(extent_root, &path); diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h index 347ca06205c..f5efc5f0fff 100644 --- a/fs/btrfs/kerncompat.h +++ b/fs/btrfs/kerncompat.h @@ -15,7 +15,7 @@ #define BUG() abort() typedef unsigned int u32; -typedef unsigned long u64; +typedef unsigned long long u64; typedef unsigned char u8; typedef unsigned short u16; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 594d23b5b24..1d591270f4c 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -11,19 +11,19 @@ void print_leaf(struct leaf *l) int nr = l->header.nritems; struct item *item; struct extent_item *ei; - printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr, + printf("leaf %Lu total ptrs %d free space %d\n", l->header.blocknr, nr, leaf_free_space(l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; - printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n", + printf("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n", i, item->key.objectid, item->key.flags, item->key.offset, item->offset, item->size); fflush(stdout); printf("\t\titem data %.*s\n", item->size, l->data+item->offset); ei = (struct extent_item *)(l->data + item->offset); - printf("\t\textent data %u %lu\n", ei->refs, ei->owner); + printf("\t\textent data %u %Lu\n", ei->refs, ei->owner); fflush(stdout); } } @@ -43,12 +43,12 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t) print_leaf((struct leaf *)c); return; } - printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr, + printf("node %Lu level %d total ptrs %d free spc %lu\n", t->blocknr, node_level(c->header.flags), c->header.nritems, NODEPTRS_PER_BLOCK - c->header.nritems); fflush(stdout); for (i = 0; i < nr; i++) { - printf("\tkey %d (%lu %u %lu) block %lu\n", + printf("\tkey %d (%Lu %u %Lu) block %Lu\n", i, c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, c->blockptrs[i]); diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 3c8c68d55d2..111a248bd5c 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -42,7 +42,7 @@ static int ins_one(struct ctree_root *root, struct radix_tree_root *radix) char buf[128]; init_path(&path); ret = setup_key(radix, &key, 0); - sprintf(buf, "str-%lu\n", key.objectid); + sprintf(buf, "str-%Lu\n", key.objectid); ret = insert_item(root, &key, buf, strlen(buf)); if (ret) goto error; @@ -54,7 +54,7 @@ static int ins_one(struct ctree_root *root, struct radix_tree_root *radix) goto error; return ret; error: - printf("failed to insert %lu\n", key.objectid); + printf("failed to insert %Lu\n", key.objectid); return -1; } @@ -68,10 +68,10 @@ static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - sprintf(buf, "str-%lu\n", key.objectid); + sprintf(buf, "str-%Lu\n", key.objectid); ret = insert_item(root, &key, buf, strlen(buf)); if (ret != -EEXIST) { - printf("insert on %lu gave us %d\n", key.objectid, ret); + printf("insert on %Lu gave us %d\n", key.objectid, ret); return 1; } return 0; @@ -99,7 +99,7 @@ static int del_one(struct ctree_root *root, struct radix_tree_root *radix) goto error; return 0; error: - printf("failed to delete %lu\n", key.objectid); + printf("failed to delete %Lu\n", key.objectid); return -1; } @@ -118,7 +118,7 @@ static int lookup_item(struct ctree_root *root, struct radix_tree_root *radix) goto error; return 0; error: - printf("unable to find key %lu\n", key.objectid); + printf("unable to find key %Lu\n", key.objectid); return -1; } @@ -137,7 +137,7 @@ static int lookup_enoent(struct ctree_root *root, struct radix_tree_root *radix) goto error; return 0; error: - printf("able to find key that should not exist %lu\n", key.objectid); + printf("able to find key that should not exist %Lu\n", key.objectid); return -1; } @@ -148,7 +148,7 @@ static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; struct key key; - u64 found; + unsigned long found; int ret; int slot; int i; -- cgit v1.2.3-70-g09d2 From aa5d6bed255d7f8c655a8f10d760f4247bc8385c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Feb 2007 16:35:06 -0500 Subject: Btrfs: return code checking Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 5 +- fs/btrfs/ctree.c | 480 ++++++++++++++++++++++++++----------------------- fs/btrfs/extent-tree.c | 5 + fs/btrfs/random-test.c | 7 +- 4 files changed, 269 insertions(+), 228 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 078061c321b..58d4260a04b 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,10 +1,13 @@ +CC=gcc CFLAGS = -g -Wall headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o # if you don't have sparse installed, use ls instead -check=sparse +CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ + -Wcontext -Wcast-truncate -Wuninitialized -Wshadow -Wundef +check=sparse $(CHECKFLAGS) #check=ls .c.o: diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0aea94224ba..be2be027251 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -6,12 +6,15 @@ #include "disk-io.h" #include "print-tree.h" -int split_node(struct ctree_root *root, struct ctree_path *path, int level); -int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size); -int push_node_left(struct ctree_root *root, struct ctree_path *path, int level); -int push_node_right(struct ctree_root *root, +static int split_node(struct ctree_root *root, struct ctree_path *path, + int level); +static int split_leaf(struct ctree_root *root, struct ctree_path *path, + int data_size); +static int push_node_left(struct ctree_root *root, struct ctree_path *path, + int level); +static int push_node_right(struct ctree_root *root, struct ctree_path *path, int level); -int del_ptr(struct ctree_root *root, struct ctree_path *path, int level); +static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level); inline void init_path(struct ctree_path *p) { @@ -26,6 +29,7 @@ void release_path(struct ctree_root *root, struct ctree_path *p) break; tree_block_release(root, p->nodes[i]); } + memset(p, 0, sizeof(*p)); } /* @@ -74,6 +78,67 @@ int comp_keys(struct key *k1, struct key *k2) return 0; } +int check_node(struct ctree_path *path, int level) +{ + int i; + struct node *parent = NULL; + struct node *node = &path->nodes[level]->node; + int parent_slot; + + if (path->nodes[level + 1]) + parent = &path->nodes[level + 1]->node; + parent_slot = path->slots[level + 1]; + if (parent && node->header.nritems > 0) { + struct key *parent_key; + parent_key = &parent->keys[parent_slot]; + BUG_ON(memcmp(parent_key, node->keys, sizeof(struct key))); + BUG_ON(parent->blockptrs[parent_slot] != node->header.blocknr); + } + BUG_ON(node->header.nritems > NODEPTRS_PER_BLOCK); + for (i = 0; i < node->header.nritems - 2; i++) { + BUG_ON(comp_keys(&node->keys[i], &node->keys[i+1]) >= 0); + } + return 0; +} + +int check_leaf(struct ctree_path *path, int level) +{ + int i; + struct leaf *leaf = &path->nodes[level]->leaf; + struct node *parent = NULL; + int parent_slot; + + if (path->nodes[level + 1]) + parent = &path->nodes[level + 1]->node; + parent_slot = path->slots[level + 1]; + if (parent && leaf->header.nritems > 0) { + struct key *parent_key; + parent_key = &parent->keys[parent_slot]; + BUG_ON(memcmp(parent_key, &leaf->items[0].key, + sizeof(struct key))); + BUG_ON(parent->blockptrs[parent_slot] != leaf->header.blocknr); + } + for (i = 0; i < leaf->header.nritems - 2; i++) { + BUG_ON(comp_keys(&leaf->items[i].key, + &leaf->items[i+1].key) >= 0); + BUG_ON(leaf->items[i].offset != leaf->items[i + 1].offset + + leaf->items[i + 1].size); + if (i == 0) { + BUG_ON(leaf->items[i].offset + leaf->items[i].size != + LEAF_DATA_SIZE); + } + } + BUG_ON(leaf_free_space(leaf) < 0); + return 0; +} + +int check_block(struct ctree_path *path, int level) +{ + if (level == 0) + return check_leaf(path, level); + return check_node(path, level); +} + /* * search for key in the array p. items p are item_size apart * and there are 'max' items in p @@ -133,7 +198,8 @@ int bin_search(struct node *c, struct key *key, int *slot) * level of the path (level 0) * * If the key isn't found, the path points to the slot where it should - * be inserted. + * be inserted, and 1 is returned. If there are other errors during the + * search a negative error number is returned. * * if ins_len > 0, nodes and leaves will be split as we walk down the * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if @@ -153,6 +219,9 @@ int search_slot(struct ctree_root *root, struct key *key, c = &b->node; level = node_level(c->header.flags); p->nodes[level] = b; + ret = check_block(p, level); + if (ret) + return -1; ret = bin_search(c, key, &slot); if (!is_leaf(c->header.flags)) { if (ret && slot > 0) @@ -183,7 +252,7 @@ int search_slot(struct ctree_root *root, struct key *key, return ret; } } - return -1; + return 1; } /* @@ -192,12 +261,17 @@ int search_slot(struct ctree_root *root, struct key *key, * This is used after shifting pointers to the left, so it stops * fixing up pointers when a given leaf/node is not in slot 0 of the * higher levels + * + * If this fails to write a tree block, it returns -1, but continues + * fixing up the blocks in ram so the tree is consistent. */ -static void fixup_low_keys(struct ctree_root *root, +static int fixup_low_keys(struct ctree_root *root, struct ctree_path *path, struct key *key, int level) { int i; + int ret = 0; + int wret; for (i = level; i < MAX_LEVEL; i++) { struct node *t; int tslot = path->slots[i]; @@ -205,10 +279,13 @@ static void fixup_low_keys(struct ctree_root *root, break; t = &path->nodes[i]->node; memcpy(t->keys + tslot, key, sizeof(*key)); - write_tree_block(root, path->nodes[i]); + wret = write_tree_block(root, path->nodes[i]); + if (wret) + ret = wret; if (tslot != 0) break; } + return ret; } /* @@ -220,8 +297,12 @@ static void fixup_low_keys(struct ctree_root *root, * be modified to reflect the push. * * The path is altered to reflect the push. + * + * returns 0 if some ptrs were pushed left, < 0 if there was some horrible + * error, and > 0 if there was no room in the left hand block. */ -int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) +static int push_node_left(struct ctree_root *root, struct ctree_path *path, + int level) { int slot; struct node *left; @@ -231,6 +312,8 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) int right_nritems; struct tree_buffer *t; struct tree_buffer *right_buf; + int ret = 0; + int wret; if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) return 1; @@ -265,10 +348,17 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) left->header.nritems += push_items; /* adjust the pointers going up the tree */ - fixup_low_keys(root, path, right->keys, level + 1); + wret = fixup_low_keys(root, path, right->keys, level + 1); + if (wret < 0) + ret = wret; - write_tree_block(root, t); - write_tree_block(root, right_buf); + wret = write_tree_block(root, t); + if (wret < 0) + ret = wret; + + wret = write_tree_block(root, right_buf); + if (wret < 0) + ret = wret; /* then fixup the leaf pointer in the path */ if (path->slots[level] < push_items) { @@ -280,7 +370,7 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) path->slots[level] -= push_items; tree_block_release(root, t); } - return 0; + return ret; } /* @@ -292,8 +382,12 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) * be modified to reflect the push. * * The path is altered to reflect the push. + * + * returns 0 if some ptrs were pushed, < 0 if there was some horrible + * error, and > 0 if there was no room in the right hand block. */ -int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) +static int push_node_right(struct ctree_root *root, struct ctree_path *path, + int level) { int slot; struct tree_buffer *t; @@ -368,6 +462,8 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) * helper function to insert a new root level in the tree. * A new node is allocated, and a single item is inserted to * point to the existing root + * + * returns zero on success or < 0 on failure. */ static int insert_new_root(struct ctree_root *root, struct ctree_path *path, int level) @@ -410,8 +506,10 @@ static int insert_new_root(struct ctree_root *root, * * slot and level indicate where you want the key to go, and * blocknr is the block the key points to. + * + * returns zero on success and < 0 on any error */ -int insert_ptr(struct ctree_root *root, +static int insert_ptr(struct ctree_root *root, struct ctree_path *path, struct key *key, u64 blocknr, int slot, int level) { @@ -446,8 +544,11 @@ int insert_ptr(struct ctree_root *root, * * Before splitting this tries to make some room in the node by pushing * left and right, if either one works, it returns right away. + * + * returns 0 on success and < 0 on failure */ -int split_node(struct ctree_root *root, struct ctree_path *path, int level) +static int split_node(struct ctree_root *root, struct ctree_path *path, + int level) { struct tree_buffer *t; struct node *c; @@ -455,13 +556,18 @@ int split_node(struct ctree_root *root, struct ctree_path *path, int level) struct node *split; int mid; int ret; + int wret; ret = push_node_left(root, path, level); if (!ret) return 0; + if (ret < 0) + return ret; ret = push_node_right(root, path, level); if (!ret) return 0; + if (ret < 0) + return ret; t = path->nodes[level]; c = &t->node; if (t == root->node) { @@ -482,10 +588,19 @@ int split_node(struct ctree_root *root, struct ctree_path *path, int level) (c->header.nritems - mid) * sizeof(u64)); split->header.nritems = c->header.nritems - mid; c->header.nritems = mid; - write_tree_block(root, t); - write_tree_block(root, split_buffer); - insert_ptr(root, path, split->keys, split_buffer->blocknr, - path->slots[level + 1] + 1, level + 1); + ret = 0; + + wret = write_tree_block(root, t); + if (wret) + ret = wret; + wret = write_tree_block(root, split_buffer); + if (wret) + ret = wret; + wret = insert_ptr(root, path, split->keys, split_buffer->blocknr, + path->slots[level + 1] + 1, level + 1); + if (wret) + ret = wret; + if (path->slots[level] >= mid) { path->slots[level] -= mid; tree_block_release(root, t); @@ -494,7 +609,7 @@ int split_node(struct ctree_root *root, struct ctree_path *path, int level) } else { tree_block_release(root, split_buffer); } - return 0; + return ret; } /* @@ -502,7 +617,7 @@ int split_node(struct ctree_root *root, struct ctree_path *path, int level) * and nr indicate which items in the leaf to check. This totals up the * space used both by the item structs and the item data */ -int leaf_space_used(struct leaf *l, int start, int nr) +static int leaf_space_used(struct leaf *l, int start, int nr) { int data_len; int end = start + nr - 1; @@ -518,9 +633,12 @@ int leaf_space_used(struct leaf *l, int start, int nr) /* * push some data in the path leaf to the right, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise + * + * returns 1 if the push failed because the other node didn't have enough + * room, 0 if everything worked out and < 0 if there were major errors. */ -int push_leaf_right(struct ctree_root *root, struct ctree_path *path, - int data_size) +static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, + int data_size) { struct tree_buffer *left_buf = path->nodes[0]; struct leaf *left = &left_buf->leaf; @@ -609,8 +727,8 @@ int push_leaf_right(struct ctree_root *root, struct ctree_path *path, * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise */ -int push_leaf_left(struct ctree_root *root, struct ctree_path *path, - int data_size) +static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, + int data_size) { struct tree_buffer *right_buf = path->nodes[0]; struct leaf *right = &right_buf->leaf; @@ -623,6 +741,8 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, int push_items = 0; struct item *item; int old_left_nritems; + int ret = 0; + int wret; slot = path->slots[1]; if (slot == 0) { @@ -681,10 +801,16 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, push_space = right->items[i].offset; } - write_tree_block(root, t); - write_tree_block(root, right_buf); + wret = write_tree_block(root, t); + if (wret) + ret = wret; + wret = write_tree_block(root, right_buf); + if (wret) + ret = wret; - fixup_low_keys(root, path, &right->items[0].key, 1); + wret = fixup_low_keys(root, path, &right->items[0].key, 1); + if (wret) + ret = wret; /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { @@ -697,17 +823,20 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); - return 0; + return ret; } /* * split the path's leaf in two, making sure there is at least data_size * available for the resulting leaf level of the path. + * + * returns 0 if all went well and < 0 on failure. */ -int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) +static int split_leaf(struct ctree_root *root, struct ctree_path *path, + int data_size) { - struct tree_buffer *l_buf = path->nodes[0]; - struct leaf *l = &l_buf->leaf; + struct tree_buffer *l_buf; + struct leaf *l; int nritems; int mid; int slot; @@ -718,14 +847,23 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) int rt_data_off; int i; int ret; - - if (push_leaf_left(root, path, data_size) == 0 || - push_leaf_right(root, path, data_size) == 0) { - l_buf = path->nodes[0]; - l = &l_buf->leaf; - if (leaf_free_space(l) >= sizeof(struct item) + data_size) - return 0; + int wret; + + wret = push_leaf_left(root, path, data_size); + if (wret < 0) + return wret; + if (wret) { + wret = push_leaf_right(root, path, data_size); + if (wret < 0) + return wret; } + l_buf = path->nodes[0]; + l = &l_buf->leaf; + + /* did the pushes work? */ + if (leaf_free_space(l) >= sizeof(struct item) + data_size) + return 0; + if (!path->nodes[1]) { ret = insert_new_root(root, path, 1); if (ret) @@ -768,10 +906,17 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) right->items[i].offset += rt_data_off; l->header.nritems = mid; - ret = insert_ptr(root, path, &right->items[0].key, + ret = 0; + wret = insert_ptr(root, path, &right->items[0].key, right_buffer->blocknr, path->slots[1] + 1, 1); - write_tree_block(root, right_buffer); - write_tree_block(root, l_buf); + if (wret) + ret = wret; + wret = write_tree_block(root, right_buffer); + if (wret) + ret = wret; + wret = write_tree_block(root, l_buf); + if (wret) + ret = wret; BUG_ON(path->slots[0] != slot); if (mid <= slot) { @@ -792,7 +937,8 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) int insert_item(struct ctree_root *root, struct key *key, void *data, int data_size) { - int ret; + int ret = 0; + int wret; int slot; int slot_orig; struct leaf *leaf; @@ -810,6 +956,10 @@ int insert_item(struct ctree_root *root, struct key *key, release_path(root, &path); return -EEXIST; } + if (ret < 0) { + release_path(root, &path); + return ret; + } slot_orig = path.slots[0]; leaf_buf = path.nodes[0]; @@ -850,13 +1000,19 @@ int insert_item(struct ctree_root *root, struct key *key, leaf->items[slot].size = data_size; memcpy(leaf->data + data_end - data_size, data, data_size); leaf->header.nritems += 1; - write_tree_block(root, leaf_buf); + + ret = 0; if (slot == 0) - fixup_low_keys(root, &path, key, 1); + ret = fixup_low_keys(root, &path, key, 1); + + wret = write_tree_block(root, leaf_buf); + if (wret) + ret = wret; + if (leaf_free_space(leaf) < 0) BUG(); release_path(root, &path); - return 0; + return ret; } /* @@ -866,13 +1022,15 @@ int insert_item(struct ctree_root *root, struct key *key, * continuing all the way the root if required. The root is converted into * a leaf if all the nodes are emptied. */ -int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) +static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) { int slot; struct tree_buffer *t; struct node *node; int nritems; u64 blocknr; + int wret; + int ret = 0; while(1) { t = path->nodes[level]; @@ -894,13 +1052,27 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) write_tree_block(root, t); if (node->header.nritems != 0) { int tslot; - if (slot == 0) - fixup_low_keys(root, path, node->keys, - level + 1); + if (slot == 0) { + wret = fixup_low_keys(root, path, + node->keys, + level + 1); + if (wret) + ret = wret; + } tslot = path->slots[level + 1]; t->count++; - if (push_node_left(root, path, level)) - push_node_right(root, path, level); + wret = push_node_left(root, path, level); + if (wret < 0) { + ret = wret; + break; + } + if (node->header.nritems != 0) { + wret = push_node_right(root, path, level); + if (wret < 0) { + ret = wret; + break; + } + } path->slots[level + 1] = tslot; if (node->header.nritems != 0) { tree_block_release(root, t); @@ -919,7 +1091,7 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) if (!path->nodes[level]) BUG(); } - return 0; + return ret; } /* @@ -933,6 +1105,8 @@ int del_item(struct ctree_root *root, struct ctree_path *path) struct tree_buffer *leaf_buf; int doff; int dsize; + int ret = 0; + int wret; leaf_buf = path->nodes[0]; leaf = &leaf_buf->leaf; @@ -959,14 +1133,23 @@ int del_item(struct ctree_root *root, struct ctree_path *path) leaf->header.flags = node_level(0); write_tree_block(root, leaf_buf); } else { - del_ptr(root, path, 1); + wret = del_ptr(root, path, 1); + if (wret) + ret = wret; free_extent(root, leaf_buf->blocknr, 1); } } else { int used = leaf_space_used(leaf, 0, leaf->header.nritems); - if (slot == 0) - fixup_low_keys(root, path, &leaf->items[0].key, 1); - write_tree_block(root, leaf_buf); + if (slot == 0) { + wret = fixup_low_keys(root, path, + &leaf->items[0].key, 1); + if (wret) + ret = wret; + } + wret = write_tree_block(root, leaf_buf); + if (wret) + ret = wret; + /* delete the leaf if it is mostly empty */ if (used < LEAF_DATA_SIZE / 3) { /* push_leaf_left fixes the path. @@ -975,13 +1158,20 @@ int del_item(struct ctree_root *root, struct ctree_path *path) */ slot = path->slots[1]; leaf_buf->count++; - push_leaf_left(root, path, 1); - if (leaf->header.nritems) - push_leaf_right(root, path, 1); + wret = push_leaf_left(root, path, 1); + if (wret < 0) + ret = wret; + if (leaf->header.nritems) { + wret = push_leaf_right(root, path, 1); + if (wret < 0) + ret = wret; + } if (leaf->header.nritems == 0) { u64 blocknr = leaf_buf->blocknr; path->slots[1] = slot; - del_ptr(root, path, 1); + wret = del_ptr(root, path, 1); + if (wret) + ret = wret; tree_block_release(root, leaf_buf); free_extent(root, blocknr, 1); } else { @@ -989,7 +1179,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) } } } - return 0; + return ret; } /* @@ -1033,165 +1223,3 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) return 0; } -/* some sample code to insert,search & delete items */ -#if 0 -/* for testing only */ -int next_key(int i, int max_key) { - return rand() % max_key; - //return i; -} -int main() { - struct key ins; - struct key last = { (u64)-1, 0, 0}; - char *buf; - int i; - int num; - int ret; - int run_size = 20000000; - int max_key = 100000000; - int tree_size = 0; - struct ctree_path path; - struct ctree_super_block super; - struct ctree_root *root; - - radix_tree_init(); - - - root = open_ctree("dbfile", &super); - srand(55); - for (i = 0; i < run_size; i++) { - buf = malloc(64); - num = next_key(i, max_key); - // num = i; - sprintf(buf, "string-%d", num); - if (i % 10000 == 0) - fprintf(stderr, "insert %d:%d\n", num, i); - ins.objectid = num; - ins.offset = 0; - ins.flags = 0; - ret = insert_item(root, &ins, buf, strlen(buf)); - if (!ret) - tree_size++; - free(buf); - } - write_ctree_super(root, &super); - close_ctree(root); - - root = open_ctree("dbfile", &super); - printf("starting search\n"); - srand(55); - for (i = 0; i < run_size; i++) { - num = next_key(i, max_key); - ins.objectid = num; - init_path(&path); - if (i % 10000 == 0) - fprintf(stderr, "search %d:%d\n", num, i); - ret = search_slot(root, &ins, &path, 0); - if (ret) { - print_tree(root, root->node); - printf("unable to find %d\n", num); - exit(1); - } - release_path(root, &path); - } - write_ctree_super(root, &super); - close_ctree(root); - root = open_ctree("dbfile", &super); - printf("node %p level %d total ptrs %d free spc %lu\n", root->node, - node_level(root->node->node.header.flags), - root->node->node.header.nritems, - NODEPTRS_PER_BLOCK - root->node->node.header.nritems); - printf("all searches good, deleting some items\n"); - i = 0; - srand(55); - for (i = 0 ; i < run_size/4; i++) { - num = next_key(i, max_key); - ins.objectid = num; - init_path(&path); - ret = search_slot(root, &ins, &path, -1); - if (!ret) { - if (i % 10000 == 0) - fprintf(stderr, "del %d:%d\n", num, i); - ret = del_item(root, &path); - if (ret != 0) - BUG(); - tree_size--; - } - release_path(root, &path); - } - write_ctree_super(root, &super); - close_ctree(root); - root = open_ctree("dbfile", &super); - srand(128); - for (i = 0; i < run_size; i++) { - buf = malloc(64); - num = next_key(i, max_key); - sprintf(buf, "string-%d", num); - ins.objectid = num; - if (i % 10000 == 0) - fprintf(stderr, "insert %d:%d\n", num, i); - ret = insert_item(root, &ins, buf, strlen(buf)); - if (!ret) - tree_size++; - free(buf); - } - write_ctree_super(root, &super); - close_ctree(root); - root = open_ctree("dbfile", &super); - srand(128); - printf("starting search2\n"); - for (i = 0; i < run_size; i++) { - num = next_key(i, max_key); - ins.objectid = num; - init_path(&path); - if (i % 10000 == 0) - fprintf(stderr, "search %d:%d\n", num, i); - ret = search_slot(root, &ins, &path, 0); - if (ret) { - print_tree(root, root->node); - printf("unable to find %d\n", num); - exit(1); - } - release_path(root, &path); - } - printf("starting big long delete run\n"); - while(root->node && root->node->node.header.nritems > 0) { - struct leaf *leaf; - int slot; - ins.objectid = (u64)-1; - init_path(&path); - ret = search_slot(root, &ins, &path, -1); - if (ret == 0) - BUG(); - - leaf = &path.nodes[0]->leaf; - slot = path.slots[0]; - if (slot != leaf->header.nritems) - BUG(); - while(path.slots[0] > 0) { - path.slots[0] -= 1; - slot = path.slots[0]; - leaf = &path.nodes[0]->leaf; - - if (comp_keys(&last, &leaf->items[slot].key) <= 0) - BUG(); - memcpy(&last, &leaf->items[slot].key, sizeof(last)); - if (tree_size % 10000 == 0) - printf("big del %d:%d\n", tree_size, i); - ret = del_item(root, &path); - if (ret != 0) { - printf("del_item returned %d\n", ret); - BUG(); - } - tree_size--; - } - release_path(root, &path); - } - printf("tree size is now %d\n", tree_size); - printf("map tree\n"); - print_tree(root->extent_root, root->extent_root->node); - write_ctree_super(root, &super); - close_ctree(root); - return 0; -} -#endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 26321524c18..97715bae784 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -125,6 +125,11 @@ check_failed: ins->flags = 0; start_found = 0; ret = search_slot(root, ins, &path, 0); + if (ret < 0) { + release_path(root, &path); + return ret; + } + while (1) { l = &path.nodes[0]->leaf; slot = path.slots[0]; diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 7078263acc3..cebaf648392 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -134,7 +134,7 @@ static int lookup_enoent(struct ctree_root *root, struct radix_tree_root *radix) return ret; ret = search_slot(root, &key, &path, 0); release_path(root, &path); - if (ret == 0) + if (ret <= 0) goto error; return 0; error: @@ -153,12 +153,17 @@ static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) int ret; int slot; int i; + key.offset = 0; key.flags = 0; key.objectid = (unsigned long)-1; while(1) { init_path(&path); ret = search_slot(root, &key, &path, 0); + if (ret < 0) { + release_path(root, &path); + return ret; + } slot = path.slots[0]; if (ret != 0) { if (slot == 0) { -- cgit v1.2.3-70-g09d2 From 0f70abe2b39d19171d4133d2ffdf77fb9113106a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Feb 2007 16:46:22 -0500 Subject: Btrfs: more return code checking Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 17 ++++++++++++----- fs/btrfs/extent-tree.c | 15 +++++++++------ 2 files changed, 21 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index be2be027251..27323993225 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1087,7 +1087,9 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) break; } level++; - free_extent(root, blocknr, 1); + wret = free_extent(root, blocknr, 1); + if (wret) + ret = wret; if (!path->nodes[level]) BUG(); } @@ -1136,7 +1138,9 @@ int del_item(struct ctree_root *root, struct ctree_path *path) wret = del_ptr(root, path, 1); if (wret) ret = wret; - free_extent(root, leaf_buf->blocknr, 1); + wret = free_extent(root, leaf_buf->blocknr, 1); + if (wret) + ret = wret; } } else { int used = leaf_space_used(leaf, 0, leaf->header.nritems); @@ -1173,7 +1177,9 @@ int del_item(struct ctree_root *root, struct ctree_path *path) if (wret) ret = wret; tree_block_release(root, leaf_buf); - free_extent(root, blocknr, 1); + wret = free_extent(root, blocknr, 1); + if (wret) + ret = wret; } else { tree_block_release(root, leaf_buf); } @@ -1184,7 +1190,8 @@ int del_item(struct ctree_root *root, struct ctree_path *path) /* * walk up the tree as far as required to find the next leaf. - * returns 0 if it found something or -1 if there are no greater leaves. + * returns 0 if it found something or 1 if there are no greater leaves. + * returns < 0 on io errors. */ int next_leaf(struct ctree_root *root, struct ctree_path *path) { @@ -1196,7 +1203,7 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) while(level < MAX_LEVEL) { if (!path->nodes[level]) - return -1; + return 1; slot = path->slots[level] + 1; c = path->nodes[level]; if (slot >= c->node.header.nritems) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 97715bae784..074f4b182f1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -105,8 +105,8 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, - u64 search_start, u64 search_end, struct key *ins) +static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, + u64 search_start, u64 search_end, struct key *ins) { struct ctree_path path; struct key *key; @@ -125,10 +125,8 @@ check_failed: ins->flags = 0; start_found = 0; ret = search_slot(root, ins, &path, 0); - if (ret < 0) { - release_path(root, &path); - return ret; - } + if (ret < 0) + goto error; while (1) { l = &path.nodes[0]->leaf; @@ -137,6 +135,8 @@ check_failed: ret = next_leaf(root, &path); if (ret == 0) continue; + if (ret < 0) + goto error; if (!start_found) { ins->objectid = search_start; ins->offset = num_blocks; @@ -187,6 +187,9 @@ check_pending: if (ins->offset != 1) BUG(); return 0; +error: + release_path(root, &path); + return ret; } /* -- cgit v1.2.3-70-g09d2 From f0930a37f1c096c3a8f6a17b1e251c7fdf4d4457 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Mar 2007 09:47:58 -0500 Subject: Btrfs: Fix extent code to use merge during delete Remove implicit commit in del_item and insert_item Add implicit commit to close() Add commit op in random-test Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 15 +++------------ fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/random-test.c | 10 ++++++++-- 4 files changed, 14 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 57fa505fb9f..3c5f4c2dd52 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1065,11 +1065,7 @@ int insert_item(struct ctree_root *root, struct key *key, ret = search_slot(root, key, &path, data_size); if (ret == 0) { release_path(root, &path); - ret = -EEXIST; - wret = commit_transaction(root); - if (wret) - ret = wret; - return ret; + return -EEXIST; } if (ret < 0) goto out; @@ -1127,9 +1123,6 @@ int insert_item(struct ctree_root *root, struct key *key, check_leaf(&path, 0); out: release_path(root, &path); - wret = commit_transaction(root); - if (wret) - ret = wret; return ret; } @@ -1245,7 +1238,8 @@ int del_item(struct ctree_root *root, struct ctree_path *path) wret = push_leaf_left(root, path, 1); if (wret < 0) ret = wret; - if (leaf->header.nritems) { + if (path->nodes[0] == leaf_buf && + leaf->header.nritems) { wret = push_leaf_right(root, path, 1); if (wret < 0) ret = wret; @@ -1265,9 +1259,6 @@ int del_item(struct ctree_root *root, struct ctree_path *path) } } } - wret = commit_transaction(root); - if (wret) - ret = wret; return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 656ace6147a..f7ca5362291 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -239,6 +239,7 @@ static int drop_cache(struct ctree_root *root) } int close_ctree(struct ctree_root *root) { + commit_transaction(root); drop_cache(root->extent_root); drop_cache(root); BUG_ON(!list_empty(&root->trans)); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 074f4b182f1..fb214449621 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -39,7 +39,7 @@ static int del_pending_extents(struct ctree_root *extent_root) key.flags = 0; key.offset = 1; init_path(&path); - ret = search_slot(extent_root, &key, &path, 0); + ret = search_slot(extent_root, &key, &path, -1); if (ret) { print_tree(extent_root, extent_root->node); printf("unable to find %Lu\n", key.objectid); @@ -83,7 +83,7 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) return 0; } init_path(&path); - ret = search_slot(extent_root, &key, &path, 0); + ret = search_slot(extent_root, &key, &path, -1); if (ret) { print_tree(extent_root, extent_root->node); printf("failed to find %Lu\n", key.objectid); diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index bbd554e88db..22955753c3a 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -59,6 +59,11 @@ error: return -1; } +static int run_commit(struct ctree_root *root, struct radix_tree_root *radix) +{ + return commit_transaction(root); +} + static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; @@ -233,7 +238,8 @@ static int bulk_op(struct ctree_root *root, struct radix_tree_root *radix) int (*ops[])(struct ctree_root *root, struct radix_tree_root *radix) = -{ ins_one, insert_dup, del_one, lookup_item, lookup_enoent, bulk_op }; + { ins_one, insert_dup, del_one, lookup_item, + lookup_enoent, bulk_op, run_commit }; static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) { @@ -366,7 +372,7 @@ int main(int ac, char **av) err = ret; goto out; } - if (ops[op] == bulk_op) + if (ops[op] == bulk_op || ops[op] == run_commit) break; if (keep_running == 0) { err = 0; -- cgit v1.2.3-70-g09d2 From 02217ed299c6340a35696e0610047eb96826de2d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Mar 2007 16:08:05 -0500 Subject: Btrfs: early reference counting Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 153 ++++++++++++++++++++++++++++--------------------- fs/btrfs/ctree.h | 3 +- fs/btrfs/disk-io.c | 2 + fs/btrfs/extent-tree.c | 45 ++++++++++++++- fs/btrfs/quick-test.c | 10 ++-- fs/btrfs/random-test.c | 12 ++-- 6 files changed, 145 insertions(+), 80 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3c5f4c2dd52..1118986d756 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -34,6 +34,37 @@ void release_path(struct ctree_root *root, struct ctree_path *p) memset(p, 0, sizeof(*p)); } +int btrfs_cow_block(struct ctree_root *root, + struct tree_buffer *buf, + struct tree_buffer *parent, + int parent_slot, + struct tree_buffer **cow_ret) +{ + struct tree_buffer *cow; + + if (!list_empty(&buf->dirty)) { + *cow_ret = buf; + return 0; + } + cow = alloc_free_block(root); + memcpy(&cow->node, &buf->node, sizeof(buf->node)); + cow->node.header.blocknr = cow->blocknr; + *cow_ret = cow; + if (buf == root->node) { + root->node = cow; + cow->count++; + tree_block_release(root, buf); + } else { + parent->node.blockptrs[parent_slot] = cow->blocknr; + BUG_ON(list_empty(&parent->dirty)); + } + if (0 && root != root->extent_root && !is_leaf(cow->node.header.flags)) { + btrfs_inc_ref(root, cow); + } + tree_block_release(root, buf); + return 0; +} + /* * The leaf data grows from end-to-front in the node. * this returns the address of the start of the last item, @@ -263,6 +294,8 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, /* first, try to make some room in the middle buffer */ if (left_buf) { + btrfs_cow_block(root, left_buf, parent_buf, + pslot - 1, &left_buf); left = &left_buf->node; orig_slot += left->header.nritems; wret = push_node_left(root, left_buf, mid_buf); @@ -274,6 +307,8 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, * then try to empty the right most buffer into the middle */ if (right_buf) { + btrfs_cow_block(root, right_buf, parent_buf, + pslot + 1, &right_buf); right = &right_buf->node; wret = push_node_left(root, mid_buf, right_buf); if (wret < 0) @@ -293,9 +328,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, } else { memcpy(parent->keys + pslot + 1, right->keys, sizeof(struct key)); - wret = dirty_tree_block(root, parent_buf); - if (wret) - ret = wret; + BUG_ON(list_empty(&parent_buf->dirty)); } } if (mid->header.nritems == 1) { @@ -330,9 +363,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, } else { /* update the parent key to reflect our changes */ memcpy(parent->keys + pslot, mid->keys, sizeof(struct key)); - wret = dirty_tree_block(root, parent_buf); - if (wret) - ret = wret; + BUG_ON(list_empty(&parent_buf->dirty)); } /* update the path */ @@ -375,9 +406,10 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, * possible) */ int search_slot(struct ctree_root *root, struct key *key, - struct ctree_path *p, int ins_len) + struct ctree_path *p, int ins_len, int cow) { struct tree_buffer *b; + struct tree_buffer *cow_buf; struct node *c; int slot; int ret; @@ -387,8 +419,15 @@ again: b = root->node; b->count++; while (b) { + level = node_level(b->node.header.flags); + if (cow) { + int wret; + wret = btrfs_cow_block(root, b, p->nodes[level + 1], + p->slots[level + 1], &cow_buf); + b = cow_buf; + } + BUG_ON(!cow && ins_len); c = &b->node; - level = node_level(c->header.flags); p->nodes[level] = b; ret = check_block(p, level); if (ret) @@ -453,7 +492,6 @@ static int fixup_low_keys(struct ctree_root *root, { int i; int ret = 0; - int wret; for (i = level; i < MAX_LEVEL; i++) { struct node *t; int tslot = path->slots[i]; @@ -461,9 +499,7 @@ static int fixup_low_keys(struct ctree_root *root, break; t = &path->nodes[i]->node; memcpy(t->keys + tslot, key, sizeof(*key)); - wret = dirty_tree_block(root, path->nodes[i]); - if (wret) - ret = wret; + BUG_ON(list_empty(&path->nodes[i]->dirty)); if (tslot != 0) break; } @@ -486,7 +522,6 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, int src_nritems; int dst_nritems; int ret = 0; - int wret; src_nritems = src->header.nritems; dst_nritems = dst->header.nritems; @@ -511,13 +546,8 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, src->header.nritems -= push_items; dst->header.nritems += push_items; - wret = dirty_tree_block(root, src_buf); - if (wret < 0) - ret = wret; - - wret = dirty_tree_block(root, dst_buf); - if (wret < 0) - ret = wret; + BUG_ON(list_empty(&src_buf->dirty)); + BUG_ON(list_empty(&dst_buf->dirty)); return ret; } @@ -541,7 +571,6 @@ static int balance_node_right(struct ctree_root *root, int src_nritems; int dst_nritems; int ret = 0; - int wret; src_nritems = src->header.nritems; dst_nritems = dst->header.nritems; @@ -569,13 +598,8 @@ static int balance_node_right(struct ctree_root *root, src->header.nritems -= push_items; dst->header.nritems += push_items; - wret = dirty_tree_block(root, src_buf); - if (wret < 0) - ret = wret; - - wret = dirty_tree_block(root, dst_buf); - if (wret < 0) - ret = wret; + BUG_ON(list_empty(&src_buf->dirty)); + BUG_ON(list_empty(&dst_buf->dirty)); return ret; } @@ -615,7 +639,6 @@ static int insert_new_root(struct ctree_root *root, tree_block_release(root, root->node); root->node = t; t->count++; - dirty_tree_block(root, t); path->nodes[level] = t; path->slots[level] = 0; return 0; @@ -655,7 +678,7 @@ static int insert_ptr(struct ctree_root *root, lower->header.nritems++; if (lower->keys[1].objectid == 0) BUG(); - dirty_tree_block(root, path->nodes[level]); + BUG_ON(list_empty(&path->nodes[level]->dirty)); return 0; } @@ -701,12 +724,7 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, c->header.nritems = mid; ret = 0; - wret = dirty_tree_block(root, t); - if (wret) - ret = wret; - wret = dirty_tree_block(root, split_buffer); - if (wret) - ret = wret; + BUG_ON(list_empty(&t->dirty)); wret = insert_ptr(root, path, split->keys, split_buffer->blocknr, path->slots[level + 1] + 1, level + 1); if (wret) @@ -778,6 +796,15 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, tree_block_release(root, right_buf); return 1; } + /* cow and double check */ + btrfs_cow_block(root, right_buf, upper, slot + 1, &right_buf); + right = &right_buf->leaf; + free_space = leaf_free_space(right); + if (free_space < data_size + sizeof(struct item)) { + tree_block_release(root, right_buf); + return 1; + } + for (i = left->header.nritems - 1; i >= 0; i--) { item = left->items + i; if (path->slots[0] == i) @@ -818,11 +845,12 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, } left->header.nritems -= push_items; - dirty_tree_block(root, left_buf); - dirty_tree_block(root, right_buf); + BUG_ON(list_empty(&left_buf->dirty)); + BUG_ON(list_empty(&right_buf->dirty)); memcpy(upper->node.keys + slot + 1, &right->items[0].key, sizeof(struct key)); - dirty_tree_block(root, upper); + BUG_ON(list_empty(&upper->dirty)); + /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left->header.nritems) { path->slots[0] -= left->header.nritems; @@ -869,6 +897,16 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, tree_block_release(root, t); return 1; } + + /* cow and double check */ + btrfs_cow_block(root, t, path->nodes[1], slot - 1, &t); + left = &t->leaf; + free_space = leaf_free_space(left); + if (free_space < data_size + sizeof(struct item)) { + tree_block_release(root, t); + return 1; + } + for (i = 0; i < right->header.nritems; i++) { item = right->items + i; if (path->slots[0] == i) @@ -912,12 +950,8 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, push_space = right->items[i].offset; } - wret = dirty_tree_block(root, t); - if (wret) - ret = wret; - wret = dirty_tree_block(root, right_buf); - if (wret) - ret = wret; + BUG_ON(list_empty(&t->dirty)); + BUG_ON(list_empty(&right_buf->dirty)); wret = fixup_low_keys(root, path, &right->items[0].key, 1); if (wret) @@ -968,6 +1002,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, if (wret < 0) return wret; } + l_buf = path->nodes[0]; l = &l_buf->leaf; @@ -1022,13 +1057,8 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, right_buffer->blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; - wret = dirty_tree_block(root, right_buffer); - if (wret) - ret = wret; - wret = dirty_tree_block(root, l_buf); - if (wret) - ret = wret; - + BUG_ON(list_empty(&right_buffer->dirty)); + BUG_ON(list_empty(&l_buf->dirty)); BUG_ON(path->slots[0] != slot); if (mid <= slot) { tree_block_release(root, path->nodes[0]); @@ -1049,7 +1079,6 @@ int insert_item(struct ctree_root *root, struct key *key, void *data, int data_size) { int ret = 0; - int wret; int slot; int slot_orig; struct leaf *leaf; @@ -1062,7 +1091,7 @@ int insert_item(struct ctree_root *root, struct key *key, if (!root->node) BUG(); init_path(&path); - ret = search_slot(root, key, &path, data_size); + ret = search_slot(root, key, &path, data_size, 1); if (ret == 0) { release_path(root, &path); return -EEXIST; @@ -1114,10 +1143,7 @@ int insert_item(struct ctree_root *root, struct key *key, if (slot == 0) ret = fixup_low_keys(root, &path, key, 1); - wret = dirty_tree_block(root, leaf_buf); - if (wret) - ret = wret; - + BUG_ON(list_empty(&leaf_buf->dirty)); if (leaf_free_space(leaf) < 0) BUG(); check_leaf(&path, 0); @@ -1162,9 +1188,7 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, if (wret) ret = wret; } - wret = dirty_tree_block(root, parent); - if (wret) - ret = wret; + BUG_ON(list_empty(&parent->dirty)); return ret; } @@ -1205,7 +1229,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) if (leaf->header.nritems == 0) { if (leaf_buf == root->node) { leaf->header.flags = node_level(0); - dirty_tree_block(root, leaf_buf); + BUG_ON(list_empty(&leaf_buf->dirty)); } else { clean_tree_block(root, leaf_buf); wret = del_ptr(root, path, 1, path->slots[1]); @@ -1223,9 +1247,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) if (wret) ret = wret; } - wret = dirty_tree_block(root, leaf_buf); - if (wret) - ret = wret; + BUG_ON(list_empty(&leaf_buf->dirty)); /* delete the leaf if it is mostly empty */ if (used < LEAF_DATA_SIZE / 3) { @@ -1304,3 +1326,4 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) return 0; } + diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6b4dabd4769..9fe8ba6e25c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -142,8 +142,9 @@ struct ctree_path { }; struct tree_buffer *alloc_free_block(struct ctree_root *root); +int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf); int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks); -int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len); +int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len, int cow); void release_path(struct ctree_root *root, struct ctree_path *p); void init_path(struct ctree_path *p); int del_item(struct ctree_root *root, struct ctree_path *path); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b1a8149bbc8..0e1c31e682f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -260,6 +260,8 @@ void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) if (buf->count < 0) BUG(); if (buf->count == 0) { + BUG_ON(!list_empty(&buf->cache)); + BUG_ON(!list_empty(&buf->dirty)); if (!radix_tree_lookup(&root->cache_radix, buf->blocknr)) BUG(); radix_tree_delete(&root->cache_radix, buf->blocknr); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fb214449621..25d9cd16920 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -15,6 +15,39 @@ */ #define CTREE_EXTENT_PENDING 0 +static int inc_block_ref(struct ctree_root *root, u64 blocknr) +{ + struct ctree_path path; + int ret; + struct key key; + struct leaf *l; + struct extent_item *item; + init_path(&path); + key.objectid = blocknr; + key.flags = 0; + key.offset = 1; + ret = search_slot(root->extent_root, &key, &path, 0, 1); + BUG_ON(ret != 0); + l = &path.nodes[0]->leaf; + item = (struct extent_item *)(l->data + + l->items[path.slots[0]].offset); + item->refs++; + BUG_ON(list_empty(&path.nodes[0]->dirty)); + release_path(root->extent_root, &path); + return 0; +} + +int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) +{ + u64 blocknr; + int i; + for (i = 0; i < buf->node.header.nritems; i++) { + blocknr = buf->node.blockptrs[i]; + inc_block_ref(root, blocknr); + } + return 0; +} + /* * find all the blocks marked as pending in the radix tree and remove * them from the extent map @@ -39,7 +72,7 @@ static int del_pending_extents(struct ctree_root *extent_root) key.flags = 0; key.offset = 1; init_path(&path); - ret = search_slot(extent_root, &key, &path, -1); + ret = search_slot(extent_root, &key, &path, -1, 1); if (ret) { print_tree(extent_root, extent_root->node); printf("unable to find %Lu\n", key.objectid); @@ -83,7 +116,7 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) return 0; } init_path(&path); - ret = search_slot(extent_root, &key, &path, -1); + ret = search_slot(extent_root, &key, &path, -1, 1); if (ret) { print_tree(extent_root, extent_root->node); printf("failed to find %Lu\n", key.objectid); @@ -124,7 +157,7 @@ check_failed: ins->offset = 0; ins->flags = 0; start_found = 0; - ret = search_slot(root, ins, &path, 0); + ret = search_slot(root, ins, &path, 0, 0); if (ret < 0) goto error; @@ -221,6 +254,8 @@ static int insert_pending_extents(struct ctree_root *extent_root) ret = insert_item(extent_root, &key, &item, sizeof(item)); if (ret) { + printf("%Lu already in tree\n", key.objectid); + print_tree(extent_root, extent_root->node); BUG(); // FIXME undo it and return sane return ret; @@ -228,6 +263,7 @@ static int insert_pending_extents(struct ctree_root *extent_root) radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, CTREE_EXTENT_PENDING); + printf("%Lu is not pending\n", gang[i]->blocknr); tree_block_release(extent_root, gang[i]); } } @@ -266,15 +302,18 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, if (pending_ret) return pending_ret; *buf = find_tree_block(root, ins->objectid); + dirty_tree_block(root, *buf); return 0; } /* we're allocating an extent for the extent tree, don't recurse */ BUG_ON(ins->offset != 1); *buf = find_tree_block(root, ins->objectid); BUG_ON(!*buf); + printf("%Lu is pending\n", ins->objectid); radix_tree_tag_set(&root->cache_radix, ins->objectid, CTREE_EXTENT_PENDING); (*buf)->count++; + dirty_tree_block(root, *buf); return 0; } diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index dbd00c3b7ab..8255f79ceca 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -19,7 +19,7 @@ int main(int ac, char **av) { int i; int num; int ret; - int run_size = 100000; + int run_size = 1024; int max_key = 100000000; int tree_size = 0; struct ctree_path path; @@ -57,7 +57,7 @@ int main(int ac, char **av) { init_path(&path); if (i % 10000 == 0) fprintf(stderr, "search %d:%d\n", num, i); - ret = search_slot(root, &ins, &path, 0); + ret = search_slot(root, &ins, &path, 0, 0); if (ret) { print_tree(root, root->node); printf("unable to find %d\n", num); @@ -79,7 +79,7 @@ int main(int ac, char **av) { num = next_key(i, max_key); ins.objectid = num; init_path(&path); - ret = search_slot(root, &ins, &path, -1); + ret = search_slot(root, &ins, &path, -1, 1); if (!ret) { if (i % 10000 == 0) fprintf(stderr, "del %d:%d\n", num, i); @@ -117,7 +117,7 @@ int main(int ac, char **av) { init_path(&path); if (i % 10000 == 0) fprintf(stderr, "search %d:%d\n", num, i); - ret = search_slot(root, &ins, &path, 0); + ret = search_slot(root, &ins, &path, 0, 0); if (ret) { print_tree(root, root->node); printf("unable to find %d\n", num); @@ -131,7 +131,7 @@ int main(int ac, char **av) { int slot; ins.objectid = (u64)-1; init_path(&path); - ret = search_slot(root, &ins, &path, -1); + ret = search_slot(root, &ins, &path, -1, 1); if (ret == 0) BUG(); diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 53245c5039d..dcc852ad673 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -93,7 +93,7 @@ static int del_one(struct ctree_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - ret = search_slot(root, &key, &path, -1); + ret = search_slot(root, &key, &path, -1, 1); if (ret) goto error; ret = del_item(root, &path); @@ -118,7 +118,7 @@ static int lookup_item(struct ctree_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - ret = search_slot(root, &key, &path, 0); + ret = search_slot(root, &key, &path, 0, 1); release_path(root, &path); if (ret) goto error; @@ -137,7 +137,7 @@ static int lookup_enoent(struct ctree_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 0); if (ret < 0) return ret; - ret = search_slot(root, &key, &path, 0); + ret = search_slot(root, &key, &path, 0, 0); release_path(root, &path); if (ret <= 0) goto error; @@ -163,7 +163,7 @@ static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix, key.objectid = (unsigned long)-1; while(nr-- >= 0) { init_path(&path); - ret = search_slot(root, &key, &path, -1); + ret = search_slot(root, &key, &path, -1, 1); if (ret < 0) { release_path(root, &path); return ret; @@ -216,7 +216,7 @@ static int fill_tree(struct ctree_root *root, struct radix_tree_root *radix, return ret; } } - if (i % 10000 == 0) { + if (i && i % 10000 == 0) { printf("bigfill %d\n", i); } if (!keep_running) @@ -263,7 +263,7 @@ static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) key.objectid = (unsigned long)-1; while(1) { init_path(&path); - ret = search_slot(root, &key, &path, 0); + ret = search_slot(root, &key, &path, 0, 0); if (ret < 0) { release_path(root, &path); return ret; -- cgit v1.2.3-70-g09d2 From a28ec19775d62d673b034082128aca95780d3737 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 6 Mar 2007 20:08:01 -0500 Subject: Btrfs: Fixup reference counting on cows Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 9 +- fs/btrfs/ctree.h | 4 + fs/btrfs/disk-io.c | 31 +++++- fs/btrfs/disk-io.h | 4 +- fs/btrfs/extent-tree.c | 291 ++++++++++++++++++++++++++++++++++--------------- fs/btrfs/print-tree.c | 6 +- fs/btrfs/quick-test.c | 26 +++-- fs/btrfs/random-test.c | 21 ++-- 9 files changed, 267 insertions(+), 127 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 756b1cd4cf4..ae7f4c00c39 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,7 +1,7 @@ CC=gcc CFLAGS = -g -Wall -headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h +headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o # if you don't have sparse installed, use ls instead diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1118986d756..72816381d20 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -50,16 +50,17 @@ int btrfs_cow_block(struct ctree_root *root, memcpy(&cow->node, &buf->node, sizeof(buf->node)); cow->node.header.blocknr = cow->blocknr; *cow_ret = cow; + btrfs_inc_ref(root, buf); if (buf == root->node) { root->node = cow; cow->count++; + if (buf != root->commit_root) + free_extent(root, buf->blocknr, 1); tree_block_release(root, buf); } else { parent->node.blockptrs[parent_slot] = cow->blocknr; BUG_ON(list_empty(&parent->dirty)); - } - if (0 && root != root->extent_root && !is_leaf(cow->node.header.flags)) { - btrfs_inc_ref(root, cow); + free_extent(root, buf->blocknr, 1); } tree_block_release(root, buf); return 0; @@ -1018,7 +1019,6 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, slot = path->slots[0]; nritems = l->header.nritems; mid = (nritems + 1)/ 2; - right_buffer = alloc_free_block(root); BUG_ON(!right_buffer); BUG_ON(mid == nritems); @@ -1170,7 +1170,6 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, node = &parent->node; nritems = node->header.nritems; - if (slot != nritems -1) { memmove(node->keys + slot, node->keys + slot + 1, sizeof(struct key) * (nritems - slot - 1)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9fe8ba6e25c..4a7bc4e6e74 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -51,10 +51,12 @@ struct tree_buffer; */ struct ctree_root { struct tree_buffer *node; + struct tree_buffer *commit_root; struct ctree_root *extent_root; struct key current_insert; int fp; struct radix_tree_root cache_radix; + struct radix_tree_root pinned_radix; struct list_head trans; struct list_head cache; int cache_size; @@ -151,4 +153,6 @@ int del_item(struct ctree_root *root, struct ctree_path *path); int insert_item(struct ctree_root *root, struct key *key, void *data, int data_size); int next_leaf(struct ctree_root *root, struct ctree_path *path); int leaf_free_space(struct leaf *leaf); +int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap); +int btrfs_finish_extent_commit(struct ctree_root *root); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0e1c31e682f..2fe31c3508c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -153,13 +153,24 @@ static int __commit_transaction(struct ctree_root *root) return ret; } -int commit_transaction(struct ctree_root *root) +int commit_transaction(struct ctree_root *root, struct ctree_super_block *s) { - int ret; + int ret = 0; + ret = __commit_transaction(root); if (!ret && root != root->extent_root) ret = __commit_transaction(root->extent_root); BUG_ON(ret); + if (root->commit_root != root->node) { + struct tree_buffer *snap = root->commit_root; + root->commit_root = root->node; + root->node->count++; + ret = btrfs_drop_snapshot(root, snap); + BUG_ON(ret); + tree_block_release(root, snap); + } + write_ctree_super(root, s); + btrfs_finish_extent_commit(root); return ret; } @@ -168,10 +179,13 @@ static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, { INIT_LIST_HEAD(&root->trans); INIT_LIST_HEAD(&root->cache); + root->cache_size = 0; root->fp = fp; root->node = NULL; - root->node = read_tree_block(root, info->tree_root); root->extent_root = extent_root; + root->commit_root = NULL; + root->node = read_tree_block(root, info->tree_root); + memset(&root->current_insert, 0, sizeof(root->current_insert)); return 0; } @@ -188,6 +202,8 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) return NULL; } INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&root->pinned_radix, GFP_KERNEL); + INIT_RADIX_TREE(&extent_root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); ret = pread(fp, super, sizeof(struct ctree_super_block), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); @@ -204,6 +220,8 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) BUG_ON(ret < 0); __setup_root(root, extent_root, &super->root_info, fp); __setup_root(extent_root, extent_root, &super->extent_info, fp); + root->commit_root = root->node; + root->node->count++; return root; } @@ -236,9 +254,11 @@ static int drop_cache(struct ctree_root *root) } return 0; } -int close_ctree(struct ctree_root *root) +int close_ctree(struct ctree_root *root, struct ctree_super_block *s) { - commit_transaction(root); + commit_transaction(root, s); + __commit_transaction(root->extent_root); + write_ctree_super(root, s); drop_cache(root->extent_root); drop_cache(root); BUG_ON(!list_empty(&root->trans)); @@ -249,6 +269,7 @@ int close_ctree(struct ctree_root *root) tree_block_release(root, root->node); if (root->extent_root->node) tree_block_release(root->extent_root, root->extent_root->node); + tree_block_release(root, root->commit_root); free(root); printf("on close %d blocks are allocated\n", allocated_blocks); return 0; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b5dee2fae4d..1c0af7c56c2 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -18,9 +18,9 @@ struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr); int write_tree_block(struct ctree_root *root, struct tree_buffer *buf); int dirty_tree_block(struct ctree_root *root, struct tree_buffer *buf); int clean_tree_block(struct ctree_root *root, struct tree_buffer *buf); -int commit_transaction(struct ctree_root *root); +int commit_transaction(struct ctree_root *root, struct ctree_super_block *s); struct ctree_root *open_ctree(char *filename, struct ctree_super_block *s); -int close_ctree(struct ctree_root *root); +int close_ctree(struct ctree_root *root, struct ctree_super_block *s); void tree_block_release(struct ctree_root *root, struct tree_buffer *buf); int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s); int mkfs(int fd); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 25d9cd16920..0723b7f3f0c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -13,7 +13,8 @@ * other allocations are done. The pending tag is also used in the same * manner for deletes. */ -#define CTREE_EXTENT_PENDING 0 +#define CTREE_EXTENT_PENDING_ADD 0 +#define CTREE_EXTENT_PENDING_DEL 1 static int inc_block_ref(struct ctree_root *root, u64 blocknr) { @@ -27,20 +28,51 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) key.flags = 0; key.offset = 1; ret = search_slot(root->extent_root, &key, &path, 0, 1); + if (ret != 0) + BUG(); BUG_ON(ret != 0); l = &path.nodes[0]->leaf; item = (struct extent_item *)(l->data + l->items[path.slots[0]].offset); item->refs++; + BUG_ON(list_empty(&path.nodes[0]->dirty)); release_path(root->extent_root, &path); return 0; } +static int lookup_block_ref(struct ctree_root *root, u64 blocknr, int *refs) +{ + struct ctree_path path; + int ret; + struct key key; + struct leaf *l; + struct extent_item *item; + init_path(&path); + key.objectid = blocknr; + key.flags = 0; + key.offset = 1; + ret = search_slot(root->extent_root, &key, &path, 0, 0); + if (ret != 0) + BUG(); + l = &path.nodes[0]->leaf; + item = (struct extent_item *)(l->data + + l->items[path.slots[0]].offset); + *refs = item->refs; + release_path(root->extent_root, &path); + return 0; +} + int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) { u64 blocknr; int i; + + if (root == root->extent_root) + return 0; + if (is_leaf(buf->node.header.flags)) + return 0; + for (i = 0; i < buf->node.header.nritems; i++) { blocknr = buf->node.blockptrs[i]; inc_block_ref(root, blocknr); @@ -48,85 +80,187 @@ int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) return 0; } +int btrfs_finish_extent_commit(struct ctree_root *root) +{ + struct ctree_root *extent_root = root->extent_root; + unsigned long gang[8]; + int ret; + int i; + + while(1) { + ret = radix_tree_gang_lookup(&extent_root->pinned_radix, + (void **)gang, 0, + ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) + radix_tree_delete(&extent_root->pinned_radix, gang[i]); + } + return 0; +} + /* - * find all the blocks marked as pending in the radix tree and remove - * them from the extent map + * remove an extent from the root, returns 0 on success */ -static int del_pending_extents(struct ctree_root *extent_root) +int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) +{ + struct ctree_path path; + struct key key; + struct ctree_root *extent_root = root->extent_root; + int ret; + struct item *item; + struct extent_item *ei; + key.objectid = blocknr; + key.flags = 0; + key.offset = num_blocks; + + init_path(&path); + ret = search_slot(extent_root, &key, &path, -1, 1); + if (ret) { + printf("failed to find %Lu\n", key.objectid); + print_tree(extent_root, extent_root->node); + printf("failed to find %Lu\n", key.objectid); + BUG(); + } + item = path.nodes[0]->leaf.items + path.slots[0]; + ei = (struct extent_item *)(path.nodes[0]->leaf.data + item->offset); + BUG_ON(ei->refs == 0); + ei->refs--; + if (ei->refs == 0) { + if (root == extent_root) { + int err; + radix_tree_preload(GFP_KERNEL); + err = radix_tree_insert(&extent_root->pinned_radix, + blocknr, (void *)blocknr); + BUG_ON(err); + radix_tree_preload_end(); + } + ret = del_item(extent_root, &path); + if (ret) + BUG(); + } + release_path(extent_root, &path); + return ret; +} + +/* + * insert all of the pending extents reserved during the original + * allocation. (CTREE_EXTENT_PENDING). Returns zero if it all worked out + */ +static int insert_pending_extents(struct ctree_root *extent_root) { int ret; struct key key; + struct extent_item item; struct tree_buffer *gang[4]; int i; - struct ctree_path path; + // FIXME -ENOSPC + item.owner = extent_root->node->node.header.parentid; + item.refs = 1; while(1) { ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, (void **)gang, 0, ARRAY_SIZE(gang), - CTREE_EXTENT_PENDING); + CTREE_EXTENT_PENDING_ADD); if (!ret) break; for (i = 0; i < ret; i++) { key.objectid = gang[i]->blocknr; key.flags = 0; key.offset = 1; - init_path(&path); - ret = search_slot(extent_root, &key, &path, -1, 1); + ret = insert_item(extent_root, &key, &item, + sizeof(item)); if (ret) { + printf("%Lu already in tree\n", key.objectid); print_tree(extent_root, extent_root->node); - printf("unable to find %Lu\n", key.objectid); BUG(); // FIXME undo it and return sane return ret; } - ret = del_item(extent_root, &path); - if (ret) { - BUG(); - return ret; - } - release_path(extent_root, &path); + radix_tree_tag_clear(&extent_root->cache_radix, + gang[i]->blocknr, + CTREE_EXTENT_PENDING_ADD); + tree_block_release(extent_root, gang[i]); + } + } + return 0; +} + +/* + * find all the blocks marked as pending in the radix tree and remove + * them from the extent map + */ +static int del_pending_extents(struct ctree_root *extent_root) +{ + int ret; + struct tree_buffer *gang[4]; + int i; + + while(1) { + ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, + (void **)gang, 0, + ARRAY_SIZE(gang), + CTREE_EXTENT_PENDING_DEL); + if (!ret) + break; + for (i = 0; i < ret; i++) { + ret = __free_extent(extent_root, gang[i]->blocknr, 1); radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, - CTREE_EXTENT_PENDING); + CTREE_EXTENT_PENDING_DEL); tree_block_release(extent_root, gang[i]); } } return 0; } +static int run_pending(struct ctree_root *extent_root) +{ + while(radix_tree_tagged(&extent_root->cache_radix, + CTREE_EXTENT_PENDING_DEL) || + radix_tree_tagged(&extent_root->cache_radix, + CTREE_EXTENT_PENDING_ADD)) { + insert_pending_extents(extent_root); + del_pending_extents(extent_root); + } + return 0; +} + + /* * remove an extent from the root, returns 0 on success */ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) { - struct ctree_path path; struct key key; struct ctree_root *extent_root = root->extent_root; struct tree_buffer *t; int pending_ret; int ret; - key.objectid = blocknr; - key.flags = 0; - key.offset = num_blocks; + if (root == extent_root) { - t = read_tree_block(root, key.objectid); - radix_tree_tag_set(&root->cache_radix, key.objectid, - CTREE_EXTENT_PENDING); + t = find_tree_block(root, blocknr); + if (radix_tree_tag_get(&root->cache_radix, blocknr, + CTREE_EXTENT_PENDING_ADD)) { + radix_tree_tag_clear(&root->cache_radix, + blocknr, + CTREE_EXTENT_PENDING_ADD); + /* once for us */ + tree_block_release(root, t); + /* once for the pending add */ + tree_block_release(root, t); + } else { + radix_tree_tag_set(&root->cache_radix, blocknr, + CTREE_EXTENT_PENDING_DEL); + } return 0; } - init_path(&path); - ret = search_slot(extent_root, &key, &path, -1, 1); - if (ret) { - print_tree(extent_root, extent_root->node); - printf("failed to find %Lu\n", key.objectid); - BUG(); - } - ret = del_item(extent_root, &path); - if (ret) - BUG(); - release_path(extent_root, &path); - pending_ret = del_pending_extents(root->extent_root); + key.objectid = blocknr; + key.flags = 0; + key.offset = num_blocks; + ret = __free_extent(root, blocknr, num_blocks); + pending_ret = run_pending(root->extent_root); return ret ? ret : pending_ret; } @@ -203,7 +337,7 @@ check_pending: */ release_path(root, &path); BUG_ON(ins->objectid < search_start); - if (orig_root->extent_root == orig_root) { + if (1 || orig_root->extent_root == orig_root) { BUG_ON(num_blocks != 1); if ((root->current_insert.objectid <= ins->objectid && root->current_insert.objectid + @@ -211,8 +345,9 @@ check_pending: (root->current_insert.objectid > ins->objectid && root->current_insert.objectid <= ins->objectid + ins->offset) || + radix_tree_lookup(&root->pinned_radix, ins->objectid) || radix_tree_tag_get(&root->cache_radix, ins->objectid, - CTREE_EXTENT_PENDING)) { + CTREE_EXTENT_PENDING_ADD)) { search_start = ins->objectid + 1; goto check_failed; } @@ -225,51 +360,6 @@ error: return ret; } -/* - * insert all of the pending extents reserved during the original - * allocation. (CTREE_EXTENT_PENDING). Returns zero if it all worked out - */ -static int insert_pending_extents(struct ctree_root *extent_root) -{ - int ret; - struct key key; - struct extent_item item; - struct tree_buffer *gang[4]; - int i; - - // FIXME -ENOSPC - item.refs = 1; - item.owner = extent_root->node->node.header.parentid; - while(1) { - ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, - (void **)gang, 0, - ARRAY_SIZE(gang), - CTREE_EXTENT_PENDING); - if (!ret) - break; - for (i = 0; i < ret; i++) { - key.objectid = gang[i]->blocknr; - key.flags = 0; - key.offset = 1; - ret = insert_item(extent_root, &key, &item, - sizeof(item)); - if (ret) { - printf("%Lu already in tree\n", key.objectid); - print_tree(extent_root, extent_root->node); - BUG(); - // FIXME undo it and return sane - return ret; - } - radix_tree_tag_clear(&extent_root->cache_radix, - gang[i]->blocknr, - CTREE_EXTENT_PENDING); - printf("%Lu is not pending\n", gang[i]->blocknr); - tree_block_release(extent_root, gang[i]); - } - } - return 0; -} - /* * finds a free extent and does all the dirty work required for allocation * returns the key for the extent through ins, and a tree buffer for @@ -296,7 +386,7 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, sizeof(extent_item)); memset(&root->extent_root->current_insert, 0, sizeof(struct key)); - pending_ret = insert_pending_extents(root->extent_root); + pending_ret = run_pending(root->extent_root); if (ret) return ret; if (pending_ret) @@ -309,9 +399,8 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, BUG_ON(ins->offset != 1); *buf = find_tree_block(root, ins->objectid); BUG_ON(!*buf); - printf("%Lu is pending\n", ins->objectid); radix_tree_tag_set(&root->cache_radix, ins->objectid, - CTREE_EXTENT_PENDING); + CTREE_EXTENT_PENDING_ADD); (*buf)->count++; dirty_tree_block(root, *buf); return 0; @@ -331,13 +420,41 @@ struct tree_buffer *alloc_free_block(struct ctree_root *root) ret = alloc_extent(root, 1, 0, (unsigned long)-1, root->node->node.header.parentid, &ins, &buf); - if (ret) { BUG(); return NULL; } if (root != root->extent_root) BUG_ON(radix_tree_tag_get(&root->extent_root->cache_radix, - buf->blocknr, CTREE_EXTENT_PENDING)); + buf->blocknr, + CTREE_EXTENT_PENDING_ADD)); return buf; } + +int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) +{ + int ret; + int level; + int refs; + u64 blocknr = snap->blocknr; + + level = node_level(snap->node.header.flags); + ret = lookup_block_ref(root, snap->blocknr, &refs); + BUG_ON(ret); + if (refs == 1 && level != 0) { + struct node *n = &snap->node; + struct tree_buffer *b; + int i; + for (i = 0; i < n->header.nritems; i++) { + b = read_tree_block(root, n->blockptrs[i]); + /* FIXME, don't recurse here */ + ret = btrfs_drop_snapshot(root, b); + BUG_ON(ret); + tree_block_release(root, b); + } + } + ret = free_extent(root, blocknr, 1); + BUG_ON(ret); + return 0; +} + diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index dda08f32c15..e32a959dd3e 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -21,9 +21,11 @@ void print_leaf(struct leaf *l) item->key.objectid, item->key.flags, item->key.offset, item->offset, item->size); fflush(stdout); - printf("\t\titem data %.*s\n", item->size, l->data+item->offset); + printf("\t\titem data %.*s\n", item->size, + l->data+item->offset); ei = (struct extent_item *)(l->data + item->offset); - printf("\t\textent data %u %Lu\n", ei->refs, ei->owner); + printf("\t\textent data refs %u owner %Lu\n", ei->refs, + ei->owner); fflush(stdout); } } diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index 8255f79ceca..6400c7100a6 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -19,7 +19,7 @@ int main(int ac, char **av) { int i; int num; int ret; - int run_size = 1024; + int run_size = 100000; int max_key = 100000000; int tree_size = 0; struct ctree_path path; @@ -44,9 +44,9 @@ int main(int ac, char **av) { if (!ret) tree_size++; free(buf); + } - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); root = open_ctree("dbfile", &super); printf("starting search\n"); @@ -65,8 +65,7 @@ int main(int ac, char **av) { } release_path(root, &path); } - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); root = open_ctree("dbfile", &super); printf("node %p level %d total ptrs %d free spc %lu\n", root->node, node_level(root->node->node.header.flags), @@ -90,8 +89,7 @@ int main(int ac, char **av) { } release_path(root, &path); } - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); root = open_ctree("dbfile", &super); srand(128); for (i = 0; i < run_size; i++) { @@ -106,8 +104,7 @@ int main(int ac, char **av) { tree_size++; free(buf); } - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); root = open_ctree("dbfile", &super); srand(128); printf("starting search2\n"); @@ -156,10 +153,17 @@ int main(int ac, char **av) { } release_path(root, &path); } + /* + printf("previous tree:\n"); + print_tree(root, root->commit_root); + printf("map before commit\n"); + print_tree(root->extent_root, root->extent_root->node); + */ + commit_transaction(root, &super); printf("tree size is now %d\n", tree_size); + printf("root %p commit root %p\n", root->node, root->commit_root); printf("map tree\n"); print_tree(root->extent_root, root->extent_root->node); - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); return 0; } diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index dcc852ad673..7b37b6bae10 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -8,6 +8,7 @@ #include "print-tree.h" int keep_running = 1; +struct ctree_super_block super; static int setup_key(struct radix_tree_root *root, struct key *key, int exists) { @@ -59,11 +60,6 @@ error: return -1; } -static int run_commit(struct ctree_root *root, struct radix_tree_root *radix) -{ - return commit_transaction(root); -} - static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; @@ -210,7 +206,7 @@ static int fill_tree(struct ctree_root *root, struct radix_tree_root *radix, goto out; } if (i % 1000 == 0) { - ret = commit_transaction(root); + ret = commit_transaction(root, &super); if (ret) { fprintf(stderr, "fill commit failed\n"); return ret; @@ -229,7 +225,7 @@ out: static int bulk_op(struct ctree_root *root, struct radix_tree_root *radix) { int ret; - int nr = rand() % 20000; + int nr = rand() % 5000; static int run_nr = 0; /* do the bulk op much less frequently */ @@ -247,7 +243,7 @@ static int bulk_op(struct ctree_root *root, struct radix_tree_root *radix) int (*ops[])(struct ctree_root *root, struct radix_tree_root *radix) = { ins_one, insert_dup, del_one, lookup_item, - lookup_enoent, bulk_op, run_commit }; + lookup_enoent, bulk_op }; static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) { @@ -314,7 +310,6 @@ int print_usage(void) int main(int ac, char **av) { RADIX_TREE(radix, GFP_KERNEL); - struct ctree_super_block super; struct ctree_root *root; int i; int ret; @@ -365,8 +360,7 @@ int main(int ac, char **av) printf("open & close, root level %d nritems %d\n", node_level(root->node->node.header.flags), root->node->node.header.nritems); - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); root = open_ctree("dbfile", &super); } while(count--) { @@ -380,7 +374,7 @@ int main(int ac, char **av) err = ret; goto out; } - if (ops[op] == bulk_op || ops[op] == run_commit) + if (ops[op] == bulk_op) break; if (keep_running == 0) { err = 0; @@ -389,8 +383,7 @@ int main(int ac, char **av) } } out: - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); return err; } -- cgit v1.2.3-70-g09d2 From 037e6390488af8ab96137e1e5cccc15ad14ef887 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 Mar 2007 11:50:24 -0500 Subject: Btrfs: get rid of add recursion Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 9 --- fs/btrfs/extent-tree.c | 209 ++++++++++++++++++++++--------------------------- 3 files changed, 96 insertions(+), 124 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index ae7f4c00c39..d92d08dde0f 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,6 +1,6 @@ CC=gcc -CFLAGS = -g -Wall +CFLAGS = -Wall headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 72816381d20..729d4ddb374 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -995,15 +995,6 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, int ret; int wret; - wret = push_leaf_left(root, path, data_size); - if (wret < 0) - return wret; - if (wret) { - wret = push_leaf_right(root, path, data_size); - if (wret < 0) - return wret; - } - l_buf = path->nodes[0]; l = &l_buf->leaf; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0723b7f3f0c..8a2b8aaf9b8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6,6 +6,11 @@ #include "disk-io.h" #include "print-tree.h" +static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, + u64 search_start, u64 search_end, struct key *ins); +static int finish_current_insert(struct ctree_root *extent_root); +static int run_pending(struct ctree_root *extent_root); + /* * pending extents are blocks that we're trying to allocate in the extent * map while trying to grow the map because of other allocations. To avoid @@ -13,8 +18,7 @@ * other allocations are done. The pending tag is also used in the same * manner for deletes. */ -#define CTREE_EXTENT_PENDING_ADD 0 -#define CTREE_EXTENT_PENDING_DEL 1 +#define CTREE_EXTENT_PENDING_DEL 0 static int inc_block_ref(struct ctree_root *root, u64 blocknr) { @@ -23,6 +27,9 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) struct key key; struct leaf *l; struct extent_item *item; + struct key ins; + + find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins); init_path(&path); key.objectid = blocknr; key.flags = 0; @@ -38,6 +45,8 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) BUG_ON(list_empty(&path.nodes[0]->dirty)); release_path(root->extent_root, &path); + finish_current_insert(root->extent_root); + run_pending(root->extent_root); return 0; } @@ -99,6 +108,28 @@ int btrfs_finish_extent_commit(struct ctree_root *root) return 0; } +static int finish_current_insert(struct ctree_root *extent_root) +{ + struct key ins; + struct extent_item extent_item; + int i; + int ret; + + extent_item.refs = 1; + extent_item.owner = extent_root->node->node.header.parentid; + ins.offset = 1; + ins.flags = 0; + + for (i = 0; i < extent_root->current_insert.flags; i++) { + ins.objectid = extent_root->current_insert.objectid + i; + ret = insert_item(extent_root, &ins, &extent_item, + sizeof(extent_item)); + BUG_ON(ret); + } + extent_root->current_insert.offset = 0; + return 0; +} + /* * remove an extent from the root, returns 0 on success */ @@ -110,10 +141,13 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) int ret; struct item *item; struct extent_item *ei; + struct key ins; + key.objectid = blocknr; key.flags = 0; key.offset = num_blocks; + find_free_extent(root, 0, 0, (u64)-1, &ins); init_path(&path); ret = search_slot(extent_root, &key, &path, -1, 1); if (ret) { @@ -140,53 +174,10 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) BUG(); } release_path(extent_root, &path); + finish_current_insert(extent_root); return ret; } -/* - * insert all of the pending extents reserved during the original - * allocation. (CTREE_EXTENT_PENDING). Returns zero if it all worked out - */ -static int insert_pending_extents(struct ctree_root *extent_root) -{ - int ret; - struct key key; - struct extent_item item; - struct tree_buffer *gang[4]; - int i; - - // FIXME -ENOSPC - item.owner = extent_root->node->node.header.parentid; - item.refs = 1; - while(1) { - ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, - (void **)gang, 0, - ARRAY_SIZE(gang), - CTREE_EXTENT_PENDING_ADD); - if (!ret) - break; - for (i = 0; i < ret; i++) { - key.objectid = gang[i]->blocknr; - key.flags = 0; - key.offset = 1; - ret = insert_item(extent_root, &key, &item, - sizeof(item)); - if (ret) { - printf("%Lu already in tree\n", key.objectid); - print_tree(extent_root, extent_root->node); - BUG(); - // FIXME undo it and return sane - return ret; - } - radix_tree_tag_clear(&extent_root->cache_radix, - gang[i]->blocknr, - CTREE_EXTENT_PENDING_ADD); - tree_block_release(extent_root, gang[i]); - } - } - return 0; -} - /* * find all the blocks marked as pending in the radix tree and remove * them from the extent map @@ -218,12 +209,8 @@ static int del_pending_extents(struct ctree_root *extent_root) static int run_pending(struct ctree_root *extent_root) { while(radix_tree_tagged(&extent_root->cache_radix, - CTREE_EXTENT_PENDING_DEL) || - radix_tree_tagged(&extent_root->cache_radix, - CTREE_EXTENT_PENDING_ADD)) { - insert_pending_extents(extent_root); + CTREE_EXTENT_PENDING_DEL)) del_pending_extents(extent_root); - } return 0; } @@ -241,19 +228,8 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) if (root == extent_root) { t = find_tree_block(root, blocknr); - if (radix_tree_tag_get(&root->cache_radix, blocknr, - CTREE_EXTENT_PENDING_ADD)) { - radix_tree_tag_clear(&root->cache_radix, - blocknr, - CTREE_EXTENT_PENDING_ADD); - /* once for us */ - tree_block_release(root, t); - /* once for the pending add */ - tree_block_release(root, t); - } else { - radix_tree_tag_set(&root->cache_radix, blocknr, + radix_tree_tag_set(&root->cache_radix, blocknr, CTREE_EXTENT_PENDING_DEL); - } return 0; } key.objectid = blocknr; @@ -281,9 +257,11 @@ static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, u64 hole_size = 0; int slot = 0; u64 last_block; + u64 test_block; int start_found; struct leaf *l; struct ctree_root * root = orig_root->extent_root; + int total_needed = num_blocks + MAX_LEVEL * 3; check_failed: init_path(&path); @@ -306,22 +284,34 @@ check_failed: goto error; if (!start_found) { ins->objectid = search_start; - ins->offset = num_blocks; + ins->offset = (u64)-1; start_found = 1; goto check_pending; } ins->objectid = last_block > search_start ? last_block : search_start; - ins->offset = num_blocks; + ins->offset = (u64)-1; goto check_pending; } + if (slot == 0) { + int last_slot = l->header.nritems - 1; + u64 span = l->items[last_slot].key.objectid; + span -= l->items[slot].key.objectid; + if (span + total_needed > last_slot - slot) { + path.slots[0] = last_slot + 1; + key = &l->items[last_slot].key; + last_block = key->objectid + key->offset; + start_found = 1; + continue; + } + } key = &l->items[slot].key; if (key->objectid >= search_start) { if (start_found) { hole_size = key->objectid - last_block; - if (hole_size > num_blocks) { + if (hole_size > total_needed) { ins->objectid = last_block; - ins->offset = num_blocks; + ins->offset = hole_size; goto check_pending; } } else @@ -337,23 +327,18 @@ check_pending: */ release_path(root, &path); BUG_ON(ins->objectid < search_start); - if (1 || orig_root->extent_root == orig_root) { - BUG_ON(num_blocks != 1); - if ((root->current_insert.objectid <= ins->objectid && - root->current_insert.objectid + - root->current_insert.offset > ins->objectid) || - (root->current_insert.objectid > ins->objectid && - root->current_insert.objectid <= ins->objectid + - ins->offset) || - radix_tree_lookup(&root->pinned_radix, ins->objectid) || - radix_tree_tag_get(&root->cache_radix, ins->objectid, - CTREE_EXTENT_PENDING_ADD)) { - search_start = ins->objectid + 1; + for (test_block = ins->objectid; + test_block < ins->objectid + total_needed; test_block++) { + if (radix_tree_lookup(&root->pinned_radix, test_block)) { + search_start = test_block + 1; goto check_failed; } } - if (ins->offset != 1) - BUG(); + BUG_ON(root->current_insert.offset); + root->current_insert.offset = total_needed; + root->current_insert.objectid = ins->objectid + num_blocks; + root->current_insert.flags = 0; + ins->offset = num_blocks; return 0; error: release_path(root, &path); @@ -368,43 +353,41 @@ error: * returns 0 if everything worked, non-zero otherwise. */ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, - u64 search_end, u64 owner, struct key *ins, - struct tree_buffer **buf) + u64 search_end, u64 owner, struct key *ins) { int ret; int pending_ret; + struct ctree_root *extent_root = root->extent_root; struct extent_item extent_item; + extent_item.refs = 1; extent_item.owner = owner; - ret = find_free_extent(root, num_blocks, search_start, search_end, ins); - if (ret) - return ret; - if (root != root->extent_root) { - memcpy(&root->extent_root->current_insert, ins, sizeof(*ins)); - ret = insert_item(root->extent_root, ins, &extent_item, - sizeof(extent_item)); - memset(&root->extent_root->current_insert, 0, - sizeof(struct key)); - pending_ret = run_pending(root->extent_root); - if (ret) - return ret; - if (pending_ret) - return pending_ret; - *buf = find_tree_block(root, ins->objectid); - dirty_tree_block(root, *buf); + if (root == extent_root) { + BUG_ON(extent_root->current_insert.offset == 0); + BUG_ON(num_blocks != 1); + BUG_ON(extent_root->current_insert.flags == + extent_root->current_insert.offset); + ins->offset = 1; + ins->objectid = extent_root->current_insert.objectid + + extent_root->current_insert.flags++; return 0; } - /* we're allocating an extent for the extent tree, don't recurse */ - BUG_ON(ins->offset != 1); - *buf = find_tree_block(root, ins->objectid); - BUG_ON(!*buf); - radix_tree_tag_set(&root->cache_radix, ins->objectid, - CTREE_EXTENT_PENDING_ADD); - (*buf)->count++; - dirty_tree_block(root, *buf); - return 0; + ret = find_free_extent(root, num_blocks, search_start, + search_end, ins); + if (ret) + return ret; + ret = insert_item(extent_root, ins, &extent_item, + sizeof(extent_item)); + + finish_current_insert(extent_root); + pending_ret = run_pending(extent_root); + if (ret) + return ret; + if (pending_ret) + return pending_ret; + return 0; } /* @@ -415,19 +398,17 @@ struct tree_buffer *alloc_free_block(struct ctree_root *root) { struct key ins; int ret; - struct tree_buffer *buf = NULL; + struct tree_buffer *buf; ret = alloc_extent(root, 1, 0, (unsigned long)-1, root->node->node.header.parentid, - &ins, &buf); + &ins); if (ret) { BUG(); return NULL; } - if (root != root->extent_root) - BUG_ON(radix_tree_tag_get(&root->extent_root->cache_radix, - buf->blocknr, - CTREE_EXTENT_PENDING_ADD)); + buf = find_tree_block(root, ins.objectid); + dirty_tree_block(root, buf); return buf; } -- cgit v1.2.3-70-g09d2 From 0579da4280812f34f382fb0f8004d7b0219e7a33 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 Mar 2007 16:15:30 -0500 Subject: Btrfs: Fixup last found extent caching Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 39 +++++++++++++++++++++------------------ 4 files changed, 24 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d92d08dde0f..ae7f4c00c39 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,6 +1,6 @@ CC=gcc -CFLAGS = -Wall +CFLAGS = -g -Wall headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4a7bc4e6e74..518326fa369 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -54,6 +54,7 @@ struct ctree_root { struct tree_buffer *commit_root; struct ctree_root *extent_root; struct key current_insert; + struct key last_insert; int fp; struct radix_tree_root cache_radix; struct radix_tree_root pinned_radix; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2fe31c3508c..997cc578a18 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -186,6 +186,7 @@ static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, root->commit_root = NULL; root->node = read_tree_block(root, info->tree_root); memset(&root->current_insert, 0, sizeof(root->current_insert)); + memset(&root->last_insert, 0, sizeof(root->last_insert)); return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8a2b8aaf9b8..dd11532cb2f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -102,9 +102,12 @@ int btrfs_finish_extent_commit(struct ctree_root *root) ARRAY_SIZE(gang)); if (!ret) break; - for (i = 0; i < ret; i++) + for (i = 0; i < ret; i++) { radix_tree_delete(&extent_root->pinned_radix, gang[i]); + } } + extent_root->last_insert.objectid = 0; + extent_root->last_insert.offset = 0; return 0; } @@ -170,6 +173,9 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) radix_tree_preload_end(); } ret = del_item(extent_root, &path); + if (root != extent_root && + extent_root->last_insert.objectid < blocknr) + extent_root->last_insert.objectid = blocknr; if (ret) BUG(); } @@ -261,8 +267,11 @@ static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, int start_found; struct leaf *l; struct ctree_root * root = orig_root->extent_root; - int total_needed = num_blocks + MAX_LEVEL * 3; + int total_needed = num_blocks; + total_needed += (node_level(root->node->node.header.flags) + 1) * 3; + if (root->last_insert.objectid > search_start) + search_start = root->last_insert.objectid; check_failed: init_path(&path); ins->objectid = search_start; @@ -273,6 +282,9 @@ check_failed: if (ret < 0) goto error; + if (path.slots[0] > 0) + path.slots[0]--; + while (1) { l = &path.nodes[0]->leaf; slot = path.slots[0]; @@ -293,31 +305,21 @@ check_failed: ins->offset = (u64)-1; goto check_pending; } - if (slot == 0) { - int last_slot = l->header.nritems - 1; - u64 span = l->items[last_slot].key.objectid; - span -= l->items[slot].key.objectid; - if (span + total_needed > last_slot - slot) { - path.slots[0] = last_slot + 1; - key = &l->items[last_slot].key; - last_block = key->objectid + key->offset; - start_found = 1; - continue; - } - } key = &l->items[slot].key; if (key->objectid >= search_start) { if (start_found) { + if (last_block < search_start) + last_block = search_start; hole_size = key->objectid - last_block; if (hole_size > total_needed) { ins->objectid = last_block; ins->offset = hole_size; goto check_pending; } - } else - start_found = 1; - last_block = key->objectid + key->offset; + } } + start_found = 1; + last_block = key->objectid + key->offset; path.slots[0]++; } // FIXME -ENOSPC @@ -335,9 +337,10 @@ check_pending: } } BUG_ON(root->current_insert.offset); - root->current_insert.offset = total_needed; + root->current_insert.offset = total_needed - num_blocks; root->current_insert.objectid = ins->objectid + num_blocks; root->current_insert.flags = 0; + root->last_insert.objectid = ins->objectid; ins->offset = num_blocks; return 0; error: -- cgit v1.2.3-70-g09d2 From 20524f02260910db1e67bd5335d3854e5e555efc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 10 Mar 2007 06:35:47 -0500 Subject: Btrfs: recursion free-first pass Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/quick-test.c | 5 ++- 2 files changed, 100 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index dd11532cb2f..6fbaece43ff 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -50,7 +50,7 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) return 0; } -static int lookup_block_ref(struct ctree_root *root, u64 blocknr, int *refs) +static int lookup_block_ref(struct ctree_root *root, u64 blocknr, u32 *refs) { struct ctree_path path; int ret; @@ -415,6 +415,100 @@ struct tree_buffer *alloc_free_block(struct ctree_root *root) return buf; } +int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) +{ + struct tree_buffer *next; + struct tree_buffer *cur; + u64 blocknr; + int ret; + u32 refs; + + ret = lookup_block_ref(root, path->nodes[*level]->blocknr, &refs); + BUG_ON(ret); + if (refs > 1) + goto out; + while(*level > 0) { + cur = path->nodes[*level]; + if (path->slots[*level] >= cur->node.header.nritems) + break; + blocknr = cur->node.blockptrs[path->slots[*level]]; + ret = lookup_block_ref(root, blocknr, &refs); + if (refs != 1 || *level == 1) { + path->slots[*level]++; + ret = free_extent(root, blocknr, 1); + BUG_ON(ret); + continue; + } + BUG_ON(ret); + next = read_tree_block(root, blocknr); + if (path->nodes[*level-1]) { + tree_block_release(root, path->nodes[*level-1]); + } + path->nodes[*level-1] = next; + *level = node_level(next->node.header.flags); + path->slots[*level] = 0; + } +out: + ret = free_extent(root, path->nodes[*level]->blocknr, 1); + path->nodes[*level] = NULL; + *level += 1; + BUG_ON(ret); + return 0; +} + +int walk_up_tree(struct ctree_root *root, struct ctree_path *path, int *level) +{ + int i; + int slot; + int ret; + for(i = *level; i < MAX_LEVEL - 1 && path->nodes[i]; i++) { + slot = path->slots[i]; + if (slot < path->nodes[i]->node.header.nritems - 1) { + path->slots[i]++; + *level = i; + return 0; + } else { + ret = free_extent(root, + path->nodes[*level]->blocknr, 1); + *level = i + 1; + BUG_ON(ret); + } + } + return 1; +} + +int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) +{ + int ret; + int level; + struct ctree_path path; + int i; + int orig_level; + + init_path(&path); + + level = node_level(snap->node.header.flags); + orig_level = level; + path.nodes[level] = snap; + path.slots[level] = 0; + while(1) { + ret = walk_down_tree(root, &path, &level); + if (ret > 0) + break; + ret = walk_up_tree(root, &path, &level); + if (ret > 0) + break; + } + for (i = 0; i < orig_level; i++) { + if (path.nodes[i]) + tree_block_release(root, path.nodes[i]); + } + + return 0; +} + + +#if 0 int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) { int ret; @@ -441,4 +535,4 @@ int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) BUG_ON(ret); return 0; } - +#endif diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index 6400c7100a6..ccca9b2c14a 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -9,7 +9,7 @@ /* for testing only */ int next_key(int i, int max_key) { return rand() % max_key; - //return i; + // return i; } int main(int ac, char **av) { @@ -44,6 +44,9 @@ int main(int ac, char **av) { if (!ret) tree_size++; free(buf); + if (i == run_size - 5) { + commit_transaction(root, &super); + } } close_ctree(root, &super); -- cgit v1.2.3-70-g09d2 From 83e15a28e046dbb4534dd263d1d3dc867a8994a2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 Mar 2007 09:03:27 -0400 Subject: fix leak in btrfs_drop_snapshot Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 997cc578a18..c9201ec81fe 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -167,7 +167,7 @@ int commit_transaction(struct ctree_root *root, struct ctree_super_block *s) root->node->count++; ret = btrfs_drop_snapshot(root, snap); BUG_ON(ret); - tree_block_release(root, snap); + // tree_block_release(root, snap); } write_ctree_super(root, s); btrfs_finish_extent_commit(root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6fbaece43ff..e7b8a70b5e5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -441,15 +441,15 @@ int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) } BUG_ON(ret); next = read_tree_block(root, blocknr); - if (path->nodes[*level-1]) { + if (path->nodes[*level-1]) tree_block_release(root, path->nodes[*level-1]); - } path->nodes[*level-1] = next; *level = node_level(next->node.header.flags); path->slots[*level] = 0; } out: ret = free_extent(root, path->nodes[*level]->blocknr, 1); + tree_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; BUG_ON(ret); @@ -470,6 +470,8 @@ int walk_up_tree(struct ctree_root *root, struct ctree_path *path, int *level) } else { ret = free_extent(root, path->nodes[*level]->blocknr, 1); + tree_block_release(root, path->nodes[*level]); + path->nodes[*level] = NULL; *level = i + 1; BUG_ON(ret); } @@ -499,9 +501,10 @@ int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) if (ret > 0) break; } - for (i = 0; i < orig_level; i++) { - if (path.nodes[i]) + for (i = 0; i <= orig_level; i++) { + if (path.nodes[i]) { tree_block_release(root, path.nodes[i]); + } } return 0; -- cgit v1.2.3-70-g09d2 From 7518a238ea0152dc849d1ed76d3cae8b44e12f46 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 Mar 2007 12:01:18 -0400 Subject: Btrfs: get/set for struct header fields Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/TODO | 1 + fs/btrfs/ctree.c | 224 +++++++++++++++++++++++++++---------------------- fs/btrfs/ctree.h | 63 +++++++++++++- fs/btrfs/disk-io.c | 7 +- fs/btrfs/extent-tree.c | 53 +++--------- fs/btrfs/mkfs.c | 10 +-- fs/btrfs/print-tree.c | 26 +++--- fs/btrfs/quick-test.c | 12 +-- fs/btrfs/random-test.c | 4 +- 10 files changed, 230 insertions(+), 172 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index ae7f4c00c39..f8532200e9a 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -25,7 +25,7 @@ tester: $(objects) random-test.o quick-test: $(objects) quick-test.o gcc $(CFLAGS) -o quick-test $(objects) quick-test.o -$(objects) : $(headers) +$(objects): $(headers) clean : rm debug-tree tester *.o diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 5c4395c3784..52d2a7a071f 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,4 +1,5 @@ * cleanup, add more error checking, get rid of BUG_ONs +* endian fixes * Make IO functions look more like the page cache * Fix ENOSPC handling * make blocksize a mkfs parameter instead of #define diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 729d4ddb374..e43c827e0df 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -48,7 +48,7 @@ int btrfs_cow_block(struct ctree_root *root, } cow = alloc_free_block(root); memcpy(&cow->node, &buf->node, sizeof(buf->node)); - cow->node.header.blocknr = cow->blocknr; + btrfs_set_header_blocknr(&cow->node.header, cow->blocknr); *cow_ret = cow; btrfs_inc_ref(root, buf); if (buf == root->node) { @@ -73,7 +73,7 @@ int btrfs_cow_block(struct ctree_root *root, */ static inline unsigned int leaf_data_end(struct leaf *leaf) { - unsigned int nr = leaf->header.nritems; + u32 nr = btrfs_header_nritems(&leaf->header); if (nr == 0) return sizeof(leaf->data); return leaf->items[nr-1].offset; @@ -87,7 +87,7 @@ static inline unsigned int leaf_data_end(struct leaf *leaf) int leaf_free_space(struct leaf *leaf) { int data_end = leaf_data_end(leaf); - int nritems = leaf->header.nritems; + int nritems = btrfs_header_nritems(&leaf->header); char *items_end = (char *)(leaf->items + nritems + 1); return (char *)(leaf->data + data_end) - (char *)items_end; } @@ -118,18 +118,21 @@ int check_node(struct ctree_path *path, int level) struct node *parent = NULL; struct node *node = &path->nodes[level]->node; int parent_slot; + u32 nritems = btrfs_header_nritems(&node->header); if (path->nodes[level + 1]) parent = &path->nodes[level + 1]->node; parent_slot = path->slots[level + 1]; - if (parent && node->header.nritems > 0) { + BUG_ON(nritems == 0); + if (parent) { struct key *parent_key; parent_key = &parent->keys[parent_slot]; BUG_ON(memcmp(parent_key, node->keys, sizeof(struct key))); - BUG_ON(parent->blockptrs[parent_slot] != node->header.blocknr); + BUG_ON(parent->blockptrs[parent_slot] != + btrfs_header_blocknr(&node->header)); } - BUG_ON(node->header.nritems > NODEPTRS_PER_BLOCK); - for (i = 0; i < node->header.nritems - 2; i++) { + BUG_ON(nritems > NODEPTRS_PER_BLOCK); + for (i = 0; nritems > 1 && i < nritems - 2; i++) { BUG_ON(comp_keys(&node->keys[i], &node->keys[i+1]) >= 0); } return 0; @@ -141,18 +144,25 @@ int check_leaf(struct ctree_path *path, int level) struct leaf *leaf = &path->nodes[level]->leaf; struct node *parent = NULL; int parent_slot; + u32 nritems = btrfs_header_nritems(&leaf->header); if (path->nodes[level + 1]) parent = &path->nodes[level + 1]->node; parent_slot = path->slots[level + 1]; - if (parent && leaf->header.nritems > 0) { + BUG_ON(leaf_free_space(leaf) < 0); + + if (nritems == 0) + return 0; + + if (parent) { struct key *parent_key; parent_key = &parent->keys[parent_slot]; BUG_ON(memcmp(parent_key, &leaf->items[0].key, sizeof(struct key))); - BUG_ON(parent->blockptrs[parent_slot] != leaf->header.blocknr); + BUG_ON(parent->blockptrs[parent_slot] != + btrfs_header_blocknr(&leaf->header)); } - for (i = 0; i < leaf->header.nritems - 2; i++) { + for (i = 0; nritems > 1 && i < nritems - 2; i++) { BUG_ON(comp_keys(&leaf->items[i].key, &leaf->items[i+1].key) >= 0); BUG_ON(leaf->items[i].offset != leaf->items[i + 1].offset + @@ -162,7 +172,6 @@ int check_leaf(struct ctree_path *path, int level) LEAF_DATA_SIZE); } } - BUG_ON(leaf_free_space(leaf) < 0); return 0; } @@ -215,13 +224,15 @@ int generic_bin_search(char *p, int item_size, struct key *key, */ int bin_search(struct node *c, struct key *key, int *slot) { - if (is_leaf(c->header.flags)) { + if (btrfs_is_leaf(c)) { struct leaf *l = (struct leaf *)c; return generic_bin_search((void *)l->items, sizeof(struct item), - key, c->header.nritems, slot); + key, btrfs_header_nritems(&c->header), + slot); } else { return generic_bin_search((void *)c->keys, sizeof(struct key), - key, c->header.nritems, slot); + key, btrfs_header_nritems(&c->header), + slot); } return -1; } @@ -233,7 +244,7 @@ struct tree_buffer *read_node_slot(struct ctree_root *root, struct node *node = &parent_buf->node; if (slot < 0) return NULL; - if (slot >= node->header.nritems) + if (slot >= btrfs_header_nritems(&node->header)) return NULL; return read_tree_block(root, node->blockptrs[slot]); } @@ -270,7 +281,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, struct tree_buffer *child; u64 blocknr = mid_buf->blocknr; - if (mid->header.nritems != 1) + if (btrfs_header_nritems(&mid->header) != 1) return 0; /* promote the child to a root */ @@ -287,7 +298,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, } parent = &parent_buf->node; - if (mid->header.nritems > NODEPTRS_PER_BLOCK / 4) + if (btrfs_header_nritems(&mid->header) > NODEPTRS_PER_BLOCK / 4) return 0; left_buf = read_node_slot(root, parent_buf, pslot - 1); @@ -298,7 +309,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, btrfs_cow_block(root, left_buf, parent_buf, pslot - 1, &left_buf); left = &left_buf->node; - orig_slot += left->header.nritems; + orig_slot += btrfs_header_nritems(&left->header); wret = push_node_left(root, left_buf, mid_buf); if (wret < 0) ret = wret; @@ -314,7 +325,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, wret = push_node_left(root, mid_buf, right_buf); if (wret < 0) ret = wret; - if (right->header.nritems == 0) { + if (btrfs_header_nritems(&right->header) == 0) { u64 blocknr = right_buf->blocknr; tree_block_release(root, right_buf); clean_tree_block(root, right_buf); @@ -332,7 +343,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, BUG_ON(list_empty(&parent_buf->dirty)); } } - if (mid->header.nritems == 1) { + if (btrfs_header_nritems(&mid->header) == 1) { /* * we're not allowed to leave a node with one item in the * tree during a delete. A deletion from lower in the tree @@ -348,7 +359,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, ret = wret; BUG_ON(wret == 1); } - if (mid->header.nritems == 0) { + if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ u64 blocknr = mid_buf->blocknr; tree_block_release(root, mid_buf); @@ -369,7 +380,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, /* update the path */ if (left_buf) { - if (left->header.nritems > orig_slot) { + if (btrfs_header_nritems(&left->header) > orig_slot) { left_buf->count++; // released below path->nodes[level] = left_buf; path->slots[level + 1] -= 1; @@ -377,7 +388,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, if (mid_buf) tree_block_release(root, mid_buf); } else { - orig_slot -= left->header.nritems; + orig_slot -= btrfs_header_nritems(&left->header); path->slots[level] = orig_slot; } } @@ -420,7 +431,7 @@ again: b = root->node; b->count++; while (b) { - level = node_level(b->node.header.flags); + level = btrfs_header_level(&b->node.header); if (cow) { int wret; wret = btrfs_cow_block(root, b, p->nodes[level + 1], @@ -434,12 +445,12 @@ again: if (ret) return -1; ret = bin_search(c, key, &slot); - if (!is_leaf(c->header.flags)) { + if (!btrfs_is_leaf(c)) { if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len > 0 && - c->header.nritems == NODEPTRS_PER_BLOCK) { + if (ins_len > 0 && btrfs_header_nritems(&c->header) == + NODEPTRS_PER_BLOCK) { int sret = split_node(root, p, level); BUG_ON(sret > 0); if (sret) @@ -456,7 +467,7 @@ again: goto again; c = &b->node; slot = p->slots[level]; - BUG_ON(c->header.nritems == 1); + BUG_ON(btrfs_header_nritems(&c->header) == 1); } b = read_tree_block(root, c->blockptrs[slot]); } else { @@ -524,8 +535,8 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, int dst_nritems; int ret = 0; - src_nritems = src->header.nritems; - dst_nritems = dst->header.nritems; + src_nritems = btrfs_header_nritems(&src->header); + dst_nritems = btrfs_header_nritems(&dst->header); push_items = NODEPTRS_PER_BLOCK - dst_nritems; if (push_items <= 0) { return 1; @@ -544,9 +555,8 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, memmove(src->blockptrs, src->blockptrs + push_items, (src_nritems - push_items) * sizeof(u64)); } - src->header.nritems -= push_items; - dst->header.nritems += push_items; - + btrfs_set_header_nritems(&src->header, src_nritems - push_items); + btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); BUG_ON(list_empty(&src_buf->dirty)); BUG_ON(list_empty(&dst_buf->dirty)); return ret; @@ -573,8 +583,8 @@ static int balance_node_right(struct ctree_root *root, int dst_nritems; int ret = 0; - src_nritems = src->header.nritems; - dst_nritems = dst->header.nritems; + src_nritems = btrfs_header_nritems(&src->header); + dst_nritems = btrfs_header_nritems(&dst->header); push_items = NODEPTRS_PER_BLOCK - dst_nritems; if (push_items <= 0) { return 1; @@ -596,8 +606,8 @@ static int balance_node_right(struct ctree_root *root, memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items, push_items * sizeof(u64)); - src->header.nritems -= push_items; - dst->header.nritems += push_items; + btrfs_set_header_nritems(&src->header, src_nritems - push_items); + btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); BUG_ON(list_empty(&src_buf->dirty)); BUG_ON(list_empty(&dst_buf->dirty)); @@ -625,12 +635,13 @@ static int insert_new_root(struct ctree_root *root, t = alloc_free_block(root); c = &t->node; memset(c, 0, sizeof(c)); - c->header.nritems = 1; - c->header.flags = node_level(level); - c->header.blocknr = t->blocknr; - c->header.parentid = root->node->node.header.parentid; + btrfs_set_header_nritems(&c->header, 1); + btrfs_set_header_level(&c->header, level); + btrfs_set_header_blocknr(&c->header, t->blocknr); + btrfs_set_header_parentid(&c->header, + btrfs_header_parentid(&root->node->node.header)); lower = &path->nodes[level-1]->node; - if (is_leaf(lower->header.flags)) + if (btrfs_is_leaf(lower)) lower_key = &((struct leaf *)lower)->items[0].key; else lower_key = lower->keys; @@ -663,7 +674,7 @@ static int insert_ptr(struct ctree_root *root, BUG_ON(!path->nodes[level]); lower = &path->nodes[level]->node; - nritems = lower->header.nritems; + nritems = btrfs_header_nritems(&lower->header); if (slot > nritems) BUG(); if (nritems == NODEPTRS_PER_BLOCK) @@ -676,7 +687,7 @@ static int insert_ptr(struct ctree_root *root, } memcpy(lower->keys + slot, key, sizeof(struct key)); lower->blockptrs[slot] = blocknr; - lower->header.nritems++; + btrfs_set_header_nritems(&lower->header, nritems + 1); if (lower->keys[1].objectid == 0) BUG(); BUG_ON(list_empty(&path->nodes[level]->dirty)); @@ -702,6 +713,7 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, int mid; int ret; int wret; + u32 c_nritems; t = path->nodes[level]; c = &t->node; @@ -711,18 +723,20 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, if (ret) return ret; } + c_nritems = btrfs_header_nritems(&c->header); split_buffer = alloc_free_block(root); split = &split_buffer->node; - split->header.flags = c->header.flags; - split->header.blocknr = split_buffer->blocknr; - split->header.parentid = root->node->node.header.parentid; - mid = (c->header.nritems + 1) / 2; + btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); + btrfs_set_header_blocknr(&split->header, split_buffer->blocknr); + btrfs_set_header_parentid(&split->header, + btrfs_header_parentid(&root->node->node.header)); + mid = (c_nritems + 1) / 2; memcpy(split->keys, c->keys + mid, - (c->header.nritems - mid) * sizeof(struct key)); + (c_nritems - mid) * sizeof(struct key)); memcpy(split->blockptrs, c->blockptrs + mid, - (c->header.nritems - mid) * sizeof(u64)); - split->header.nritems = c->header.nritems - mid; - c->header.nritems = mid; + (c_nritems - mid) * sizeof(u64)); + btrfs_set_header_nritems(&split->header, c_nritems - mid); + btrfs_set_header_nritems(&c->header, mid); ret = 0; BUG_ON(list_empty(&t->dirty)); @@ -781,13 +795,15 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, int push_space = 0; int push_items = 0; struct item *item; + u32 left_nritems; + u32 right_nritems; slot = path->slots[1]; if (!path->nodes[1]) { return 1; } upper = path->nodes[1]; - if (slot >= upper->node.header.nritems - 1) { + if (slot >= btrfs_header_nritems(&upper->node.header) - 1) { return 1; } right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]); @@ -806,7 +822,8 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, return 1; } - for (i = left->header.nritems - 1; i >= 0; i--) { + left_nritems = btrfs_header_nritems(&left->header); + for (i = left_nritems - 1; i >= 0; i--) { item = left->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -819,9 +836,10 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, tree_block_release(root, right_buf); return 1; } + right_nritems = btrfs_header_nritems(&right->header); /* push left to right */ - push_space = left->items[left->header.nritems - push_items].offset + - left->items[left->header.nritems - push_items].size; + push_space = left->items[left_nritems - push_items].offset + + left->items[left_nritems - push_items].size; push_space -= leaf_data_end(left); /* make room in the right data area */ memmove(right->data + leaf_data_end(right) - push_space, @@ -832,19 +850,21 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, left->data + leaf_data_end(left), push_space); memmove(right->items + push_items, right->items, - right->header.nritems * sizeof(struct item)); + right_nritems * sizeof(struct item)); /* copy the items from left to right */ - memcpy(right->items, left->items + left->header.nritems - push_items, + memcpy(right->items, left->items + left_nritems - push_items, push_items * sizeof(struct item)); /* update the item pointers */ - right->header.nritems += push_items; + right_nritems += push_items; + btrfs_set_header_nritems(&right->header, right_nritems); push_space = LEAF_DATA_SIZE; - for (i = 0; i < right->header.nritems; i++) { + for (i = 0; i < right_nritems; i++) { right->items[i].offset = push_space - right->items[i].size; push_space = right->items[i].offset; } - left->header.nritems -= push_items; + left_nritems -= push_items; + btrfs_set_header_nritems(&left->header, left_nritems); BUG_ON(list_empty(&left_buf->dirty)); BUG_ON(list_empty(&right_buf->dirty)); @@ -853,8 +873,8 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, BUG_ON(list_empty(&upper->dirty)); /* then fixup the leaf pointer in the path */ - if (path->slots[0] >= left->header.nritems) { - path->slots[0] -= left->header.nritems; + if (path->slots[0] >= left_nritems) { + path->slots[0] -= left_nritems; tree_block_release(root, path->nodes[0]); path->nodes[0] = right_buf; path->slots[1] += 1; @@ -880,7 +900,7 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, int push_space = 0; int push_items = 0; struct item *item; - int old_left_nritems; + u32 old_left_nritems; int ret = 0; int wret; @@ -908,7 +928,7 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, return 1; } - for (i = 0; i < right->header.nritems; i++) { + for (i = 0; i < btrfs_header_nritems(&right->header); i++) { item = right->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -922,31 +942,34 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, return 1; } /* push data from right to left */ - memcpy(left->items + left->header.nritems, + memcpy(left->items + btrfs_header_nritems(&left->header), right->items, push_items * sizeof(struct item)); push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset; memcpy(left->data + leaf_data_end(left) - push_space, right->data + right->items[push_items - 1].offset, push_space); - old_left_nritems = left->header.nritems; + old_left_nritems = btrfs_header_nritems(&left->header); BUG_ON(old_left_nritems < 0); for(i = old_left_nritems; i < old_left_nritems + push_items; i++) { left->items[i].offset -= LEAF_DATA_SIZE - left->items[old_left_nritems -1].offset; } - left->header.nritems += push_items; + btrfs_set_header_nritems(&left->header, old_left_nritems + push_items); /* fixup right node */ push_space = right->items[push_items-1].offset - leaf_data_end(right); memmove(right->data + LEAF_DATA_SIZE - push_space, right->data + leaf_data_end(right), push_space); memmove(right->items, right->items + push_items, - (right->header.nritems - push_items) * sizeof(struct item)); - right->header.nritems -= push_items; + (btrfs_header_nritems(&right->header) - push_items) * + sizeof(struct item)); + btrfs_set_header_nritems(&right->header, + btrfs_header_nritems(&right->header) - + push_items); push_space = LEAF_DATA_SIZE; - for (i = 0; i < right->header.nritems; i++) { + for (i = 0; i < btrfs_header_nritems(&right->header); i++) { right->items[i].offset = push_space - right->items[i].size; push_space = right->items[i].offset; } @@ -983,7 +1006,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, { struct tree_buffer *l_buf; struct leaf *l; - int nritems; + u32 nritems; int mid; int slot; struct leaf *right; @@ -1008,7 +1031,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, return ret; } slot = path->slots[0]; - nritems = l->header.nritems; + nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; right_buffer = alloc_free_block(root); BUG_ON(!right_buffer); @@ -1026,10 +1049,11 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, LEAF_DATA_SIZE) BUG(); } - right->header.nritems = nritems - mid; - right->header.blocknr = right_buffer->blocknr; - right->header.flags = node_level(0); - right->header.parentid = root->node->node.header.parentid; + btrfs_set_header_nritems(&right->header, nritems - mid); + btrfs_set_header_blocknr(&right->header, right_buffer->blocknr); + btrfs_set_header_level(&right->header, 0); + btrfs_set_header_parentid(&right->header, + btrfs_header_parentid(&root->node->node.header)); data_copy_size = l->items[mid].offset + l->items[mid].size - leaf_data_end(l); memcpy(right->items, l->items + mid, @@ -1039,10 +1063,10 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, rt_data_off = LEAF_DATA_SIZE - (l->items[mid].offset + l->items[mid].size); - for (i = 0; i < right->header.nritems; i++) + for (i = 0; i < btrfs_header_nritems(&right->header); i++) right->items[i].offset += rt_data_off; - l->header.nritems = mid; + btrfs_set_header_nritems(&l->header, mid); ret = 0; wret = insert_ptr(root, path, &right->items[0].key, right_buffer->blocknr, path->slots[1] + 1, 1); @@ -1074,7 +1098,7 @@ int insert_item(struct ctree_root *root, struct key *key, int slot_orig; struct leaf *leaf; struct tree_buffer *leaf_buf; - unsigned int nritems; + u32 nritems; unsigned int data_end; struct ctree_path path; @@ -1094,7 +1118,7 @@ int insert_item(struct ctree_root *root, struct key *key, leaf_buf = path.nodes[0]; leaf = &leaf_buf->leaf; - nritems = leaf->header.nritems; + nritems = btrfs_header_nritems(&leaf->header); data_end = leaf_data_end(leaf); if (leaf_free_space(leaf) < sizeof(struct item) + data_size) @@ -1128,7 +1152,7 @@ int insert_item(struct ctree_root *root, struct key *key, leaf->items[slot].offset = data_end - data_size; leaf->items[slot].size = data_size; memcpy(leaf->data + data_end - data_size, data, data_size); - leaf->header.nritems += 1; + btrfs_set_header_nritems(&leaf->header, nritems + 1); ret = 0; if (slot == 0) @@ -1155,12 +1179,12 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, { struct node *node; struct tree_buffer *parent = path->nodes[level]; - int nritems; + u32 nritems; int ret = 0; int wret; node = &parent->node; - nritems = node->header.nritems; + nritems = btrfs_header_nritems(&node->header); if (slot != nritems -1) { memmove(node->keys + slot, node->keys + slot + 1, sizeof(struct key) * (nritems - slot - 1)); @@ -1168,11 +1192,12 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, node->blockptrs + slot + 1, sizeof(u64) * (nritems - slot - 1)); } - node->header.nritems--; - if (node->header.nritems == 0 && parent == root->node) { - BUG_ON(node_level(root->node->node.header.flags) != 1); + nritems--; + btrfs_set_header_nritems(&node->header, nritems); + if (nritems == 0 && parent == root->node) { + BUG_ON(btrfs_header_level(&root->node->node.header) != 1); /* just turn the root into a leaf and break */ - root->node->node.header.flags = node_level(0); + btrfs_set_header_level(&root->node->node.header, 0); } else if (slot == 0) { wret = fixup_low_keys(root, path, node->keys, level + 1); if (wret) @@ -1195,30 +1220,33 @@ int del_item(struct ctree_root *root, struct ctree_path *path) int dsize; int ret = 0; int wret; + u32 nritems; leaf_buf = path->nodes[0]; leaf = &leaf_buf->leaf; slot = path->slots[0]; doff = leaf->items[slot].offset; dsize = leaf->items[slot].size; + nritems = btrfs_header_nritems(&leaf->header); - if (slot != leaf->header.nritems - 1) { + if (slot != nritems - 1) { int i; int data_end = leaf_data_end(leaf); memmove(leaf->data + data_end + dsize, leaf->data + data_end, doff - data_end); - for (i = slot + 1; i < leaf->header.nritems; i++) + for (i = slot + 1; i < nritems; i++) leaf->items[i].offset += dsize; memmove(leaf->items + slot, leaf->items + slot + 1, sizeof(struct item) * - (leaf->header.nritems - slot - 1)); + (nritems - slot - 1)); } - leaf->header.nritems -= 1; + btrfs_set_header_nritems(&leaf->header, nritems - 1); + nritems--; /* delete the leaf if we've emptied it */ - if (leaf->header.nritems == 0) { + if (nritems == 0) { if (leaf_buf == root->node) { - leaf->header.flags = node_level(0); + btrfs_set_header_level(&leaf->header, 0); BUG_ON(list_empty(&leaf_buf->dirty)); } else { clean_tree_block(root, leaf_buf); @@ -1230,7 +1258,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) ret = wret; } } else { - int used = leaf_space_used(leaf, 0, leaf->header.nritems); + int used = leaf_space_used(leaf, 0, nritems); if (slot == 0) { wret = fixup_low_keys(root, path, &leaf->items[0].key, 1); @@ -1251,12 +1279,12 @@ int del_item(struct ctree_root *root, struct ctree_path *path) if (wret < 0) ret = wret; if (path->nodes[0] == leaf_buf && - leaf->header.nritems) { + btrfs_header_nritems(&leaf->header)) { wret = push_leaf_right(root, path, 1); if (wret < 0) ret = wret; } - if (leaf->header.nritems == 0) { + if (btrfs_header_nritems(&leaf->header) == 0) { u64 blocknr = leaf_buf->blocknr; clean_tree_block(root, leaf_buf); wret = del_ptr(root, path, 1, slot); @@ -1292,7 +1320,7 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) return 1; slot = path->slots[level] + 1; c = path->nodes[level]; - if (slot >= c->node.header.nritems) { + if (slot >= btrfs_header_nritems(&c->node.header)) { level++; continue; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 518326fa369..60c21fe1566 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -35,13 +35,10 @@ struct header { /* generation flags to be added */ } __attribute__ ((__packed__)); +#define MAX_LEVEL 8 #define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct header)) / \ (sizeof(struct key) + sizeof(u64))) -#define MAX_LEVEL 8 -#define node_level(f) ((f) & (MAX_LEVEL-1)) -#define is_leaf(f) (node_level(f) == 0) - struct tree_buffer; /* @@ -144,6 +141,64 @@ struct ctree_path { int slots[MAX_LEVEL]; }; +static inline u64 btrfs_header_blocknr(struct header *h) +{ + return h->blocknr; +} + +static inline void btrfs_set_header_blocknr(struct header *h, u64 blocknr) +{ + h->blocknr = blocknr; +} + +static inline u64 btrfs_header_parentid(struct header *h) +{ + return h->parentid; +} + +static inline void btrfs_set_header_parentid(struct header *h, u64 parentid) +{ + h->parentid = parentid; +} + +static inline u32 btrfs_header_nritems(struct header *h) +{ + return h->nritems; +} + +static inline void btrfs_set_header_nritems(struct header *h, u32 val) +{ + h->nritems = val; +} + +static inline u32 btrfs_header_flags(struct header *h) +{ + return h->flags; +} + +static inline void btrfs_set_header_flags(struct header *h, u32 val) +{ + h->flags = val; +} + +static inline int btrfs_header_level(struct header *h) +{ + return btrfs_header_flags(h) & (MAX_LEVEL - 1); +} + +static inline void btrfs_set_header_level(struct header *h, int level) +{ + u32 flags; + BUG_ON(level > MAX_LEVEL); + flags = btrfs_header_flags(h) & ~(MAX_LEVEL - 1); + btrfs_set_header_flags(h, flags | level); +} + +static inline int btrfs_is_leaf(struct node *n) +{ + return (btrfs_header_level(&n->header) == 0); +} + struct tree_buffer *alloc_free_block(struct ctree_root *root); int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf); int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c9201ec81fe..065e888d2c0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -15,9 +15,10 @@ int cache_max = 10000; static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) { - if (buf->blocknr != buf->node.header.blocknr) + if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); - if (root->node && buf->node.header.parentid != root->node->node.header.parentid) + if (root->node && btrfs_header_parentid(&buf->node.header) != + btrfs_header_parentid(&root->node->node.header)) BUG(); return 0; } @@ -129,7 +130,7 @@ int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) loff_t offset = blocknr * CTREE_BLOCKSIZE; int ret; - if (buf->blocknr != buf->node.header.blocknr) + if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); ret = pwrite(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); if (ret != CTREE_BLOCKSIZE) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e7b8a70b5e5..524c11fea24 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -79,10 +79,10 @@ int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) if (root == root->extent_root) return 0; - if (is_leaf(buf->node.header.flags)) + if (btrfs_is_leaf(&buf->node)) return 0; - for (i = 0; i < buf->node.header.nritems; i++) { + for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) { blocknr = buf->node.blockptrs[i]; inc_block_ref(root, blocknr); } @@ -119,7 +119,8 @@ static int finish_current_insert(struct ctree_root *extent_root) int ret; extent_item.refs = 1; - extent_item.owner = extent_root->node->node.header.parentid; + extent_item.owner = + btrfs_header_parentid(&extent_root->node->node.header); ins.offset = 1; ins.flags = 0; @@ -269,7 +270,7 @@ static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, struct ctree_root * root = orig_root->extent_root; int total_needed = num_blocks; - total_needed += (node_level(root->node->node.header.flags) + 1) * 3; + total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3; if (root->last_insert.objectid > search_start) search_start = root->last_insert.objectid; check_failed: @@ -288,7 +289,7 @@ check_failed: while (1) { l = &path.nodes[0]->leaf; slot = path.slots[0]; - if (slot >= l->header.nritems) { + if (slot >= btrfs_header_nritems(&l->header)) { ret = next_leaf(root, &path); if (ret == 0) continue; @@ -404,7 +405,7 @@ struct tree_buffer *alloc_free_block(struct ctree_root *root) struct tree_buffer *buf; ret = alloc_extent(root, 1, 0, (unsigned long)-1, - root->node->node.header.parentid, + btrfs_header_parentid(&root->node->node.header), &ins); if (ret) { BUG(); @@ -429,7 +430,8 @@ int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) goto out; while(*level > 0) { cur = path->nodes[*level]; - if (path->slots[*level] >= cur->node.header.nritems) + if (path->slots[*level] >= + btrfs_header_nritems(&cur->node.header)) break; blocknr = cur->node.blockptrs[path->slots[*level]]; ret = lookup_block_ref(root, blocknr, &refs); @@ -444,7 +446,7 @@ int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) if (path->nodes[*level-1]) tree_block_release(root, path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = node_level(next->node.header.flags); + *level = btrfs_header_level(&next->node.header); path->slots[*level] = 0; } out: @@ -463,7 +465,8 @@ int walk_up_tree(struct ctree_root *root, struct ctree_path *path, int *level) int ret; for(i = *level; i < MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < path->nodes[i]->node.header.nritems - 1) { + if (slot < + btrfs_header_nritems(&path->nodes[i]->node.header)- 1) { path->slots[i]++; *level = i; return 0; @@ -489,7 +492,7 @@ int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) init_path(&path); - level = node_level(snap->node.header.flags); + level = btrfs_header_level(&snap->node.header); orig_level = level; path.nodes[level] = snap; path.slots[level] = 0; @@ -509,33 +512,3 @@ int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) return 0; } - - -#if 0 -int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) -{ - int ret; - int level; - int refs; - u64 blocknr = snap->blocknr; - - level = node_level(snap->node.header.flags); - ret = lookup_block_ref(root, snap->blocknr, &refs); - BUG_ON(ret); - if (refs == 1 && level != 0) { - struct node *n = &snap->node; - struct tree_buffer *b; - int i; - for (i = 0; i < n->header.nritems; i++) { - b = read_tree_block(root, n->blockptrs[i]); - /* FIXME, don't recurse here */ - ret = btrfs_drop_snapshot(root, b); - BUG_ON(ret); - tree_block_release(root, b); - } - } - ret = free_extent(root, blocknr, 1); - BUG_ON(ret); - return 0; -} -#endif diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index fd4e5dea7e1..b2ba9469629 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -34,15 +34,15 @@ int mkfs(int fd) /* create leaves for the tree root and extent root */ memset(&empty_leaf, 0, sizeof(empty_leaf)); - empty_leaf.header.parentid = 1; - empty_leaf.header.blocknr = 17; + btrfs_set_header_parentid(&empty_leaf.header, 1); + btrfs_set_header_blocknr(&empty_leaf.header, 17); ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 17 * CTREE_BLOCKSIZE); if (ret != sizeof(empty_leaf)) return -1; - empty_leaf.header.parentid = 2; - empty_leaf.header.blocknr = 18; - empty_leaf.header.nritems = 3; + btrfs_set_header_parentid(&empty_leaf.header, 2); + btrfs_set_header_blocknr(&empty_leaf.header, 18); + btrfs_set_header_nritems(&empty_leaf.header, 3); /* item1, reserve blocks 0-16 */ item.key.objectid = 0; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index e32a959dd3e..7df16b1e473 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -8,11 +8,11 @@ void print_leaf(struct leaf *l) { int i; - int nr = l->header.nritems; + u32 nr = btrfs_header_nritems(&l->header); struct item *item; struct extent_item *ei; - printf("leaf %Lu total ptrs %d free space %d\n", l->header.blocknr, nr, - leaf_free_space(l)); + printf("leaf %Lu total ptrs %d free space %d\n", + btrfs_header_blocknr(&l->header), nr, leaf_free_space(l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; @@ -32,22 +32,20 @@ void print_leaf(struct leaf *l) void print_tree(struct ctree_root *root, struct tree_buffer *t) { int i; - int nr; + u32 nr; struct node *c; if (!t) return; c = &t->node; - nr = c->header.nritems; - if (c->header.blocknr != t->blocknr) - BUG(); - if (is_leaf(c->header.flags)) { + nr = btrfs_header_nritems(&c->header); + if (btrfs_is_leaf(c)) { print_leaf((struct leaf *)c); return; } printf("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, - node_level(c->header.flags), c->header.nritems, - (u32)NODEPTRS_PER_BLOCK - c->header.nritems); + btrfs_header_level(&c->header), nr, + (u32)NODEPTRS_PER_BLOCK - nr); fflush(stdout); for (i = 0; i < nr; i++) { printf("\tkey %d (%Lu %u %Lu) block %Lu\n", @@ -60,11 +58,11 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t) struct tree_buffer *next_buf = read_tree_block(root, c->blockptrs[i]); struct node *next = &next_buf->node; - if (is_leaf(next->header.flags) && - node_level(c->header.flags) != 1) + if (btrfs_is_leaf(next) && + btrfs_header_level(&c->header) != 1) BUG(); - if (node_level(next->header.flags) != - node_level(c->header.flags) - 1) + if (btrfs_header_level(&next->header) != + btrfs_header_level(&c->header) - 1) BUG(); print_tree(root, next_buf); tree_block_release(root, next_buf); diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index ccca9b2c14a..07fd71b7762 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -71,9 +71,10 @@ int main(int ac, char **av) { close_ctree(root, &super); root = open_ctree("dbfile", &super); printf("node %p level %d total ptrs %d free spc %lu\n", root->node, - node_level(root->node->node.header.flags), - root->node->node.header.nritems, - NODEPTRS_PER_BLOCK - root->node->node.header.nritems); + btrfs_header_level(&root->node->node.header), + btrfs_header_nritems(&root->node->node.header), + NODEPTRS_PER_BLOCK - + btrfs_header_nritems(&root->node->node.header)); printf("all searches good, deleting some items\n"); i = 0; srand(55); @@ -126,7 +127,8 @@ int main(int ac, char **av) { release_path(root, &path); } printf("starting big long delete run\n"); - while(root->node && root->node->node.header.nritems > 0) { + while(root->node && + btrfs_header_nritems(&root->node->node.header) > 0) { struct leaf *leaf; int slot; ins.objectid = (u64)-1; @@ -137,7 +139,7 @@ int main(int ac, char **av) { leaf = &path.nodes[0]->leaf; slot = path.slots[0]; - if (slot != leaf->header.nritems) + if (slot != btrfs_header_nritems(&leaf->header)) BUG(); while(path.slots[0] > 0) { path.slots[0] -= 1; diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 7b37b6bae10..5d99b730a69 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -358,8 +358,8 @@ int main(int ac, char **av) } if (i && i % 5000 == 0) { printf("open & close, root level %d nritems %d\n", - node_level(root->node->node.header.flags), - root->node->node.header.nritems); + btrfs_header_level(&root->node->node.header), + btrfs_header_nritems(&root->node->node.header)); close_ctree(root, &super); root = open_ctree("dbfile", &super); } -- cgit v1.2.3-70-g09d2 From e2fa7227cdf132d72e7410dd0679dc573a1c2618 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 Mar 2007 16:22:34 -0400 Subject: Btrfs: struct key endian fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 92 +++++++++++++++++++++++++++++--------------------- fs/btrfs/ctree.h | 78 +++++++++++++++++++++++++++++++++++++----- fs/btrfs/extent-tree.c | 34 ++++++++++--------- fs/btrfs/kerncompat.h | 3 +- fs/btrfs/mkfs.c | 14 ++++---- fs/btrfs/quick-test.c | 6 ++-- fs/btrfs/random-test.c | 17 +++++----- 7 files changed, 162 insertions(+), 82 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e43c827e0df..489019ac04b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -95,19 +95,23 @@ int leaf_free_space(struct leaf *leaf) /* * compare two keys in a memcmp fashion */ -int comp_keys(struct key *k1, struct key *k2) +int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) { - if (k1->objectid > k2->objectid) + struct btrfs_key k1; + + btrfs_disk_key_to_cpu(&k1, disk); + + if (k1.objectid > k2->objectid) return 1; - if (k1->objectid < k2->objectid) + if (k1.objectid < k2->objectid) return -1; - if (k1->flags > k2->flags) + if (k1.flags > k2->flags) return 1; - if (k1->flags < k2->flags) + if (k1.flags < k2->flags) return -1; - if (k1->offset > k2->offset) + if (k1.offset > k2->offset) return 1; - if (k1->offset < k2->offset) + if (k1.offset < k2->offset) return -1; return 0; } @@ -125,15 +129,18 @@ int check_node(struct ctree_path *path, int level) parent_slot = path->slots[level + 1]; BUG_ON(nritems == 0); if (parent) { - struct key *parent_key; + struct btrfs_disk_key *parent_key; parent_key = &parent->keys[parent_slot]; - BUG_ON(memcmp(parent_key, node->keys, sizeof(struct key))); + BUG_ON(memcmp(parent_key, node->keys, + sizeof(struct btrfs_disk_key))); BUG_ON(parent->blockptrs[parent_slot] != btrfs_header_blocknr(&node->header)); } BUG_ON(nritems > NODEPTRS_PER_BLOCK); for (i = 0; nritems > 1 && i < nritems - 2; i++) { - BUG_ON(comp_keys(&node->keys[i], &node->keys[i+1]) >= 0); + struct btrfs_key cpukey; + btrfs_disk_key_to_cpu(&cpukey, &node->keys[i + 1]); + BUG_ON(comp_keys(&node->keys[i], &cpukey) >= 0); } return 0; } @@ -155,16 +162,18 @@ int check_leaf(struct ctree_path *path, int level) return 0; if (parent) { - struct key *parent_key; + struct btrfs_disk_key *parent_key; parent_key = &parent->keys[parent_slot]; BUG_ON(memcmp(parent_key, &leaf->items[0].key, - sizeof(struct key))); + sizeof(struct btrfs_disk_key))); BUG_ON(parent->blockptrs[parent_slot] != btrfs_header_blocknr(&leaf->header)); } for (i = 0; nritems > 1 && i < nritems - 2; i++) { + struct btrfs_key cpukey; + btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key); BUG_ON(comp_keys(&leaf->items[i].key, - &leaf->items[i+1].key) >= 0); + &cpukey) >= 0); BUG_ON(leaf->items[i].offset != leaf->items[i + 1].offset + leaf->items[i + 1].size); if (i == 0) { @@ -191,18 +200,18 @@ int check_block(struct ctree_path *path, int level) * * slot may point to max if the key is bigger than all of the keys */ -int generic_bin_search(char *p, int item_size, struct key *key, +int generic_bin_search(char *p, int item_size, struct btrfs_key *key, int max, int *slot) { int low = 0; int high = max; int mid; int ret; - struct key *tmp; + struct btrfs_disk_key *tmp; while(low < high) { mid = (low + high) / 2; - tmp = (struct key *)(p + mid * item_size); + tmp = (struct btrfs_disk_key *)(p + mid * item_size); ret = comp_keys(tmp, key); if (ret < 0) @@ -222,7 +231,7 @@ int generic_bin_search(char *p, int item_size, struct key *key, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -int bin_search(struct node *c, struct key *key, int *slot) +int bin_search(struct node *c, struct btrfs_key *key, int *slot) { if (btrfs_is_leaf(c)) { struct leaf *l = (struct leaf *)c; @@ -230,7 +239,8 @@ int bin_search(struct node *c, struct key *key, int *slot) key, btrfs_header_nritems(&c->header), slot); } else { - return generic_bin_search((void *)c->keys, sizeof(struct key), + return generic_bin_search((void *)c->keys, + sizeof(struct btrfs_disk_key), key, btrfs_header_nritems(&c->header), slot); } @@ -339,7 +349,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, ret = wret; } else { memcpy(parent->keys + pslot + 1, right->keys, - sizeof(struct key)); + sizeof(struct btrfs_disk_key)); BUG_ON(list_empty(&parent_buf->dirty)); } } @@ -374,7 +384,8 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, ret = wret; } else { /* update the parent key to reflect our changes */ - memcpy(parent->keys + pslot, mid->keys, sizeof(struct key)); + memcpy(parent->keys + pslot, mid->keys, + sizeof(struct btrfs_disk_key)); BUG_ON(list_empty(&parent_buf->dirty)); } @@ -417,7 +428,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if * possible) */ -int search_slot(struct ctree_root *root, struct key *key, +int search_slot(struct ctree_root *root, struct btrfs_key *key, struct ctree_path *p, int ins_len, int cow) { struct tree_buffer *b; @@ -499,7 +510,7 @@ again: * fixing up the blocks in ram so the tree is consistent. */ static int fixup_low_keys(struct ctree_root *root, - struct ctree_path *path, struct key *key, + struct ctree_path *path, struct btrfs_disk_key *key, int level) { int i; @@ -546,12 +557,13 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, push_items = src_nritems; memcpy(dst->keys + dst_nritems, src->keys, - push_items * sizeof(struct key)); + push_items * sizeof(struct btrfs_disk_key)); memcpy(dst->blockptrs + dst_nritems, src->blockptrs, push_items * sizeof(u64)); if (push_items < src_nritems) { memmove(src->keys, src->keys + push_items, - (src_nritems - push_items) * sizeof(struct key)); + (src_nritems - push_items) * + sizeof(struct btrfs_disk_key)); memmove(src->blockptrs, src->blockptrs + push_items, (src_nritems - push_items) * sizeof(u64)); } @@ -598,11 +610,11 @@ static int balance_node_right(struct ctree_root *root, push_items = max_push; memmove(dst->keys + push_items, dst->keys, - dst_nritems * sizeof(struct key)); + dst_nritems * sizeof(struct btrfs_disk_key)); memmove(dst->blockptrs + push_items, dst->blockptrs, dst_nritems * sizeof(u64)); memcpy(dst->keys, src->keys + src_nritems - push_items, - push_items * sizeof(struct key)); + push_items * sizeof(struct btrfs_disk_key)); memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items, push_items * sizeof(u64)); @@ -627,7 +639,7 @@ static int insert_new_root(struct ctree_root *root, struct tree_buffer *t; struct node *lower; struct node *c; - struct key *lower_key; + struct btrfs_disk_key *lower_key; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); @@ -645,7 +657,7 @@ static int insert_new_root(struct ctree_root *root, lower_key = &((struct leaf *)lower)->items[0].key; else lower_key = lower->keys; - memcpy(c->keys, lower_key, sizeof(struct key)); + memcpy(c->keys, lower_key, sizeof(struct btrfs_disk_key)); c->blockptrs[0] = path->nodes[level-1]->blocknr; /* the super has an extra ref to root->node */ tree_block_release(root, root->node); @@ -666,7 +678,7 @@ static int insert_new_root(struct ctree_root *root, * returns zero on success and < 0 on any error */ static int insert_ptr(struct ctree_root *root, - struct ctree_path *path, struct key *key, + struct ctree_path *path, struct btrfs_disk_key *key, u64 blocknr, int slot, int level) { struct node *lower; @@ -681,11 +693,11 @@ static int insert_ptr(struct ctree_root *root, BUG(); if (slot != nritems) { memmove(lower->keys + slot + 1, lower->keys + slot, - (nritems - slot) * sizeof(struct key)); + (nritems - slot) * sizeof(struct btrfs_disk_key)); memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot, (nritems - slot) * sizeof(u64)); } - memcpy(lower->keys + slot, key, sizeof(struct key)); + memcpy(lower->keys + slot, key, sizeof(struct btrfs_disk_key)); lower->blockptrs[slot] = blocknr; btrfs_set_header_nritems(&lower->header, nritems + 1); if (lower->keys[1].objectid == 0) @@ -732,7 +744,7 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, btrfs_header_parentid(&root->node->node.header)); mid = (c_nritems + 1) / 2; memcpy(split->keys, c->keys + mid, - (c_nritems - mid) * sizeof(struct key)); + (c_nritems - mid) * sizeof(struct btrfs_disk_key)); memcpy(split->blockptrs, c->blockptrs + mid, (c_nritems - mid) * sizeof(u64)); btrfs_set_header_nritems(&split->header, c_nritems - mid); @@ -869,7 +881,7 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, BUG_ON(list_empty(&left_buf->dirty)); BUG_ON(list_empty(&right_buf->dirty)); memcpy(upper->node.keys + slot + 1, - &right->items[0].key, sizeof(struct key)); + &right->items[0].key, sizeof(struct btrfs_disk_key)); BUG_ON(list_empty(&upper->dirty)); /* then fixup the leaf pointer in the path */ @@ -1090,7 +1102,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int insert_item(struct ctree_root *root, struct key *key, +int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, void *data, int data_size) { int ret = 0; @@ -1101,12 +1113,15 @@ int insert_item(struct ctree_root *root, struct key *key, u32 nritems; unsigned int data_end; struct ctree_path path; + struct btrfs_disk_key disk_key; + + btrfs_cpu_key_to_disk(&disk_key, cpu_key); /* create a root if there isn't one */ if (!root->node) BUG(); init_path(&path); - ret = search_slot(root, key, &path, data_size, 1); + ret = search_slot(root, cpu_key, &path, data_size, 1); if (ret == 0) { release_path(root, &path); return -EEXIST; @@ -1148,7 +1163,8 @@ int insert_item(struct ctree_root *root, struct key *key, data_end = old_data; } /* copy the new data in */ - memcpy(&leaf->items[slot].key, key, sizeof(struct key)); + memcpy(&leaf->items[slot].key, &disk_key, + sizeof(struct btrfs_disk_key)); leaf->items[slot].offset = data_end - data_size; leaf->items[slot].size = data_size; memcpy(leaf->data + data_end - data_size, data, data_size); @@ -1156,7 +1172,7 @@ int insert_item(struct ctree_root *root, struct key *key, ret = 0; if (slot == 0) - ret = fixup_low_keys(root, &path, key, 1); + ret = fixup_low_keys(root, &path, &disk_key, 1); BUG_ON(list_empty(&leaf_buf->dirty)); if (leaf_free_space(leaf) < 0) @@ -1187,7 +1203,7 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, nritems = btrfs_header_nritems(&node->header); if (slot != nritems -1) { memmove(node->keys + slot, node->keys + slot + 1, - sizeof(struct key) * (nritems - slot - 1)); + sizeof(struct btrfs_disk_key) * (nritems - slot - 1)); memmove(node->blockptrs + slot, node->blockptrs + slot + 1, sizeof(u64) * (nritems - slot - 1)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e1aaca66d59..58e03e90f23 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2,6 +2,7 @@ #define __CTREE__ #include "list.h" +#include "kerncompat.h" #define CTREE_BLOCKSIZE 1024 @@ -14,8 +15,18 @@ * may point to extents. * * offset is the starting byte offset for this key in the stream. + * + * btrfs_disk_key is in disk byte order. struct btrfs_key is always + * in cpu native order. Otherwise they are identical and their sizes + * should be the same (ie both packed) */ -struct key { +struct btrfs_disk_key { + __le64 objectid; + __le32 flags; + __le64 offset; +} __attribute__ ((__packed__)); + +struct btrfs_key { u64 objectid; u32 flags; u64 offset; @@ -37,7 +48,7 @@ struct btrfs_header { #define MAX_LEVEL 8 #define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct btrfs_header)) / \ - (sizeof(struct key) + sizeof(u64))) + (sizeof(struct btrfs_disk_key) + sizeof(u64))) struct tree_buffer; @@ -50,8 +61,8 @@ struct ctree_root { struct tree_buffer *node; struct tree_buffer *commit_root; struct ctree_root *extent_root; - struct key current_insert; - struct key last_insert; + struct btrfs_key current_insert; + struct btrfs_key last_insert; int fp; struct radix_tree_root cache_radix; struct radix_tree_root pinned_radix; @@ -88,7 +99,7 @@ struct ctree_super_block { * the item in the leaf (relative to the start of the data area) */ struct item { - struct key key; + struct btrfs_disk_key key; u16 offset; u16 size; } __attribute__ ((__packed__)); @@ -115,7 +126,7 @@ struct leaf { */ struct node { struct btrfs_header header; - struct key keys[NODEPTRS_PER_BLOCK]; + struct btrfs_disk_key keys[NODEPTRS_PER_BLOCK]; u64 blockptrs[NODEPTRS_PER_BLOCK]; } __attribute__ ((__packed__)); @@ -141,6 +152,55 @@ struct ctree_path { int slots[MAX_LEVEL]; }; +static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, + struct btrfs_disk_key *disk) +{ + cpu->offset = le64_to_cpu(disk->offset); + cpu->flags = le32_to_cpu(disk->flags); + cpu->objectid = le64_to_cpu(disk->objectid); +} + +static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, + struct btrfs_key *cpu) +{ + disk->offset = cpu_to_le64(cpu->offset); + disk->flags = cpu_to_le32(cpu->flags); + disk->objectid = cpu_to_le64(cpu->objectid); +} + +static inline u64 btrfs_key_objectid(struct btrfs_disk_key *disk) +{ + return le64_to_cpu(disk->objectid); +} + +static inline void btrfs_set_key_objectid(struct btrfs_disk_key *disk, + u64 val) +{ + disk->objectid = cpu_to_le64(val); +} + +static inline u64 btrfs_key_offset(struct btrfs_disk_key *disk) +{ + return le64_to_cpu(disk->offset); +} + +static inline void btrfs_set_key_offset(struct btrfs_disk_key *disk, + u64 val) +{ + disk->offset = cpu_to_le64(val); +} + +static inline u32 btrfs_key_flags(struct btrfs_disk_key *disk) +{ + return le32_to_cpu(disk->flags); +} + +static inline void btrfs_set_key_flags(struct btrfs_disk_key *disk, + u32 val) +{ + disk->flags = cpu_to_le32(val); +} + static inline u64 btrfs_header_blocknr(struct btrfs_header *h) { return le64_to_cpu(h->blocknr); @@ -203,11 +263,13 @@ static inline int btrfs_is_leaf(struct node *n) struct tree_buffer *alloc_free_block(struct ctree_root *root); int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf); int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks); -int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len, int cow); +int search_slot(struct ctree_root *root, struct btrfs_key *key, + struct ctree_path *p, int ins_len, int cow); void release_path(struct ctree_root *root, struct ctree_path *p); void init_path(struct ctree_path *p); int del_item(struct ctree_root *root, struct ctree_path *path); -int insert_item(struct ctree_root *root, struct key *key, void *data, int data_size); +int insert_item(struct ctree_root *root, struct btrfs_key *key, + void *data, int data_size); int next_leaf(struct ctree_root *root, struct ctree_path *path); int leaf_free_space(struct leaf *leaf); int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 524c11fea24..475e7221596 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7,7 +7,8 @@ #include "print-tree.h" static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, - u64 search_start, u64 search_end, struct key *ins); + u64 search_start, u64 search_end, + struct btrfs_key *ins); static int finish_current_insert(struct ctree_root *extent_root); static int run_pending(struct ctree_root *extent_root); @@ -24,10 +25,10 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) { struct ctree_path path; int ret; - struct key key; + struct btrfs_key key; struct leaf *l; struct extent_item *item; - struct key ins; + struct btrfs_key ins; find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins); init_path(&path); @@ -54,7 +55,7 @@ static int lookup_block_ref(struct ctree_root *root, u64 blocknr, u32 *refs) { struct ctree_path path; int ret; - struct key key; + struct btrfs_key key; struct leaf *l; struct extent_item *item; init_path(&path); @@ -113,7 +114,7 @@ int btrfs_finish_extent_commit(struct ctree_root *root) static int finish_current_insert(struct ctree_root *extent_root) { - struct key ins; + struct btrfs_key ins; struct extent_item extent_item; int i; int ret; @@ -140,12 +141,12 @@ static int finish_current_insert(struct ctree_root *extent_root) int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) { struct ctree_path path; - struct key key; + struct btrfs_key key; struct ctree_root *extent_root = root->extent_root; int ret; struct item *item; struct extent_item *ei; - struct key ins; + struct btrfs_key ins; key.objectid = blocknr; key.flags = 0; @@ -227,7 +228,7 @@ static int run_pending(struct ctree_root *extent_root) */ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) { - struct key key; + struct btrfs_key key; struct ctree_root *extent_root = root->extent_root; struct tree_buffer *t; int pending_ret; @@ -256,10 +257,11 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) * Any available blocks before search_start are skipped. */ static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, - u64 search_start, u64 search_end, struct key *ins) + u64 search_start, u64 search_end, + struct btrfs_key *ins) { struct ctree_path path; - struct key *key; + struct btrfs_key key; int ret; u64 hole_size = 0; int slot = 0; @@ -306,12 +308,12 @@ check_failed: ins->offset = (u64)-1; goto check_pending; } - key = &l->items[slot].key; - if (key->objectid >= search_start) { + btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + if (key.objectid >= search_start) { if (start_found) { if (last_block < search_start) last_block = search_start; - hole_size = key->objectid - last_block; + hole_size = key.objectid - last_block; if (hole_size > total_needed) { ins->objectid = last_block; ins->offset = hole_size; @@ -320,7 +322,7 @@ check_failed: } } start_found = 1; - last_block = key->objectid + key->offset; + last_block = key.objectid + key.offset; path.slots[0]++; } // FIXME -ENOSPC @@ -357,7 +359,7 @@ error: * returns 0 if everything worked, non-zero otherwise. */ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, - u64 search_end, u64 owner, struct key *ins) + u64 search_end, u64 owner, struct btrfs_key *ins) { int ret; int pending_ret; @@ -400,7 +402,7 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, */ struct tree_buffer *alloc_free_block(struct ctree_root *root) { - struct key ins; + struct btrfs_key ins; int ret; struct tree_buffer *buf; diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h index 54cf70eaa01..56239b922ba 100644 --- a/fs/btrfs/kerncompat.h +++ b/fs/btrfs/kerncompat.h @@ -69,8 +69,6 @@ static inline void __clear_bit(int bit, unsigned long *map) { const typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - __builtin_offsetof(type,member) );}) -#endif - #define ENOMEM 5 #define EEXIST 6 @@ -94,3 +92,4 @@ typedef u64 __bitwise __be64; #define le32_to_cpu(x) ((__force u32)(__le32)(x)) #define cpu_to_le16(x) ((__force __le16)(u16)(x)) #define le16_to_cpu(x) ((__force u16)(__le16)(x)) +#endif diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index b2ba9469629..5290423343f 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -45,9 +45,9 @@ int mkfs(int fd) btrfs_set_header_nritems(&empty_leaf.header, 3); /* item1, reserve blocks 0-16 */ - item.key.objectid = 0; - item.key.offset = 17; - item.key.flags = 0; + item.key.objectid = cpu_to_le64(0); + item.key.offset = cpu_to_le64(17); + item.key.flags = cpu_to_le32(0); item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item); item.size = sizeof(struct extent_item); extent_item.refs = 1; @@ -56,16 +56,16 @@ int mkfs(int fd) memcpy(empty_leaf.data + item.offset, &extent_item, item.size); /* item2, give block 17 to the root */ - item.key.objectid = 17; - item.key.offset = 1; + item.key.objectid = cpu_to_le64(17); + item.key.offset = cpu_to_le64(1); item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item) * 2; extent_item.owner = 1; memcpy(empty_leaf.items + 1, &item, sizeof(item)); memcpy(empty_leaf.data + item.offset, &extent_item, item.size); /* item3, give block 18 for the extent root */ - item.key.objectid = 18; - item.key.offset = 1; + item.key.objectid = cpu_to_le64(18); + item.key.offset = cpu_to_le64(1); item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item) * 3; extent_item.owner = 2; memcpy(empty_leaf.items + 2, &item, sizeof(item)); diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index 07fd71b7762..ab3bda53a2f 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -13,8 +13,8 @@ int next_key(int i, int max_key) { } int main(int ac, char **av) { - struct key ins; - struct key last = { (u64)-1, 0, 0}; + struct btrfs_key ins; + struct btrfs_key last = { (u64)-1, 0, 0}; char *buf; int i; int num; @@ -146,7 +146,7 @@ int main(int ac, char **av) { slot = path.slots[0]; leaf = &path.nodes[0]->leaf; - memcpy(&last, &leaf->items[slot].key, sizeof(last)); + btrfs_disk_key_to_cpu(&last, &leaf->items[slot].key); if (tree_size % 10000 == 0) printf("big del %d:%d\n", tree_size, i); ret = del_item(root, &path); diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 5d99b730a69..34a15841ebd 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -10,7 +10,8 @@ int keep_running = 1; struct ctree_super_block super; -static int setup_key(struct radix_tree_root *root, struct key *key, int exists) +static int setup_key(struct radix_tree_root *root, struct btrfs_key *key, + int exists) { int num = rand(); unsigned long res[2]; @@ -38,7 +39,7 @@ again: static int ins_one(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; - struct key key; + struct btrfs_key key; int ret; char buf[128]; unsigned long oid; @@ -63,7 +64,7 @@ error: static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; - struct key key; + struct btrfs_key key; int ret; char buf[128]; init_path(&path); @@ -82,7 +83,7 @@ static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) static int del_one(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; - struct key key; + struct btrfs_key key; int ret; unsigned long *ptr; init_path(&path); @@ -108,7 +109,7 @@ error: static int lookup_item(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; - struct key key; + struct btrfs_key key; int ret; init_path(&path); ret = setup_key(radix, &key, 1); @@ -127,7 +128,7 @@ error: static int lookup_enoent(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; - struct key key; + struct btrfs_key key; int ret; init_path(&path); ret = setup_key(radix, &key, 0); @@ -147,7 +148,7 @@ static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix, int nr) { struct ctree_path path; - struct key key; + struct btrfs_key key; unsigned long found = 0; int ret; int slot; @@ -248,7 +249,7 @@ int (*ops[])(struct ctree_root *root, struct radix_tree_root *radix) = static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; - struct key key; + struct btrfs_key key; unsigned long found; int ret; int slot; -- cgit v1.2.3-70-g09d2 From 0783fcfc4dc19b8bb99bd51b7afa669ba4cbd377 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 Mar 2007 20:12:07 -0400 Subject: Btrfs: struct item endian fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 129 +++++++++++++++++++++++++++---------------------- fs/btrfs/ctree.h | 34 +++++++++++-- fs/btrfs/extent-tree.c | 12 +++-- fs/btrfs/mkfs.c | 36 ++++++++------ fs/btrfs/print-tree.c | 15 +++--- fs/btrfs/random-test.c | 5 +- 6 files changed, 141 insertions(+), 90 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 489019ac04b..10555647005 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -76,7 +76,7 @@ static inline unsigned int leaf_data_end(struct leaf *leaf) u32 nr = btrfs_header_nritems(&leaf->header); if (nr == 0) return sizeof(leaf->data); - return leaf->items[nr-1].offset; + return btrfs_item_offset(leaf->items + nr - 1); } /* @@ -174,11 +174,12 @@ int check_leaf(struct ctree_path *path, int level) btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key); BUG_ON(comp_keys(&leaf->items[i].key, &cpukey) >= 0); - BUG_ON(leaf->items[i].offset != leaf->items[i + 1].offset + - leaf->items[i + 1].size); + BUG_ON(btrfs_item_offset(leaf->items + i) != + btrfs_item_end(leaf->items + i + 1)); if (i == 0) { - BUG_ON(leaf->items[i].offset + leaf->items[i].size != - LEAF_DATA_SIZE); + BUG_ON(btrfs_item_offset(leaf->items + i) + + btrfs_item_size(leaf->items + i) != + LEAF_DATA_SIZE); } } return 0; @@ -235,7 +236,8 @@ int bin_search(struct node *c, struct btrfs_key *key, int *slot) { if (btrfs_is_leaf(c)) { struct leaf *l = (struct leaf *)c; - return generic_bin_search((void *)l->items, sizeof(struct item), + return generic_bin_search((void *)l->items, + sizeof(struct btrfs_item), key, btrfs_header_nritems(&c->header), slot); } else { @@ -485,7 +487,7 @@ again: struct leaf *l = (struct leaf *)c; p->slots[level] = slot; if (ins_len > 0 && leaf_free_space(l) < - sizeof(struct item) + ins_len) { + sizeof(struct btrfs_item) + ins_len) { int sret = split_leaf(root, p, ins_len); BUG_ON(sret > 0); if (sret) @@ -780,9 +782,9 @@ static int leaf_space_used(struct leaf *l, int start, int nr) if (!nr) return 0; - data_len = l->items[start].offset + l->items[start].size; - data_len = data_len - l->items[end].offset; - data_len += sizeof(struct item) * nr; + data_len = btrfs_item_end(l->items + start); + data_len = data_len - btrfs_item_offset(l->items + end); + data_len += sizeof(struct btrfs_item) * nr; return data_len; } @@ -806,7 +808,7 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, int free_space; int push_space = 0; int push_items = 0; - struct item *item; + struct btrfs_item *item; u32 left_nritems; u32 right_nritems; @@ -821,7 +823,7 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]); right = &right_buf->leaf; free_space = leaf_free_space(right); - if (free_space < data_size + sizeof(struct item)) { + if (free_space < data_size + sizeof(struct btrfs_item)) { tree_block_release(root, right_buf); return 1; } @@ -829,7 +831,7 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, btrfs_cow_block(root, right_buf, upper, slot + 1, &right_buf); right = &right_buf->leaf; free_space = leaf_free_space(right); - if (free_space < data_size + sizeof(struct item)) { + if (free_space < data_size + sizeof(struct btrfs_item)) { tree_block_release(root, right_buf); return 1; } @@ -839,10 +841,11 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, item = left->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (item->size + sizeof(*item) + push_space > free_space) + if (btrfs_item_size(item) + sizeof(*item) + push_space > + free_space) break; push_items++; - push_space += item->size + sizeof(*item); + push_space += btrfs_item_size(item) + sizeof(*item); } if (push_items == 0) { tree_block_release(root, right_buf); @@ -850,8 +853,7 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, } right_nritems = btrfs_header_nritems(&right->header); /* push left to right */ - push_space = left->items[left_nritems - push_items].offset + - left->items[left_nritems - push_items].size; + push_space = btrfs_item_end(left->items + left_nritems - push_items); push_space -= leaf_data_end(left); /* make room in the right data area */ memmove(right->data + leaf_data_end(right) - push_space, @@ -862,18 +864,19 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, left->data + leaf_data_end(left), push_space); memmove(right->items + push_items, right->items, - right_nritems * sizeof(struct item)); + right_nritems * sizeof(struct btrfs_item)); /* copy the items from left to right */ memcpy(right->items, left->items + left_nritems - push_items, - push_items * sizeof(struct item)); + push_items * sizeof(struct btrfs_item)); /* update the item pointers */ right_nritems += push_items; btrfs_set_header_nritems(&right->header, right_nritems); push_space = LEAF_DATA_SIZE; for (i = 0; i < right_nritems; i++) { - right->items[i].offset = push_space - right->items[i].size; - push_space = right->items[i].offset; + btrfs_set_item_offset(right->items + i, push_space - + btrfs_item_size(right->items + i)); + push_space = btrfs_item_offset(right->items + i); } left_nritems -= push_items; btrfs_set_header_nritems(&left->header, left_nritems); @@ -911,7 +914,7 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, int free_space; int push_space = 0; int push_items = 0; - struct item *item; + struct btrfs_item *item; u32 old_left_nritems; int ret = 0; int wret; @@ -926,7 +929,7 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]); left = &t->leaf; free_space = leaf_free_space(left); - if (free_space < data_size + sizeof(struct item)) { + if (free_space < data_size + sizeof(struct btrfs_item)) { tree_block_release(root, t); return 1; } @@ -935,7 +938,7 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, btrfs_cow_block(root, t, path->nodes[1], slot - 1, &t); left = &t->leaf; free_space = leaf_free_space(left); - if (free_space < data_size + sizeof(struct item)) { + if (free_space < data_size + sizeof(struct btrfs_item)) { tree_block_release(root, t); return 1; } @@ -944,10 +947,11 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, item = right->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (item->size + sizeof(*item) + push_space > free_space) + if (btrfs_item_size(item) + sizeof(*item) + push_space > + free_space) break; push_items++; - push_space += item->size + sizeof(*item); + push_space += btrfs_item_size(item) + sizeof(*item); } if (push_items == 0) { tree_block_release(root, t); @@ -955,35 +959,40 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, } /* push data from right to left */ memcpy(left->items + btrfs_header_nritems(&left->header), - right->items, push_items * sizeof(struct item)); - push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset; + right->items, push_items * sizeof(struct btrfs_item)); + push_space = LEAF_DATA_SIZE - + btrfs_item_offset(right->items + push_items -1); memcpy(left->data + leaf_data_end(left) - push_space, - right->data + right->items[push_items - 1].offset, + right->data + btrfs_item_offset(right->items + push_items - 1), push_space); old_left_nritems = btrfs_header_nritems(&left->header); BUG_ON(old_left_nritems < 0); - for(i = old_left_nritems; i < old_left_nritems + push_items; i++) { - left->items[i].offset -= LEAF_DATA_SIZE - - left->items[old_left_nritems -1].offset; + for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { + u16 ioff = btrfs_item_offset(left->items + i); + btrfs_set_item_offset(left->items + i, ioff - (LEAF_DATA_SIZE - + btrfs_item_offset(left->items + + old_left_nritems - 1))); } btrfs_set_header_nritems(&left->header, old_left_nritems + push_items); /* fixup right node */ - push_space = right->items[push_items-1].offset - leaf_data_end(right); + push_space = btrfs_item_offset(right->items + push_items - 1) - + leaf_data_end(right); memmove(right->data + LEAF_DATA_SIZE - push_space, right->data + leaf_data_end(right), push_space); memmove(right->items, right->items + push_items, (btrfs_header_nritems(&right->header) - push_items) * - sizeof(struct item)); + sizeof(struct btrfs_item)); btrfs_set_header_nritems(&right->header, btrfs_header_nritems(&right->header) - push_items); push_space = LEAF_DATA_SIZE; for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - right->items[i].offset = push_space - right->items[i].size; - push_space = right->items[i].offset; + btrfs_set_item_offset(right->items + i, push_space - + btrfs_item_size(right->items + i)); + push_space = btrfs_item_offset(right->items + i); } BUG_ON(list_empty(&t->dirty)); @@ -1023,7 +1032,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, int slot; struct leaf *right; struct tree_buffer *right_buffer; - int space_needed = data_size + sizeof(struct item); + int space_needed = data_size + sizeof(struct btrfs_item); int data_copy_size; int rt_data_off; int i; @@ -1034,7 +1043,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, l = &l_buf->leaf; /* did the pushes work? */ - if (leaf_free_space(l) >= sizeof(struct item) + data_size) + if (leaf_free_space(l) >= sizeof(struct btrfs_item) + data_size) return 0; if (!path->nodes[1]) { @@ -1066,17 +1075,17 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, btrfs_header_parentid(&root->node->node.header)); - data_copy_size = l->items[mid].offset + l->items[mid].size - - leaf_data_end(l); + data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(l); memcpy(right->items, l->items + mid, - (nritems - mid) * sizeof(struct item)); + (nritems - mid) * sizeof(struct btrfs_item)); memcpy(right->data + LEAF_DATA_SIZE - data_copy_size, l->data + leaf_data_end(l), data_copy_size); - rt_data_off = LEAF_DATA_SIZE - - (l->items[mid].offset + l->items[mid].size); + rt_data_off = LEAF_DATA_SIZE - btrfs_item_end(l->items + mid); - for (i = 0; i < btrfs_header_nritems(&right->header); i++) - right->items[i].offset += rt_data_off; + for (i = 0; i < btrfs_header_nritems(&right->header); i++) { + u16 ioff = btrfs_item_offset(right->items + i); + btrfs_set_item_offset(right->items + i, ioff + rt_data_off); + } btrfs_set_header_nritems(&l->header, mid); ret = 0; @@ -1136,26 +1145,28 @@ int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, nritems = btrfs_header_nritems(&leaf->header); data_end = leaf_data_end(leaf); - if (leaf_free_space(leaf) < sizeof(struct item) + data_size) + if (leaf_free_space(leaf) < sizeof(struct btrfs_item) + data_size) BUG(); slot = path.slots[0]; BUG_ON(slot < 0); if (slot != nritems) { int i; - unsigned int old_data = leaf->items[slot].offset + - leaf->items[slot].size; + unsigned int old_data = btrfs_item_end(leaf->items + slot); /* * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ - for (i = slot; i < nritems; i++) - leaf->items[i].offset -= data_size; + for (i = slot; i < nritems; i++) { + u16 ioff = btrfs_item_offset(leaf->items + i); + btrfs_set_item_offset(leaf->items + i, + ioff - data_size); + } /* shift the items */ memmove(leaf->items + slot + 1, leaf->items + slot, - (nritems - slot) * sizeof(struct item)); + (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ memmove(leaf->data + data_end - data_size, leaf->data + @@ -1165,8 +1176,8 @@ int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, /* copy the new data in */ memcpy(&leaf->items[slot].key, &disk_key, sizeof(struct btrfs_disk_key)); - leaf->items[slot].offset = data_end - data_size; - leaf->items[slot].size = data_size; + btrfs_set_item_offset(leaf->items + slot, data_end - data_size); + btrfs_set_item_size(leaf->items + slot, data_size); memcpy(leaf->data + data_end - data_size, data, data_size); btrfs_set_header_nritems(&leaf->header, nritems + 1); @@ -1241,8 +1252,8 @@ int del_item(struct ctree_root *root, struct ctree_path *path) leaf_buf = path->nodes[0]; leaf = &leaf_buf->leaf; slot = path->slots[0]; - doff = leaf->items[slot].offset; - dsize = leaf->items[slot].size; + doff = btrfs_item_offset(leaf->items + slot); + dsize = btrfs_item_size(leaf->items + slot); nritems = btrfs_header_nritems(&leaf->header); if (slot != nritems - 1) { @@ -1251,10 +1262,12 @@ int del_item(struct ctree_root *root, struct ctree_path *path) memmove(leaf->data + data_end + dsize, leaf->data + data_end, doff - data_end); - for (i = slot + 1; i < nritems; i++) - leaf->items[i].offset += dsize; + for (i = slot + 1; i < nritems; i++) { + u16 ioff = btrfs_item_offset(leaf->items + i); + btrfs_set_item_offset(leaf->items + i, ioff + dsize); + } memmove(leaf->items + slot, leaf->items + slot + 1, - sizeof(struct item) * + sizeof(struct btrfs_item) * (nritems - slot - 1)); } btrfs_set_header_nritems(&leaf->header, nritems - 1); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 58e03e90f23..b03df154dcd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -98,10 +98,10 @@ struct ctree_super_block { * the key flags parameter. offset and size tell us where to find * the item in the leaf (relative to the start of the data area) */ -struct item { +struct btrfs_item { struct btrfs_disk_key key; - u16 offset; - u16 size; + __le16 offset; + __le16 size; } __attribute__ ((__packed__)); /* @@ -115,7 +115,8 @@ struct item { struct leaf { struct btrfs_header header; union { - struct item items[LEAF_DATA_SIZE/sizeof(struct item)]; + struct btrfs_item items[LEAF_DATA_SIZE/ + sizeof(struct btrfs_item)]; u8 data[CTREE_BLOCKSIZE-sizeof(struct btrfs_header)]; }; } __attribute__ ((__packed__)); @@ -152,6 +153,31 @@ struct ctree_path { int slots[MAX_LEVEL]; }; +static inline u16 btrfs_item_offset(struct btrfs_item *item) +{ + return le16_to_cpu(item->offset); +} + +static inline void btrfs_set_item_offset(struct btrfs_item *item, u16 val) +{ + item->offset = cpu_to_le16(val); +} + +static inline u16 btrfs_item_end(struct btrfs_item *item) +{ + return le16_to_cpu(item->offset) + le16_to_cpu(item->size); +} + +static inline u16 btrfs_item_size(struct btrfs_item *item) +{ + return le16_to_cpu(item->size); +} + +static inline void btrfs_set_item_size(struct btrfs_item *item, u16 val) +{ + item->size = cpu_to_le16(val); +} + static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, struct btrfs_disk_key *disk) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 475e7221596..fdf95bd07f9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -40,8 +40,8 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) BUG(); BUG_ON(ret != 0); l = &path.nodes[0]->leaf; - item = (struct extent_item *)(l->data + - l->items[path.slots[0]].offset); + item = (struct extent_item *)(l->data + btrfs_item_offset(l->items + + path.slots[0])); item->refs++; BUG_ON(list_empty(&path.nodes[0]->dirty)); @@ -67,7 +67,8 @@ static int lookup_block_ref(struct ctree_root *root, u64 blocknr, u32 *refs) BUG(); l = &path.nodes[0]->leaf; item = (struct extent_item *)(l->data + - l->items[path.slots[0]].offset); + btrfs_item_offset(l->items + + path.slots[0])); *refs = item->refs; release_path(root->extent_root, &path); return 0; @@ -144,7 +145,7 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) struct btrfs_key key; struct ctree_root *extent_root = root->extent_root; int ret; - struct item *item; + struct btrfs_item *item; struct extent_item *ei; struct btrfs_key ins; @@ -162,7 +163,8 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) BUG(); } item = path.nodes[0]->leaf.items + path.slots[0]; - ei = (struct extent_item *)(path.nodes[0]->leaf.data + item->offset); + ei = (struct extent_item *)(path.nodes[0]->leaf.data + + btrfs_item_offset(item)); BUG_ON(ei->refs == 0); ei->refs--; if (ei->refs == 0) { diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index 5290423343f..0f77babcd30 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -14,7 +14,7 @@ int mkfs(int fd) { struct ctree_root_info info[2]; struct leaf empty_leaf; - struct item item; + struct btrfs_item item; struct extent_item extent_item; int ret; @@ -45,31 +45,37 @@ int mkfs(int fd) btrfs_set_header_nritems(&empty_leaf.header, 3); /* item1, reserve blocks 0-16 */ - item.key.objectid = cpu_to_le64(0); - item.key.offset = cpu_to_le64(17); - item.key.flags = cpu_to_le32(0); - item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item); - item.size = sizeof(struct extent_item); + btrfs_set_key_objectid(&item.key, 0); + btrfs_set_key_offset(&item.key, 17); + btrfs_set_key_flags(&item.key, 0); + btrfs_set_item_offset(&item, + LEAF_DATA_SIZE - sizeof(struct extent_item)); + btrfs_set_item_size(&item, sizeof(struct extent_item)); extent_item.refs = 1; extent_item.owner = 0; memcpy(empty_leaf.items, &item, sizeof(item)); - memcpy(empty_leaf.data + item.offset, &extent_item, item.size); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); /* item2, give block 17 to the root */ - item.key.objectid = cpu_to_le64(17); - item.key.offset = cpu_to_le64(1); - item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item) * 2; + btrfs_set_key_objectid(&item.key, 17); + btrfs_set_key_offset(&item.key, 1); + btrfs_set_item_offset(&item, + LEAF_DATA_SIZE - sizeof(struct extent_item) * 2); extent_item.owner = 1; memcpy(empty_leaf.items + 1, &item, sizeof(item)); - memcpy(empty_leaf.data + item.offset, &extent_item, item.size); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); /* item3, give block 18 for the extent root */ - item.key.objectid = cpu_to_le64(18); - item.key.offset = cpu_to_le64(1); - item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item) * 3; + btrfs_set_key_objectid(&item.key, 18); + btrfs_set_key_offset(&item.key, 1); + btrfs_set_item_offset(&item, + LEAF_DATA_SIZE - sizeof(struct extent_item) * 3); extent_item.owner = 2; memcpy(empty_leaf.items + 2, &item, sizeof(item)); - memcpy(empty_leaf.data + item.offset, &extent_item, item.size); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 18 * CTREE_BLOCKSIZE); if (ret != sizeof(empty_leaf)) return -1; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 7df16b1e473..33f5ee4052c 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -9,7 +9,7 @@ void print_leaf(struct leaf *l) { int i; u32 nr = btrfs_header_nritems(&l->header); - struct item *item; + struct btrfs_item *item; struct extent_item *ei; printf("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, leaf_free_space(l)); @@ -18,12 +18,15 @@ void print_leaf(struct leaf *l) item = l->items + i; printf("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n", i, - item->key.objectid, item->key.flags, item->key.offset, - item->offset, item->size); + btrfs_key_objectid(&item->key), + btrfs_key_flags(&item->key), + btrfs_key_offset(&item->key), + btrfs_item_offset(item), + btrfs_item_size(item)); fflush(stdout); - printf("\t\titem data %.*s\n", item->size, - l->data+item->offset); - ei = (struct extent_item *)(l->data + item->offset); + printf("\t\titem data %.*s\n", btrfs_item_size(item), + l->data + btrfs_item_offset(item)); + ei = (struct extent_item *)(l->data + btrfs_item_offset(item)); printf("\t\textent data refs %u owner %Lu\n", ei->refs, ei->owner); fflush(stdout); diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 34a15841ebd..e767528bc52 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -173,7 +173,7 @@ static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix, path.slots[0] -= 1; } slot = path.slots[0]; - found = path.nodes[0]->leaf.items[slot].key.objectid; + found=btrfs_key_objectid(&path.nodes[0]->leaf.items[slot].key); ret = del_item(root, &path); count++; if (ret) { @@ -274,7 +274,8 @@ static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) slot -= 1; } for (i = slot; i >= 0; i--) { - found = path.nodes[0]->leaf.items[i].key.objectid; + found = btrfs_key_objectid(&path.nodes[0]-> + leaf.items[i].key); radix_tree_preload(GFP_KERNEL); ret = radix_tree_insert(radix, found, (void *)found); if (ret) { -- cgit v1.2.3-70-g09d2 From 1d4f8a0c1eca5586134b56a4114a7cd5e85e3560 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Mar 2007 09:28:32 -0400 Subject: Btrfs: node->blockptrs endian fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 31 ++++++++++++++++++------------- fs/btrfs/ctree.h | 12 +++++++++++- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/print-tree.c | 4 ++-- 4 files changed, 33 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 10555647005..76281073100 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -58,7 +58,8 @@ int btrfs_cow_block(struct ctree_root *root, free_extent(root, buf->blocknr, 1); tree_block_release(root, buf); } else { - parent->node.blockptrs[parent_slot] = cow->blocknr; + btrfs_set_node_blockptr(&parent->node, parent_slot, + cow->blocknr); BUG_ON(list_empty(&parent->dirty)); free_extent(root, buf->blocknr, 1); } @@ -133,7 +134,7 @@ int check_node(struct ctree_path *path, int level) parent_key = &parent->keys[parent_slot]; BUG_ON(memcmp(parent_key, node->keys, sizeof(struct btrfs_disk_key))); - BUG_ON(parent->blockptrs[parent_slot] != + BUG_ON(btrfs_node_blockptr(parent, parent_slot) != btrfs_header_blocknr(&node->header)); } BUG_ON(nritems > NODEPTRS_PER_BLOCK); @@ -166,7 +167,7 @@ int check_leaf(struct ctree_path *path, int level) parent_key = &parent->keys[parent_slot]; BUG_ON(memcmp(parent_key, &leaf->items[0].key, sizeof(struct btrfs_disk_key))); - BUG_ON(parent->blockptrs[parent_slot] != + BUG_ON(btrfs_node_blockptr(parent, parent_slot) != btrfs_header_blocknr(&leaf->header)); } for (i = 0; nritems > 1 && i < nritems - 2; i++) { @@ -258,7 +259,7 @@ struct tree_buffer *read_node_slot(struct ctree_root *root, return NULL; if (slot >= btrfs_header_nritems(&node->header)) return NULL; - return read_tree_block(root, node->blockptrs[slot]); + return read_tree_block(root, btrfs_node_blockptr(node, slot)); } static int balance_level(struct ctree_root *root, struct ctree_path *path, @@ -283,7 +284,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, mid_buf = path->nodes[level]; mid = &mid_buf->node; - orig_ptr = mid->blockptrs[orig_slot]; + orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < MAX_LEVEL - 1) parent_buf = path->nodes[level + 1]; @@ -407,7 +408,8 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, } /* double check we haven't messed things up */ check_block(path, level); - if (orig_ptr != path->nodes[level]->node.blockptrs[path->slots[level]]) + if (orig_ptr != btrfs_node_blockptr(&path->nodes[level]->node, + path->slots[level])) BUG(); if (right_buf) @@ -482,7 +484,7 @@ again: slot = p->slots[level]; BUG_ON(btrfs_header_nritems(&c->header) == 1); } - b = read_tree_block(root, c->blockptrs[slot]); + b = read_tree_block(root, btrfs_node_blockptr(c, slot)); } else { struct leaf *l = (struct leaf *)c; p->slots[level] = slot; @@ -660,7 +662,7 @@ static int insert_new_root(struct ctree_root *root, else lower_key = lower->keys; memcpy(c->keys, lower_key, sizeof(struct btrfs_disk_key)); - c->blockptrs[0] = path->nodes[level-1]->blocknr; + btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->blocknr); /* the super has an extra ref to root->node */ tree_block_release(root, root->node); root->node = t; @@ -700,7 +702,7 @@ static int insert_ptr(struct ctree_root *root, (nritems - slot) * sizeof(u64)); } memcpy(lower->keys + slot, key, sizeof(struct btrfs_disk_key)); - lower->blockptrs[slot] = blocknr; + btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); if (lower->keys[1].objectid == 0) BUG(); @@ -820,7 +822,8 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, if (slot >= btrfs_header_nritems(&upper->node.header) - 1) { return 1; } - right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]); + right_buf = read_tree_block(root, btrfs_node_blockptr(&upper->node, + slot + 1)); right = &right_buf->leaf; free_space = leaf_free_space(right); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -926,7 +929,8 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, if (!path->nodes[1]) { return 1; } - t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]); + t = read_tree_block(root, btrfs_node_blockptr(&path->nodes[1]->node, + slot - 1)); left = &t->leaf; free_space = leaf_free_space(left); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -1353,7 +1357,7 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) level++; continue; } - blocknr = c->node.blockptrs[slot]; + blocknr = btrfs_node_blockptr(&c->node, slot); if (next) tree_block_release(root, next); next = read_tree_block(root, blocknr); @@ -1368,7 +1372,8 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) path->slots[level] = 0; if (!level) break; - next = read_tree_block(root, next->node.blockptrs[0]); + next = read_tree_block(root, + btrfs_node_blockptr(&next->node, 0)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b03df154dcd..a8454c401ce 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -128,7 +128,7 @@ struct leaf { struct node { struct btrfs_header header; struct btrfs_disk_key keys[NODEPTRS_PER_BLOCK]; - u64 blockptrs[NODEPTRS_PER_BLOCK]; + __le64 blockptrs[NODEPTRS_PER_BLOCK]; } __attribute__ ((__packed__)); /* @@ -153,6 +153,16 @@ struct ctree_path { int slots[MAX_LEVEL]; }; +static inline u64 btrfs_node_blockptr(struct node *n, int nr) +{ + return le64_to_cpu(n->blockptrs[nr]); +} + +static inline void btrfs_set_node_blockptr(struct node *n, int nr, u64 val) +{ + n->blockptrs[nr] = cpu_to_le64(val); +} + static inline u16 btrfs_item_offset(struct btrfs_item *item) { return le16_to_cpu(item->offset); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fdf95bd07f9..e511f48eb48 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -85,7 +85,7 @@ int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) return 0; for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) { - blocknr = buf->node.blockptrs[i]; + blocknr = btrfs_node_blockptr(&buf->node, i); inc_block_ref(root, blocknr); } return 0; @@ -437,7 +437,7 @@ int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) if (path->slots[*level] >= btrfs_header_nritems(&cur->node.header)) break; - blocknr = cur->node.blockptrs[path->slots[*level]]; + blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]); ret = lookup_block_ref(root, blocknr, &refs); if (refs != 1 || *level == 1) { path->slots[*level]++; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 33f5ee4052c..101278e1139 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -54,12 +54,12 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t) printf("\tkey %d (%Lu %u %Lu) block %Lu\n", i, c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, - c->blockptrs[i]); + btrfs_node_blockptr(c, i)); fflush(stdout); } for (i = 0; i < nr; i++) { struct tree_buffer *next_buf = read_tree_block(root, - c->blockptrs[i]); + btrfs_node_blockptr(c, i)); struct node *next = &next_buf->node; if (btrfs_is_leaf(next) && btrfs_header_level(&c->header) != 1) -- cgit v1.2.3-70-g09d2 From cf27e1eec063fa68a89c57ae0a83f93aa38851d6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Mar 2007 09:49:06 -0400 Subject: Btrfs: struct extent_item endian Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 24 ++++++++++++++++++++++-- fs/btrfs/extent-tree.c | 22 +++++++++++++--------- fs/btrfs/mkfs.c | 8 ++++---- 3 files changed, 39 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a8454c401ce..90cc2c9bd79 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -136,8 +136,8 @@ struct node { * owner of the block and the number of references */ struct extent_item { - u32 refs; - u64 owner; + __le32 refs; + __le64 owner; } __attribute__ ((__packed__)); /* @@ -153,6 +153,26 @@ struct ctree_path { int slots[MAX_LEVEL]; }; +static inline u64 btrfs_extent_owner(struct extent_item *ei) +{ + return le64_to_cpu(ei->owner); +} + +static inline void btrfs_set_extent_owner(struct extent_item *ei, u64 val) +{ + ei->owner = cpu_to_le64(val); +} + +static inline u32 btrfs_extent_refs(struct extent_item *ei) +{ + return le32_to_cpu(ei->refs); +} + +static inline void btrfs_set_extent_refs(struct extent_item *ei, u32 val) +{ + ei->refs = cpu_to_le32(val); +} + static inline u64 btrfs_node_blockptr(struct node *n, int nr) { return le64_to_cpu(n->blockptrs[nr]); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e511f48eb48..1676a6595cc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -29,6 +29,7 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) struct leaf *l; struct extent_item *item; struct btrfs_key ins; + u32 refs; find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins); init_path(&path); @@ -42,7 +43,8 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) l = &path.nodes[0]->leaf; item = (struct extent_item *)(l->data + btrfs_item_offset(l->items + path.slots[0])); - item->refs++; + refs = btrfs_extent_refs(item); + btrfs_set_extent_refs(item, refs + 1); BUG_ON(list_empty(&path.nodes[0]->dirty)); release_path(root->extent_root, &path); @@ -69,7 +71,7 @@ static int lookup_block_ref(struct ctree_root *root, u64 blocknr, u32 *refs) item = (struct extent_item *)(l->data + btrfs_item_offset(l->items + path.slots[0])); - *refs = item->refs; + *refs = btrfs_extent_refs(item); release_path(root->extent_root, &path); return 0; } @@ -120,9 +122,9 @@ static int finish_current_insert(struct ctree_root *extent_root) int i; int ret; - extent_item.refs = 1; - extent_item.owner = - btrfs_header_parentid(&extent_root->node->node.header); + btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_extent_owner(&extent_item, + btrfs_header_parentid(&extent_root->node->node.header)); ins.offset = 1; ins.flags = 0; @@ -148,6 +150,7 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) struct btrfs_item *item; struct extent_item *ei; struct btrfs_key ins; + u32 refs; key.objectid = blocknr; key.flags = 0; @@ -166,8 +169,9 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) ei = (struct extent_item *)(path.nodes[0]->leaf.data + btrfs_item_offset(item)); BUG_ON(ei->refs == 0); - ei->refs--; - if (ei->refs == 0) { + refs = btrfs_extent_refs(ei) - 1; + btrfs_set_extent_refs(ei, refs); + if (refs == 0) { if (root == extent_root) { int err; radix_tree_preload(GFP_KERNEL); @@ -368,8 +372,8 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, struct ctree_root *extent_root = root->extent_root; struct extent_item extent_item; - extent_item.refs = 1; - extent_item.owner = owner; + btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_extent_owner(&extent_item, owner); if (root == extent_root) { BUG_ON(extent_root->current_insert.offset == 0); diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index 0f77babcd30..e0d3bc1d025 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -51,8 +51,8 @@ int mkfs(int fd) btrfs_set_item_offset(&item, LEAF_DATA_SIZE - sizeof(struct extent_item)); btrfs_set_item_size(&item, sizeof(struct extent_item)); - extent_item.refs = 1; - extent_item.owner = 0; + btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_extent_owner(&extent_item, 0); memcpy(empty_leaf.items, &item, sizeof(item)); memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, btrfs_item_size(&item)); @@ -62,7 +62,7 @@ int mkfs(int fd) btrfs_set_key_offset(&item.key, 1); btrfs_set_item_offset(&item, LEAF_DATA_SIZE - sizeof(struct extent_item) * 2); - extent_item.owner = 1; + btrfs_set_extent_owner(&extent_item, 1); memcpy(empty_leaf.items + 1, &item, sizeof(item)); memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, btrfs_item_size(&item)); @@ -72,7 +72,7 @@ int mkfs(int fd) btrfs_set_key_offset(&item.key, 1); btrfs_set_item_offset(&item, LEAF_DATA_SIZE - sizeof(struct extent_item) * 3); - extent_item.owner = 2; + btrfs_set_extent_owner(&extent_item, 2); memcpy(empty_leaf.items + 2, &item, sizeof(item)); memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, btrfs_item_size(&item)); -- cgit v1.2.3-70-g09d2 From 234b63a091e1df6bd4261dd7b3a7490074830628 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Mar 2007 10:46:10 -0400 Subject: rename funcs and structs to btrfs Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 317 +++++++++++++++++++++++++------------------------ fs/btrfs/ctree.h | 95 +++++++-------- fs/btrfs/debug-tree.c | 8 +- fs/btrfs/disk-io.c | 106 +++++++++-------- fs/btrfs/disk-io.h | 29 ++--- fs/btrfs/extent-tree.c | 147 +++++++++++------------ fs/btrfs/mkfs.c | 26 ++-- fs/btrfs/print-tree.c | 23 ++-- fs/btrfs/print-tree.h | 4 +- fs/btrfs/quick-test.c | 54 ++++----- fs/btrfs/random-test.c | 92 +++++++------- 11 files changed, 454 insertions(+), 447 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 76281073100..c8529b394d5 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -6,47 +6,47 @@ #include "disk-io.h" #include "print-tree.h" -static int split_node(struct ctree_root *root, struct ctree_path *path, +static int split_node(struct btrfs_root *root, struct btrfs_path *path, int level); -static int split_leaf(struct ctree_root *root, struct ctree_path *path, +static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, int data_size); -static int push_node_left(struct ctree_root *root, struct tree_buffer *dst, - struct tree_buffer *src); -static int balance_node_right(struct ctree_root *root, - struct tree_buffer *dst_buf, - struct tree_buffer *src_buf); -static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, +static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst, + struct btrfs_buffer *src); +static int balance_node_right(struct btrfs_root *root, + struct btrfs_buffer *dst_buf, + struct btrfs_buffer *src_buf); +static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, int slot); -inline void init_path(struct ctree_path *p) +inline void btrfs_init_path(struct btrfs_path *p) { memset(p, 0, sizeof(*p)); } -void release_path(struct ctree_root *root, struct ctree_path *p) +void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; - for (i = 0; i < MAX_LEVEL; i++) { + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) break; - tree_block_release(root, p->nodes[i]); + btrfs_block_release(root, p->nodes[i]); } memset(p, 0, sizeof(*p)); } -int btrfs_cow_block(struct ctree_root *root, - struct tree_buffer *buf, - struct tree_buffer *parent, +int btrfs_cow_block(struct btrfs_root *root, + struct btrfs_buffer *buf, + struct btrfs_buffer *parent, int parent_slot, - struct tree_buffer **cow_ret) + struct btrfs_buffer **cow_ret) { - struct tree_buffer *cow; + struct btrfs_buffer *cow; if (!list_empty(&buf->dirty)) { *cow_ret = buf; return 0; } - cow = alloc_free_block(root); + cow = btrfs_alloc_free_block(root); memcpy(&cow->node, &buf->node, sizeof(buf->node)); btrfs_set_header_blocknr(&cow->node.header, cow->blocknr); *cow_ret = cow; @@ -55,15 +55,15 @@ int btrfs_cow_block(struct ctree_root *root, root->node = cow; cow->count++; if (buf != root->commit_root) - free_extent(root, buf->blocknr, 1); - tree_block_release(root, buf); + btrfs_free_extent(root, buf->blocknr, 1); + btrfs_block_release(root, buf); } else { btrfs_set_node_blockptr(&parent->node, parent_slot, cow->blocknr); BUG_ON(list_empty(&parent->dirty)); - free_extent(root, buf->blocknr, 1); + btrfs_free_extent(root, buf->blocknr, 1); } - tree_block_release(root, buf); + btrfs_block_release(root, buf); return 0; } @@ -72,7 +72,7 @@ int btrfs_cow_block(struct ctree_root *root, * this returns the address of the start of the last item, * which is the stop of the leaf data stack */ -static inline unsigned int leaf_data_end(struct leaf *leaf) +static inline unsigned int leaf_data_end(struct btrfs_leaf *leaf) { u32 nr = btrfs_header_nritems(&leaf->header); if (nr == 0) @@ -85,7 +85,7 @@ static inline unsigned int leaf_data_end(struct leaf *leaf) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int leaf_free_space(struct leaf *leaf) +int btrfs_leaf_free_space(struct btrfs_leaf *leaf) { int data_end = leaf_data_end(leaf); int nritems = btrfs_header_nritems(&leaf->header); @@ -117,11 +117,11 @@ int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 0; } -int check_node(struct ctree_path *path, int level) +int check_node(struct btrfs_path *path, int level) { int i; - struct node *parent = NULL; - struct node *node = &path->nodes[level]->node; + struct btrfs_node *parent = NULL; + struct btrfs_node *node = &path->nodes[level]->node; int parent_slot; u32 nritems = btrfs_header_nritems(&node->header); @@ -146,18 +146,18 @@ int check_node(struct ctree_path *path, int level) return 0; } -int check_leaf(struct ctree_path *path, int level) +int check_leaf(struct btrfs_path *path, int level) { int i; - struct leaf *leaf = &path->nodes[level]->leaf; - struct node *parent = NULL; + struct btrfs_leaf *leaf = &path->nodes[level]->leaf; + struct btrfs_node *parent = NULL; int parent_slot; u32 nritems = btrfs_header_nritems(&leaf->header); if (path->nodes[level + 1]) parent = &path->nodes[level + 1]->node; parent_slot = path->slots[level + 1]; - BUG_ON(leaf_free_space(leaf) < 0); + BUG_ON(btrfs_leaf_free_space(leaf) < 0); if (nritems == 0) return 0; @@ -186,7 +186,7 @@ int check_leaf(struct ctree_path *path, int level) return 0; } -int check_block(struct ctree_path *path, int level) +int check_block(struct btrfs_path *path, int level) { if (level == 0) return check_leaf(path, level); @@ -233,10 +233,10 @@ int generic_bin_search(char *p, int item_size, struct btrfs_key *key, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -int bin_search(struct node *c, struct btrfs_key *key, int *slot) +int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) { if (btrfs_is_leaf(c)) { - struct leaf *l = (struct leaf *)c; + struct btrfs_leaf *l = (struct btrfs_leaf *)c; return generic_bin_search((void *)l->items, sizeof(struct btrfs_item), key, btrfs_header_nritems(&c->header), @@ -250,11 +250,11 @@ int bin_search(struct node *c, struct btrfs_key *key, int *slot) return -1; } -struct tree_buffer *read_node_slot(struct ctree_root *root, - struct tree_buffer *parent_buf, +struct btrfs_buffer *read_node_slot(struct btrfs_root *root, + struct btrfs_buffer *parent_buf, int slot) { - struct node *node = &parent_buf->node; + struct btrfs_node *node = &parent_buf->node; if (slot < 0) return NULL; if (slot >= btrfs_header_nritems(&node->header)) @@ -262,17 +262,17 @@ struct tree_buffer *read_node_slot(struct ctree_root *root, return read_tree_block(root, btrfs_node_blockptr(node, slot)); } -static int balance_level(struct ctree_root *root, struct ctree_path *path, +static int balance_level(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct tree_buffer *right_buf; - struct tree_buffer *mid_buf; - struct tree_buffer *left_buf; - struct tree_buffer *parent_buf = NULL; - struct node *right = NULL; - struct node *mid; - struct node *left = NULL; - struct node *parent = NULL; + struct btrfs_buffer *right_buf; + struct btrfs_buffer *mid_buf; + struct btrfs_buffer *left_buf; + struct btrfs_buffer *parent_buf = NULL; + struct btrfs_node *right = NULL; + struct btrfs_node *mid; + struct btrfs_node *left = NULL; + struct btrfs_node *parent = NULL; int ret = 0; int wret; int pslot; @@ -286,12 +286,12 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, mid = &mid_buf->node; orig_ptr = btrfs_node_blockptr(mid, orig_slot); - if (level < MAX_LEVEL - 1) + if (level < BTRFS_MAX_LEVEL - 1) parent_buf = path->nodes[level + 1]; pslot = path->slots[level + 1]; if (!parent_buf) { - struct tree_buffer *child; + struct btrfs_buffer *child; u64 blocknr = mid_buf->blocknr; if (btrfs_header_nritems(&mid->header) != 1) @@ -303,11 +303,11 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, root->node = child; path->nodes[level] = NULL; /* once for the path */ - tree_block_release(root, mid_buf); + btrfs_block_release(root, mid_buf); /* once for the root ptr */ - tree_block_release(root, mid_buf); + btrfs_block_release(root, mid_buf); clean_tree_block(root, mid_buf); - return free_extent(root, blocknr, 1); + return btrfs_free_extent(root, blocknr, 1); } parent = &parent_buf->node; @@ -340,14 +340,14 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, ret = wret; if (btrfs_header_nritems(&right->header) == 0) { u64 blocknr = right_buf->blocknr; - tree_block_release(root, right_buf); + btrfs_block_release(root, right_buf); clean_tree_block(root, right_buf); right_buf = NULL; right = NULL; wret = del_ptr(root, path, level + 1, pslot + 1); if (wret) ret = wret; - wret = free_extent(root, blocknr, 1); + wret = btrfs_free_extent(root, blocknr, 1); if (wret) ret = wret; } else { @@ -375,14 +375,14 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ u64 blocknr = mid_buf->blocknr; - tree_block_release(root, mid_buf); + btrfs_block_release(root, mid_buf); clean_tree_block(root, mid_buf); mid_buf = NULL; mid = NULL; wret = del_ptr(root, path, level + 1, pslot); if (wret) ret = wret; - wret = free_extent(root, blocknr, 1); + wret = btrfs_free_extent(root, blocknr, 1); if (wret) ret = wret; } else { @@ -400,7 +400,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, path->slots[level + 1] -= 1; path->slots[level] = orig_slot; if (mid_buf) - tree_block_release(root, mid_buf); + btrfs_block_release(root, mid_buf); } else { orig_slot -= btrfs_header_nritems(&left->header); path->slots[level] = orig_slot; @@ -413,9 +413,9 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, BUG(); if (right_buf) - tree_block_release(root, right_buf); + btrfs_block_release(root, right_buf); if (left_buf) - tree_block_release(root, left_buf); + btrfs_block_release(root, left_buf); return ret; } @@ -432,12 +432,12 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if * possible) */ -int search_slot(struct ctree_root *root, struct btrfs_key *key, - struct ctree_path *p, int ins_len, int cow) +int btrfs_search_slot(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_path *p, int ins_len, int cow) { - struct tree_buffer *b; - struct tree_buffer *cow_buf; - struct node *c; + struct btrfs_buffer *b; + struct btrfs_buffer *cow_buf; + struct btrfs_node *c; int slot; int ret; int level; @@ -486,9 +486,9 @@ again: } b = read_tree_block(root, btrfs_node_blockptr(c, slot)); } else { - struct leaf *l = (struct leaf *)c; + struct btrfs_leaf *l = (struct btrfs_leaf *)c; p->slots[level] = slot; - if (ins_len > 0 && leaf_free_space(l) < + if (ins_len > 0 && btrfs_leaf_free_space(l) < sizeof(struct btrfs_item) + ins_len) { int sret = split_leaf(root, p, ins_len); BUG_ON(sret > 0); @@ -513,14 +513,14 @@ again: * If this fails to write a tree block, it returns -1, but continues * fixing up the blocks in ram so the tree is consistent. */ -static int fixup_low_keys(struct ctree_root *root, - struct ctree_path *path, struct btrfs_disk_key *key, +static int fixup_low_keys(struct btrfs_root *root, + struct btrfs_path *path, struct btrfs_disk_key *key, int level) { int i; int ret = 0; - for (i = level; i < MAX_LEVEL; i++) { - struct node *t; + for (i = level; i < BTRFS_MAX_LEVEL; i++) { + struct btrfs_node *t; int tslot = path->slots[i]; if (!path->nodes[i]) break; @@ -540,11 +540,11 @@ static int fixup_low_keys(struct ctree_root *root, * returns 0 if some ptrs were pushed left, < 0 if there was some horrible * error, and > 0 if there was no room in the left hand block. */ -static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, - struct tree_buffer *src_buf) +static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst_buf, + struct btrfs_buffer *src_buf) { - struct node *src = &src_buf->node; - struct node *dst = &dst_buf->node; + struct btrfs_node *src = &src_buf->node; + struct btrfs_node *dst = &dst_buf->node; int push_items = 0; int src_nritems; int dst_nritems; @@ -587,12 +587,12 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, * * this will only push up to 1/2 the contents of the left node over */ -static int balance_node_right(struct ctree_root *root, - struct tree_buffer *dst_buf, - struct tree_buffer *src_buf) +static int balance_node_right(struct btrfs_root *root, + struct btrfs_buffer *dst_buf, + struct btrfs_buffer *src_buf) { - struct node *src = &src_buf->node; - struct node *dst = &dst_buf->node; + struct btrfs_node *src = &src_buf->node; + struct btrfs_node *dst = &dst_buf->node; int push_items = 0; int max_push; int src_nritems; @@ -637,18 +637,18 @@ static int balance_node_right(struct ctree_root *root, * * returns zero on success or < 0 on failure. */ -static int insert_new_root(struct ctree_root *root, - struct ctree_path *path, int level) +static int insert_new_root(struct btrfs_root *root, + struct btrfs_path *path, int level) { - struct tree_buffer *t; - struct node *lower; - struct node *c; + struct btrfs_buffer *t; + struct btrfs_node *lower; + struct btrfs_node *c; struct btrfs_disk_key *lower_key; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = alloc_free_block(root); + t = btrfs_alloc_free_block(root); c = &t->node; memset(c, 0, sizeof(c)); btrfs_set_header_nritems(&c->header, 1); @@ -658,13 +658,13 @@ static int insert_new_root(struct ctree_root *root, btrfs_header_parentid(&root->node->node.header)); lower = &path->nodes[level-1]->node; if (btrfs_is_leaf(lower)) - lower_key = &((struct leaf *)lower)->items[0].key; + lower_key = &((struct btrfs_leaf *)lower)->items[0].key; else lower_key = lower->keys; memcpy(c->keys, lower_key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->blocknr); /* the super has an extra ref to root->node */ - tree_block_release(root, root->node); + btrfs_block_release(root, root->node); root->node = t; t->count++; path->nodes[level] = t; @@ -681,11 +681,11 @@ static int insert_new_root(struct ctree_root *root, * * returns zero on success and < 0 on any error */ -static int insert_ptr(struct ctree_root *root, - struct ctree_path *path, struct btrfs_disk_key *key, +static int insert_ptr(struct btrfs_root *root, + struct btrfs_path *path, struct btrfs_disk_key *key, u64 blocknr, int slot, int level) { - struct node *lower; + struct btrfs_node *lower; int nritems; BUG_ON(!path->nodes[level]); @@ -719,13 +719,13 @@ static int insert_ptr(struct ctree_root *root, * * returns 0 on success and < 0 on failure */ -static int split_node(struct ctree_root *root, struct ctree_path *path, +static int split_node(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct tree_buffer *t; - struct node *c; - struct tree_buffer *split_buffer; - struct node *split; + struct btrfs_buffer *t; + struct btrfs_node *c; + struct btrfs_buffer *split_buffer; + struct btrfs_node *split; int mid; int ret; int wret; @@ -740,7 +740,7 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, return ret; } c_nritems = btrfs_header_nritems(&c->header); - split_buffer = alloc_free_block(root); + split_buffer = btrfs_alloc_free_block(root); split = &split_buffer->node; btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_blocknr(&split->header, split_buffer->blocknr); @@ -763,11 +763,11 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, if (path->slots[level] >= mid) { path->slots[level] -= mid; - tree_block_release(root, t); + btrfs_block_release(root, t); path->nodes[level] = split_buffer; path->slots[level + 1] += 1; } else { - tree_block_release(root, split_buffer); + btrfs_block_release(root, split_buffer); } return ret; } @@ -777,7 +777,7 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, * and nr indicate which items in the leaf to check. This totals up the * space used both by the item structs and the item data */ -static int leaf_space_used(struct leaf *l, int start, int nr) +static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) { int data_len; int end = start + nr - 1; @@ -797,14 +797,14 @@ static int leaf_space_used(struct leaf *l, int start, int nr) * returns 1 if the push failed because the other node didn't have enough * room, 0 if everything worked out and < 0 if there were major errors. */ -static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, +static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct tree_buffer *left_buf = path->nodes[0]; - struct leaf *left = &left_buf->leaf; - struct leaf *right; - struct tree_buffer *right_buf; - struct tree_buffer *upper; + struct btrfs_buffer *left_buf = path->nodes[0]; + struct btrfs_leaf *left = &left_buf->leaf; + struct btrfs_leaf *right; + struct btrfs_buffer *right_buf; + struct btrfs_buffer *upper; int slot; int i; int free_space; @@ -825,17 +825,17 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, right_buf = read_tree_block(root, btrfs_node_blockptr(&upper->node, slot + 1)); right = &right_buf->leaf; - free_space = leaf_free_space(right); + free_space = btrfs_leaf_free_space(right); if (free_space < data_size + sizeof(struct btrfs_item)) { - tree_block_release(root, right_buf); + btrfs_block_release(root, right_buf); return 1; } /* cow and double check */ btrfs_cow_block(root, right_buf, upper, slot + 1, &right_buf); right = &right_buf->leaf; - free_space = leaf_free_space(right); + free_space = btrfs_leaf_free_space(right); if (free_space < data_size + sizeof(struct btrfs_item)) { - tree_block_release(root, right_buf); + btrfs_block_release(root, right_buf); return 1; } @@ -851,7 +851,7 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, push_space += btrfs_item_size(item) + sizeof(*item); } if (push_items == 0) { - tree_block_release(root, right_buf); + btrfs_block_release(root, right_buf); return 1; } right_nritems = btrfs_header_nritems(&right->header); @@ -893,11 +893,11 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { path->slots[0] -= left_nritems; - tree_block_release(root, path->nodes[0]); + btrfs_block_release(root, path->nodes[0]); path->nodes[0] = right_buf; path->slots[1] += 1; } else { - tree_block_release(root, right_buf); + btrfs_block_release(root, right_buf); } return 0; } @@ -905,13 +905,13 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise */ -static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, +static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct tree_buffer *right_buf = path->nodes[0]; - struct leaf *right = &right_buf->leaf; - struct tree_buffer *t; - struct leaf *left; + struct btrfs_buffer *right_buf = path->nodes[0]; + struct btrfs_leaf *right = &right_buf->leaf; + struct btrfs_buffer *t; + struct btrfs_leaf *left; int slot; int i; int free_space; @@ -932,18 +932,18 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, t = read_tree_block(root, btrfs_node_blockptr(&path->nodes[1]->node, slot - 1)); left = &t->leaf; - free_space = leaf_free_space(left); + free_space = btrfs_leaf_free_space(left); if (free_space < data_size + sizeof(struct btrfs_item)) { - tree_block_release(root, t); + btrfs_block_release(root, t); return 1; } /* cow and double check */ btrfs_cow_block(root, t, path->nodes[1], slot - 1, &t); left = &t->leaf; - free_space = leaf_free_space(left); + free_space = btrfs_leaf_free_space(left); if (free_space < data_size + sizeof(struct btrfs_item)) { - tree_block_release(root, t); + btrfs_block_release(root, t); return 1; } @@ -958,7 +958,7 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, push_space += btrfs_item_size(item) + sizeof(*item); } if (push_items == 0) { - tree_block_release(root, t); + btrfs_block_release(root, t); return 1; } /* push data from right to left */ @@ -1009,11 +1009,11 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { path->slots[0] += old_left_nritems; - tree_block_release(root, path->nodes[0]); + btrfs_block_release(root, path->nodes[0]); path->nodes[0] = t; path->slots[1] -= 1; } else { - tree_block_release(root, t); + btrfs_block_release(root, t); path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); @@ -1026,16 +1026,16 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, * * returns 0 if all went well and < 0 on failure. */ -static int split_leaf(struct ctree_root *root, struct ctree_path *path, +static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct tree_buffer *l_buf; - struct leaf *l; + struct btrfs_buffer *l_buf; + struct btrfs_leaf *l; u32 nritems; int mid; int slot; - struct leaf *right; - struct tree_buffer *right_buffer; + struct btrfs_leaf *right; + struct btrfs_buffer *right_buffer; int space_needed = data_size + sizeof(struct btrfs_item); int data_copy_size; int rt_data_off; @@ -1047,7 +1047,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, l = &l_buf->leaf; /* did the pushes work? */ - if (leaf_free_space(l) >= sizeof(struct btrfs_item) + data_size) + if (btrfs_leaf_free_space(l) >= sizeof(struct btrfs_item) + data_size) return 0; if (!path->nodes[1]) { @@ -1058,7 +1058,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, slot = path->slots[0]; nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; - right_buffer = alloc_free_block(root); + right_buffer = btrfs_alloc_free_block(root); BUG_ON(!right_buffer); BUG_ON(mid == nritems); right = &right_buffer->leaf; @@ -1101,12 +1101,12 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, BUG_ON(list_empty(&l_buf->dirty)); BUG_ON(path->slots[0] != slot); if (mid <= slot) { - tree_block_release(root, path->nodes[0]); + btrfs_block_release(root, path->nodes[0]); path->nodes[0] = right_buffer; path->slots[0] -= mid; path->slots[1] += 1; } else - tree_block_release(root, right_buffer); + btrfs_block_release(root, right_buffer); BUG_ON(path->slots[0] < 0); return ret; } @@ -1115,17 +1115,17 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, +int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, void *data, int data_size) { int ret = 0; int slot; int slot_orig; - struct leaf *leaf; - struct tree_buffer *leaf_buf; + struct btrfs_leaf *leaf; + struct btrfs_buffer *leaf_buf; u32 nritems; unsigned int data_end; - struct ctree_path path; + struct btrfs_path path; struct btrfs_disk_key disk_key; btrfs_cpu_key_to_disk(&disk_key, cpu_key); @@ -1133,10 +1133,10 @@ int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, /* create a root if there isn't one */ if (!root->node) BUG(); - init_path(&path); - ret = search_slot(root, cpu_key, &path, data_size, 1); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, cpu_key, &path, data_size, 1); if (ret == 0) { - release_path(root, &path); + btrfs_release_path(root, &path); return -EEXIST; } if (ret < 0) @@ -1149,7 +1149,8 @@ int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, nritems = btrfs_header_nritems(&leaf->header); data_end = leaf_data_end(leaf); - if (leaf_free_space(leaf) < sizeof(struct btrfs_item) + data_size) + if (btrfs_leaf_free_space(leaf) < + sizeof(struct btrfs_item) + data_size) BUG(); slot = path.slots[0]; @@ -1190,11 +1191,11 @@ int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, ret = fixup_low_keys(root, &path, &disk_key, 1); BUG_ON(list_empty(&leaf_buf->dirty)); - if (leaf_free_space(leaf) < 0) + if (btrfs_leaf_free_space(leaf) < 0) BUG(); check_leaf(&path, 0); out: - release_path(root, &path); + btrfs_release_path(root, &path); return ret; } @@ -1205,11 +1206,11 @@ out: * continuing all the way the root if required. The root is converted into * a leaf if all the nodes are emptied. */ -static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, +static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - struct node *node; - struct tree_buffer *parent = path->nodes[level]; + struct btrfs_node *node; + struct btrfs_buffer *parent = path->nodes[level]; u32 nritems; int ret = 0; int wret; @@ -1242,11 +1243,11 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, * delete the item at the leaf level in path. If that empties * the leaf, remove it from the tree */ -int del_item(struct ctree_root *root, struct ctree_path *path) +int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) { int slot; - struct leaf *leaf; - struct tree_buffer *leaf_buf; + struct btrfs_leaf *leaf; + struct btrfs_buffer *leaf_buf; int doff; int dsize; int ret = 0; @@ -1286,7 +1287,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) wret = del_ptr(root, path, 1, path->slots[1]); if (wret) ret = wret; - wret = free_extent(root, leaf_buf->blocknr, 1); + wret = btrfs_free_extent(root, leaf_buf->blocknr, 1); if (wret) ret = wret; } @@ -1323,12 +1324,12 @@ int del_item(struct ctree_root *root, struct ctree_path *path) wret = del_ptr(root, path, 1, slot); if (wret) ret = wret; - tree_block_release(root, leaf_buf); - wret = free_extent(root, blocknr, 1); + btrfs_block_release(root, leaf_buf); + wret = btrfs_free_extent(root, blocknr, 1); if (wret) ret = wret; } else { - tree_block_release(root, leaf_buf); + btrfs_block_release(root, leaf_buf); } } } @@ -1340,15 +1341,15 @@ int del_item(struct ctree_root *root, struct ctree_path *path) * returns 0 if it found something or 1 if there are no greater leaves. * returns < 0 on io errors. */ -int next_leaf(struct ctree_root *root, struct ctree_path *path) +int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) { int slot; int level = 1; u64 blocknr; - struct tree_buffer *c; - struct tree_buffer *next = NULL; + struct btrfs_buffer *c; + struct btrfs_buffer *next = NULL; - while(level < MAX_LEVEL) { + while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; slot = path->slots[level] + 1; @@ -1359,7 +1360,7 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) } blocknr = btrfs_node_blockptr(&c->node, slot); if (next) - tree_block_release(root, next); + btrfs_block_release(root, next); next = read_tree_block(root, blocknr); break; } @@ -1367,7 +1368,7 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) while(1) { level--; c = path->nodes[level]; - tree_block_release(root, c); + btrfs_block_release(root, c); path->nodes[level] = next; path->slots[level] = 0; if (!level) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 90cc2c9bd79..5dfcc90812c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,10 +1,10 @@ -#ifndef __CTREE__ -#define __CTREE__ +#ifndef __BTRFS__ +#define __BTRFS__ #include "list.h" #include "kerncompat.h" -#define CTREE_BLOCKSIZE 1024 +#define BTRFS_BLOCKSIZE 1024 /* * the key defines the order in the tree, and so it also defines (optimal) @@ -46,21 +46,21 @@ struct btrfs_header { /* generation flags to be added */ } __attribute__ ((__packed__)); -#define MAX_LEVEL 8 -#define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct btrfs_header)) / \ +#define BTRFS_MAX_LEVEL 8 +#define NODEPTRS_PER_BLOCK ((BTRFS_BLOCKSIZE - sizeof(struct btrfs_header)) / \ (sizeof(struct btrfs_disk_key) + sizeof(u64))) -struct tree_buffer; +struct btrfs_buffer; /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. current_insert is used * only for the extent tree. */ -struct ctree_root { - struct tree_buffer *node; - struct tree_buffer *commit_root; - struct ctree_root *extent_root; +struct btrfs_root { + struct btrfs_buffer *node; + struct btrfs_buffer *commit_root; + struct btrfs_root *extent_root; struct btrfs_key current_insert; struct btrfs_key last_insert; int fp; @@ -74,7 +74,7 @@ struct ctree_root { /* * describes a tree on disk */ -struct ctree_root_info { +struct btrfs_root_info { u64 fsid[2]; /* FS specific uuid */ u64 blocknr; /* blocknr of this block */ u64 objectid; /* inode number of this root */ @@ -88,9 +88,9 @@ struct ctree_root_info { * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc */ -struct ctree_super_block { - struct ctree_root_info root_info; - struct ctree_root_info extent_info; +struct btrfs_super_block { + struct btrfs_root_info root_info; + struct btrfs_root_info extent_info; } __attribute__ ((__packed__)); /* @@ -111,13 +111,13 @@ struct btrfs_item { * The data is separate from the items to get the keys closer together * during searches. */ -#define LEAF_DATA_SIZE (CTREE_BLOCKSIZE - sizeof(struct btrfs_header)) -struct leaf { +#define LEAF_DATA_SIZE (BTRFS_BLOCKSIZE - sizeof(struct btrfs_header)) +struct btrfs_leaf { struct btrfs_header header; union { struct btrfs_item items[LEAF_DATA_SIZE/ sizeof(struct btrfs_item)]; - u8 data[CTREE_BLOCKSIZE-sizeof(struct btrfs_header)]; + u8 data[BTRFS_BLOCKSIZE - sizeof(struct btrfs_header)]; }; } __attribute__ ((__packed__)); @@ -125,7 +125,7 @@ struct leaf { * all non-leaf blocks are nodes, they hold only keys and pointers to * other blocks */ -struct node { +struct btrfs_node { struct btrfs_header header; struct btrfs_disk_key keys[NODEPTRS_PER_BLOCK]; __le64 blockptrs[NODEPTRS_PER_BLOCK]; @@ -135,50 +135,51 @@ struct node { * items in the extent btree are used to record the objectid of the * owner of the block and the number of references */ -struct extent_item { +struct btrfs_extent_item { __le32 refs; __le64 owner; } __attribute__ ((__packed__)); /* - * ctree_paths remember the path taken from the root down to the leaf. - * level 0 is always the leaf, and nodes[1...MAX_LEVEL] will point + * btrfs_paths remember the path taken from the root down to the leaf. + * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point * to any other levels that are present. * * The slots array records the index of the item or block pointer * used while walking the tree. */ -struct ctree_path { - struct tree_buffer *nodes[MAX_LEVEL]; - int slots[MAX_LEVEL]; +struct btrfs_path { + struct btrfs_buffer *nodes[BTRFS_MAX_LEVEL]; + int slots[BTRFS_MAX_LEVEL]; }; -static inline u64 btrfs_extent_owner(struct extent_item *ei) +static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) { return le64_to_cpu(ei->owner); } -static inline void btrfs_set_extent_owner(struct extent_item *ei, u64 val) +static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) { ei->owner = cpu_to_le64(val); } -static inline u32 btrfs_extent_refs(struct extent_item *ei) +static inline u32 btrfs_extent_refs(struct btrfs_extent_item *ei) { return le32_to_cpu(ei->refs); } -static inline void btrfs_set_extent_refs(struct extent_item *ei, u32 val) +static inline void btrfs_set_extent_refs(struct btrfs_extent_item *ei, u32 val) { ei->refs = cpu_to_le32(val); } -static inline u64 btrfs_node_blockptr(struct node *n, int nr) +static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) { return le64_to_cpu(n->blockptrs[nr]); } -static inline void btrfs_set_node_blockptr(struct node *n, int nr, u64 val) +static inline void btrfs_set_node_blockptr(struct btrfs_node *n, int nr, + u64 val) { n->blockptrs[nr] = cpu_to_le64(val); } @@ -300,34 +301,34 @@ static inline void btrfs_set_header_flags(struct btrfs_header *h, u16 val) static inline int btrfs_header_level(struct btrfs_header *h) { - return btrfs_header_flags(h) & (MAX_LEVEL - 1); + return btrfs_header_flags(h) & (BTRFS_MAX_LEVEL - 1); } static inline void btrfs_set_header_level(struct btrfs_header *h, int level) { u16 flags; - BUG_ON(level > MAX_LEVEL); - flags = btrfs_header_flags(h) & ~(MAX_LEVEL - 1); + BUG_ON(level > BTRFS_MAX_LEVEL); + flags = btrfs_header_flags(h) & ~(BTRFS_MAX_LEVEL - 1); btrfs_set_header_flags(h, flags | level); } -static inline int btrfs_is_leaf(struct node *n) +static inline int btrfs_is_leaf(struct btrfs_node *n) { return (btrfs_header_level(&n->header) == 0); } -struct tree_buffer *alloc_free_block(struct ctree_root *root); -int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf); -int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks); -int search_slot(struct ctree_root *root, struct btrfs_key *key, - struct ctree_path *p, int ins_len, int cow); -void release_path(struct ctree_root *root, struct ctree_path *p); -void init_path(struct ctree_path *p); -int del_item(struct ctree_root *root, struct ctree_path *path); -int insert_item(struct ctree_root *root, struct btrfs_key *key, +struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root); +int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf); +int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks); +int btrfs_search_slot(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_path *p, int ins_len, int cow); +void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); +void btrfs_init_path(struct btrfs_path *p); +int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path); +int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *key, void *data, int data_size); -int next_leaf(struct ctree_root *root, struct ctree_path *path); -int leaf_free_space(struct leaf *leaf); -int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap); -int btrfs_finish_extent_commit(struct ctree_root *root); +int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); +int btrfs_leaf_free_space(struct btrfs_leaf *leaf); +int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap); +int btrfs_finish_extent_commit(struct btrfs_root *root); #endif diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c index 21f607d8b05..6da0a7aa0f4 100644 --- a/fs/btrfs/debug-tree.c +++ b/fs/btrfs/debug-tree.c @@ -7,13 +7,13 @@ #include "print-tree.h" int main(int ac, char **av) { - struct ctree_super_block super; - struct ctree_root *root; + struct btrfs_super_block super; + struct btrfs_root *root; radix_tree_init(); root = open_ctree("dbfile", &super); printf("root tree\n"); - print_tree(root, root->node); + btrfs_print_tree(root, root->node); printf("map tree\n"); - print_tree(root->extent_root, root->extent_root->node); + btrfs_print_tree(root->extent_root, root->extent_root->node); return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 065e888d2c0..c34c0c60935 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -13,7 +13,7 @@ static int allocated_blocks = 0; int cache_max = 10000; -static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) +static int check_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); @@ -23,18 +23,18 @@ static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) return 0; } -static int free_some_buffers(struct ctree_root *root) +static int free_some_buffers(struct btrfs_root *root) { struct list_head *node, *next; - struct tree_buffer *b; + struct btrfs_buffer *b; if (root->cache_size < cache_max) return 0; list_for_each_safe(node, next, &root->cache) { - b = list_entry(node, struct tree_buffer, cache); + b = list_entry(node, struct btrfs_buffer, cache); if (b->count == 1) { BUG_ON(!list_empty(&b->dirty)); list_del_init(&b->cache); - tree_block_release(root, b); + btrfs_block_release(root, b); if (root->cache_size < cache_max) break; } @@ -42,11 +42,11 @@ static int free_some_buffers(struct ctree_root *root) return 0; } -struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) +struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) { - struct tree_buffer *buf; + struct btrfs_buffer *buf; int ret; - buf = malloc(sizeof(struct tree_buffer)); + buf = malloc(sizeof(struct btrfs_buffer)); if (!buf) return buf; allocated_blocks++; @@ -66,9 +66,9 @@ struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) return buf; } -struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr) +struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) { - struct tree_buffer *buf; + struct btrfs_buffer *buf; buf = radix_tree_lookup(&root->cache_radix, blocknr); if (buf) { buf->count++; @@ -82,10 +82,10 @@ struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr) return buf; } -struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) +struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) { - loff_t offset = blocknr * CTREE_BLOCKSIZE; - struct tree_buffer *buf; + loff_t offset = blocknr * BTRFS_BLOCKSIZE; + struct btrfs_buffer *buf; int ret; buf = radix_tree_lookup(&root->cache_radix, blocknr); @@ -95,8 +95,8 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) buf = alloc_tree_block(root, blocknr); if (!buf) return NULL; - ret = pread(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); - if (ret != CTREE_BLOCKSIZE) { + ret = pread(root->fp, &buf->node, BTRFS_BLOCKSIZE, offset); + if (ret != BTRFS_BLOCKSIZE) { free(buf); return NULL; } @@ -106,7 +106,7 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) return buf; } -int dirty_tree_block(struct ctree_root *root, struct tree_buffer *buf) +int dirty_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { if (!list_empty(&buf->dirty)) return 0; @@ -115,46 +115,47 @@ int dirty_tree_block(struct ctree_root *root, struct tree_buffer *buf) return 0; } -int clean_tree_block(struct ctree_root *root, struct tree_buffer *buf) +int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { if (!list_empty(&buf->dirty)) { list_del_init(&buf->dirty); - tree_block_release(root, buf); + btrfs_block_release(root, buf); } return 0; } -int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) +int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { u64 blocknr = buf->blocknr; - loff_t offset = blocknr * CTREE_BLOCKSIZE; + loff_t offset = blocknr * BTRFS_BLOCKSIZE; int ret; if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); - ret = pwrite(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); - if (ret != CTREE_BLOCKSIZE) + ret = pwrite(root->fp, &buf->node, BTRFS_BLOCKSIZE, offset); + if (ret != BTRFS_BLOCKSIZE) return ret; return 0; } -static int __commit_transaction(struct ctree_root *root) +static int __commit_transaction(struct btrfs_root *root) { - struct tree_buffer *b; + struct btrfs_buffer *b; int ret = 0; int wret; while(!list_empty(&root->trans)) { - b = list_entry(root->trans.next, struct tree_buffer, dirty); + b = list_entry(root->trans.next, struct btrfs_buffer, dirty); list_del_init(&b->dirty); wret = write_tree_block(root, b); if (wret) ret = wret; - tree_block_release(root, b); + btrfs_block_release(root, b); } return ret; } -int commit_transaction(struct ctree_root *root, struct ctree_super_block *s) +int btrfs_commit_transaction(struct btrfs_root *root, + struct btrfs_super_block *s) { int ret = 0; @@ -163,20 +164,20 @@ int commit_transaction(struct ctree_root *root, struct ctree_super_block *s) ret = __commit_transaction(root->extent_root); BUG_ON(ret); if (root->commit_root != root->node) { - struct tree_buffer *snap = root->commit_root; + struct btrfs_buffer *snap = root->commit_root; root->commit_root = root->node; root->node->count++; ret = btrfs_drop_snapshot(root, snap); BUG_ON(ret); - // tree_block_release(root, snap); + // btrfs_block_release(root, snap); } write_ctree_super(root, s); btrfs_finish_extent_commit(root); return ret; } -static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, - struct ctree_root_info *info, int fp) +static int __setup_root(struct btrfs_root *root, struct btrfs_root *extent_root, + struct btrfs_root_info *info, int fp) { INIT_LIST_HEAD(&root->trans); INIT_LIST_HEAD(&root->cache); @@ -191,10 +192,10 @@ static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, return 0; } -struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) +struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) { - struct ctree_root *root = malloc(sizeof(struct ctree_root)); - struct ctree_root *extent_root = malloc(sizeof(struct ctree_root)); + struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); + struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); int fp; int ret; @@ -207,16 +208,16 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) INIT_RADIX_TREE(&root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); - ret = pread(fp, super, sizeof(struct ctree_super_block), - CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + ret = pread(fp, super, sizeof(struct btrfs_super_block), + BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); if (ret == 0 || super->root_info.tree_root == 0) { printf("making new FS!\n"); ret = mkfs(fp); if (ret) return NULL; - ret = pread(fp, super, sizeof(struct ctree_super_block), - CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); - if (ret != sizeof(struct ctree_super_block)) + ret = pread(fp, super, sizeof(struct btrfs_super_block), + BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); + if (ret != sizeof(struct btrfs_super_block)) return NULL; } BUG_ON(ret < 0); @@ -227,18 +228,19 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) return root; } -static int __update_root(struct ctree_root *root, struct ctree_root_info *info) +static int __update_root(struct btrfs_root *root, struct btrfs_root_info *info) { info->tree_root = root->node->blocknr; return 0; } -int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s) +int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s) { int ret; __update_root(root, &s->root_info); __update_root(root->extent_root, &s->extent_info); - ret = pwrite(root->fp, s, sizeof(*s), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + ret = pwrite(root->fp, s, sizeof(*s), + BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); if (ret != sizeof(*s)) { fprintf(stderr, "failed to write new super block err %d\n", ret); return ret; @@ -246,19 +248,19 @@ int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s) return 0; } -static int drop_cache(struct ctree_root *root) +static int drop_cache(struct btrfs_root *root) { while(!list_empty(&root->cache)) { - struct tree_buffer *b = list_entry(root->cache.next, - struct tree_buffer, cache); + struct btrfs_buffer *b = list_entry(root->cache.next, + struct btrfs_buffer, cache); list_del_init(&b->cache); - tree_block_release(root, b); + btrfs_block_release(root, b); } return 0; } -int close_ctree(struct ctree_root *root, struct ctree_super_block *s) +int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) { - commit_transaction(root, s); + btrfs_commit_transaction(root, s); __commit_transaction(root->extent_root); write_ctree_super(root, s); drop_cache(root->extent_root); @@ -268,16 +270,16 @@ int close_ctree(struct ctree_root *root, struct ctree_super_block *s) close(root->fp); if (root->node) - tree_block_release(root, root->node); + btrfs_block_release(root, root->node); if (root->extent_root->node) - tree_block_release(root->extent_root, root->extent_root->node); - tree_block_release(root, root->commit_root); + btrfs_block_release(root->extent_root, root->extent_root->node); + btrfs_block_release(root, root->commit_root); free(root); printf("on close %d blocks are allocated\n", allocated_blocks); return 0; } -void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) +void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf) { buf->count--; if (buf->count < 0) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 1c0af7c56c2..b391335864b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -2,29 +2,30 @@ #define __DISKIO__ #include "list.h" -struct tree_buffer { +struct btrfs_buffer { u64 blocknr; int count; union { - struct node node; - struct leaf leaf; + struct btrfs_node node; + struct btrfs_leaf leaf; }; struct list_head dirty; struct list_head cache; }; -struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr); -struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr); -int write_tree_block(struct ctree_root *root, struct tree_buffer *buf); -int dirty_tree_block(struct ctree_root *root, struct tree_buffer *buf); -int clean_tree_block(struct ctree_root *root, struct tree_buffer *buf); -int commit_transaction(struct ctree_root *root, struct ctree_super_block *s); -struct ctree_root *open_ctree(char *filename, struct ctree_super_block *s); -int close_ctree(struct ctree_root *root, struct ctree_super_block *s); -void tree_block_release(struct ctree_root *root, struct tree_buffer *buf); -int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s); +struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); +struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr); +int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf); +int dirty_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf); +int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf); +int btrfs_commit_transaction(struct btrfs_root *root, + struct btrfs_super_block *s); +struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); +int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); +void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); +int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s); int mkfs(int fd); -#define CTREE_SUPER_INFO_OFFSET(bs) (16 * (bs)) +#define BTRFS_SUPER_INFO_OFFSET(bs) (16 * (bs)) #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1676a6595cc..4a4f2d810d9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6,11 +6,11 @@ #include "disk-io.h" #include "print-tree.h" -static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, +static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 search_end, struct btrfs_key *ins); -static int finish_current_insert(struct ctree_root *extent_root); -static int run_pending(struct ctree_root *extent_root); +static int finish_current_insert(struct btrfs_root *extent_root); +static int run_pending(struct btrfs_root *extent_root); /* * pending extents are blocks that we're trying to allocate in the extent @@ -21,62 +21,63 @@ static int run_pending(struct ctree_root *extent_root); */ #define CTREE_EXTENT_PENDING_DEL 0 -static int inc_block_ref(struct ctree_root *root, u64 blocknr) +static int inc_block_ref(struct btrfs_root *root, u64 blocknr) { - struct ctree_path path; + struct btrfs_path path; int ret; struct btrfs_key key; - struct leaf *l; - struct extent_item *item; + struct btrfs_leaf *l; + struct btrfs_extent_item *item; struct btrfs_key ins; u32 refs; find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins); - init_path(&path); + btrfs_init_path(&path); key.objectid = blocknr; key.flags = 0; key.offset = 1; - ret = search_slot(root->extent_root, &key, &path, 0, 1); + ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 1); if (ret != 0) BUG(); BUG_ON(ret != 0); l = &path.nodes[0]->leaf; - item = (struct extent_item *)(l->data + btrfs_item_offset(l->items + - path.slots[0])); + item = (struct btrfs_extent_item *)(l->data + + btrfs_item_offset(l->items + + path.slots[0])); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); BUG_ON(list_empty(&path.nodes[0]->dirty)); - release_path(root->extent_root, &path); + btrfs_release_path(root->extent_root, &path); finish_current_insert(root->extent_root); run_pending(root->extent_root); return 0; } -static int lookup_block_ref(struct ctree_root *root, u64 blocknr, u32 *refs) +static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs) { - struct ctree_path path; + struct btrfs_path path; int ret; struct btrfs_key key; - struct leaf *l; - struct extent_item *item; - init_path(&path); + struct btrfs_leaf *l; + struct btrfs_extent_item *item; + btrfs_init_path(&path); key.objectid = blocknr; key.flags = 0; key.offset = 1; - ret = search_slot(root->extent_root, &key, &path, 0, 0); + ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 0); if (ret != 0) BUG(); l = &path.nodes[0]->leaf; - item = (struct extent_item *)(l->data + + item = (struct btrfs_extent_item *)(l->data + btrfs_item_offset(l->items + path.slots[0])); *refs = btrfs_extent_refs(item); - release_path(root->extent_root, &path); + btrfs_release_path(root->extent_root, &path); return 0; } -int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) +int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf) { u64 blocknr; int i; @@ -93,9 +94,9 @@ int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) return 0; } -int btrfs_finish_extent_commit(struct ctree_root *root) +int btrfs_finish_extent_commit(struct btrfs_root *root) { - struct ctree_root *extent_root = root->extent_root; + struct btrfs_root *extent_root = root->extent_root; unsigned long gang[8]; int ret; int i; @@ -115,10 +116,10 @@ int btrfs_finish_extent_commit(struct ctree_root *root) return 0; } -static int finish_current_insert(struct ctree_root *extent_root) +static int finish_current_insert(struct btrfs_root *extent_root) { struct btrfs_key ins; - struct extent_item extent_item; + struct btrfs_extent_item extent_item; int i; int ret; @@ -130,7 +131,7 @@ static int finish_current_insert(struct ctree_root *extent_root) for (i = 0; i < extent_root->current_insert.flags; i++) { ins.objectid = extent_root->current_insert.objectid + i; - ret = insert_item(extent_root, &ins, &extent_item, + ret = btrfs_insert_item(extent_root, &ins, &extent_item, sizeof(extent_item)); BUG_ON(ret); } @@ -141,14 +142,14 @@ static int finish_current_insert(struct ctree_root *extent_root) /* * remove an extent from the root, returns 0 on success */ -int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) +static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; - struct ctree_root *extent_root = root->extent_root; + struct btrfs_root *extent_root = root->extent_root; int ret; struct btrfs_item *item; - struct extent_item *ei; + struct btrfs_extent_item *ei; struct btrfs_key ins; u32 refs; @@ -157,16 +158,16 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) key.offset = num_blocks; find_free_extent(root, 0, 0, (u64)-1, &ins); - init_path(&path); - ret = search_slot(extent_root, &key, &path, -1, 1); + btrfs_init_path(&path); + ret = btrfs_search_slot(extent_root, &key, &path, -1, 1); if (ret) { printf("failed to find %Lu\n", key.objectid); - print_tree(extent_root, extent_root->node); + btrfs_print_tree(extent_root, extent_root->node); printf("failed to find %Lu\n", key.objectid); BUG(); } item = path.nodes[0]->leaf.items + path.slots[0]; - ei = (struct extent_item *)(path.nodes[0]->leaf.data + + ei = (struct btrfs_extent_item *)(path.nodes[0]->leaf.data + btrfs_item_offset(item)); BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; @@ -180,14 +181,14 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) BUG_ON(err); radix_tree_preload_end(); } - ret = del_item(extent_root, &path); + ret = btrfs_del_item(extent_root, &path); if (root != extent_root && extent_root->last_insert.objectid < blocknr) extent_root->last_insert.objectid = blocknr; if (ret) BUG(); } - release_path(extent_root, &path); + btrfs_release_path(extent_root, &path); finish_current_insert(extent_root); return ret; } @@ -196,10 +197,10 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) * find all the blocks marked as pending in the radix tree and remove * them from the extent map */ -static int del_pending_extents(struct ctree_root *extent_root) +static int del_pending_extents(struct btrfs_root *extent_root) { int ret; - struct tree_buffer *gang[4]; + struct btrfs_buffer *gang[4]; int i; while(1) { @@ -214,13 +215,13 @@ static int del_pending_extents(struct ctree_root *extent_root) radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, CTREE_EXTENT_PENDING_DEL); - tree_block_release(extent_root, gang[i]); + btrfs_block_release(extent_root, gang[i]); } } return 0; } -static int run_pending(struct ctree_root *extent_root) +static int run_pending(struct btrfs_root *extent_root) { while(radix_tree_tagged(&extent_root->cache_radix, CTREE_EXTENT_PENDING_DEL)) @@ -232,11 +233,11 @@ static int run_pending(struct ctree_root *extent_root) /* * remove an extent from the root, returns 0 on success */ -int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) +int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) { struct btrfs_key key; - struct ctree_root *extent_root = root->extent_root; - struct tree_buffer *t; + struct btrfs_root *extent_root = root->extent_root; + struct btrfs_buffer *t; int pending_ret; int ret; @@ -262,11 +263,11 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, +static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 search_end, struct btrfs_key *ins) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; int ret; u64 hole_size = 0; @@ -274,20 +275,20 @@ static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, u64 last_block; u64 test_block; int start_found; - struct leaf *l; - struct ctree_root * root = orig_root->extent_root; + struct btrfs_leaf *l; + struct btrfs_root * root = orig_root->extent_root; int total_needed = num_blocks; total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3; if (root->last_insert.objectid > search_start) search_start = root->last_insert.objectid; check_failed: - init_path(&path); + btrfs_init_path(&path); ins->objectid = search_start; ins->offset = 0; ins->flags = 0; start_found = 0; - ret = search_slot(root, ins, &path, 0, 0); + ret = btrfs_search_slot(root, ins, &path, 0, 0); if (ret < 0) goto error; @@ -298,7 +299,7 @@ check_failed: l = &path.nodes[0]->leaf; slot = path.slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - ret = next_leaf(root, &path); + ret = btrfs_next_leaf(root, &path); if (ret == 0) continue; if (ret < 0) @@ -336,7 +337,7 @@ check_pending: /* we have to make sure we didn't find an extent that has already * been allocated by the map tree or the original allocation */ - release_path(root, &path); + btrfs_release_path(root, &path); BUG_ON(ins->objectid < search_start); for (test_block = ins->objectid; test_block < ins->objectid + total_needed; test_block++) { @@ -353,7 +354,7 @@ check_pending: ins->offset = num_blocks; return 0; error: - release_path(root, &path); + btrfs_release_path(root, &path); return ret; } @@ -364,13 +365,13 @@ error: * * returns 0 if everything worked, non-zero otherwise. */ -int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, +int alloc_extent(struct btrfs_root *root, u64 num_blocks, u64 search_start, u64 search_end, u64 owner, struct btrfs_key *ins) { int ret; int pending_ret; - struct ctree_root *extent_root = root->extent_root; - struct extent_item extent_item; + struct btrfs_root *extent_root = root->extent_root; + struct btrfs_extent_item extent_item; btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, owner); @@ -390,7 +391,7 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, if (ret) return ret; - ret = insert_item(extent_root, ins, &extent_item, + ret = btrfs_insert_item(extent_root, ins, &extent_item, sizeof(extent_item)); finish_current_insert(extent_root); @@ -406,11 +407,11 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ -struct tree_buffer *alloc_free_block(struct ctree_root *root) +struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root) { struct btrfs_key ins; int ret; - struct tree_buffer *buf; + struct btrfs_buffer *buf; ret = alloc_extent(root, 1, 0, (unsigned long)-1, btrfs_header_parentid(&root->node->node.header), @@ -424,10 +425,10 @@ struct tree_buffer *alloc_free_block(struct ctree_root *root) return buf; } -int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) +int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, int *level) { - struct tree_buffer *next; - struct tree_buffer *cur; + struct btrfs_buffer *next; + struct btrfs_buffer *cur; u64 blocknr; int ret; u32 refs; @@ -445,33 +446,33 @@ int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) ret = lookup_block_ref(root, blocknr, &refs); if (refs != 1 || *level == 1) { path->slots[*level]++; - ret = free_extent(root, blocknr, 1); + ret = btrfs_free_extent(root, blocknr, 1); BUG_ON(ret); continue; } BUG_ON(ret); next = read_tree_block(root, blocknr); if (path->nodes[*level-1]) - tree_block_release(root, path->nodes[*level-1]); + btrfs_block_release(root, path->nodes[*level-1]); path->nodes[*level-1] = next; *level = btrfs_header_level(&next->node.header); path->slots[*level] = 0; } out: - ret = free_extent(root, path->nodes[*level]->blocknr, 1); - tree_block_release(root, path->nodes[*level]); + ret = btrfs_free_extent(root, path->nodes[*level]->blocknr, 1); + btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; BUG_ON(ret); return 0; } -int walk_up_tree(struct ctree_root *root, struct ctree_path *path, int *level) +int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, int *level) { int i; int slot; int ret; - for(i = *level; i < MAX_LEVEL - 1 && path->nodes[i]; i++) { + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; if (slot < btrfs_header_nritems(&path->nodes[i]->node.header)- 1) { @@ -479,9 +480,9 @@ int walk_up_tree(struct ctree_root *root, struct ctree_path *path, int *level) *level = i; return 0; } else { - ret = free_extent(root, + ret = btrfs_free_extent(root, path->nodes[*level]->blocknr, 1); - tree_block_release(root, path->nodes[*level]); + btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; BUG_ON(ret); @@ -490,15 +491,15 @@ int walk_up_tree(struct ctree_root *root, struct ctree_path *path, int *level) return 1; } -int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) +int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap) { int ret; int level; - struct ctree_path path; + struct btrfs_path path; int i; int orig_level; - init_path(&path); + btrfs_init_path(&path); level = btrfs_header_level(&snap->node.header); orig_level = level; @@ -514,7 +515,7 @@ int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) } for (i = 0; i <= orig_level; i++) { if (path.nodes[i]) { - tree_block_release(root, path.nodes[i]); + btrfs_block_release(root, path.nodes[i]); } } diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index e0d3bc1d025..fc1923320de 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -12,10 +12,10 @@ int mkfs(int fd) { - struct ctree_root_info info[2]; - struct leaf empty_leaf; + struct btrfs_root_info info[2]; + struct btrfs_leaf empty_leaf; struct btrfs_item item; - struct extent_item extent_item; + struct btrfs_extent_item extent_item; int ret; /* setup the super block area */ @@ -28,7 +28,7 @@ int mkfs(int fd) info[1].objectid = 2; info[1].tree_root = 18; ret = pwrite(fd, info, sizeof(info), - CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); if (ret != sizeof(info)) return -1; @@ -36,7 +36,7 @@ int mkfs(int fd) memset(&empty_leaf, 0, sizeof(empty_leaf)); btrfs_set_header_parentid(&empty_leaf.header, 1); btrfs_set_header_blocknr(&empty_leaf.header, 17); - ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 17 * CTREE_BLOCKSIZE); + ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 17 * BTRFS_BLOCKSIZE); if (ret != sizeof(empty_leaf)) return -1; @@ -48,9 +48,9 @@ int mkfs(int fd) btrfs_set_key_objectid(&item.key, 0); btrfs_set_key_offset(&item.key, 17); btrfs_set_key_flags(&item.key, 0); - btrfs_set_item_offset(&item, - LEAF_DATA_SIZE - sizeof(struct extent_item)); - btrfs_set_item_size(&item, sizeof(struct extent_item)); + btrfs_set_item_offset(&item, LEAF_DATA_SIZE - + sizeof(struct btrfs_extent_item)); + btrfs_set_item_size(&item, sizeof(struct btrfs_extent_item)); btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, 0); memcpy(empty_leaf.items, &item, sizeof(item)); @@ -60,8 +60,8 @@ int mkfs(int fd) /* item2, give block 17 to the root */ btrfs_set_key_objectid(&item.key, 17); btrfs_set_key_offset(&item.key, 1); - btrfs_set_item_offset(&item, - LEAF_DATA_SIZE - sizeof(struct extent_item) * 2); + btrfs_set_item_offset(&item, LEAF_DATA_SIZE - + sizeof(struct btrfs_extent_item) * 2); btrfs_set_extent_owner(&extent_item, 1); memcpy(empty_leaf.items + 1, &item, sizeof(item)); memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, @@ -70,13 +70,13 @@ int mkfs(int fd) /* item3, give block 18 for the extent root */ btrfs_set_key_objectid(&item.key, 18); btrfs_set_key_offset(&item.key, 1); - btrfs_set_item_offset(&item, - LEAF_DATA_SIZE - sizeof(struct extent_item) * 3); + btrfs_set_item_offset(&item, LEAF_DATA_SIZE - + sizeof(struct btrfs_extent_item) * 3); btrfs_set_extent_owner(&extent_item, 2); memcpy(empty_leaf.items + 2, &item, sizeof(item)); memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, btrfs_item_size(&item)); - ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 18 * CTREE_BLOCKSIZE); + ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 18 * BTRFS_BLOCKSIZE); if (ret != sizeof(empty_leaf)) return -1; return 0; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 101278e1139..c95c85640aa 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -5,14 +5,14 @@ #include "ctree.h" #include "disk-io.h" -void print_leaf(struct leaf *l) +void btrfs_print_leaf(struct btrfs_leaf *l) { int i; u32 nr = btrfs_header_nritems(&l->header); struct btrfs_item *item; - struct extent_item *ei; + struct btrfs_extent_item *ei; printf("leaf %Lu total ptrs %d free space %d\n", - btrfs_header_blocknr(&l->header), nr, leaf_free_space(l)); + btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; @@ -26,24 +26,25 @@ void print_leaf(struct leaf *l) fflush(stdout); printf("\t\titem data %.*s\n", btrfs_item_size(item), l->data + btrfs_item_offset(item)); - ei = (struct extent_item *)(l->data + btrfs_item_offset(item)); + ei = (struct btrfs_extent_item *)(l->data + + btrfs_item_offset(item)); printf("\t\textent data refs %u owner %Lu\n", ei->refs, ei->owner); fflush(stdout); } } -void print_tree(struct ctree_root *root, struct tree_buffer *t) +void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) { int i; u32 nr; - struct node *c; + struct btrfs_node *c; if (!t) return; c = &t->node; nr = btrfs_header_nritems(&c->header); if (btrfs_is_leaf(c)) { - print_leaf((struct leaf *)c); + btrfs_print_leaf((struct btrfs_leaf *)c); return; } printf("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, @@ -58,17 +59,17 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t) fflush(stdout); } for (i = 0; i < nr; i++) { - struct tree_buffer *next_buf = read_tree_block(root, + struct btrfs_buffer *next_buf = read_tree_block(root, btrfs_node_blockptr(c, i)); - struct node *next = &next_buf->node; + struct btrfs_node *next = &next_buf->node; if (btrfs_is_leaf(next) && btrfs_header_level(&c->header) != 1) BUG(); if (btrfs_header_level(&next->header) != btrfs_header_level(&c->header) - 1) BUG(); - print_tree(root, next_buf); - tree_block_release(root, next_buf); + btrfs_print_tree(root, next_buf); + btrfs_block_release(root, next_buf); } } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 3c1e9a3e026..e8d0b847c02 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,3 +1,3 @@ -void print_leaf(struct leaf *l); -void print_tree(struct ctree_root *root, struct tree_buffer *t); +void btrfs_print_leaf(struct btrfs_leaf *l); +void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t); diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index ab3bda53a2f..66bdc57905d 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -22,9 +22,9 @@ int main(int ac, char **av) { int run_size = 100000; int max_key = 100000000; int tree_size = 0; - struct ctree_path path; - struct ctree_super_block super; - struct ctree_root *root; + struct btrfs_path path; + struct btrfs_super_block super; + struct btrfs_root *root; radix_tree_init(); @@ -40,12 +40,12 @@ int main(int ac, char **av) { ins.objectid = num; ins.offset = 0; ins.flags = 0; - ret = insert_item(root, &ins, buf, strlen(buf)); + ret = btrfs_insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; free(buf); if (i == run_size - 5) { - commit_transaction(root, &super); + btrfs_commit_transaction(root, &super); } } @@ -57,16 +57,16 @@ int main(int ac, char **av) { for (i = 0; i < run_size; i++) { num = next_key(i, max_key); ins.objectid = num; - init_path(&path); + btrfs_init_path(&path); if (i % 10000 == 0) fprintf(stderr, "search %d:%d\n", num, i); - ret = search_slot(root, &ins, &path, 0, 0); + ret = btrfs_search_slot(root, &ins, &path, 0, 0); if (ret) { - print_tree(root, root->node); + btrfs_print_tree(root, root->node); printf("unable to find %d\n", num); exit(1); } - release_path(root, &path); + btrfs_release_path(root, &path); } close_ctree(root, &super); root = open_ctree("dbfile", &super); @@ -81,17 +81,17 @@ int main(int ac, char **av) { for (i = 0 ; i < run_size/4; i++) { num = next_key(i, max_key); ins.objectid = num; - init_path(&path); - ret = search_slot(root, &ins, &path, -1, 1); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &ins, &path, -1, 1); if (!ret) { if (i % 10000 == 0) fprintf(stderr, "del %d:%d\n", num, i); - ret = del_item(root, &path); + ret = btrfs_del_item(root, &path); if (ret != 0) BUG(); tree_size--; } - release_path(root, &path); + btrfs_release_path(root, &path); } close_ctree(root, &super); root = open_ctree("dbfile", &super); @@ -103,7 +103,7 @@ int main(int ac, char **av) { ins.objectid = num; if (i % 10000 == 0) fprintf(stderr, "insert %d:%d\n", num, i); - ret = insert_item(root, &ins, buf, strlen(buf)); + ret = btrfs_insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; free(buf); @@ -115,25 +115,25 @@ int main(int ac, char **av) { for (i = 0; i < run_size; i++) { num = next_key(i, max_key); ins.objectid = num; - init_path(&path); + btrfs_init_path(&path); if (i % 10000 == 0) fprintf(stderr, "search %d:%d\n", num, i); - ret = search_slot(root, &ins, &path, 0, 0); + ret = btrfs_search_slot(root, &ins, &path, 0, 0); if (ret) { - print_tree(root, root->node); + btrfs_print_tree(root, root->node); printf("unable to find %d\n", num); exit(1); } - release_path(root, &path); + btrfs_release_path(root, &path); } printf("starting big long delete run\n"); while(root->node && btrfs_header_nritems(&root->node->node.header) > 0) { - struct leaf *leaf; + struct btrfs_leaf *leaf; int slot; ins.objectid = (u64)-1; - init_path(&path); - ret = search_slot(root, &ins, &path, -1, 1); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &ins, &path, -1, 1); if (ret == 0) BUG(); @@ -149,26 +149,26 @@ int main(int ac, char **av) { btrfs_disk_key_to_cpu(&last, &leaf->items[slot].key); if (tree_size % 10000 == 0) printf("big del %d:%d\n", tree_size, i); - ret = del_item(root, &path); + ret = btrfs_del_item(root, &path); if (ret != 0) { printf("del_item returned %d\n", ret); BUG(); } tree_size--; } - release_path(root, &path); + btrfs_release_path(root, &path); } /* printf("previous tree:\n"); - print_tree(root, root->commit_root); + btrfs_print_tree(root, root->commit_root); printf("map before commit\n"); - print_tree(root->extent_root, root->extent_root->node); + btrfs_print_tree(root->extent_root, root->extent_root->node); */ - commit_transaction(root, &super); + btrfs_commit_transaction(root, &super); printf("tree size is now %d\n", tree_size); printf("root %p commit root %p\n", root->node, root->commit_root); printf("map tree\n"); - print_tree(root->extent_root, root->extent_root->node); + btrfs_print_tree(root->extent_root, root->extent_root->node); close_ctree(root, &super); return 0; } diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index e767528bc52..a8ef0478991 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -8,7 +8,7 @@ #include "print-tree.h" int keep_running = 1; -struct ctree_super_block super; +struct btrfs_super_block super; static int setup_key(struct radix_tree_root *root, struct btrfs_key *key, int exists) @@ -36,17 +36,17 @@ again: return 0; } -static int ins_one(struct ctree_root *root, struct radix_tree_root *radix) +static int ins_one(struct btrfs_root *root, struct radix_tree_root *radix) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; int ret; char buf[128]; unsigned long oid; - init_path(&path); + btrfs_init_path(&path); ret = setup_key(radix, &key, 0); sprintf(buf, "str-%Lu\n", key.objectid); - ret = insert_item(root, &key, buf, strlen(buf)); + ret = btrfs_insert_item(root, &key, buf, strlen(buf)); if (ret) goto error; oid = (unsigned long)key.objectid; @@ -61,18 +61,18 @@ error: return -1; } -static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) +static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; int ret; char buf[128]; - init_path(&path); + btrfs_init_path(&path); ret = setup_key(radix, &key, 1); if (ret < 0) return 0; sprintf(buf, "str-%Lu\n", key.objectid); - ret = insert_item(root, &key, buf, strlen(buf)); + ret = btrfs_insert_item(root, &key, buf, strlen(buf)); if (ret != -EEXIST) { printf("insert on %Lu gave us %d\n", key.objectid, ret); return 1; @@ -80,21 +80,21 @@ static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) return 0; } -static int del_one(struct ctree_root *root, struct radix_tree_root *radix) +static int del_one(struct btrfs_root *root, struct radix_tree_root *radix) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; int ret; unsigned long *ptr; - init_path(&path); + btrfs_init_path(&path); ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - ret = search_slot(root, &key, &path, -1, 1); + ret = btrfs_search_slot(root, &key, &path, -1, 1); if (ret) goto error; - ret = del_item(root, &path); - release_path(root, &path); + ret = btrfs_del_item(root, &path); + btrfs_release_path(root, &path); if (ret != 0) goto error; ptr = radix_tree_delete(radix, key.objectid); @@ -106,17 +106,17 @@ error: return -1; } -static int lookup_item(struct ctree_root *root, struct radix_tree_root *radix) +static int lookup_item(struct btrfs_root *root, struct radix_tree_root *radix) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; int ret; - init_path(&path); + btrfs_init_path(&path); ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - ret = search_slot(root, &key, &path, 0, 1); - release_path(root, &path); + ret = btrfs_search_slot(root, &key, &path, 0, 1); + btrfs_release_path(root, &path); if (ret) goto error; return 0; @@ -125,17 +125,17 @@ error: return -1; } -static int lookup_enoent(struct ctree_root *root, struct radix_tree_root *radix) +static int lookup_enoent(struct btrfs_root *root, struct radix_tree_root *radix) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; int ret; - init_path(&path); + btrfs_init_path(&path); ret = setup_key(radix, &key, 0); if (ret < 0) return ret; - ret = search_slot(root, &key, &path, 0, 0); - release_path(root, &path); + ret = btrfs_search_slot(root, &key, &path, 0, 0); + btrfs_release_path(root, &path); if (ret <= 0) goto error; return 0; @@ -144,10 +144,10 @@ error: return -1; } -static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix, +static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, int nr) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; unsigned long found = 0; int ret; @@ -159,22 +159,22 @@ static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix, key.flags = 0; key.objectid = (unsigned long)-1; while(nr-- >= 0) { - init_path(&path); - ret = search_slot(root, &key, &path, -1, 1); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &key, &path, -1, 1); if (ret < 0) { - release_path(root, &path); + btrfs_release_path(root, &path); return ret; } if (ret != 0) { if (path.slots[0] == 0) { - release_path(root, &path); + btrfs_release_path(root, &path); break; } path.slots[0] -= 1; } slot = path.slots[0]; found=btrfs_key_objectid(&path.nodes[0]->leaf.items[slot].key); - ret = del_item(root, &path); + ret = btrfs_del_item(root, &path); count++; if (ret) { fprintf(stderr, @@ -182,7 +182,7 @@ static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix, found); return -1; } - release_path(root, &path); + btrfs_release_path(root, &path); ptr = radix_tree_delete(radix, found); if (!ptr) goto error; @@ -195,7 +195,7 @@ error: return -1; } -static int fill_tree(struct ctree_root *root, struct radix_tree_root *radix, +static int fill_tree(struct btrfs_root *root, struct radix_tree_root *radix, int count) { int i; @@ -207,7 +207,7 @@ static int fill_tree(struct ctree_root *root, struct radix_tree_root *radix, goto out; } if (i % 1000 == 0) { - ret = commit_transaction(root, &super); + ret = btrfs_commit_transaction(root, &super); if (ret) { fprintf(stderr, "fill commit failed\n"); return ret; @@ -223,7 +223,7 @@ out: return ret; } -static int bulk_op(struct ctree_root *root, struct radix_tree_root *radix) +static int bulk_op(struct btrfs_root *root, struct radix_tree_root *radix) { int ret; int nr = rand() % 5000; @@ -242,13 +242,13 @@ static int bulk_op(struct ctree_root *root, struct radix_tree_root *radix) } -int (*ops[])(struct ctree_root *root, struct radix_tree_root *radix) = +int (*ops[])(struct btrfs_root *root, struct radix_tree_root *radix) = { ins_one, insert_dup, del_one, lookup_item, lookup_enoent, bulk_op }; -static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) +static int fill_radix(struct btrfs_root *root, struct radix_tree_root *radix) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; unsigned long found; int ret; @@ -259,16 +259,16 @@ static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) key.flags = 0; key.objectid = (unsigned long)-1; while(1) { - init_path(&path); - ret = search_slot(root, &key, &path, 0, 0); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &key, &path, 0, 0); if (ret < 0) { - release_path(root, &path); + btrfs_release_path(root, &path); return ret; } slot = path.slots[0]; if (ret != 0) { if (slot == 0) { - release_path(root, &path); + btrfs_release_path(root, &path); break; } slot -= 1; @@ -287,7 +287,7 @@ static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) radix_tree_preload_end(); } - release_path(root, &path); + btrfs_release_path(root, &path); key.objectid = found - 1; if (key.objectid > found) break; @@ -312,7 +312,7 @@ int print_usage(void) int main(int ac, char **av) { RADIX_TREE(radix, GFP_KERNEL); - struct ctree_root *root; + struct btrfs_root *root; int i; int ret; int count; @@ -370,7 +370,7 @@ int main(int ac, char **av) if (ret) { fprintf(stderr, "op %d failed %d:%d\n", op, i, iterations); - print_tree(root, root->node); + btrfs_print_tree(root, root->node); fprintf(stderr, "op %d failed %d:%d\n", op, i, iterations); err = ret; -- cgit v1.2.3-70-g09d2 From 9aca1d51323c0291a7358e0728b64ec4f17d1a77 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Mar 2007 11:09:37 -0400 Subject: Btrfs: make some funcs static Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 16 ++++++++-------- fs/btrfs/extent-tree.c | 47 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 44 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c8529b394d5..bbb63229170 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -34,7 +34,7 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } -int btrfs_cow_block(struct btrfs_root *root, +static int btrfs_cow_block(struct btrfs_root *root, struct btrfs_buffer *buf, struct btrfs_buffer *parent, int parent_slot, @@ -96,7 +96,7 @@ int btrfs_leaf_free_space(struct btrfs_leaf *leaf) /* * compare two keys in a memcmp fashion */ -int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) +static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) { struct btrfs_key k1; @@ -117,7 +117,7 @@ int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 0; } -int check_node(struct btrfs_path *path, int level) +static int check_node(struct btrfs_path *path, int level) { int i; struct btrfs_node *parent = NULL; @@ -146,7 +146,7 @@ int check_node(struct btrfs_path *path, int level) return 0; } -int check_leaf(struct btrfs_path *path, int level) +static int check_leaf(struct btrfs_path *path, int level) { int i; struct btrfs_leaf *leaf = &path->nodes[level]->leaf; @@ -186,7 +186,7 @@ int check_leaf(struct btrfs_path *path, int level) return 0; } -int check_block(struct btrfs_path *path, int level) +static int check_block(struct btrfs_path *path, int level) { if (level == 0) return check_leaf(path, level); @@ -202,7 +202,7 @@ int check_block(struct btrfs_path *path, int level) * * slot may point to max if the key is bigger than all of the keys */ -int generic_bin_search(char *p, int item_size, struct btrfs_key *key, +static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, int max, int *slot) { int low = 0; @@ -233,7 +233,7 @@ int generic_bin_search(char *p, int item_size, struct btrfs_key *key, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) +static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) { if (btrfs_is_leaf(c)) { struct btrfs_leaf *l = (struct btrfs_leaf *)c; @@ -250,7 +250,7 @@ int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) return -1; } -struct btrfs_buffer *read_node_slot(struct btrfs_root *root, +static struct btrfs_buffer *read_node_slot(struct btrfs_root *root, struct btrfs_buffer *parent_buf, int slot) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4a4f2d810d9..a6969538bca 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -365,8 +365,9 @@ error: * * returns 0 if everything worked, non-zero otherwise. */ -int alloc_extent(struct btrfs_root *root, u64 num_blocks, u64 search_start, - u64 search_end, u64 owner, struct btrfs_key *ins) +static int alloc_extent(struct btrfs_root *root, u64 num_blocks, + u64 search_start, u64 search_end, u64 owner, + struct btrfs_key *ins) { int ret; int pending_ret; @@ -425,7 +426,12 @@ struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root) return buf; } -int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, int *level) +/* + * helper function for drop_snapshot, this walks down the tree dropping ref + * counts as it goes. + */ +static int walk_down_tree(struct btrfs_root *root, + struct btrfs_path *path, int *level) { struct btrfs_buffer *next; struct btrfs_buffer *cur; @@ -437,6 +443,9 @@ int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, int *level) BUG_ON(ret); if (refs > 1) goto out; + /* + * walk down to the last node level and free all the leaves + */ while(*level > 0) { cur = path->nodes[*level]; if (path->slots[*level] >= @@ -467,7 +476,13 @@ out: return 0; } -int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, int *level) +/* + * helper for dropping snapshots. This walks back up the tree in the path + * to find the first node higher up where we haven't yet gone through + * all the slots + */ +static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, + int *level) { int i; int slot; @@ -491,9 +506,15 @@ int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, int *level) return 1; } +/* + * drop the reference count on the tree rooted at 'snap'. This traverses + * the tree freeing any blocks that have a ref count of zero after being + * decremented. + */ int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap) { - int ret; + int ret = 0;; + int wret; int level; struct btrfs_path path; int i; @@ -506,18 +527,22 @@ int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap) path.nodes[level] = snap; path.slots[level] = 0; while(1) { - ret = walk_down_tree(root, &path, &level); - if (ret > 0) + wret = walk_down_tree(root, &path, &level); + if (wret > 0) break; - ret = walk_up_tree(root, &path, &level); - if (ret > 0) + if (wret < 0) + ret = wret; + + wret = walk_up_tree(root, &path, &level); + if (wret > 0) break; + if (wret < 0) + ret = wret; } for (i = 0; i <= orig_level; i++) { if (path.nodes[i]) { btrfs_block_release(root, path.nodes[i]); } } - - return 0; + return ret; } -- cgit v1.2.3-70-g09d2 From 3768f3689fc76ecea17414936dff7a02746a4355 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Mar 2007 16:47:54 -0400 Subject: Btrfs: Change the super to point to a tree of trees to enable persistent snapshots Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/TODO | 1 + fs/btrfs/ctree.h | 123 ++++++++++++++++++++++++++++++++++++++------ fs/btrfs/debug-tree.c | 4 +- fs/btrfs/disk-io.c | 135 +++++++++++++++++++++++++++++++++++++++---------- fs/btrfs/disk-io.h | 3 +- fs/btrfs/extent-tree.c | 15 +++--- fs/btrfs/mkfs.c | 128 +++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/print-tree.c | 10 ++-- fs/btrfs/root-tree.c | 88 ++++++++++++++++++++++++++++++++ 10 files changed, 445 insertions(+), 65 deletions(-) create mode 100644 fs/btrfs/root-tree.c (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index f8532200e9a..13ed6313ac8 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -2,7 +2,8 @@ CC=gcc CFLAGS = -g -Wall headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h -objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o +objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o \ + root-tree.o # if you don't have sparse installed, use ls instead CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index ad29267b873..ff90e5eaf43 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -5,6 +5,7 @@ * make a real mkfs and superblock * Do checksumming * Define FS objects in terms of different item types +* add inode tree * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5dfcc90812c..0968899fb7f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4,8 +4,13 @@ #include "list.h" #include "kerncompat.h" +#define BTRFS_MAGIC "_BtRfS_M" #define BTRFS_BLOCKSIZE 1024 +#define BTRFS_ROOT_TREE_OBJECTID 1 +#define BTRFS_EXTENT_TREE_OBJECTID 2 +#define BTRFS_FS_TREE_OBJECTID 3 + /* * the key defines the order in the tree, and so it also defines (optimal) * block layout. objectid corresonds to the inode number. The flags @@ -36,7 +41,7 @@ struct btrfs_key { * every tree block (leaf or node) starts with this header. */ struct btrfs_header { - __le64 fsid[2]; /* FS specific uuid */ + u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 parentid; /* objectid of the tree root */ __le32 csum; @@ -52,6 +57,14 @@ struct btrfs_header { struct btrfs_buffer; +struct btrfs_root_item { + __le64 blocknr; + __le32 flags; + __le64 block_limit; + __le64 blocks_used; + __le32 refs; +}; + /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. current_insert is used @@ -61,6 +74,7 @@ struct btrfs_root { struct btrfs_buffer *node; struct btrfs_buffer *commit_root; struct btrfs_root *extent_root; + struct btrfs_root *tree_root; struct btrfs_key current_insert; struct btrfs_key last_insert; int fp; @@ -69,28 +83,25 @@ struct btrfs_root { struct list_head trans; struct list_head cache; int cache_size; + int ref_cows; + struct btrfs_root_item root_item; + struct btrfs_key root_key; }; -/* - * describes a tree on disk - */ -struct btrfs_root_info { - u64 fsid[2]; /* FS specific uuid */ - u64 blocknr; /* blocknr of this block */ - u64 objectid; /* inode number of this root */ - u64 tree_root; /* the tree root block */ - u32 csum; - u32 ham; - u64 snapuuid[2]; /* root specific uuid */ -} __attribute__ ((__packed__)); - /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc */ struct btrfs_super_block { - struct btrfs_root_info root_info; - struct btrfs_root_info extent_info; + u8 fsid[16]; /* FS specific uuid */ + __le64 blocknr; /* this block number */ + __le32 csum; + __le64 magic; + __le16 blocksize; + __le64 generation; + __le64 root; + __le64 total_blocks; + __le64 blocks_used; } __attribute__ ((__packed__)); /* @@ -317,6 +328,79 @@ static inline int btrfs_is_leaf(struct btrfs_node *n) return (btrfs_header_level(&n->header) == 0); } +static inline u64 btrfs_root_blocknr(struct btrfs_root_item *item) +{ + return le64_to_cpu(item->blocknr); +} + +static inline void btrfs_set_root_blocknr(struct btrfs_root_item *item, u64 val) +{ + item->blocknr = cpu_to_le64(val); +} + +static inline u32 btrfs_root_refs(struct btrfs_root_item *item) +{ + return le32_to_cpu(item->refs); +} + +static inline void btrfs_set_root_refs(struct btrfs_root_item *item, u32 val) +{ + item->refs = cpu_to_le32(val); +} + +static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->blocknr); +} + +static inline void btrfs_set_super_blocknr(struct btrfs_super_block *s, u64 val) +{ + s->blocknr = cpu_to_le64(val); +} + +static inline u64 btrfs_super_root(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->root); +} + +static inline void btrfs_set_super_root(struct btrfs_super_block *s, u64 val) +{ + s->root = cpu_to_le64(val); +} + +static inline u64 btrfs_super_total_blocks(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->total_blocks); +} + +static inline void btrfs_set_super_total_blocks(struct btrfs_super_block *s, + u64 val) +{ + s->total_blocks = cpu_to_le64(val); +} + +static inline u64 btrfs_super_blocks_used(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->blocks_used); +} + +static inline void btrfs_set_super_blocks_used(struct btrfs_super_block *s, + u64 val) +{ + s->blocks_used = cpu_to_le64(val); +} + +static inline u16 btrfs_super_blocksize(struct btrfs_super_block *s) +{ + return le16_to_cpu(s->blocksize); +} + +static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s, + u16 val) +{ + s->blocksize = cpu_to_le16(val); +} + struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root); int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf); int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks); @@ -331,4 +415,11 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap); int btrfs_finish_extent_commit(struct btrfs_root *root); +int btrfs_del_root(struct btrfs_root *root, struct btrfs_key *key); +int btrfs_insert_root(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_root_item *item); +int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_root_item *item); +int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, + struct btrfs_root_item *item, struct btrfs_key *key); #endif diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c index 6da0a7aa0f4..de45fb4dfdd 100644 --- a/fs/btrfs/debug-tree.c +++ b/fs/btrfs/debug-tree.c @@ -11,9 +11,11 @@ int main(int ac, char **av) { struct btrfs_root *root; radix_tree_init(); root = open_ctree("dbfile", &super); - printf("root tree\n"); + printf("fs tree\n"); btrfs_print_tree(root, root->node); printf("map tree\n"); btrfs_print_tree(root->extent_root, root->extent_root->node); + printf("root tree\n"); + btrfs_print_tree(root->tree_root, root->tree_root->node); return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c34c0c60935..3d4bf6833f2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -154,41 +154,96 @@ static int __commit_transaction(struct btrfs_root *root) return ret; } +static int commit_extent_and_tree_roots(struct btrfs_root *tree_root, + struct btrfs_root *extent_root) +{ + int ret; + u64 old_extent_block; + + while(1) { + old_extent_block = btrfs_root_blocknr(&extent_root->root_item); + if (old_extent_block == extent_root->node->blocknr) + break; + btrfs_set_root_blocknr(&extent_root->root_item, + extent_root->node->blocknr); + ret = btrfs_update_root(tree_root, + &extent_root->root_key, + &extent_root->root_item); + BUG_ON(ret); + } + __commit_transaction(extent_root); + __commit_transaction(tree_root); + return 0; +} + int btrfs_commit_transaction(struct btrfs_root *root, struct btrfs_super_block *s) { int ret = 0; + struct btrfs_buffer *snap = root->commit_root; + struct btrfs_key snap_key; ret = __commit_transaction(root); - if (!ret && root != root->extent_root) - ret = __commit_transaction(root->extent_root); BUG_ON(ret); - if (root->commit_root != root->node) { - struct btrfs_buffer *snap = root->commit_root; - root->commit_root = root->node; - root->node->count++; - ret = btrfs_drop_snapshot(root, snap); - BUG_ON(ret); - // btrfs_block_release(root, snap); - } + + if (root->commit_root == root->node) + return 0; + + memcpy(&snap_key, &root->root_key, sizeof(snap_key)); + root->root_key.offset++; + + btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); + ret = btrfs_insert_root(root->tree_root, &root->root_key, + &root->root_item); + BUG_ON(ret); + + ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root); + BUG_ON(ret); + write_ctree_super(root, s); - btrfs_finish_extent_commit(root); + btrfs_finish_extent_commit(root->extent_root); + btrfs_finish_extent_commit(root->tree_root); + + root->commit_root = root->node; + root->node->count++; + ret = btrfs_drop_snapshot(root, snap); + BUG_ON(ret); + + ret = btrfs_del_root(root->tree_root, &snap_key); + BUG_ON(ret); + return ret; } -static int __setup_root(struct btrfs_root *root, struct btrfs_root *extent_root, - struct btrfs_root_info *info, int fp) +static int __setup_root(struct btrfs_root *root, u64 objectid, int fp) { INIT_LIST_HEAD(&root->trans); INIT_LIST_HEAD(&root->cache); root->cache_size = 0; root->fp = fp; root->node = NULL; - root->extent_root = extent_root; root->commit_root = NULL; - root->node = read_tree_block(root, info->tree_root); memset(&root->current_insert, 0, sizeof(root->current_insert)); memset(&root->last_insert, 0, sizeof(root->last_insert)); + memset(&root->root_key, 0, sizeof(root->root_key)); + memset(&root->root_item, 0, sizeof(root->root_item)); + return 0; +} + +static int find_and_setup_root(struct btrfs_root *tree_root, u64 objectid, + struct btrfs_root *root, int fp) +{ + int ret; + + __setup_root(root, objectid, fp); + ret = btrfs_find_last_root(tree_root, objectid, + &root->root_item, &root->root_key); + BUG_ON(ret); + + root->node = read_tree_block(root, + btrfs_root_blocknr(&root->root_item)); + root->ref_cows = 0; + BUG_ON(!root->node); return 0; } @@ -196,9 +251,19 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) { struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); + struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); int fp; int ret; + root->extent_root = extent_root; + root->tree_root = tree_root; + + extent_root->extent_root = extent_root; + extent_root->tree_root = tree_root; + + tree_root->extent_root = extent_root; + tree_root->tree_root = tree_root; + fp = open(filename, O_CREAT | O_RDWR, 0600); if (fp < 0) { free(root); @@ -208,11 +273,14 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) INIT_RADIX_TREE(&root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&tree_root->pinned_radix, GFP_KERNEL); + INIT_RADIX_TREE(&tree_root->cache_radix, GFP_KERNEL); + ret = pread(fp, super, sizeof(struct btrfs_super_block), BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); - if (ret == 0 || super->root_info.tree_root == 0) { + if (ret == 0 || btrfs_super_root(super) == 0) { printf("making new FS!\n"); - ret = mkfs(fp); + ret = mkfs(fp, 0, BTRFS_BLOCKSIZE); if (ret) return NULL; ret = pread(fp, super, sizeof(struct btrfs_super_block), @@ -221,24 +289,29 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) return NULL; } BUG_ON(ret < 0); - __setup_root(root, extent_root, &super->root_info, fp); - __setup_root(extent_root, extent_root, &super->extent_info, fp); + + __setup_root(tree_root, BTRFS_ROOT_TREE_OBJECTID, fp); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); + BUG_ON(!tree_root->node); + + ret = find_and_setup_root(tree_root, BTRFS_EXTENT_TREE_OBJECTID, + extent_root, fp); + BUG_ON(ret); + + ret = find_and_setup_root(tree_root, BTRFS_FS_TREE_OBJECTID, + root, fp); + BUG_ON(ret); + root->commit_root = root->node; root->node->count++; + root->ref_cows = 1; return root; } -static int __update_root(struct btrfs_root *root, struct btrfs_root_info *info) -{ - info->tree_root = root->node->blocknr; - return 0; -} - int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s) { int ret; - __update_root(root, &s->root_info); - __update_root(root->extent_root, &s->extent_info); + btrfs_set_super_root(s, root->tree_root->node->blocknr); ret = pwrite(root->fp, s, sizeof(*s), BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); if (ret != sizeof(*s)) { @@ -260,19 +333,25 @@ static int drop_cache(struct btrfs_root *root) } int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) { + int ret; btrfs_commit_transaction(root, s); - __commit_transaction(root->extent_root); + ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root); + BUG_ON(ret); write_ctree_super(root, s); drop_cache(root->extent_root); + drop_cache(root->tree_root); drop_cache(root); BUG_ON(!list_empty(&root->trans)); BUG_ON(!list_empty(&root->extent_root->trans)); + BUG_ON(!list_empty(&root->tree_root->trans)); close(root->fp); if (root->node) btrfs_block_release(root, root->node); if (root->extent_root->node) btrfs_block_release(root->extent_root, root->extent_root->node); + if (root->tree_root->node) + btrfs_block_release(root->tree_root, root->tree_root->node); btrfs_block_release(root, root->commit_root); free(root); printf("on close %d blocks are allocated\n", allocated_blocks); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b391335864b..c22a61f9233 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -24,7 +24,8 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s); -int mkfs(int fd); +int mkfs(int fd, u64 num_blocks, u16 blocksize); + #define BTRFS_SUPER_INFO_OFFSET(bs) (16 * (bs)) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a6969538bca..21f39b40469 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -82,7 +82,7 @@ int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf) u64 blocknr; int i; - if (root == root->extent_root) + if (!root->ref_cows) return 0; if (btrfs_is_leaf(&buf->node)) return 0; @@ -96,23 +96,22 @@ int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf) int btrfs_finish_extent_commit(struct btrfs_root *root) { - struct btrfs_root *extent_root = root->extent_root; unsigned long gang[8]; int ret; int i; while(1) { - ret = radix_tree_gang_lookup(&extent_root->pinned_radix, + ret = radix_tree_gang_lookup(&root->pinned_radix, (void **)gang, 0, ARRAY_SIZE(gang)); if (!ret) break; for (i = 0; i < ret; i++) { - radix_tree_delete(&extent_root->pinned_radix, gang[i]); + radix_tree_delete(&root->pinned_radix, gang[i]); } } - extent_root->last_insert.objectid = 0; - extent_root->last_insert.offset = 0; + root->last_insert.objectid = 0; + root->last_insert.offset = 0; return 0; } @@ -173,7 +172,7 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); if (refs == 0) { - if (root == extent_root) { + if (!root->ref_cows) { int err; radix_tree_preload(GFP_KERNEL); err = radix_tree_insert(&extent_root->pinned_radix, @@ -513,7 +512,7 @@ static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, */ int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap) { - int ret = 0;; + int ret = 0; int wret; int level; struct btrfs_path path; diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index fc1923320de..dd14ed4fea6 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -10,6 +10,120 @@ #include "ctree.h" #include "disk-io.h" +int mkfs(int fd, u64 num_blocks, u16 blocksize) +{ + struct btrfs_super_block super; + struct btrfs_leaf empty_leaf; + struct btrfs_root_item root_item; + struct btrfs_item item; + struct btrfs_extent_item extent_item; + char *block; + int ret; + u16 itemoff; + + btrfs_set_super_blocknr(&super, 16); + btrfs_set_super_root(&super, 17); + strcpy((char *)(&super.magic), BTRFS_MAGIC); + btrfs_set_super_blocksize(&super, blocksize); + btrfs_set_super_total_blocks(&super, num_blocks); + btrfs_set_super_blocks_used(&super, 0); + + block = malloc(blocksize); + memset(block, 0, blocksize); + BUG_ON(sizeof(super) > blocksize); + memcpy(block, &super, sizeof(super)); + ret = pwrite(fd, block, blocksize, BTRFS_SUPER_INFO_OFFSET(blocksize)); + BUG_ON(ret != blocksize); + + /* create the tree of root objects */ + memset(&empty_leaf, 0, sizeof(empty_leaf)); + btrfs_set_header_parentid(&empty_leaf.header, BTRFS_ROOT_TREE_OBJECTID); + btrfs_set_header_blocknr(&empty_leaf.header, 17); + btrfs_set_header_nritems(&empty_leaf.header, 2); + + /* create the items for the root tree */ + btrfs_set_root_blocknr(&root_item, 18); + btrfs_set_root_refs(&root_item, 1); + itemoff = LEAF_DATA_SIZE - sizeof(root_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_item_size(&item, sizeof(root_item)); + btrfs_set_key_objectid(&item.key, BTRFS_EXTENT_TREE_OBJECTID); + btrfs_set_key_offset(&item.key, 0); + btrfs_set_key_flags(&item.key, 0); + memcpy(empty_leaf.items, &item, sizeof(item)); + memcpy(empty_leaf.data + itemoff, &root_item, sizeof(root_item)); + + btrfs_set_root_blocknr(&root_item, 19); + itemoff = itemoff - sizeof(root_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); + memcpy(empty_leaf.items + 1, &item, sizeof(item)); + memcpy(empty_leaf.data + itemoff, &root_item, sizeof(root_item)); + ret = pwrite(fd, &empty_leaf, blocksize, 17 * blocksize); + + /* create the items for the extent tree */ + btrfs_set_header_parentid(&empty_leaf.header, + BTRFS_EXTENT_TREE_OBJECTID); + btrfs_set_header_blocknr(&empty_leaf.header, 18); + btrfs_set_header_nritems(&empty_leaf.header, 4); + + /* item1, reserve blocks 0-16 */ + btrfs_set_key_objectid(&item.key, 0); + btrfs_set_key_offset(&item.key, 17); + btrfs_set_key_flags(&item.key, 0); + itemoff = LEAF_DATA_SIZE - sizeof(struct btrfs_extent_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_item_size(&item, sizeof(struct btrfs_extent_item)); + btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_extent_owner(&extent_item, 0); + memcpy(empty_leaf.items, &item, sizeof(item)); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); + + /* item2, give block 17 to the root */ + btrfs_set_key_objectid(&item.key, 17); + btrfs_set_key_offset(&item.key, 1); + itemoff = itemoff - sizeof(struct btrfs_extent_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_extent_owner(&extent_item, BTRFS_ROOT_TREE_OBJECTID); + memcpy(empty_leaf.items + 1, &item, sizeof(item)); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); + + /* item3, give block 18 to the extent root */ + btrfs_set_key_objectid(&item.key, 18); + btrfs_set_key_offset(&item.key, 1); + itemoff = itemoff - sizeof(struct btrfs_extent_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_extent_owner(&extent_item, BTRFS_EXTENT_TREE_OBJECTID); + memcpy(empty_leaf.items + 2, &item, sizeof(item)); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); + + /* item4, give block 19 to the FS root */ + btrfs_set_key_objectid(&item.key, 19); + btrfs_set_key_offset(&item.key, 1); + itemoff = itemoff - sizeof(struct btrfs_extent_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_extent_owner(&extent_item, BTRFS_FS_TREE_OBJECTID); + memcpy(empty_leaf.items + 3, &item, sizeof(item)); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); + ret = pwrite(fd, &empty_leaf, blocksize, 18 * blocksize); + if (ret != sizeof(empty_leaf)) + return -1; + + /* finally create the FS root */ + btrfs_set_header_parentid(&empty_leaf.header, BTRFS_FS_TREE_OBJECTID); + btrfs_set_header_blocknr(&empty_leaf.header, 19); + btrfs_set_header_nritems(&empty_leaf.header, 0); + ret = pwrite(fd, &empty_leaf, blocksize, 19 * blocksize); + if (ret != sizeof(empty_leaf)) + return -1; + return 0; +} + +#if 0 int mkfs(int fd) { struct btrfs_root_info info[2]; @@ -20,13 +134,14 @@ int mkfs(int fd) /* setup the super block area */ memset(info, 0, sizeof(info)); - info[0].blocknr = 16; - info[0].objectid = 1; - info[0].tree_root = 17; + btrfs_set_root_blocknr(info, 16); + btrfs_set_root_objectid(info, 1); + btrfs_set_root_tree_root(info, 17); + + btrfs_set_root_blocknr(info + 1, 16); + btrfs_set_root_objectid(info + 1, 2); + btrfs_set_root_tree_root(info + 1, 18); - info[1].blocknr = 16; - info[1].objectid = 2; - info[1].tree_root = 18; ret = pwrite(fd, info, sizeof(info), BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); if (ret != sizeof(info)) @@ -81,3 +196,4 @@ int mkfs(int fd) return -1; return 0; } +#endif diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index c95c85640aa..64829b6b90f 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -11,6 +11,7 @@ void btrfs_print_leaf(struct btrfs_leaf *l) u32 nr = btrfs_header_nritems(&l->header); struct btrfs_item *item; struct btrfs_extent_item *ei; + struct btrfs_root_item *ri; printf("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(l)); fflush(stdout); @@ -23,13 +24,15 @@ void btrfs_print_leaf(struct btrfs_leaf *l) btrfs_key_offset(&item->key), btrfs_item_offset(item), btrfs_item_size(item)); - fflush(stdout); printf("\t\titem data %.*s\n", btrfs_item_size(item), l->data + btrfs_item_offset(item)); ei = (struct btrfs_extent_item *)(l->data + btrfs_item_offset(item)); - printf("\t\textent data refs %u owner %Lu\n", ei->refs, - ei->owner); + printf("\t\textent data refs %u owner %Lu\n", + btrfs_extent_refs(ei), btrfs_extent_owner(ei)); + ri = (struct btrfs_root_item *)ei; + printf("\t\troot data blocknr %Lu refs %u\n", + btrfs_root_blocknr(ri), btrfs_root_refs(ri)); fflush(stdout); } } @@ -71,6 +74,5 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) btrfs_print_tree(root, next_buf); btrfs_block_release(root, next_buf); } - } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c new file mode 100644 index 00000000000..0ab90cfea98 --- /dev/null +++ b/fs/btrfs/root-tree.c @@ -0,0 +1,88 @@ +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" + +int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, + struct btrfs_root_item *item, struct btrfs_key *key) +{ + struct btrfs_path path; + struct btrfs_key search_key; + struct btrfs_leaf *l; + int ret; + int slot; + + search_key.objectid = objectid; + search_key.flags = (u32)-1; + search_key.offset = (u32)-1; + + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &search_key, &path, 0, 0); + if (ret < 0) + goto out; + BUG_ON(ret == 0); + l = &path.nodes[0]->leaf; + BUG_ON(path.slots[0] == 0); + slot = path.slots[0] - 1; + if (btrfs_key_objectid(&l->items[slot].key) != objectid) { + ret = 1; + goto out; + } + memcpy(item, l->data + btrfs_item_offset(l->items + slot), + sizeof(*item)); + btrfs_disk_key_to_cpu(key, &l->items[slot].key); + btrfs_release_path(root, &path); + ret = 0; +out: + return ret; +} + +int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_root_item *item) +{ + struct btrfs_path path; + struct btrfs_leaf *l; + int ret; + int slot; + + btrfs_init_path(&path); + ret = btrfs_search_slot(root, key, &path, 0, 1); + if (ret < 0) + goto out; + BUG_ON(ret != 0); + l = &path.nodes[0]->leaf; + slot = path.slots[0]; + memcpy(l->data + btrfs_item_offset(l->items + slot), item, + sizeof(*item)); +out: + btrfs_release_path(root, &path); + return ret; +} + +int btrfs_insert_root(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_root_item *item) +{ + int ret; + ret = btrfs_insert_item(root, key, item, sizeof(*item)); + BUG_ON(ret); + return ret; +} + +int btrfs_del_root(struct btrfs_root *root, struct btrfs_key *key) +{ + struct btrfs_path path; + int ret; + + btrfs_init_path(&path); + ret = btrfs_search_slot(root, key, &path, -1, 1); + if (ret < 0) + goto out; + BUG_ON(ret != 0); + ret = btrfs_del_item(root, &path); +out: + btrfs_release_path(root, &path); + return ret; +} -- cgit v1.2.3-70-g09d2 From 710874947aa0f81c3b348b052e81cb9075113c49 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 14 Mar 2007 09:20:39 -0400 Subject: Btrfs: properly reset block cache on free Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 21f39b40469..7f156d9b708 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -182,7 +182,7 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) } ret = btrfs_del_item(extent_root, &path); if (root != extent_root && - extent_root->last_insert.objectid < blocknr) + extent_root->last_insert.objectid > blocknr) extent_root->last_insert.objectid = blocknr; if (ret) BUG(); -- cgit v1.2.3-70-g09d2 From 4beb1b8b75a86373f6020103ab840448d14c8880 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 14 Mar 2007 10:31:29 -0400 Subject: Btrfs: add leaf data casting helper Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++++ fs/btrfs/extent-tree.c | 8 ++------ fs/btrfs/print-tree.c | 5 ++--- 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0968899fb7f..05c7707263f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -401,6 +401,10 @@ static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s, s->blocksize = cpu_to_le16(val); } +/* helper function to cast into the data area of the leaf. */ +#define btrfs_item_ptr(leaf, slot, type) \ + ((type *)((leaf)->data + btrfs_item_offset((leaf)->items + (slot)))) + struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root); int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf); int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7f156d9b708..3767744f659 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -41,9 +41,7 @@ static int inc_block_ref(struct btrfs_root *root, u64 blocknr) BUG(); BUG_ON(ret != 0); l = &path.nodes[0]->leaf; - item = (struct btrfs_extent_item *)(l->data + - btrfs_item_offset(l->items + - path.slots[0])); + item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); @@ -69,9 +67,7 @@ static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs) if (ret != 0) BUG(); l = &path.nodes[0]->leaf; - item = (struct btrfs_extent_item *)(l->data + - btrfs_item_offset(l->items + - path.slots[0])); + item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); btrfs_release_path(root->extent_root, &path); return 0; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 64829b6b90f..e769f36cf05 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -26,11 +26,10 @@ void btrfs_print_leaf(struct btrfs_leaf *l) btrfs_item_size(item)); printf("\t\titem data %.*s\n", btrfs_item_size(item), l->data + btrfs_item_offset(item)); - ei = (struct btrfs_extent_item *)(l->data + - btrfs_item_offset(item)); + ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); printf("\t\textent data refs %u owner %Lu\n", btrfs_extent_refs(ei), btrfs_extent_owner(ei)); - ri = (struct btrfs_root_item *)ei; + ri = btrfs_item_ptr(l, i, struct btrfs_root_item); printf("\t\troot data blocknr %Lu refs %u\n", btrfs_root_blocknr(ri), btrfs_root_refs(ri)); fflush(stdout); -- cgit v1.2.3-70-g09d2 From 123abc88c9087b9c5605566ee3491aaef17fd837 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 14 Mar 2007 14:14:43 -0400 Subject: Btrfs: variable block size support Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 225 ++++++++++++++++++++++++------------------------- fs/btrfs/ctree.h | 62 ++++++++------ fs/btrfs/disk-io.c | 42 ++++----- fs/btrfs/disk-io.h | 9 +- fs/btrfs/extent-tree.c | 6 +- fs/btrfs/mkfs.c | 171 +++++++++++-------------------------- fs/btrfs/print-tree.c | 15 ++-- fs/btrfs/print-tree.h | 2 +- fs/btrfs/quick-test.c | 2 +- fs/btrfs/root-tree.c | 4 +- 10 files changed, 238 insertions(+), 300 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index dc620186e50..ece8de7f38e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -47,7 +47,7 @@ static int btrfs_cow_block(struct btrfs_root *root, return 0; } cow = btrfs_alloc_free_block(root); - memcpy(&cow->node, &buf->node, sizeof(buf->node)); + memcpy(&cow->node, &buf->node, root->blocksize); btrfs_set_header_blocknr(&cow->node.header, cow->blocknr); *cow_ret = cow; btrfs_inc_ref(root, buf); @@ -72,11 +72,12 @@ static int btrfs_cow_block(struct btrfs_root *root, * this returns the address of the start of the last item, * which is the stop of the leaf data stack */ -static inline unsigned int leaf_data_end(struct btrfs_leaf *leaf) +static inline unsigned int leaf_data_end(struct btrfs_root *root, + struct btrfs_leaf *leaf) { u32 nr = btrfs_header_nritems(&leaf->header); if (nr == 0) - return sizeof(leaf->data); + return BTRFS_LEAF_DATA_SIZE(root); return btrfs_item_offset(leaf->items + nr - 1); } @@ -85,12 +86,12 @@ static inline unsigned int leaf_data_end(struct btrfs_leaf *leaf) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int btrfs_leaf_free_space(struct btrfs_leaf *leaf) +int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) { - int data_end = leaf_data_end(leaf); + int data_end = leaf_data_end(root, leaf); int nritems = btrfs_header_nritems(&leaf->header); char *items_end = (char *)(leaf->items + nritems + 1); - return (char *)(leaf->data + data_end) - (char *)items_end; + return (char *)(btrfs_leaf_data(leaf) + data_end) - (char *)items_end; } /* @@ -117,7 +118,8 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 0; } -static int check_node(struct btrfs_path *path, int level) +static int check_node(struct btrfs_root *root, struct btrfs_path *path, + int level) { int i; struct btrfs_node *parent = NULL; @@ -131,22 +133,23 @@ static int check_node(struct btrfs_path *path, int level) BUG_ON(nritems == 0); if (parent) { struct btrfs_disk_key *parent_key; - parent_key = &parent->keys[parent_slot]; - BUG_ON(memcmp(parent_key, node->keys, + parent_key = &parent->ptrs[parent_slot].key; + BUG_ON(memcmp(parent_key, &node->ptrs[0].key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != btrfs_header_blocknr(&node->header)); } - BUG_ON(nritems > NODEPTRS_PER_BLOCK); + BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); for (i = 0; nritems > 1 && i < nritems - 2; i++) { struct btrfs_key cpukey; - btrfs_disk_key_to_cpu(&cpukey, &node->keys[i + 1]); - BUG_ON(comp_keys(&node->keys[i], &cpukey) >= 0); + btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key); + BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0); } return 0; } -static int check_leaf(struct btrfs_path *path, int level) +static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, + int level) { int i; struct btrfs_leaf *leaf = &path->nodes[level]->leaf; @@ -157,14 +160,14 @@ static int check_leaf(struct btrfs_path *path, int level) if (path->nodes[level + 1]) parent = &path->nodes[level + 1]->node; parent_slot = path->slots[level + 1]; - BUG_ON(btrfs_leaf_free_space(leaf) < 0); + BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); if (nritems == 0) return 0; if (parent) { struct btrfs_disk_key *parent_key; - parent_key = &parent->keys[parent_slot]; + parent_key = &parent->ptrs[parent_slot].key; BUG_ON(memcmp(parent_key, &leaf->items[0].key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != @@ -180,17 +183,18 @@ static int check_leaf(struct btrfs_path *path, int level) if (i == 0) { BUG_ON(btrfs_item_offset(leaf->items + i) + btrfs_item_size(leaf->items + i) != - LEAF_DATA_SIZE); + BTRFS_LEAF_DATA_SIZE(root)); } } return 0; } -static int check_block(struct btrfs_path *path, int level) +static int check_block(struct btrfs_root *root, struct btrfs_path *path, + int level) { if (level == 0) - return check_leaf(path, level); - return check_node(path, level); + return check_leaf(root, path, level); + return check_node(root, path, level); } /* @@ -242,8 +246,8 @@ static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) key, btrfs_header_nritems(&c->header), slot); } else { - return generic_bin_search((void *)c->keys, - sizeof(struct btrfs_disk_key), + return generic_bin_search((void *)c->ptrs, + sizeof(struct btrfs_key_ptr), key, btrfs_header_nritems(&c->header), slot); } @@ -311,7 +315,8 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, } parent = &parent_buf->node; - if (btrfs_header_nritems(&mid->header) > NODEPTRS_PER_BLOCK / 4) + if (btrfs_header_nritems(&mid->header) > + BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; left_buf = read_node_slot(root, parent_buf, pslot - 1); @@ -351,7 +356,8 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, if (wret) ret = wret; } else { - memcpy(parent->keys + pslot + 1, right->keys, + memcpy(&parent->ptrs[pslot + 1].key, + &right->ptrs[0].key, sizeof(struct btrfs_disk_key)); BUG_ON(list_empty(&parent_buf->dirty)); } @@ -387,7 +393,7 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, ret = wret; } else { /* update the parent key to reflect our changes */ - memcpy(parent->keys + pslot, mid->keys, + memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key, sizeof(struct btrfs_disk_key)); BUG_ON(list_empty(&parent_buf->dirty)); } @@ -407,7 +413,7 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, } } /* double check we haven't messed things up */ - check_block(path, level); + check_block(root, path, level); if (orig_ptr != btrfs_node_blockptr(&path->nodes[level]->node, path->slots[level])) BUG(); @@ -456,7 +462,7 @@ again: BUG_ON(!cow && ins_len); c = &b->node; p->nodes[level] = b; - ret = check_block(p, level); + ret = check_block(root, p, level); if (ret) return -1; ret = bin_search(c, key, &slot); @@ -465,7 +471,7 @@ again: slot -= 1; p->slots[level] = slot; if (ins_len > 0 && btrfs_header_nritems(&c->header) == - NODEPTRS_PER_BLOCK) { + BTRFS_NODEPTRS_PER_BLOCK(root)) { int sret = split_node(root, p, level); BUG_ON(sret > 0); if (sret) @@ -488,7 +494,7 @@ again: } else { struct btrfs_leaf *l = (struct btrfs_leaf *)c; p->slots[level] = slot; - if (ins_len > 0 && btrfs_leaf_free_space(l) < + if (ins_len > 0 && btrfs_leaf_free_space(root, l) < sizeof(struct btrfs_item) + ins_len) { int sret = split_leaf(root, p, ins_len); BUG_ON(sret > 0); @@ -525,7 +531,7 @@ static int fixup_low_keys(struct btrfs_root *root, if (!path->nodes[i]) break; t = &path->nodes[i]->node; - memcpy(t->keys + tslot, key, sizeof(*key)); + memcpy(&t->ptrs[tslot].key, key, sizeof(*key)); BUG_ON(list_empty(&path->nodes[i]->dirty)); if (tslot != 0) break; @@ -552,7 +558,7 @@ static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst_buf, src_nritems = btrfs_header_nritems(&src->header); dst_nritems = btrfs_header_nritems(&dst->header); - push_items = NODEPTRS_PER_BLOCK - dst_nritems; + push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; if (push_items <= 0) { return 1; } @@ -560,16 +566,12 @@ static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst_buf, if (src_nritems < push_items) push_items = src_nritems; - memcpy(dst->keys + dst_nritems, src->keys, - push_items * sizeof(struct btrfs_disk_key)); - memcpy(dst->blockptrs + dst_nritems, src->blockptrs, - push_items * sizeof(u64)); + memcpy(dst->ptrs + dst_nritems, src->ptrs, + push_items * sizeof(struct btrfs_key_ptr)); if (push_items < src_nritems) { - memmove(src->keys, src->keys + push_items, + memmove(src->ptrs, src->ptrs + push_items, (src_nritems - push_items) * - sizeof(struct btrfs_disk_key)); - memmove(src->blockptrs, src->blockptrs + push_items, - (src_nritems - push_items) * sizeof(u64)); + sizeof(struct btrfs_key_ptr)); } btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); @@ -601,7 +603,7 @@ static int balance_node_right(struct btrfs_root *root, src_nritems = btrfs_header_nritems(&src->header); dst_nritems = btrfs_header_nritems(&dst->header); - push_items = NODEPTRS_PER_BLOCK - dst_nritems; + push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; if (push_items <= 0) { return 1; } @@ -613,14 +615,10 @@ static int balance_node_right(struct btrfs_root *root, if (max_push < push_items) push_items = max_push; - memmove(dst->keys + push_items, dst->keys, - dst_nritems * sizeof(struct btrfs_disk_key)); - memmove(dst->blockptrs + push_items, dst->blockptrs, - dst_nritems * sizeof(u64)); - memcpy(dst->keys, src->keys + src_nritems - push_items, - push_items * sizeof(struct btrfs_disk_key)); - memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items, - push_items * sizeof(u64)); + memmove(dst->ptrs + push_items, dst->ptrs, + dst_nritems * sizeof(struct btrfs_key_ptr)); + memcpy(dst->ptrs, src->ptrs + src_nritems - push_items, + push_items * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); @@ -650,7 +648,7 @@ static int insert_new_root(struct btrfs_root *root, t = btrfs_alloc_free_block(root); c = &t->node; - memset(c, 0, sizeof(c)); + memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); btrfs_set_header_level(&c->header, level); btrfs_set_header_blocknr(&c->header, t->blocknr); @@ -660,8 +658,8 @@ static int insert_new_root(struct btrfs_root *root, if (btrfs_is_leaf(lower)) lower_key = &((struct btrfs_leaf *)lower)->items[0].key; else - lower_key = lower->keys; - memcpy(c->keys, lower_key, sizeof(struct btrfs_disk_key)); + lower_key = &lower->ptrs[0].key; + memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->blocknr); /* the super has an extra ref to root->node */ btrfs_block_release(root, root->node); @@ -693,19 +691,15 @@ static int insert_ptr(struct btrfs_root *root, nritems = btrfs_header_nritems(&lower->header); if (slot > nritems) BUG(); - if (nritems == NODEPTRS_PER_BLOCK) + if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) BUG(); if (slot != nritems) { - memmove(lower->keys + slot + 1, lower->keys + slot, - (nritems - slot) * sizeof(struct btrfs_disk_key)); - memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot, - (nritems - slot) * sizeof(u64)); + memmove(lower->ptrs + slot + 1, lower->ptrs + slot, + (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - memcpy(lower->keys + slot, key, sizeof(struct btrfs_disk_key)); + memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); - if (lower->keys[1].objectid == 0) - BUG(); BUG_ON(list_empty(&path->nodes[level]->dirty)); return 0; } @@ -747,17 +741,16 @@ static int split_node(struct btrfs_root *root, struct btrfs_path *path, btrfs_set_header_parentid(&split->header, btrfs_header_parentid(&root->node->node.header)); mid = (c_nritems + 1) / 2; - memcpy(split->keys, c->keys + mid, - (c_nritems - mid) * sizeof(struct btrfs_disk_key)); - memcpy(split->blockptrs, c->blockptrs + mid, - (c_nritems - mid) * sizeof(u64)); + memcpy(split->ptrs, c->ptrs + mid, + (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(&split->header, c_nritems - mid); btrfs_set_header_nritems(&c->header, mid); ret = 0; BUG_ON(list_empty(&t->dirty)); - wret = insert_ptr(root, path, split->keys, split_buffer->blocknr, - path->slots[level + 1] + 1, level + 1); + wret = insert_ptr(root, path, &split->ptrs[0].key, + split_buffer->blocknr, path->slots[level + 1] + 1, + level + 1); if (wret) ret = wret; @@ -825,7 +818,7 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, right_buf = read_tree_block(root, btrfs_node_blockptr(&upper->node, slot + 1)); right = &right_buf->leaf; - free_space = btrfs_leaf_free_space(right); + free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, right_buf); return 1; @@ -833,7 +826,7 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, /* cow and double check */ btrfs_cow_block(root, right_buf, upper, slot + 1, &right_buf); right = &right_buf->leaf; - free_space = btrfs_leaf_free_space(right); + free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, right_buf); return 1; @@ -857,15 +850,14 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, right_nritems = btrfs_header_nritems(&right->header); /* push left to right */ push_space = btrfs_item_end(left->items + left_nritems - push_items); - push_space -= leaf_data_end(left); + push_space -= leaf_data_end(root, left); /* make room in the right data area */ - memmove(right->data + leaf_data_end(right) - push_space, - right->data + leaf_data_end(right), - LEAF_DATA_SIZE - leaf_data_end(right)); + memmove(btrfs_leaf_data(right) + leaf_data_end(root, right) - + push_space, btrfs_leaf_data(right) + leaf_data_end(root, right), + BTRFS_LEAF_DATA_SIZE(root) - leaf_data_end(root, right)); /* copy from the left data area */ - memcpy(right->data + LEAF_DATA_SIZE - push_space, - left->data + leaf_data_end(left), - push_space); + memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(left) + leaf_data_end(root, left), push_space); memmove(right->items + push_items, right->items, right_nritems * sizeof(struct btrfs_item)); /* copy the items from left to right */ @@ -875,7 +867,7 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, /* update the item pointers */ right_nritems += push_items; btrfs_set_header_nritems(&right->header, right_nritems); - push_space = LEAF_DATA_SIZE; + push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { btrfs_set_item_offset(right->items + i, push_space - btrfs_item_size(right->items + i)); @@ -886,7 +878,7 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(list_empty(&left_buf->dirty)); BUG_ON(list_empty(&right_buf->dirty)); - memcpy(upper->node.keys + slot + 1, + memcpy(&upper->node.ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); BUG_ON(list_empty(&upper->dirty)); @@ -932,7 +924,7 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, t = read_tree_block(root, btrfs_node_blockptr(&path->nodes[1]->node, slot - 1)); left = &t->leaf; - free_space = btrfs_leaf_free_space(left); + free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, t); return 1; @@ -941,7 +933,7 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, /* cow and double check */ btrfs_cow_block(root, t, path->nodes[1], slot - 1, &t); left = &t->leaf; - free_space = btrfs_leaf_free_space(left); + free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, t); return 1; @@ -964,17 +956,19 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, /* push data from right to left */ memcpy(left->items + btrfs_header_nritems(&left->header), right->items, push_items * sizeof(struct btrfs_item)); - push_space = LEAF_DATA_SIZE - + push_space = BTRFS_LEAF_DATA_SIZE(root) - btrfs_item_offset(right->items + push_items -1); - memcpy(left->data + leaf_data_end(left) - push_space, - right->data + btrfs_item_offset(right->items + push_items - 1), + memcpy(btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space, + btrfs_leaf_data(right) + + btrfs_item_offset(right->items + push_items - 1), push_space); old_left_nritems = btrfs_header_nritems(&left->header); BUG_ON(old_left_nritems < 0); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { - u16 ioff = btrfs_item_offset(left->items + i); - btrfs_set_item_offset(left->items + i, ioff - (LEAF_DATA_SIZE - + u32 ioff = btrfs_item_offset(left->items + i); + btrfs_set_item_offset(left->items + i, ioff - + (BTRFS_LEAF_DATA_SIZE(root) - btrfs_item_offset(left->items + old_left_nritems - 1))); } @@ -982,16 +976,17 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, /* fixup right node */ push_space = btrfs_item_offset(right->items + push_items - 1) - - leaf_data_end(right); - memmove(right->data + LEAF_DATA_SIZE - push_space, right->data + - leaf_data_end(right), push_space); + leaf_data_end(root, right); + memmove(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - + push_space, btrfs_leaf_data(right) + + leaf_data_end(root, right), push_space); memmove(right->items, right->items + push_items, (btrfs_header_nritems(&right->header) - push_items) * sizeof(struct btrfs_item)); btrfs_set_header_nritems(&right->header, btrfs_header_nritems(&right->header) - push_items); - push_space = LEAF_DATA_SIZE; + push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < btrfs_header_nritems(&right->header); i++) { btrfs_set_item_offset(right->items + i, push_space - @@ -1051,12 +1046,12 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, if (wret < 0) return wret; } - l_buf = path->nodes[0]; l = &l_buf->leaf; /* did the pushes work? */ - if (btrfs_leaf_free_space(l) >= sizeof(struct btrfs_item) + data_size) + if (btrfs_leaf_free_space(root, l) >= + sizeof(struct btrfs_item) + data_size) return 0; if (!path->nodes[1]) { @@ -1071,16 +1066,16 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(!right_buffer); BUG_ON(mid == nritems); right = &right_buffer->leaf; - memset(right, 0, sizeof(*right)); + memset(&right->header, 0, sizeof(right->header)); if (mid <= slot) { /* FIXME, just alloc a new leaf here */ if (leaf_space_used(l, mid, nritems - mid) + space_needed > - LEAF_DATA_SIZE) + BTRFS_LEAF_DATA_SIZE(root)) BUG(); } else { /* FIXME, just alloc a new leaf here */ if (leaf_space_used(l, 0, mid + 1) + space_needed > - LEAF_DATA_SIZE) + BTRFS_LEAF_DATA_SIZE(root)) BUG(); } btrfs_set_header_nritems(&right->header, nritems - mid); @@ -1088,15 +1083,18 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, btrfs_header_parentid(&root->node->node.header)); - data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(l); + data_copy_size = btrfs_item_end(l->items + mid) - + leaf_data_end(root, l); memcpy(right->items, l->items + mid, (nritems - mid) * sizeof(struct btrfs_item)); - memcpy(right->data + LEAF_DATA_SIZE - data_copy_size, - l->data + leaf_data_end(l), data_copy_size); - rt_data_off = LEAF_DATA_SIZE - btrfs_item_end(l->items + mid); + memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - + data_copy_size, btrfs_leaf_data(l) + + leaf_data_end(root, l), data_copy_size); + rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_end(l->items + mid); for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - u16 ioff = btrfs_item_offset(right->items + i); + u32 ioff = btrfs_item_offset(right->items + i); btrfs_set_item_offset(right->items + i, ioff + rt_data_off); } @@ -1156,9 +1154,9 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, leaf = &leaf_buf->leaf; nritems = btrfs_header_nritems(&leaf->header); - data_end = leaf_data_end(leaf); + data_end = leaf_data_end(root, leaf); - if (btrfs_leaf_free_space(leaf) < + if (btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item) + data_size) BUG(); @@ -1173,7 +1171,7 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u16 ioff = btrfs_item_offset(leaf->items + i); + u32 ioff = btrfs_item_offset(leaf->items + i); btrfs_set_item_offset(leaf->items + i, ioff - data_size); } @@ -1183,7 +1181,8 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ - memmove(leaf->data + data_end - data_size, leaf->data + + memmove(btrfs_leaf_data(leaf) + data_end - data_size, + btrfs_leaf_data(leaf) + data_end, old_data - data_end); data_end = old_data; } @@ -1192,7 +1191,7 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, sizeof(struct btrfs_disk_key)); btrfs_set_item_offset(leaf->items + slot, data_end - data_size); btrfs_set_item_size(leaf->items + slot, data_size); - memcpy(leaf->data + data_end - data_size, data, data_size); + memcpy(btrfs_leaf_data(leaf) + data_end - data_size, data, data_size); btrfs_set_header_nritems(&leaf->header, nritems + 1); ret = 0; @@ -1200,9 +1199,9 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, ret = fixup_low_keys(root, &path, &disk_key, 1); BUG_ON(list_empty(&leaf_buf->dirty)); - if (btrfs_leaf_free_space(leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) BUG(); - check_leaf(&path, 0); + check_leaf(root, &path, 0); out: btrfs_release_path(root, &path); return ret; @@ -1227,11 +1226,8 @@ static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, node = &parent->node; nritems = btrfs_header_nritems(&node->header); if (slot != nritems -1) { - memmove(node->keys + slot, node->keys + slot + 1, - sizeof(struct btrfs_disk_key) * (nritems - slot - 1)); - memmove(node->blockptrs + slot, - node->blockptrs + slot + 1, - sizeof(u64) * (nritems - slot - 1)); + memmove(node->ptrs + slot, node->ptrs + slot + 1, + sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); } nritems--; btrfs_set_header_nritems(&node->header, nritems); @@ -1240,7 +1236,8 @@ static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, /* just turn the root into a leaf and break */ btrfs_set_header_level(&root->node->node.header, 0); } else if (slot == 0) { - wret = fixup_low_keys(root, path, node->keys, level + 1); + wret = fixup_low_keys(root, path, &node->ptrs[0].key, + level + 1); if (wret) ret = wret; } @@ -1272,12 +1269,12 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) if (slot != nritems - 1) { int i; - int data_end = leaf_data_end(leaf); - memmove(leaf->data + data_end + dsize, - leaf->data + data_end, + int data_end = leaf_data_end(root, leaf); + memmove(btrfs_leaf_data(leaf) + data_end + dsize, + btrfs_leaf_data(leaf) + data_end, doff - data_end); for (i = slot + 1; i < nritems; i++) { - u16 ioff = btrfs_item_offset(leaf->items + i); + u32 ioff = btrfs_item_offset(leaf->items + i); btrfs_set_item_offset(leaf->items + i, ioff + dsize); } memmove(leaf->items + slot, leaf->items + slot + 1, @@ -1311,7 +1308,7 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) BUG_ON(list_empty(&leaf_buf->dirty)); /* delete the leaf if it is mostly empty */ - if (used < LEAF_DATA_SIZE / 3) { + if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { /* push_leaf_left fixes the path. * make sure the path still points to our leaf * for possible call to del_ptr below diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 05c7707263f..c61ad0f69be 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -5,7 +5,6 @@ #include "kerncompat.h" #define BTRFS_MAGIC "_BtRfS_M" -#define BTRFS_BLOCKSIZE 1024 #define BTRFS_ROOT_TREE_OBJECTID 1 #define BTRFS_EXTENT_TREE_OBJECTID 2 @@ -52,8 +51,11 @@ struct btrfs_header { } __attribute__ ((__packed__)); #define BTRFS_MAX_LEVEL 8 -#define NODEPTRS_PER_BLOCK ((BTRFS_BLOCKSIZE - sizeof(struct btrfs_header)) / \ - (sizeof(struct btrfs_disk_key) + sizeof(u64))) +#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->blocksize - \ + sizeof(struct btrfs_header)) / \ + (sizeof(struct btrfs_disk_key) + sizeof(u64))) +#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) +#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) struct btrfs_buffer; @@ -86,6 +88,7 @@ struct btrfs_root { int ref_cows; struct btrfs_root_item root_item; struct btrfs_key root_key; + u32 blocksize; }; /* @@ -97,7 +100,7 @@ struct btrfs_super_block { __le64 blocknr; /* this block number */ __le32 csum; __le64 magic; - __le16 blocksize; + __le32 blocksize; __le64 generation; __le64 root; __le64 total_blocks; @@ -111,7 +114,7 @@ struct btrfs_super_block { */ struct btrfs_item { struct btrfs_disk_key key; - __le16 offset; + __le32 offset; __le16 size; } __attribute__ ((__packed__)); @@ -122,24 +125,23 @@ struct btrfs_item { * The data is separate from the items to get the keys closer together * during searches. */ -#define LEAF_DATA_SIZE (BTRFS_BLOCKSIZE - sizeof(struct btrfs_header)) struct btrfs_leaf { struct btrfs_header header; - union { - struct btrfs_item items[LEAF_DATA_SIZE/ - sizeof(struct btrfs_item)]; - u8 data[BTRFS_BLOCKSIZE - sizeof(struct btrfs_header)]; - }; + struct btrfs_item items[]; } __attribute__ ((__packed__)); /* * all non-leaf blocks are nodes, they hold only keys and pointers to * other blocks */ +struct btrfs_key_ptr { + struct btrfs_disk_key key; + __le64 blockptr; +} __attribute__ ((__packed__)); + struct btrfs_node { struct btrfs_header header; - struct btrfs_disk_key keys[NODEPTRS_PER_BLOCK]; - __le64 blockptrs[NODEPTRS_PER_BLOCK]; + struct btrfs_key_ptr ptrs[]; } __attribute__ ((__packed__)); /* @@ -186,28 +188,28 @@ static inline void btrfs_set_extent_refs(struct btrfs_extent_item *ei, u32 val) static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) { - return le64_to_cpu(n->blockptrs[nr]); + return le64_to_cpu(n->ptrs[nr].blockptr); } static inline void btrfs_set_node_blockptr(struct btrfs_node *n, int nr, u64 val) { - n->blockptrs[nr] = cpu_to_le64(val); + n->ptrs[nr].blockptr = cpu_to_le64(val); } -static inline u16 btrfs_item_offset(struct btrfs_item *item) +static inline u32 btrfs_item_offset(struct btrfs_item *item) { - return le16_to_cpu(item->offset); + return le32_to_cpu(item->offset); } -static inline void btrfs_set_item_offset(struct btrfs_item *item, u16 val) +static inline void btrfs_set_item_offset(struct btrfs_item *item, u32 val) { - item->offset = cpu_to_le16(val); + item->offset = cpu_to_le32(val); } -static inline u16 btrfs_item_end(struct btrfs_item *item) +static inline u32 btrfs_item_end(struct btrfs_item *item) { - return le16_to_cpu(item->offset) + le16_to_cpu(item->size); + return le32_to_cpu(item->offset) + le16_to_cpu(item->size); } static inline u16 btrfs_item_size(struct btrfs_item *item) @@ -390,20 +392,26 @@ static inline void btrfs_set_super_blocks_used(struct btrfs_super_block *s, s->blocks_used = cpu_to_le64(val); } -static inline u16 btrfs_super_blocksize(struct btrfs_super_block *s) +static inline u32 btrfs_super_blocksize(struct btrfs_super_block *s) { - return le16_to_cpu(s->blocksize); + return le32_to_cpu(s->blocksize); } static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s, - u16 val) + u32 val) +{ + s->blocksize = cpu_to_le32(val); +} + +static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) { - s->blocksize = cpu_to_le16(val); + return (u8 *)l->items; } /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ - ((type *)((leaf)->data + btrfs_item_offset((leaf)->items + (slot)))) + ((type *)(btrfs_leaf_data(leaf) + \ + btrfs_item_offset((leaf)->items + (slot)))) struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root); int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf); @@ -416,7 +424,7 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path); int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *key, void *data, int data_size); int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); -int btrfs_leaf_free_space(struct btrfs_leaf *leaf); +int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap); int btrfs_finish_extent_commit(struct btrfs_root *root); int btrfs_del_root(struct btrfs_root *root, struct btrfs_key *key); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3d4bf6833f2..8d9457b5aef 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -46,7 +46,8 @@ struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) { struct btrfs_buffer *buf; int ret; - buf = malloc(sizeof(struct btrfs_buffer)); + + buf = malloc(sizeof(struct btrfs_buffer) + root->blocksize); if (!buf) return buf; allocated_blocks++; @@ -84,7 +85,7 @@ struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) { - loff_t offset = blocknr * BTRFS_BLOCKSIZE; + loff_t offset = blocknr * root->blocksize; struct btrfs_buffer *buf; int ret; @@ -95,8 +96,8 @@ struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) buf = alloc_tree_block(root, blocknr); if (!buf) return NULL; - ret = pread(root->fp, &buf->node, BTRFS_BLOCKSIZE, offset); - if (ret != BTRFS_BLOCKSIZE) { + ret = pread(root->fp, &buf->node, root->blocksize, offset); + if (ret != root->blocksize) { free(buf); return NULL; } @@ -127,13 +128,13 @@ int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { u64 blocknr = buf->blocknr; - loff_t offset = blocknr * BTRFS_BLOCKSIZE; + loff_t offset = blocknr * root->blocksize; int ret; if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); - ret = pwrite(root->fp, &buf->node, BTRFS_BLOCKSIZE, offset); - if (ret != BTRFS_BLOCKSIZE) + ret = pwrite(root->fp, &buf->node, root->blocksize, offset); + if (ret != root->blocksize) return ret; return 0; } @@ -215,7 +216,8 @@ int btrfs_commit_transaction(struct btrfs_root *root, return ret; } -static int __setup_root(struct btrfs_root *root, u64 objectid, int fp) +static int __setup_root(struct btrfs_super_block *super, + struct btrfs_root *root, u64 objectid, int fp) { INIT_LIST_HEAD(&root->trans); INIT_LIST_HEAD(&root->cache); @@ -223,6 +225,8 @@ static int __setup_root(struct btrfs_root *root, u64 objectid, int fp) root->fp = fp; root->node = NULL; root->commit_root = NULL; + root->blocksize = btrfs_super_blocksize(super); + root->ref_cows = 0; memset(&root->current_insert, 0, sizeof(root->current_insert)); memset(&root->last_insert, 0, sizeof(root->last_insert)); memset(&root->root_key, 0, sizeof(root->root_key)); @@ -230,19 +234,19 @@ static int __setup_root(struct btrfs_root *root, u64 objectid, int fp) return 0; } -static int find_and_setup_root(struct btrfs_root *tree_root, u64 objectid, - struct btrfs_root *root, int fp) +static int find_and_setup_root(struct btrfs_super_block *super, + struct btrfs_root *tree_root, u64 objectid, + struct btrfs_root *root, int fp) { int ret; - __setup_root(root, objectid, fp); + __setup_root(super, root, objectid, fp); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); root->node = read_tree_block(root, btrfs_root_blocknr(&root->root_item)); - root->ref_cows = 0; BUG_ON(!root->node); return 0; } @@ -277,28 +281,28 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) INIT_RADIX_TREE(&tree_root->cache_radix, GFP_KERNEL); ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); + BTRFS_SUPER_INFO_OFFSET); if (ret == 0 || btrfs_super_root(super) == 0) { printf("making new FS!\n"); - ret = mkfs(fp, 0, BTRFS_BLOCKSIZE); + ret = mkfs(fp, 0, 1024); if (ret) return NULL; ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); + BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(struct btrfs_super_block)) return NULL; } BUG_ON(ret < 0); - __setup_root(tree_root, BTRFS_ROOT_TREE_OBJECTID, fp); + __setup_root(super, tree_root, BTRFS_ROOT_TREE_OBJECTID, fp); tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(tree_root, BTRFS_EXTENT_TREE_OBJECTID, + ret = find_and_setup_root(super, tree_root, BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp); BUG_ON(ret); - ret = find_and_setup_root(tree_root, BTRFS_FS_TREE_OBJECTID, + ret = find_and_setup_root(super, tree_root, BTRFS_FS_TREE_OBJECTID, root, fp); BUG_ON(ret); @@ -313,7 +317,7 @@ int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s) int ret; btrfs_set_super_root(s, root->tree_root->node->blocknr); ret = pwrite(root->fp, s, sizeof(*s), - BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); + BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(*s)) { fprintf(stderr, "failed to write new super block err %d\n", ret); return ret; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c22a61f9233..5771bb90acb 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -5,12 +5,12 @@ struct btrfs_buffer { u64 blocknr; int count; + struct list_head dirty; + struct list_head cache; union { struct btrfs_node node; struct btrfs_leaf leaf; }; - struct list_head dirty; - struct list_head cache; }; struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); @@ -24,9 +24,8 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s); -int mkfs(int fd, u64 num_blocks, u16 blocksize); - +int mkfs(int fd, u64 num_blocks, u32 blocksize); -#define BTRFS_SUPER_INFO_OFFSET(bs) (16 * (bs)) +#define BTRFS_SUPER_INFO_OFFSET (16 * 1024) #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3767744f659..d4f1ec32839 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -143,7 +143,6 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) struct btrfs_key key; struct btrfs_root *extent_root = root->extent_root; int ret; - struct btrfs_item *item; struct btrfs_extent_item *ei; struct btrfs_key ins; u32 refs; @@ -161,9 +160,8 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) printf("failed to find %Lu\n", key.objectid); BUG(); } - item = path.nodes[0]->leaf.items + path.slots[0]; - ei = (struct btrfs_extent_item *)(path.nodes[0]->leaf.data + - btrfs_item_offset(item)); + ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + struct btrfs_extent_item); BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index dd14ed4fea6..317d20ce759 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -10,19 +10,20 @@ #include "ctree.h" #include "disk-io.h" -int mkfs(int fd, u64 num_blocks, u16 blocksize) +int mkfs(int fd, u64 num_blocks, u32 blocksize) { struct btrfs_super_block super; - struct btrfs_leaf empty_leaf; + struct btrfs_leaf *empty_leaf; struct btrfs_root_item root_item; struct btrfs_item item; struct btrfs_extent_item extent_item; char *block; int ret; - u16 itemoff; + u32 itemoff; + u32 start_block = BTRFS_SUPER_INFO_OFFSET / blocksize; - btrfs_set_super_blocknr(&super, 16); - btrfs_set_super_root(&super, 17); + btrfs_set_super_blocknr(&super, start_block); + btrfs_set_super_root(&super, start_block + 1); strcpy((char *)(&super.magic), BTRFS_MAGIC); btrfs_set_super_blocksize(&super, blocksize); btrfs_set_super_total_blocks(&super, num_blocks); @@ -32,168 +33,98 @@ int mkfs(int fd, u64 num_blocks, u16 blocksize) memset(block, 0, blocksize); BUG_ON(sizeof(super) > blocksize); memcpy(block, &super, sizeof(super)); - ret = pwrite(fd, block, blocksize, BTRFS_SUPER_INFO_OFFSET(blocksize)); + ret = pwrite(fd, block, blocksize, BTRFS_SUPER_INFO_OFFSET); BUG_ON(ret != blocksize); /* create the tree of root objects */ - memset(&empty_leaf, 0, sizeof(empty_leaf)); - btrfs_set_header_parentid(&empty_leaf.header, BTRFS_ROOT_TREE_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf.header, 17); - btrfs_set_header_nritems(&empty_leaf.header, 2); + empty_leaf = malloc(blocksize); + memset(empty_leaf, 0, blocksize); + btrfs_set_header_parentid(&empty_leaf->header, + BTRFS_ROOT_TREE_OBJECTID); + btrfs_set_header_blocknr(&empty_leaf->header, start_block + 1); + btrfs_set_header_nritems(&empty_leaf->header, 2); /* create the items for the root tree */ - btrfs_set_root_blocknr(&root_item, 18); + btrfs_set_root_blocknr(&root_item, start_block + 2); btrfs_set_root_refs(&root_item, 1); - itemoff = LEAF_DATA_SIZE - sizeof(root_item); + itemoff = __BTRFS_LEAF_DATA_SIZE(blocksize) - sizeof(root_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_item_size(&item, sizeof(root_item)); btrfs_set_key_objectid(&item.key, BTRFS_EXTENT_TREE_OBJECTID); btrfs_set_key_offset(&item.key, 0); btrfs_set_key_flags(&item.key, 0); - memcpy(empty_leaf.items, &item, sizeof(item)); - memcpy(empty_leaf.data + itemoff, &root_item, sizeof(root_item)); + memcpy(empty_leaf->items, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + itemoff, + &root_item, sizeof(root_item)); - btrfs_set_root_blocknr(&root_item, 19); + btrfs_set_root_blocknr(&root_item, start_block + 3); itemoff = itemoff - sizeof(root_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); - memcpy(empty_leaf.items + 1, &item, sizeof(item)); - memcpy(empty_leaf.data + itemoff, &root_item, sizeof(root_item)); - ret = pwrite(fd, &empty_leaf, blocksize, 17 * blocksize); + memcpy(empty_leaf->items + 1, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + itemoff, + &root_item, sizeof(root_item)); + ret = pwrite(fd, empty_leaf, blocksize, (start_block + 1) * blocksize); /* create the items for the extent tree */ - btrfs_set_header_parentid(&empty_leaf.header, + btrfs_set_header_parentid(&empty_leaf->header, BTRFS_EXTENT_TREE_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf.header, 18); - btrfs_set_header_nritems(&empty_leaf.header, 4); + btrfs_set_header_blocknr(&empty_leaf->header, start_block + 2); + btrfs_set_header_nritems(&empty_leaf->header, 4); /* item1, reserve blocks 0-16 */ btrfs_set_key_objectid(&item.key, 0); - btrfs_set_key_offset(&item.key, 17); + btrfs_set_key_offset(&item.key, start_block + 1); btrfs_set_key_flags(&item.key, 0); - itemoff = LEAF_DATA_SIZE - sizeof(struct btrfs_extent_item); + itemoff = __BTRFS_LEAF_DATA_SIZE(blocksize) - + sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_item_size(&item, sizeof(struct btrfs_extent_item)); btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, 0); - memcpy(empty_leaf.items, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); + memcpy(empty_leaf->items, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), + &extent_item, btrfs_item_size(&item)); /* item2, give block 17 to the root */ - btrfs_set_key_objectid(&item.key, 17); + btrfs_set_key_objectid(&item.key, start_block + 1); btrfs_set_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_extent_owner(&extent_item, BTRFS_ROOT_TREE_OBJECTID); - memcpy(empty_leaf.items + 1, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); + memcpy(empty_leaf->items + 1, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), + &extent_item, btrfs_item_size(&item)); /* item3, give block 18 to the extent root */ - btrfs_set_key_objectid(&item.key, 18); + btrfs_set_key_objectid(&item.key, start_block + 2); btrfs_set_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_extent_owner(&extent_item, BTRFS_EXTENT_TREE_OBJECTID); - memcpy(empty_leaf.items + 2, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); + memcpy(empty_leaf->items + 2, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), + &extent_item, btrfs_item_size(&item)); /* item4, give block 19 to the FS root */ - btrfs_set_key_objectid(&item.key, 19); + btrfs_set_key_objectid(&item.key, start_block + 3); btrfs_set_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_extent_owner(&extent_item, BTRFS_FS_TREE_OBJECTID); - memcpy(empty_leaf.items + 3, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); - ret = pwrite(fd, &empty_leaf, blocksize, 18 * blocksize); - if (ret != sizeof(empty_leaf)) + memcpy(empty_leaf->items + 3, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), + &extent_item, btrfs_item_size(&item)); + ret = pwrite(fd, empty_leaf, blocksize, (start_block + 2) * blocksize); + if (ret != blocksize) return -1; /* finally create the FS root */ - btrfs_set_header_parentid(&empty_leaf.header, BTRFS_FS_TREE_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf.header, 19); - btrfs_set_header_nritems(&empty_leaf.header, 0); - ret = pwrite(fd, &empty_leaf, blocksize, 19 * blocksize); - if (ret != sizeof(empty_leaf)) + btrfs_set_header_parentid(&empty_leaf->header, BTRFS_FS_TREE_OBJECTID); + btrfs_set_header_blocknr(&empty_leaf->header, start_block + 3); + btrfs_set_header_nritems(&empty_leaf->header, 0); + ret = pwrite(fd, empty_leaf, blocksize, (start_block + 3) * blocksize); + if (ret != blocksize) return -1; return 0; } - -#if 0 -int mkfs(int fd) -{ - struct btrfs_root_info info[2]; - struct btrfs_leaf empty_leaf; - struct btrfs_item item; - struct btrfs_extent_item extent_item; - int ret; - - /* setup the super block area */ - memset(info, 0, sizeof(info)); - btrfs_set_root_blocknr(info, 16); - btrfs_set_root_objectid(info, 1); - btrfs_set_root_tree_root(info, 17); - - btrfs_set_root_blocknr(info + 1, 16); - btrfs_set_root_objectid(info + 1, 2); - btrfs_set_root_tree_root(info + 1, 18); - - ret = pwrite(fd, info, sizeof(info), - BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); - if (ret != sizeof(info)) - return -1; - - /* create leaves for the tree root and extent root */ - memset(&empty_leaf, 0, sizeof(empty_leaf)); - btrfs_set_header_parentid(&empty_leaf.header, 1); - btrfs_set_header_blocknr(&empty_leaf.header, 17); - ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 17 * BTRFS_BLOCKSIZE); - if (ret != sizeof(empty_leaf)) - return -1; - - btrfs_set_header_parentid(&empty_leaf.header, 2); - btrfs_set_header_blocknr(&empty_leaf.header, 18); - btrfs_set_header_nritems(&empty_leaf.header, 3); - - /* item1, reserve blocks 0-16 */ - btrfs_set_key_objectid(&item.key, 0); - btrfs_set_key_offset(&item.key, 17); - btrfs_set_key_flags(&item.key, 0); - btrfs_set_item_offset(&item, LEAF_DATA_SIZE - - sizeof(struct btrfs_extent_item)); - btrfs_set_item_size(&item, sizeof(struct btrfs_extent_item)); - btrfs_set_extent_refs(&extent_item, 1); - btrfs_set_extent_owner(&extent_item, 0); - memcpy(empty_leaf.items, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); - - /* item2, give block 17 to the root */ - btrfs_set_key_objectid(&item.key, 17); - btrfs_set_key_offset(&item.key, 1); - btrfs_set_item_offset(&item, LEAF_DATA_SIZE - - sizeof(struct btrfs_extent_item) * 2); - btrfs_set_extent_owner(&extent_item, 1); - memcpy(empty_leaf.items + 1, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); - - /* item3, give block 18 for the extent root */ - btrfs_set_key_objectid(&item.key, 18); - btrfs_set_key_offset(&item.key, 1); - btrfs_set_item_offset(&item, LEAF_DATA_SIZE - - sizeof(struct btrfs_extent_item) * 3); - btrfs_set_extent_owner(&extent_item, 2); - memcpy(empty_leaf.items + 2, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); - ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 18 * BTRFS_BLOCKSIZE); - if (ret != sizeof(empty_leaf)) - return -1; - return 0; -} -#endif diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index e769f36cf05..7a189eaa589 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -5,7 +5,7 @@ #include "ctree.h" #include "disk-io.h" -void btrfs_print_leaf(struct btrfs_leaf *l) +void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) { int i; u32 nr = btrfs_header_nritems(&l->header); @@ -13,7 +13,8 @@ void btrfs_print_leaf(struct btrfs_leaf *l) struct btrfs_extent_item *ei; struct btrfs_root_item *ri; printf("leaf %Lu total ptrs %d free space %d\n", - btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(l)); + btrfs_header_blocknr(&l->header), nr, + btrfs_leaf_free_space(root, l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; @@ -25,7 +26,7 @@ void btrfs_print_leaf(struct btrfs_leaf *l) btrfs_item_offset(item), btrfs_item_size(item)); printf("\t\titem data %.*s\n", btrfs_item_size(item), - l->data + btrfs_item_offset(item)); + btrfs_leaf_data(l) + btrfs_item_offset(item)); ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); printf("\t\textent data refs %u owner %Lu\n", btrfs_extent_refs(ei), btrfs_extent_owner(ei)); @@ -46,18 +47,18 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) c = &t->node; nr = btrfs_header_nritems(&c->header); if (btrfs_is_leaf(c)) { - btrfs_print_leaf((struct btrfs_leaf *)c); + btrfs_print_leaf(root, (struct btrfs_leaf *)c); return; } printf("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, btrfs_header_level(&c->header), nr, - (u32)NODEPTRS_PER_BLOCK - nr); + (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); fflush(stdout); for (i = 0; i < nr; i++) { printf("\tkey %d (%Lu %u %Lu) block %Lu\n", i, - c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, - btrfs_node_blockptr(c, i)); + c->ptrs[i].key.objectid, c->ptrs[i].key.flags, + c->ptrs[i].key.offset, btrfs_node_blockptr(c, i)); fflush(stdout); } for (i = 0; i < nr; i++) { diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index e8d0b847c02..9236abda2a7 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,3 +1,3 @@ -void btrfs_print_leaf(struct btrfs_leaf *l); +void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t); diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index 66bdc57905d..d9287cad35c 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -73,7 +73,7 @@ int main(int ac, char **av) { printf("node %p level %d total ptrs %d free spc %lu\n", root->node, btrfs_header_level(&root->node->node.header), btrfs_header_nritems(&root->node->node.header), - NODEPTRS_PER_BLOCK - + BTRFS_NODEPTRS_PER_BLOCK(root) - btrfs_header_nritems(&root->node->node.header)); printf("all searches good, deleting some items\n"); i = 0; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0ab90cfea98..03aa7c2c633 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -31,7 +31,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, ret = 1; goto out; } - memcpy(item, l->data + btrfs_item_offset(l->items + slot), + memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), sizeof(*item)); btrfs_disk_key_to_cpu(key, &l->items[slot].key); btrfs_release_path(root, &path); @@ -55,7 +55,7 @@ int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key, BUG_ON(ret != 0); l = &path.nodes[0]->leaf; slot = path.slots[0]; - memcpy(l->data + btrfs_item_offset(l->items + slot), item, + memcpy(btrfs_item_ptr(l, slot, struct btrfs_root_item), item, sizeof(*item)); out: btrfs_release_path(root, &path); -- cgit v1.2.3-70-g09d2 From 62e2749e03a855d98855f9ce032dbe72d5fad148 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 15 Mar 2007 12:56:47 -0400 Subject: Btrfs: Use a chunk of the key flags to record the item type. Add (untested and simple) directory item code Fix comp_keys to use the new key ordering Add btrfs_insert_empty_item Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/TODO | 1 + fs/btrfs/ctree.c | 51 +++++++++----- fs/btrfs/ctree.h | 182 ++++++++++++++++++++++++++++++++++--------------- fs/btrfs/dir-item.c | 102 +++++++++++++++++++++++++++ fs/btrfs/extent-tree.c | 17 +++-- fs/btrfs/hash.c | 86 +++++++++++++++++++++++ fs/btrfs/hasher.c | 23 +++++++ fs/btrfs/kerncompat.h | 1 + fs/btrfs/mkfs.c | 28 ++++---- fs/btrfs/print-tree.c | 36 +++++++--- fs/btrfs/quick-test.c | 3 +- fs/btrfs/random-test.c | 10 ++- fs/btrfs/root-tree.c | 2 +- 14 files changed, 435 insertions(+), 109 deletions(-) create mode 100644 fs/btrfs/dir-item.c create mode 100644 fs/btrfs/hash.c create mode 100644 fs/btrfs/hasher.c (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 13ed6313ac8..ee78cdd08c0 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -3,7 +3,7 @@ CC=gcc CFLAGS = -g -Wall headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o \ - root-tree.o + root-tree.o dir-item.o hash.o # if you don't have sparse installed, use ls instead CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 8142be88d71..249e2e90bea 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,6 +8,7 @@ * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter +* Do actual block accounting * Port into the kernel * Add virtual filesystems, mountable snapshots * Get rid of struct ctree_path, limiting tree levels held at one time diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ece8de7f38e..7edfbd46804 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -107,14 +107,14 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 1; if (k1.objectid < k2->objectid) return -1; - if (k1.flags > k2->flags) - return 1; - if (k1.flags < k2->flags) - return -1; if (k1.offset > k2->offset) return 1; if (k1.offset < k2->offset) return -1; + if (k1.flags > k2->flags) + return 1; + if (k1.flags < k2->flags) + return -1; return 0; } @@ -1122,8 +1122,8 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, - void *data, int data_size) +int btrfs_insert_empty_item(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 data_size) { int ret = 0; int slot; @@ -1132,7 +1132,6 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, struct btrfs_buffer *leaf_buf; u32 nritems; unsigned int data_end; - struct btrfs_path path; struct btrfs_disk_key disk_key; btrfs_cpu_key_to_disk(&disk_key, cpu_key); @@ -1140,17 +1139,16 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, /* create a root if there isn't one */ if (!root->node) BUG(); - btrfs_init_path(&path); - ret = btrfs_search_slot(root, cpu_key, &path, data_size, 1); + ret = btrfs_search_slot(root, cpu_key, path, data_size, 1); if (ret == 0) { - btrfs_release_path(root, &path); + btrfs_release_path(root, path); return -EEXIST; } if (ret < 0) goto out; - slot_orig = path.slots[0]; - leaf_buf = path.nodes[0]; + slot_orig = path->slots[0]; + leaf_buf = path->nodes[0]; leaf = &leaf_buf->leaf; nritems = btrfs_header_nritems(&leaf->header); @@ -1160,7 +1158,7 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, sizeof(struct btrfs_item) + data_size) BUG(); - slot = path.slots[0]; + slot = path->slots[0]; BUG_ON(slot < 0); if (slot != nritems) { int i; @@ -1186,23 +1184,42 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, data_end, old_data - data_end); data_end = old_data; } - /* copy the new data in */ + /* setup the item for the new data */ memcpy(&leaf->items[slot].key, &disk_key, sizeof(struct btrfs_disk_key)); btrfs_set_item_offset(leaf->items + slot, data_end - data_size); btrfs_set_item_size(leaf->items + slot, data_size); - memcpy(btrfs_leaf_data(leaf) + data_end - data_size, data, data_size); btrfs_set_header_nritems(&leaf->header, nritems + 1); ret = 0; if (slot == 0) - ret = fixup_low_keys(root, &path, &disk_key, 1); + ret = fixup_low_keys(root, path, &disk_key, 1); BUG_ON(list_empty(&leaf_buf->dirty)); if (btrfs_leaf_free_space(root, leaf) < 0) BUG(); - check_leaf(root, &path, 0); + check_leaf(root, path, 0); out: + return ret; +} + +/* + * Given a key and some data, insert an item into the tree. + * This does all the path init required, making room in the tree if needed. + */ +int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, + void *data, u32 data_size) +{ + int ret = 0; + struct btrfs_path path; + u8 *ptr; + + btrfs_init_path(&path); + ret = btrfs_insert_empty_item(root, &path, cpu_key, data_size); + if (!ret) { + ptr = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], u8); + memcpy(ptr, data, data_size); + } btrfs_release_path(root, &path); return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 73ebc824924..e8a26fd8ea9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -58,39 +58,6 @@ struct btrfs_header { #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) struct btrfs_buffer; - -struct btrfs_root_item { - __le64 blocknr; - __le32 flags; - __le64 block_limit; - __le64 blocks_used; - __le32 refs; -}; - -/* - * in ram representation of the tree. extent_root is used for all allocations - * and for the extent tree extent_root root. current_insert is used - * only for the extent tree. - */ -struct btrfs_root { - struct btrfs_buffer *node; - struct btrfs_buffer *commit_root; - struct btrfs_root *extent_root; - struct btrfs_root *tree_root; - struct btrfs_key current_insert; - struct btrfs_key last_insert; - int fp; - struct radix_tree_root cache_radix; - struct radix_tree_root pinned_radix; - struct list_head trans; - struct list_head cache; - int cache_size; - int ref_cows; - struct btrfs_root_item root_item; - struct btrfs_key root_key; - u32 blocksize; -}; - /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -108,8 +75,7 @@ struct btrfs_super_block { } __attribute__ ((__packed__)); /* - * A leaf is full of items. The exact type of item is defined by - * the key flags parameter. offset and size tell us where to find + * A leaf is full of items. offset and size tell us where to find * the item in the leaf (relative to the start of the data area) */ struct btrfs_item { @@ -144,15 +110,6 @@ struct btrfs_node { struct btrfs_key_ptr ptrs[]; } __attribute__ ((__packed__)); -/* - * items in the extent btree are used to record the objectid of the - * owner of the block and the number of references - */ -struct btrfs_extent_item { - __le32 refs; - __le64 owner; -} __attribute__ ((__packed__)); - /* * btrfs_paths remember the path taken from the root down to the leaf. * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point @@ -166,6 +123,94 @@ struct btrfs_path { int slots[BTRFS_MAX_LEVEL]; }; +/* + * items in the extent btree are used to record the objectid of the + * owner of the block and the number of references + */ +struct btrfs_extent_item { + __le32 refs; + __le64 owner; +} __attribute__ ((__packed__)); + +struct btrfs_dir_item { + __le64 objectid; + __le16 flags; + u8 type; +} __attribute__ ((__packed__)); + +struct btrfs_root_item { + __le64 blocknr; + __le32 flags; + __le64 block_limit; + __le64 blocks_used; + __le32 refs; +}; + +/* + * in ram representation of the tree. extent_root is used for all allocations + * and for the extent tree extent_root root. current_insert is used + * only for the extent tree. + */ +struct btrfs_root { + struct btrfs_buffer *node; + struct btrfs_buffer *commit_root; + struct btrfs_root *extent_root; + struct btrfs_root *tree_root; + struct btrfs_key current_insert; + struct btrfs_key last_insert; + int fp; + struct radix_tree_root cache_radix; + struct radix_tree_root pinned_radix; + struct list_head trans; + struct list_head cache; + int cache_size; + int ref_cows; + struct btrfs_root_item root_item; + struct btrfs_key root_key; + u32 blocksize; +}; + + +/* the lower bits in the key flags defines the item type */ +#define BTRFS_KEY_TYPE_MAX 256 +#define BTRFS_KEY_TYPE_MASK (BTRFS_KEY_TYPE_MAX - 1) +#define BTRFS_INODE_ITEM_KEY 1 +#define BTRFS_DIR_ITEM_KEY 2 +#define BTRFS_ROOT_ITEM_KEY 3 +#define BTRFS_EXTENT_ITEM_KEY 4 +#define BTRFS_STRING_ITEM_KEY 5 + +static inline u64 btrfs_dir_objectid(struct btrfs_dir_item *d) +{ + return le64_to_cpu(d->objectid); +} + +static inline void btrfs_set_dir_objectid(struct btrfs_dir_item *d, u64 val) +{ + d->objectid = cpu_to_le64(val); +} + +static inline u16 btrfs_dir_flags(struct btrfs_dir_item *d) +{ + return le16_to_cpu(d->flags); +} + +static inline void btrfs_set_dir_flags(struct btrfs_dir_item *d, u16 val) +{ + d->flags = cpu_to_le16(val); +} + +static inline u8 btrfs_dir_type(struct btrfs_dir_item *d) +{ + return d->type; +} + +static inline void btrfs_set_dir_type(struct btrfs_dir_item *d, u8 val) +{ + d->type = val; +} + + static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) { return le64_to_cpu(ei->owner); @@ -238,39 +283,65 @@ static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, disk->objectid = cpu_to_le64(cpu->objectid); } -static inline u64 btrfs_key_objectid(struct btrfs_disk_key *disk) +static inline u64 btrfs_disk_key_objectid(struct btrfs_disk_key *disk) { return le64_to_cpu(disk->objectid); } -static inline void btrfs_set_key_objectid(struct btrfs_disk_key *disk, - u64 val) +static inline void btrfs_set_disk_key_objectid(struct btrfs_disk_key *disk, + u64 val) { disk->objectid = cpu_to_le64(val); } -static inline u64 btrfs_key_offset(struct btrfs_disk_key *disk) +static inline u64 btrfs_disk_key_offset(struct btrfs_disk_key *disk) { return le64_to_cpu(disk->offset); } -static inline void btrfs_set_key_offset(struct btrfs_disk_key *disk, - u64 val) +static inline void btrfs_set_disk_key_offset(struct btrfs_disk_key *disk, + u64 val) { disk->offset = cpu_to_le64(val); } -static inline u32 btrfs_key_flags(struct btrfs_disk_key *disk) +static inline u32 btrfs_disk_key_flags(struct btrfs_disk_key *disk) { return le32_to_cpu(disk->flags); } -static inline void btrfs_set_key_flags(struct btrfs_disk_key *disk, - u32 val) +static inline void btrfs_set_disk_key_flags(struct btrfs_disk_key *disk, + u32 val) { disk->flags = cpu_to_le32(val); } +static inline u32 btrfs_key_type(struct btrfs_key *key) +{ + return key->flags & BTRFS_KEY_TYPE_MASK; +} + +static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key) +{ + return le32_to_cpu(key->flags) & BTRFS_KEY_TYPE_MASK; +} + +static inline void btrfs_set_key_type(struct btrfs_key *key, u32 type) +{ + BUG_ON(type >= BTRFS_KEY_TYPE_MAX); + key->flags = (key->flags & ~((u64)BTRFS_KEY_TYPE_MASK)) | type; +} + +static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, u32 type) +{ + u32 flags = btrfs_disk_key_flags(key); + BUG_ON(type >= BTRFS_KEY_TYPE_MAX); + flags = (flags & ~((u64)BTRFS_KEY_TYPE_MASK)) | type; + btrfs_set_disk_key_flags(key, flags); +} + + + static inline u64 btrfs_header_blocknr(struct btrfs_header *h) { return le64_to_cpu(h->blocknr); @@ -407,7 +478,6 @@ static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) { return (u8 *)l->items; } - /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -422,7 +492,9 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); void btrfs_init_path(struct btrfs_path *p); int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path); int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *key, - void *data, int data_size); + void *data, u32 data_size); +int btrfs_insert_empty_item(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 data_size); int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c new file mode 100644 index 00000000000..2a888e97e1a --- /dev/null +++ b/fs/btrfs/dir-item.c @@ -0,0 +1,102 @@ +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "hash.h" + +int btrfs_insert_dir_item(struct btrfs_root *root, char *name, int name_len, + u64 dir, u64 objectid, u8 type) +{ + int ret = 0; + struct btrfs_path path; + struct btrfs_dir_item *dir_item; + char *name_ptr; + struct btrfs_key key; + u32 data_size; + + key.objectid = dir; + key.flags = 0; + ret = btrfs_name_hash(name, name_len, &key.offset); + BUG_ON(ret); + btrfs_init_path(&path); + data_size = sizeof(*dir_item) + name_len; + ret = btrfs_insert_empty_item(root, &path, &key, data_size); + if (ret) + goto out; + + dir_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + struct btrfs_dir_item); + btrfs_set_dir_objectid(dir_item, objectid); + btrfs_set_dir_type(dir_item, type); + btrfs_set_dir_flags(dir_item, 0); + name_ptr = (char *)(dir_item + 1); + memcpy(name_ptr, name, name_len); +out: + btrfs_release_path(root, &path); + return ret; +} + +int btrfs_del_dir_item(struct btrfs_root *root, u64 dir, char *name, + int name_len) +{ + int ret = 0; + struct btrfs_path path; + struct btrfs_key key; + + key.objectid = dir; + key.flags = 0; + ret = btrfs_name_hash(name, name_len, &key.offset); + BUG_ON(ret); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &key, &path, 0, 1); + if (ret) + goto out; + ret = btrfs_del_item(root, &path); +out: + btrfs_release_path(root, &path); + return ret; +} + +int btrfs_lookup_dir_item(struct btrfs_root *root, u64 dir, char *name, + int name_len, u64 *objectid) +{ + int ret = 0; + struct btrfs_path path; + struct btrfs_dir_item *dir_item; + char *name_ptr; + struct btrfs_key key; + u32 item_len; + struct btrfs_item *item; + + key.objectid = dir; + key.flags = 0; + ret = btrfs_name_hash(name, name_len, &key.offset); + BUG_ON(ret); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &key, &path, 0, 0); + if (ret) + goto out; + + dir_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + struct btrfs_dir_item); + + item = path.nodes[0]->leaf.items + path.slots[0]; + item_len = btrfs_item_size(item); + if (item_len != name_len + sizeof(struct btrfs_dir_item)) { + BUG(); + ret = 1; + goto out; + } + name_ptr = (char *)(dir_item + 1); + if (memcmp(name_ptr, name, name_len)) { + BUG(); + ret = 1; + goto out; + } + *objectid = btrfs_dir_objectid(dir_item); +out: + btrfs_release_path(root, &path); + return ret; +} diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d4f1ec32839..c81e14162ef 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -35,6 +35,7 @@ static int inc_block_ref(struct btrfs_root *root, u64 blocknr) btrfs_init_path(&path); key.objectid = blocknr; key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = 1; ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 1); if (ret != 0) @@ -61,8 +62,9 @@ static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs) struct btrfs_extent_item *item; btrfs_init_path(&path); key.objectid = blocknr; - key.flags = 0; key.offset = 1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 0); if (ret != 0) BUG(); @@ -123,6 +125,7 @@ static int finish_current_insert(struct btrfs_root *extent_root) btrfs_header_parentid(&extent_root->node->node.header)); ins.offset = 1; ins.flags = 0; + btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); for (i = 0; i < extent_root->current_insert.flags; i++) { ins.objectid = extent_root->current_insert.objectid + i; @@ -149,6 +152,7 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) key.objectid = blocknr; key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; find_free_extent(root, 0, 0, (u64)-1, &ins); @@ -228,7 +232,6 @@ static int run_pending(struct btrfs_root *extent_root) */ int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) { - struct btrfs_key key; struct btrfs_root *extent_root = root->extent_root; struct btrfs_buffer *t; int pending_ret; @@ -240,9 +243,6 @@ int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) CTREE_EXTENT_PENDING_DEL); return 0; } - key.objectid = blocknr; - key.flags = 0; - key.offset = num_blocks; ret = __free_extent(root, blocknr, num_blocks); pending_ret = run_pending(root->extent_root); return ret ? ret : pending_ret; @@ -252,7 +252,7 @@ int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) * walks the btree of allocated extents and find a hole of a given size. * The key ins is changed to record the hole: * ins->objectid == block start - * ins->flags = 0 + * ins->flags = BTRFS_EXTENT_ITEM_KEY * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ @@ -275,11 +275,14 @@ static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks, total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3; if (root->last_insert.objectid > search_start) search_start = root->last_insert.objectid; + + ins->flags = 0; + btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + check_failed: btrfs_init_path(&path); ins->objectid = search_start; ins->offset = 0; - ins->flags = 0; start_found = 0; ret = btrfs_search_slot(root, ins, &path, 0, 0); if (ret < 0) diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c new file mode 100644 index 00000000000..92c37eece1f --- /dev/null +++ b/fs/btrfs/hash.c @@ -0,0 +1,86 @@ +/* + * Original copy from: + * linux/fs/ext3/hash.c + * + * Copyright (C) 2002 by Theodore Ts'o + * + * This file is released under the GPL v2. + * + * This file may be redistributed under the terms of the GNU Public + * License. + */ + +#include "kerncompat.h" +#define DELTA 0x9E3779B9 + +static void TEA_transform(__u32 buf[2], __u32 const in[]) +{ + __u32 sum = 0; + __u32 b0 = buf[0], b1 = buf[1]; + __u32 a = in[0], b = in[1], c = in[2], d = in[3]; + int n = 16; + + do { + sum += DELTA; + b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); + b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); + } while(--n); + + buf[0] += b0; + buf[1] += b1; +} + +static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) +{ + __u32 pad, val; + int i; + + pad = (__u32)len | ((__u32)len << 8); + pad |= pad << 16; + + val = pad; + if (len > num*4) + len = num * 4; + for (i=0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + val = msg[i] + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num--; + } + } + if (--num >= 0) + *buf++ = val; + while (--num >= 0) + *buf++ = pad; +} + +int btrfs_name_hash(const char *name, int len, u64 *hash_result) +{ + __u32 hash; + __u32 minor_hash = 0; + const char *p; + __u32 in[8], buf[2]; + + /* Initialize the default seed for the hash checksum functions */ + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + + p = name; + while (len > 0) { + str2hashbuf(p, len, in, 4); + TEA_transform(buf, in); + len -= 16; + p += 16; + } + hash = buf[0]; + minor_hash = buf[1]; + *hash_result = buf[0]; + *hash_result <<= 32; + *hash_result |= buf[1]; + return 0; +} diff --git a/fs/btrfs/hasher.c b/fs/btrfs/hasher.c new file mode 100644 index 00000000000..96702da4329 --- /dev/null +++ b/fs/btrfs/hasher.c @@ -0,0 +1,23 @@ +#include +#include +#include +#include "kerncompat.h" +#include "hash.h" + +int main() { + u64 result; + int ret; + char line[255]; + char *p; + while(1) { + p = fgets(line, 255, stdin); + if (!p) + break; + if (strlen(line) == 0) + continue; + ret = btrfs_name_hash(line, strlen(line), &result); + BUG_ON(ret); + printf("hash returns %Lu\n", result); + } + return 0; +} diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h index 56239b922ba..105d3f58408 100644 --- a/fs/btrfs/kerncompat.h +++ b/fs/btrfs/kerncompat.h @@ -21,6 +21,7 @@ #endif typedef unsigned int u32; +typedef u32 __u32; typedef unsigned long long u64; typedef unsigned char u8; typedef unsigned short u16; diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index 317d20ce759..9aa900811c3 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -50,9 +50,10 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) itemoff = __BTRFS_LEAF_DATA_SIZE(blocksize) - sizeof(root_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_item_size(&item, sizeof(root_item)); - btrfs_set_key_objectid(&item.key, BTRFS_EXTENT_TREE_OBJECTID); - btrfs_set_key_offset(&item.key, 0); - btrfs_set_key_flags(&item.key, 0); + btrfs_set_disk_key_objectid(&item.key, BTRFS_EXTENT_TREE_OBJECTID); + btrfs_set_disk_key_offset(&item.key, 0); + btrfs_set_disk_key_flags(&item.key, 0); + btrfs_set_disk_key_type(&item.key, BTRFS_ROOT_ITEM_KEY); memcpy(empty_leaf->items, &item, sizeof(item)); memcpy(btrfs_leaf_data(empty_leaf) + itemoff, &root_item, sizeof(root_item)); @@ -60,7 +61,7 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) btrfs_set_root_blocknr(&root_item, start_block + 3); itemoff = itemoff - sizeof(root_item); btrfs_set_item_offset(&item, itemoff); - btrfs_set_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); memcpy(empty_leaf->items + 1, &item, sizeof(item)); memcpy(btrfs_leaf_data(empty_leaf) + itemoff, &root_item, sizeof(root_item)); @@ -73,9 +74,10 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) btrfs_set_header_nritems(&empty_leaf->header, 4); /* item1, reserve blocks 0-16 */ - btrfs_set_key_objectid(&item.key, 0); - btrfs_set_key_offset(&item.key, start_block + 1); - btrfs_set_key_flags(&item.key, 0); + btrfs_set_disk_key_objectid(&item.key, 0); + btrfs_set_disk_key_offset(&item.key, start_block + 1); + btrfs_set_disk_key_flags(&item.key, 0); + btrfs_set_disk_key_type(&item.key, BTRFS_EXTENT_ITEM_KEY); itemoff = __BTRFS_LEAF_DATA_SIZE(blocksize) - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); @@ -87,8 +89,8 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) &extent_item, btrfs_item_size(&item)); /* item2, give block 17 to the root */ - btrfs_set_key_objectid(&item.key, start_block + 1); - btrfs_set_key_offset(&item.key, 1); + btrfs_set_disk_key_objectid(&item.key, start_block + 1); + btrfs_set_disk_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_extent_owner(&extent_item, BTRFS_ROOT_TREE_OBJECTID); @@ -97,8 +99,8 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) &extent_item, btrfs_item_size(&item)); /* item3, give block 18 to the extent root */ - btrfs_set_key_objectid(&item.key, start_block + 2); - btrfs_set_key_offset(&item.key, 1); + btrfs_set_disk_key_objectid(&item.key, start_block + 2); + btrfs_set_disk_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_extent_owner(&extent_item, BTRFS_EXTENT_TREE_OBJECTID); @@ -107,8 +109,8 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) &extent_item, btrfs_item_size(&item)); /* item4, give block 19 to the FS root */ - btrfs_set_key_objectid(&item.key, start_block + 3); - btrfs_set_key_offset(&item.key, 1); + btrfs_set_disk_key_objectid(&item.key, start_block + 3); + btrfs_set_disk_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_extent_owner(&extent_item, BTRFS_FS_TREE_OBJECTID); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 4c710190343..f2745b24747 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -12,27 +12,41 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_item *item; struct btrfs_extent_item *ei; struct btrfs_root_item *ri; + u32 type; printf("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(root, l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; + type = btrfs_disk_key_type(&item->key); printf("\titem %d key (%Lu %Lu %u) itemoff %d itemsize %d\n", i, - btrfs_key_objectid(&item->key), - btrfs_key_offset(&item->key), - btrfs_key_flags(&item->key), + btrfs_disk_key_objectid(&item->key), + btrfs_disk_key_offset(&item->key), + btrfs_disk_key_flags(&item->key), btrfs_item_offset(item), btrfs_item_size(item)); - printf("\t\titem data %.*s\n", btrfs_item_size(item), - btrfs_leaf_data(l) + btrfs_item_offset(item)); - ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); - printf("\t\textent data refs %u owner %Lu\n", - btrfs_extent_refs(ei), btrfs_extent_owner(ei)); - ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - printf("\t\troot data blocknr %Lu refs %u\n", - btrfs_root_blocknr(ri), btrfs_root_refs(ri)); + switch (type) { + case BTRFS_INODE_ITEM_KEY: + break; + case BTRFS_DIR_ITEM_KEY: + break; + case BTRFS_ROOT_ITEM_KEY: + ri = btrfs_item_ptr(l, i, struct btrfs_root_item); + printf("\t\troot data blocknr %Lu refs %u\n", + btrfs_root_blocknr(ri), btrfs_root_refs(ri)); + break; + case BTRFS_EXTENT_ITEM_KEY: + ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); + printf("\t\textent data refs %u owner %Lu\n", + btrfs_extent_refs(ei), btrfs_extent_owner(ei)); + break; + case BTRFS_STRING_ITEM_KEY: + printf("\t\titem data %.*s\n", btrfs_item_size(item), + btrfs_leaf_data(l) + btrfs_item_offset(item)); + break; + }; fflush(stdout); } } diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index d9287cad35c..3a257161712 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -30,6 +30,8 @@ int main(int ac, char **av) { root = open_ctree("dbfile", &super); srand(55); + ins.flags = 0; + btrfs_set_key_type(&ins, BTRFS_STRING_ITEM_KEY); for (i = 0; i < run_size; i++) { buf = malloc(64); num = next_key(i, max_key); @@ -39,7 +41,6 @@ int main(int ac, char **av) { fprintf(stderr, "insert %d:%d\n", num, i); ins.objectid = num; ins.offset = 0; - ins.flags = 0; ret = btrfs_insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index a8ef0478991..f05135f13f9 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -18,6 +18,7 @@ static int setup_key(struct radix_tree_root *root, struct btrfs_key *key, int ret; key->flags = 0; + btrfs_set_key_type(key, BTRFS_STRING_ITEM_KEY); key->offset = 0; again: ret = radix_tree_gang_lookup(root, (void **)res, num, 2); @@ -157,6 +158,7 @@ static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, key.offset = 0; key.flags = 0; + btrfs_set_key_type(&key, BTRFS_STRING_ITEM_KEY); key.objectid = (unsigned long)-1; while(nr-- >= 0) { btrfs_init_path(&path); @@ -173,7 +175,8 @@ static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, path.slots[0] -= 1; } slot = path.slots[0]; - found=btrfs_key_objectid(&path.nodes[0]->leaf.items[slot].key); + found = btrfs_disk_key_objectid( + &path.nodes[0]->leaf.items[slot].key); ret = btrfs_del_item(root, &path); count++; if (ret) { @@ -257,6 +260,7 @@ static int fill_radix(struct btrfs_root *root, struct radix_tree_root *radix) key.offset = 0; key.flags = 0; + btrfs_set_key_type(&key, BTRFS_STRING_ITEM_KEY); key.objectid = (unsigned long)-1; while(1) { btrfs_init_path(&path); @@ -274,8 +278,8 @@ static int fill_radix(struct btrfs_root *root, struct radix_tree_root *radix) slot -= 1; } for (i = slot; i >= 0; i--) { - found = btrfs_key_objectid(&path.nodes[0]-> - leaf.items[i].key); + found = btrfs_disk_key_objectid(&path.nodes[0]-> + leaf.items[i].key); radix_tree_preload(GFP_KERNEL); ret = radix_tree_insert(radix, found, (void *)found); if (ret) { diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 03aa7c2c633..a24b4727403 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -27,7 +27,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, l = &path.nodes[0]->leaf; BUG_ON(path.slots[0] == 0); slot = path.slots[0] - 1; - if (btrfs_key_objectid(&l->items[slot].key) != objectid) { + if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { ret = 1; goto out; } -- cgit v1.2.3-70-g09d2 From 88fd146c27da0f34c512f47e2b3776a0762ecd81 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 16 Mar 2007 08:56:18 -0400 Subject: Btrfs: pin freed blocks from the FS tree too Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 14 +++++++------- fs/btrfs/ctree.h | 3 ++- fs/btrfs/extent-tree.c | 28 +++++++++++++++++----------- 3 files changed, 26 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 68f66b320a8..13128b5ed65 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -55,13 +55,13 @@ static int btrfs_cow_block(struct btrfs_root *root, root->node = cow; cow->count++; if (buf != root->commit_root) - btrfs_free_extent(root, buf->blocknr, 1); + btrfs_free_extent(root, buf->blocknr, 1, 1); btrfs_block_release(root, buf); } else { btrfs_set_node_blockptr(&parent->node, parent_slot, cow->blocknr); BUG_ON(list_empty(&parent->dirty)); - btrfs_free_extent(root, buf->blocknr, 1); + btrfs_free_extent(root, buf->blocknr, 1, 1); } btrfs_block_release(root, buf); return 0; @@ -311,7 +311,7 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, /* once for the root ptr */ btrfs_block_release(root, mid_buf); clean_tree_block(root, mid_buf); - return btrfs_free_extent(root, blocknr, 1); + return btrfs_free_extent(root, blocknr, 1, 1); } parent = &parent_buf->node; @@ -352,7 +352,7 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, wret = del_ptr(root, path, level + 1, pslot + 1); if (wret) ret = wret; - wret = btrfs_free_extent(root, blocknr, 1); + wret = btrfs_free_extent(root, blocknr, 1, 1); if (wret) ret = wret; } else { @@ -388,7 +388,7 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, wret = del_ptr(root, path, level + 1, pslot); if (wret) ret = wret; - wret = btrfs_free_extent(root, blocknr, 1); + wret = btrfs_free_extent(root, blocknr, 1, 1); if (wret) ret = wret; } else { @@ -1310,7 +1310,7 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) wret = del_ptr(root, path, 1, path->slots[1]); if (wret) ret = wret; - wret = btrfs_free_extent(root, leaf_buf->blocknr, 1); + wret = btrfs_free_extent(root, leaf_buf->blocknr, 1, 1); if (wret) ret = wret; } @@ -1348,7 +1348,7 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) if (wret) ret = wret; btrfs_block_release(root, leaf_buf); - wret = btrfs_free_extent(root, blocknr, 1); + wret = btrfs_free_extent(root, blocknr, 1, 1); if (wret) ret = wret; } else { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7c66b647ea2..d15a2ed9507 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -658,7 +658,8 @@ static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root); int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf); -int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks); +int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, + int pin); int btrfs_search_slot(struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c81e14162ef..4a40282b45f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -95,6 +95,7 @@ int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf) int btrfs_finish_extent_commit(struct btrfs_root *root) { unsigned long gang[8]; + u64 first = 0; int ret; int i; @@ -104,11 +105,13 @@ int btrfs_finish_extent_commit(struct btrfs_root *root) ARRAY_SIZE(gang)); if (!ret) break; + if (!first) + first = gang[0]; for (i = 0; i < ret; i++) { radix_tree_delete(&root->pinned_radix, gang[i]); } } - root->last_insert.objectid = 0; + root->last_insert.objectid = first; root->last_insert.offset = 0; return 0; } @@ -140,7 +143,8 @@ static int finish_current_insert(struct btrfs_root *extent_root) /* * remove an extent from the root, returns 0 on success */ -static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) +static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, + int pin) { struct btrfs_path path; struct btrfs_key key; @@ -150,6 +154,7 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) struct btrfs_key ins; u32 refs; + BUG_ON(pin && num_blocks != 1); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -170,7 +175,7 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); if (refs == 0) { - if (!root->ref_cows) { + if (pin) { int err; radix_tree_preload(GFP_KERNEL); err = radix_tree_insert(&extent_root->pinned_radix, @@ -179,8 +184,7 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) radix_tree_preload_end(); } ret = btrfs_del_item(extent_root, &path); - if (root != extent_root && - extent_root->last_insert.objectid > blocknr) + if (!pin && extent_root->last_insert.objectid > blocknr) extent_root->last_insert.objectid = blocknr; if (ret) BUG(); @@ -208,7 +212,8 @@ static int del_pending_extents(struct btrfs_root *extent_root) if (!ret) break; for (i = 0; i < ret; i++) { - ret = __free_extent(extent_root, gang[i]->blocknr, 1); + ret = __free_extent(extent_root, + gang[i]->blocknr, 1, 1); radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, CTREE_EXTENT_PENDING_DEL); @@ -230,7 +235,8 @@ static int run_pending(struct btrfs_root *extent_root) /* * remove an extent from the root, returns 0 on success */ -int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) +int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, + int pin) { struct btrfs_root *extent_root = root->extent_root; struct btrfs_buffer *t; @@ -243,7 +249,7 @@ int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) CTREE_EXTENT_PENDING_DEL); return 0; } - ret = __free_extent(root, blocknr, num_blocks); + ret = __free_extent(root, blocknr, num_blocks, pin); pending_ret = run_pending(root->extent_root); return ret ? ret : pending_ret; } @@ -451,7 +457,7 @@ static int walk_down_tree(struct btrfs_root *root, ret = lookup_block_ref(root, blocknr, &refs); if (refs != 1 || *level == 1) { path->slots[*level]++; - ret = btrfs_free_extent(root, blocknr, 1); + ret = btrfs_free_extent(root, blocknr, 1, 1); BUG_ON(ret); continue; } @@ -464,7 +470,7 @@ static int walk_down_tree(struct btrfs_root *root, path->slots[*level] = 0; } out: - ret = btrfs_free_extent(root, path->nodes[*level]->blocknr, 1); + ret = btrfs_free_extent(root, path->nodes[*level]->blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -492,7 +498,7 @@ static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, return 0; } else { ret = btrfs_free_extent(root, - path->nodes[*level]->blocknr, 1); + path->nodes[*level]->blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; -- cgit v1.2.3-70-g09d2 From e089f05c18ab36ed5fa7e2319052e03ab800d518 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 16 Mar 2007 16:20:31 -0400 Subject: Btrfs: transaction handles everywhere Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 4 +- fs/btrfs/TODO | 2 + fs/btrfs/ctree.c | 204 +++++++++++++++++++++++++------------------------ fs/btrfs/ctree.h | 63 +++++++++------ fs/btrfs/debug-tree.c | 1 + fs/btrfs/dir-item.c | 20 +++-- fs/btrfs/dir-test.c | 70 ++++++++++------- fs/btrfs/disk-io.c | 60 +++++++++------ fs/btrfs/disk-io.h | 16 ++-- fs/btrfs/extent-tree.c | 135 +++++++++++++++++--------------- fs/btrfs/inode-item.c | 15 ++-- fs/btrfs/print-tree.h | 4 +- fs/btrfs/quick-test.c | 23 +++--- fs/btrfs/random-test.c | 62 ++++++++------- fs/btrfs/root-tree.c | 23 +++--- fs/btrfs/transaction.h | 27 +++++++ 16 files changed, 426 insertions(+), 303 deletions(-) create mode 100644 fs/btrfs/transaction.h (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 36964f54bb4..76bcd87d857 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,5 +1,5 @@ CC=gcc -CFLAGS = -g -Wall +CFLAGS = -g -Wall -Werror headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o \ root-tree.o dir-item.o hash.o file-item.o inode-item.o @@ -14,7 +14,7 @@ check=sparse $(CHECKFLAGS) $(check) $< $(CC) $(CFLAGS) -c $< -all: tester debug-tree quick-test dir-test +all: tester debug-tree quick-test dir-test tags debug-tree: $(objects) debug-tree.o gcc $(CFLAGS) -o debug-tree $(objects) debug-tree.o diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 2ae4b3aae1e..e9894999322 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,6 +8,8 @@ * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter +* Only pin blocks allocated in this transaction +* Add transaction handles * Do actual block accounting * Check compat and incompat flags on the inode * Port into the kernel diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 13128b5ed65..17a3ff2f182 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -6,17 +6,18 @@ #include "disk-io.h" #include "print-tree.h" -static int split_node(struct btrfs_root *root, struct btrfs_path *path, - int level); -static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, - int data_size); -static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst, - struct btrfs_buffer *src); -static int balance_node_right(struct btrfs_root *root, - struct btrfs_buffer *dst_buf, +static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int level); +static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size); +static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_buffer *dst, struct btrfs_buffer + *src); +static int balance_node_right(struct btrfs_trans_handle *trans, struct + btrfs_root *root, struct btrfs_buffer *dst_buf, struct btrfs_buffer *src_buf); -static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, - int slot); +static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path, int level, int slot); inline void btrfs_init_path(struct btrfs_path *p) { @@ -34,11 +35,10 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } -static int btrfs_cow_block(struct btrfs_root *root, - struct btrfs_buffer *buf, - struct btrfs_buffer *parent, - int parent_slot, - struct btrfs_buffer **cow_ret) +static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_buffer *buf, struct btrfs_buffer + *parent, int parent_slot, struct btrfs_buffer + **cow_ret) { struct btrfs_buffer *cow; @@ -46,22 +46,22 @@ static int btrfs_cow_block(struct btrfs_root *root, *cow_ret = buf; return 0; } - cow = btrfs_alloc_free_block(root); + cow = btrfs_alloc_free_block(trans, root); memcpy(&cow->node, &buf->node, root->blocksize); btrfs_set_header_blocknr(&cow->node.header, cow->blocknr); *cow_ret = cow; - btrfs_inc_ref(root, buf); + btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; cow->count++; if (buf != root->commit_root) - btrfs_free_extent(root, buf->blocknr, 1, 1); + btrfs_free_extent(trans, root, buf->blocknr, 1, 1); btrfs_block_release(root, buf); } else { btrfs_set_node_blockptr(&parent->node, parent_slot, cow->blocknr); BUG_ON(list_empty(&parent->dirty)); - btrfs_free_extent(root, buf->blocknr, 1, 1); + btrfs_free_extent(trans, root, buf->blocknr, 1, 1); } btrfs_block_release(root, buf); return 0; @@ -266,8 +266,8 @@ static struct btrfs_buffer *read_node_slot(struct btrfs_root *root, return read_tree_block(root, btrfs_node_blockptr(node, slot)); } -static int balance_level(struct btrfs_root *root, struct btrfs_path *path, - int level) +static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int level) { struct btrfs_buffer *right_buf; struct btrfs_buffer *mid_buf; @@ -310,8 +310,8 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, btrfs_block_release(root, mid_buf); /* once for the root ptr */ btrfs_block_release(root, mid_buf); - clean_tree_block(root, mid_buf); - return btrfs_free_extent(root, blocknr, 1, 1); + clean_tree_block(trans, root, mid_buf); + return btrfs_free_extent(trans, root, blocknr, 1, 1); } parent = &parent_buf->node; @@ -324,11 +324,11 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, /* first, try to make some room in the middle buffer */ if (left_buf) { - btrfs_cow_block(root, left_buf, parent_buf, - pslot - 1, &left_buf); + btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, + &left_buf); left = &left_buf->node; orig_slot += btrfs_header_nritems(&left->header); - wret = push_node_left(root, left_buf, mid_buf); + wret = push_node_left(trans, root, left_buf, mid_buf); if (wret < 0) ret = wret; } @@ -337,22 +337,23 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, * then try to empty the right most buffer into the middle */ if (right_buf) { - btrfs_cow_block(root, right_buf, parent_buf, - pslot + 1, &right_buf); + btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1, + &right_buf); right = &right_buf->node; - wret = push_node_left(root, mid_buf, right_buf); + wret = push_node_left(trans, root, mid_buf, right_buf); if (wret < 0) ret = wret; if (btrfs_header_nritems(&right->header) == 0) { u64 blocknr = right_buf->blocknr; btrfs_block_release(root, right_buf); - clean_tree_block(root, right_buf); + clean_tree_block(trans, root, right_buf); right_buf = NULL; right = NULL; - wret = del_ptr(root, path, level + 1, pslot + 1); + wret = del_ptr(trans, root, path, level + 1, pslot + + 1); if (wret) ret = wret; - wret = btrfs_free_extent(root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, blocknr, 1, 1); if (wret) ret = wret; } else { @@ -373,7 +374,7 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, * right */ BUG_ON(!left_buf); - wret = balance_node_right(root, mid_buf, left_buf); + wret = balance_node_right(trans, root, mid_buf, left_buf); if (wret < 0) ret = wret; BUG_ON(wret == 1); @@ -382,13 +383,13 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, /* we've managed to empty the middle node, drop it */ u64 blocknr = mid_buf->blocknr; btrfs_block_release(root, mid_buf); - clean_tree_block(root, mid_buf); + clean_tree_block(trans, root, mid_buf); mid_buf = NULL; mid = NULL; - wret = del_ptr(root, path, level + 1, pslot); + wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) ret = wret; - wret = btrfs_free_extent(root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, blocknr, 1, 1); if (wret) ret = wret; } else { @@ -438,8 +439,9 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if * possible) */ -int btrfs_search_slot(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_path *p, int ins_len, int cow) +int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_path *p, int + ins_len, int cow) { struct btrfs_buffer *b; struct btrfs_buffer *cow_buf; @@ -455,8 +457,9 @@ again: level = btrfs_header_level(&b->node.header); if (cow) { int wret; - wret = btrfs_cow_block(root, b, p->nodes[level + 1], - p->slots[level + 1], &cow_buf); + wret = btrfs_cow_block(trans, root, b, p->nodes[level + + 1], p->slots[level + 1], + &cow_buf); b = cow_buf; } BUG_ON(!cow && ins_len); @@ -472,7 +475,7 @@ again: p->slots[level] = slot; if (ins_len > 0 && btrfs_header_nritems(&c->header) == BTRFS_NODEPTRS_PER_BLOCK(root)) { - int sret = split_node(root, p, level); + int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); if (sret) return sret; @@ -480,7 +483,8 @@ again: c = &b->node; slot = p->slots[level]; } else if (ins_len < 0) { - int sret = balance_level(root, p, level); + int sret = balance_level(trans, root, p, + level); if (sret) return sret; b = p->nodes[level]; @@ -496,7 +500,7 @@ again: p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, l) < sizeof(struct btrfs_item) + ins_len) { - int sret = split_leaf(root, p, ins_len); + int sret = split_leaf(trans, root, p, ins_len); BUG_ON(sret > 0); if (sret) return sret; @@ -519,9 +523,9 @@ again: * If this fails to write a tree block, it returns -1, but continues * fixing up the blocks in ram so the tree is consistent. */ -static int fixup_low_keys(struct btrfs_root *root, - struct btrfs_path *path, struct btrfs_disk_key *key, - int level) +static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, struct btrfs_disk_key + *key, int level) { int i; int ret = 0; @@ -546,8 +550,9 @@ static int fixup_low_keys(struct btrfs_root *root, * returns 0 if some ptrs were pushed left, < 0 if there was some horrible * error, and > 0 if there was no room in the left hand block. */ -static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst_buf, - struct btrfs_buffer *src_buf) +static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_buffer *dst_buf, struct + btrfs_buffer *src_buf) { struct btrfs_node *src = &src_buf->node; struct btrfs_node *dst = &dst_buf->node; @@ -589,8 +594,8 @@ static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst_buf, * * this will only push up to 1/2 the contents of the left node over */ -static int balance_node_right(struct btrfs_root *root, - struct btrfs_buffer *dst_buf, +static int balance_node_right(struct btrfs_trans_handle *trans, struct + btrfs_root *root, struct btrfs_buffer *dst_buf, struct btrfs_buffer *src_buf) { struct btrfs_node *src = &src_buf->node; @@ -635,8 +640,8 @@ static int balance_node_right(struct btrfs_root *root, * * returns zero on success or < 0 on failure. */ -static int insert_new_root(struct btrfs_root *root, - struct btrfs_path *path, int level) +static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int level) { struct btrfs_buffer *t; struct btrfs_node *lower; @@ -646,7 +651,7 @@ static int insert_new_root(struct btrfs_root *root, BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(root); + t = btrfs_alloc_free_block(trans, root); c = &t->node; memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); @@ -679,9 +684,9 @@ static int insert_new_root(struct btrfs_root *root, * * returns zero on success and < 0 on any error */ -static int insert_ptr(struct btrfs_root *root, - struct btrfs_path *path, struct btrfs_disk_key *key, - u64 blocknr, int slot, int level) +static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, struct btrfs_disk_key + *key, u64 blocknr, int slot, int level) { struct btrfs_node *lower; int nritems; @@ -713,8 +718,8 @@ static int insert_ptr(struct btrfs_root *root, * * returns 0 on success and < 0 on failure */ -static int split_node(struct btrfs_root *root, struct btrfs_path *path, - int level) +static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int level) { struct btrfs_buffer *t; struct btrfs_node *c; @@ -729,12 +734,12 @@ static int split_node(struct btrfs_root *root, struct btrfs_path *path, c = &t->node; if (t == root->node) { /* trying to split the root, lets make a new one */ - ret = insert_new_root(root, path, level + 1); + ret = insert_new_root(trans, root, path, level + 1); if (ret) return ret; } c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(root); + split_buffer = btrfs_alloc_free_block(trans, root); split = &split_buffer->node; btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_blocknr(&split->header, split_buffer->blocknr); @@ -748,7 +753,7 @@ static int split_node(struct btrfs_root *root, struct btrfs_path *path, ret = 0; BUG_ON(list_empty(&t->dirty)); - wret = insert_ptr(root, path, &split->ptrs[0].key, + wret = insert_ptr(trans, root, path, &split->ptrs[0].key, split_buffer->blocknr, path->slots[level + 1] + 1, level + 1); if (wret) @@ -790,8 +795,8 @@ static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) * returns 1 if the push failed because the other node didn't have enough * room, 0 if everything worked out and < 0 if there were major errors. */ -static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, - int data_size) +static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size) { struct btrfs_buffer *left_buf = path->nodes[0]; struct btrfs_leaf *left = &left_buf->leaf; @@ -824,7 +829,7 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, return 1; } /* cow and double check */ - btrfs_cow_block(root, right_buf, upper, slot + 1, &right_buf); + btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf); right = &right_buf->leaf; free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -897,8 +902,8 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise */ -static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, - int data_size) +static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size) { struct btrfs_buffer *right_buf = path->nodes[0]; struct btrfs_leaf *right = &right_buf->leaf; @@ -931,7 +936,7 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, } /* cow and double check */ - btrfs_cow_block(root, t, path->nodes[1], slot - 1, &t); + btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); left = &t->leaf; free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -997,7 +1002,7 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(list_empty(&t->dirty)); BUG_ON(list_empty(&right_buf->dirty)); - wret = fixup_low_keys(root, path, &right->items[0].key, 1); + wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) ret = wret; @@ -1021,8 +1026,8 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, * * returns 0 if all went well and < 0 on failure. */ -static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, - int data_size) +static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size) { struct btrfs_buffer *l_buf; struct btrfs_leaf *l; @@ -1038,11 +1043,11 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, int ret; int wret; - wret = push_leaf_left(root, path, data_size); + wret = push_leaf_left(trans, root, path, data_size); if (wret < 0) return wret; if (wret) { - wret = push_leaf_right(root, path, data_size); + wret = push_leaf_right(trans, root, path, data_size); if (wret < 0) return wret; } @@ -1055,14 +1060,14 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, return 0; if (!path->nodes[1]) { - ret = insert_new_root(root, path, 1); + ret = insert_new_root(trans, root, path, 1); if (ret) return ret; } slot = path->slots[0]; nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(root); + right_buffer = btrfs_alloc_free_block(trans, root); BUG_ON(!right_buffer); BUG_ON(mid == nritems); right = &right_buffer->leaf; @@ -1100,7 +1105,7 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, btrfs_set_header_nritems(&l->header, mid); ret = 0; - wret = insert_ptr(root, path, &right->items[0].key, + wret = insert_ptr(trans, root, path, &right->items[0].key, right_buffer->blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; @@ -1122,8 +1127,9 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int btrfs_insert_empty_item(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *cpu_key, u32 data_size) +int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, struct btrfs_key + *cpu_key, u32 data_size) { int ret = 0; int slot; @@ -1139,7 +1145,7 @@ int btrfs_insert_empty_item(struct btrfs_root *root, struct btrfs_path *path, /* create a root if there isn't one */ if (!root->node) BUG(); - ret = btrfs_search_slot(root, cpu_key, path, data_size, 1); + ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1); if (ret == 0) { btrfs_release_path(root, path); return -EEXIST; @@ -1193,7 +1199,7 @@ int btrfs_insert_empty_item(struct btrfs_root *root, struct btrfs_path *path, ret = 0; if (slot == 0) - ret = fixup_low_keys(root, path, &disk_key, 1); + ret = fixup_low_keys(trans, root, path, &disk_key, 1); BUG_ON(list_empty(&leaf_buf->dirty)); if (btrfs_leaf_free_space(root, leaf) < 0) @@ -1207,15 +1213,16 @@ out: * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, - void *data, u32 data_size) +int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *cpu_key, void *data, u32 + data_size) { int ret = 0; struct btrfs_path path; u8 *ptr; btrfs_init_path(&path); - ret = btrfs_insert_empty_item(root, &path, cpu_key, data_size); + ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size); if (!ret) { ptr = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], u8); memcpy(ptr, data, data_size); @@ -1231,8 +1238,8 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, * continuing all the way the root if required. The root is converted into * a leaf if all the nodes are emptied. */ -static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, - int slot) +static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path, int level, int slot) { struct btrfs_node *node; struct btrfs_buffer *parent = path->nodes[level]; @@ -1253,7 +1260,7 @@ static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, /* just turn the root into a leaf and break */ btrfs_set_header_level(&root->node->node.header, 0); } else if (slot == 0) { - wret = fixup_low_keys(root, path, &node->ptrs[0].key, + wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key, level + 1); if (wret) ret = wret; @@ -1266,7 +1273,8 @@ static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, * delete the item at the leaf level in path. If that empties * the leaf, remove it from the tree */ -int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) +int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path) { int slot; struct btrfs_leaf *leaf; @@ -1306,19 +1314,20 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) btrfs_set_header_level(&leaf->header, 0); BUG_ON(list_empty(&leaf_buf->dirty)); } else { - clean_tree_block(root, leaf_buf); - wret = del_ptr(root, path, 1, path->slots[1]); + clean_tree_block(trans, root, leaf_buf); + wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; - wret = btrfs_free_extent(root, leaf_buf->blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, + leaf_buf->blocknr, 1, 1); if (wret) ret = wret; } } else { int used = leaf_space_used(leaf, 0, nritems); if (slot == 0) { - wret = fixup_low_keys(root, path, - &leaf->items[0].key, 1); + wret = fixup_low_keys(trans, root, path, + &leaf->items[0].key, 1); if (wret) ret = wret; } @@ -1332,23 +1341,24 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) */ slot = path->slots[1]; leaf_buf->count++; - wret = push_leaf_left(root, path, 1); + wret = push_leaf_left(trans, root, path, 1); if (wret < 0) ret = wret; if (path->nodes[0] == leaf_buf && btrfs_header_nritems(&leaf->header)) { - wret = push_leaf_right(root, path, 1); + wret = push_leaf_right(trans, root, path, 1); if (wret < 0) ret = wret; } if (btrfs_header_nritems(&leaf->header) == 0) { u64 blocknr = leaf_buf->blocknr; - clean_tree_block(root, leaf_buf); - wret = del_ptr(root, path, 1, slot); + clean_tree_block(trans, root, leaf_buf); + wret = del_ptr(trans, root, path, 1, slot); if (wret) ret = wret; btrfs_block_release(root, leaf_buf); - wret = btrfs_free_extent(root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, blocknr, + 1, 1); if (wret) ret = wret; } else { @@ -1401,5 +1411,3 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) } return 0; } - - diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d15a2ed9507..68f0af39777 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4,6 +4,8 @@ #include "list.h" #include "kerncompat.h" +struct btrfs_trans_handle; + #define BTRFS_MAGIC "_BtRfS_M" #define BTRFS_ROOT_TREE_OBJECTID 1 @@ -200,6 +202,7 @@ struct btrfs_root { struct btrfs_root_item root_item; struct btrfs_key root_key; u32 blocksize; + struct btrfs_trans_handle *running_transaction; }; /* the lower bits in the key flags defines the item type */ @@ -656,34 +659,46 @@ static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) -struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root); -int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf); -int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, - int pin); -int btrfs_search_slot(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_path *p, int ins_len, int cow); +struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf); +int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 blocknr, u64 num_blocks, int pin); +int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_path *p, int + ins_len, int cow); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); void btrfs_init_path(struct btrfs_path *p); -int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path); -int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *key, - void *data, u32 data_size); -int btrfs_insert_empty_item(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *cpu_key, u32 data_size); +int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path); +int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, void *data, u32 data_size); +int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, struct btrfs_key + *cpu_key, u32 data_size); int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); -int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap); -int btrfs_finish_extent_commit(struct btrfs_root *root); -int btrfs_del_root(struct btrfs_root *root, struct btrfs_key *key); -int btrfs_insert_root(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_root_item *item); -int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_root_item *item); -int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, - struct btrfs_root_item *item, struct btrfs_key *key); -int btrfs_insert_dir_item(struct btrfs_root *root, char *name, int name_len, - u64 dir, u64 objectid, u8 type); -int btrfs_lookup_dir_item(struct btrfs_root *root, struct btrfs_path *path, - u64 dir, char *name, int name_len, int mod); +int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_buffer *snap); +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct + btrfs_root *root); +int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_key *key); +int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item); +int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item); +int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct + btrfs_root_item *item, struct btrfs_key *key); +int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, char *name, int name_len, u64 dir, u64 + objectid, u8 type); +int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, u64 dir, char *name, + int name_len, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, char *name, int name_len); #endif diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c index de45fb4dfdd..91dea7a0a47 100644 --- a/fs/btrfs/debug-tree.c +++ b/fs/btrfs/debug-tree.c @@ -5,6 +5,7 @@ #include "ctree.h" #include "disk-io.h" #include "print-tree.h" +#include "transaction.h" int main(int ac, char **av) { struct btrfs_super_block super; diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index a42a67b9975..949c4e52679 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -5,9 +5,11 @@ #include "ctree.h" #include "disk-io.h" #include "hash.h" +#include "transaction.h" -int btrfs_insert_dir_item(struct btrfs_root *root, char *name, int name_len, - u64 dir, u64 objectid, u8 type) +int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, char *name, int name_len, u64 dir, u64 + objectid, u8 type) { int ret = 0; struct btrfs_path path; @@ -23,7 +25,7 @@ int btrfs_insert_dir_item(struct btrfs_root *root, char *name, int name_len, BUG_ON(ret); btrfs_init_path(&path); data_size = sizeof(*dir_item) + name_len; - ret = btrfs_insert_empty_item(root, &path, &key, data_size); + ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size); if (ret) goto out; @@ -40,8 +42,9 @@ out: return ret; } -int btrfs_lookup_dir_item(struct btrfs_root *root, struct btrfs_path *path, - u64 dir, char *name, int name_len, int mod) +int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, u64 dir, char *name, + int name_len, int mod) { int ret; struct btrfs_key key; @@ -53,12 +56,13 @@ int btrfs_lookup_dir_item(struct btrfs_root *root, struct btrfs_path *path, btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); - ret = btrfs_search_slot(root, &key, path, ins_len, cow); + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); return ret; } -int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, - char *name, int name_len) +int btrfs_match_dir_item_name(struct btrfs_root *root, + struct btrfs_path *path, char + *name, int name_len) { struct btrfs_dir_item *dir_item; char *name_ptr; diff --git a/fs/btrfs/dir-test.c b/fs/btrfs/dir-test.c index f73aa762339..e908c0c588c 100644 --- a/fs/btrfs/dir-test.c +++ b/fs/btrfs/dir-test.c @@ -8,6 +8,7 @@ #include "disk-io.h" #include "print-tree.h" #include "hash.h" +#include "transaction.h" int keep_running = 1; struct btrfs_super_block super; @@ -38,7 +39,8 @@ again: return 0; } -static int ins_one(struct btrfs_root *root, struct radix_tree_root *radix) +static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix) { int ret; char buf[128]; @@ -48,8 +50,8 @@ static int ins_one(struct btrfs_root *root, struct radix_tree_root *radix) find_num(radix, &oid, 0); sprintf(buf, "str-%lu", oid); - ret = btrfs_insert_dir_item(root, buf, strlen(buf), dir_oid, file_oid, - 1); + ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid, + file_oid, 1); if (ret) goto error; @@ -68,7 +70,8 @@ error: * check */ btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(root, &path, dir_oid, buf, strlen(buf), 0); + ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, + strlen(buf), 0); if (ret) goto fatal_release; if (!btrfs_match_dir_item_name(root, &path, buf, strlen(buf))) { @@ -96,7 +99,8 @@ fatal: return -1; } -static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) +static int insert_dup(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix) { int ret; char buf[128]; @@ -107,8 +111,8 @@ static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) return 0; sprintf(buf, "str-%lu", oid); - ret = btrfs_insert_dir_item(root, buf, strlen(buf), dir_oid, file_oid, - 1); + ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid, + file_oid, 1); if (ret != -EEXIST) { printf("insert on %s gave us %d\n", buf, ret); return 1; @@ -116,7 +120,8 @@ static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) return 0; } -static int del_one(struct btrfs_root *root, struct radix_tree_root *radix) +static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix) { int ret; char buf[128]; @@ -129,10 +134,11 @@ static int del_one(struct btrfs_root *root, struct radix_tree_root *radix) return 0; sprintf(buf, "str-%lu", oid); btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(root, &path, dir_oid, buf, strlen(buf), -1); + ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, + strlen(buf), -1); if (ret) goto out_release; - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); if (ret) goto out_release; btrfs_release_path(root, &path); @@ -149,7 +155,8 @@ out: return -1; } -static int lookup_item(struct btrfs_root *root, struct radix_tree_root *radix) +static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix) { struct btrfs_path path; char buf[128]; @@ -161,7 +168,8 @@ static int lookup_item(struct btrfs_root *root, struct radix_tree_root *radix) return 0; sprintf(buf, "str-%lu", oid); btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(root, &path, dir_oid, buf, strlen(buf), 0); + ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, + strlen(buf), 0); btrfs_release_path(root, &path); if (ret) { printf("unable to find key %lu\n", oid); @@ -170,7 +178,8 @@ static int lookup_item(struct btrfs_root *root, struct radix_tree_root *radix) return 0; } -static int lookup_enoent(struct btrfs_root *root, struct radix_tree_root *radix) +static int lookup_enoent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix) { struct btrfs_path path; char buf[128]; @@ -182,7 +191,8 @@ static int lookup_enoent(struct btrfs_root *root, struct radix_tree_root *radix) return 0; sprintf(buf, "str-%lu", oid); btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(root, &path, dir_oid, buf, strlen(buf), 0); + ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, + strlen(buf), 0); btrfs_release_path(root, &path); if (!ret) { printf("able to find key that should not exist %lu\n", oid); @@ -191,8 +201,8 @@ static int lookup_enoent(struct btrfs_root *root, struct radix_tree_root *radix) return 0; } -static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, - int nr) +static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix, int nr) { struct btrfs_path path; struct btrfs_key key; @@ -211,7 +221,7 @@ static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, key.objectid = dir_oid; while(nr-- >= 0) { btrfs_init_path(&path); - ret = btrfs_search_slot(root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret < 0) { btrfs_release_path(root, &path); return ret; @@ -231,7 +241,7 @@ static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, BUG_ON(found_len > 128); buf[found_len] = '\0'; found = atoi(buf + 4); - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); count++; if (ret) { fprintf(stderr, @@ -252,19 +262,19 @@ error: return -1; } -static int fill_tree(struct btrfs_root *root, struct radix_tree_root *radix, - int count) +static int fill_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix, int count) { int i; int ret = 0; for (i = 0; i < count; i++) { - ret = ins_one(root, radix); + ret = ins_one(trans, root, radix); if (ret) { fprintf(stderr, "fill failed\n"); goto out; } if (i % 1000 == 0) { - ret = btrfs_commit_transaction(root, &super); + ret = btrfs_commit_transaction(trans, root, &super); if (ret) { fprintf(stderr, "fill commit failed\n"); return ret; @@ -280,7 +290,8 @@ out: return ret; } -static int bulk_op(struct btrfs_root *root, struct radix_tree_root *radix) +static int bulk_op(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix) { int ret; int nr = rand() % 5000; @@ -289,17 +300,18 @@ static int bulk_op(struct btrfs_root *root, struct radix_tree_root *radix) /* do the bulk op much less frequently */ if (run_nr++ % 100) return 0; - ret = empty_tree(root, radix, nr); + ret = empty_tree(trans, root, radix, nr); if (ret) return ret; - ret = fill_tree(root, radix, nr); + ret = fill_tree(trans, root, radix, nr); if (ret) return ret; return 0; } -int (*ops[])(struct btrfs_root *root, struct radix_tree_root *radix) = +int (*ops[])(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct + radix_tree_root *radix) = { ins_one, insert_dup, del_one, lookup_item, lookup_enoent, bulk_op }; @@ -330,11 +342,13 @@ int main(int ac, char **av) int init_fill_count = 800000; int err = 0; int initial_only = 0; + struct btrfs_trans_handle *trans; radix_tree_init(); printf("removing old tree\n"); unlink("dbfile"); root = open_ctree("dbfile", &super); + trans = btrfs_start_transaction(root, 1); signal(SIGTERM, sigstopper); signal(SIGINT, sigstopper); @@ -353,7 +367,7 @@ int main(int ac, char **av) } } printf("initial fill\n"); - ret = fill_tree(root, &radix, init_fill_count); + ret = fill_tree(trans, root, &radix, init_fill_count); printf("starting run\n"); if (ret) { err = ret; @@ -377,7 +391,7 @@ int main(int ac, char **av) root = open_ctree("dbfile", &super); } while(count--) { - ret = ops[op](root, &radix); + ret = ops[op](trans, root, &radix); if (ret) { fprintf(stderr, "op %d failed %d:%d\n", op, i, iterations); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8d9457b5aef..25ce07908ee 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -9,6 +9,7 @@ #include "radix-tree.h" #include "ctree.h" #include "disk-io.h" +#include "transaction.h" static int allocated_blocks = 0; int cache_max = 10000; @@ -107,7 +108,8 @@ struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) return buf; } -int dirty_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf) { if (!list_empty(&buf->dirty)) return 0; @@ -116,7 +118,8 @@ int dirty_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) return 0; } -int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf) { if (!list_empty(&buf->dirty)) { list_del_init(&buf->dirty); @@ -125,7 +128,8 @@ int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) return 0; } -int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf) { u64 blocknr = buf->blocknr; loff_t offset = blocknr * root->blocksize; @@ -139,7 +143,8 @@ int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) return 0; } -static int __commit_transaction(struct btrfs_root *root) +static int __commit_transaction(struct btrfs_trans_handle *trans, struct + btrfs_root *root) { struct btrfs_buffer *b; int ret = 0; @@ -147,7 +152,7 @@ static int __commit_transaction(struct btrfs_root *root) while(!list_empty(&root->trans)) { b = list_entry(root->trans.next, struct btrfs_buffer, dirty); list_del_init(&b->dirty); - wret = write_tree_block(root, b); + wret = write_tree_block(trans, root, b); if (wret) ret = wret; btrfs_block_release(root, b); @@ -155,8 +160,9 @@ static int __commit_transaction(struct btrfs_root *root) return ret; } -static int commit_extent_and_tree_roots(struct btrfs_root *tree_root, - struct btrfs_root *extent_root) +static int commit_extent_and_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_root *tree_root, struct + btrfs_root *extent_root) { int ret; u64 old_extent_block; @@ -167,24 +173,24 @@ static int commit_extent_and_tree_roots(struct btrfs_root *tree_root, break; btrfs_set_root_blocknr(&extent_root->root_item, extent_root->node->blocknr); - ret = btrfs_update_root(tree_root, + ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); BUG_ON(ret); } - __commit_transaction(extent_root); - __commit_transaction(tree_root); + __commit_transaction(trans, extent_root); + __commit_transaction(trans, tree_root); return 0; } -int btrfs_commit_transaction(struct btrfs_root *root, - struct btrfs_super_block *s) +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct + btrfs_root *root, struct btrfs_super_block *s) { int ret = 0; struct btrfs_buffer *snap = root->commit_root; struct btrfs_key snap_key; - ret = __commit_transaction(root); + ret = __commit_transaction(trans, root); BUG_ON(ret); if (root->commit_root == root->node) @@ -194,23 +200,24 @@ int btrfs_commit_transaction(struct btrfs_root *root, root->root_key.offset++; btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); - ret = btrfs_insert_root(root->tree_root, &root->root_key, + ret = btrfs_insert_root(trans, root->tree_root, &root->root_key, &root->root_item); BUG_ON(ret); - ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root); + ret = commit_extent_and_tree_roots(trans, root->tree_root, + root->extent_root); BUG_ON(ret); - write_ctree_super(root, s); - btrfs_finish_extent_commit(root->extent_root); - btrfs_finish_extent_commit(root->tree_root); + write_ctree_super(trans, root, s); + btrfs_finish_extent_commit(trans, root->extent_root); + btrfs_finish_extent_commit(trans, root->tree_root); root->commit_root = root->node; root->node->count++; - ret = btrfs_drop_snapshot(root, snap); + ret = btrfs_drop_snapshot(trans, root, snap); BUG_ON(ret); - ret = btrfs_del_root(root->tree_root, &snap_key); + ret = btrfs_del_root(trans, root->tree_root, &snap_key); BUG_ON(ret); return ret; @@ -312,7 +319,8 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) return root; } -int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s) +int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_super_block *s) { int ret; btrfs_set_super_root(s, root->tree_root->node->blocknr); @@ -338,10 +346,14 @@ static int drop_cache(struct btrfs_root *root) int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) { int ret; - btrfs_commit_transaction(root, s); - ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root); + struct btrfs_trans_handle *trans; + + trans = root->running_transaction; + btrfs_commit_transaction(trans, root, s); + ret = commit_extent_and_tree_roots(trans, root->tree_root, + root->extent_root); BUG_ON(ret); - write_ctree_super(root, s); + write_ctree_super(trans, root, s); drop_cache(root->extent_root); drop_cache(root->tree_root); drop_cache(root); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 5771bb90acb..24a9e77c831 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -15,15 +15,19 @@ struct btrfs_buffer { struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr); -int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf); -int dirty_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf); -int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf); -int btrfs_commit_transaction(struct btrfs_root *root, - struct btrfs_super_block *s); +int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf); +int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf); +int clean_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_buffer *buf); +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_super_block *s); struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); -int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s); +int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_super_block *s); int mkfs(int fd, u64 num_blocks, u32 blocksize); #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4a40282b45f..c29b92d440e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5,12 +5,15 @@ #include "ctree.h" #include "disk-io.h" #include "print-tree.h" +#include "transaction.h" -static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks, - u64 search_start, u64 search_end, - struct btrfs_key *ins); -static int finish_current_insert(struct btrfs_root *extent_root); -static int run_pending(struct btrfs_root *extent_root); +static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *orig_root, u64 num_blocks, u64 search_start, u64 + search_end, struct btrfs_key *ins); +static int finish_current_insert(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root); +static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root + *extent_root); /* * pending extents are blocks that we're trying to allocate in the extent @@ -21,7 +24,8 @@ static int run_pending(struct btrfs_root *extent_root); */ #define CTREE_EXTENT_PENDING_DEL 0 -static int inc_block_ref(struct btrfs_root *root, u64 blocknr) +static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 blocknr) { struct btrfs_path path; int ret; @@ -31,13 +35,13 @@ static int inc_block_ref(struct btrfs_root *root, u64 blocknr) struct btrfs_key ins; u32 refs; - find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins); + find_free_extent(trans, root->extent_root, 0, 0, (u64)-1, &ins); btrfs_init_path(&path); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = 1; - ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 1); + ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 1); if (ret != 0) BUG(); BUG_ON(ret != 0); @@ -48,12 +52,13 @@ static int inc_block_ref(struct btrfs_root *root, u64 blocknr) BUG_ON(list_empty(&path.nodes[0]->dirty)); btrfs_release_path(root->extent_root, &path); - finish_current_insert(root->extent_root); - run_pending(root->extent_root); + finish_current_insert(trans, root->extent_root); + run_pending(trans, root->extent_root); return 0; } -static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs) +static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 blocknr, u32 *refs) { struct btrfs_path path; int ret; @@ -65,7 +70,7 @@ static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs) key.offset = 1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 0); + ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 0); if (ret != 0) BUG(); l = &path.nodes[0]->leaf; @@ -75,7 +80,8 @@ static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs) return 0; } -int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf) +int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf) { u64 blocknr; int i; @@ -87,12 +93,13 @@ int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf) for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) { blocknr = btrfs_node_blockptr(&buf->node, i); - inc_block_ref(root, blocknr); + inc_block_ref(trans, root, blocknr); } return 0; } -int btrfs_finish_extent_commit(struct btrfs_root *root) +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct + btrfs_root *root) { unsigned long gang[8]; u64 first = 0; @@ -116,7 +123,8 @@ int btrfs_finish_extent_commit(struct btrfs_root *root) return 0; } -static int finish_current_insert(struct btrfs_root *extent_root) +static int finish_current_insert(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root) { struct btrfs_key ins; struct btrfs_extent_item extent_item; @@ -132,8 +140,8 @@ static int finish_current_insert(struct btrfs_root *extent_root) for (i = 0; i < extent_root->current_insert.flags; i++) { ins.objectid = extent_root->current_insert.objectid + i; - ret = btrfs_insert_item(extent_root, &ins, &extent_item, - sizeof(extent_item)); + ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, + sizeof(extent_item)); BUG_ON(ret); } extent_root->current_insert.offset = 0; @@ -143,8 +151,8 @@ static int finish_current_insert(struct btrfs_root *extent_root) /* * remove an extent from the root, returns 0 on success */ -static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, - int pin) +static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_path path; struct btrfs_key key; @@ -160,9 +168,9 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; - find_free_extent(root, 0, 0, (u64)-1, &ins); + find_free_extent(trans, root, 0, 0, (u64)-1, &ins); btrfs_init_path(&path); - ret = btrfs_search_slot(extent_root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1); if (ret) { printf("failed to find %Lu\n", key.objectid); btrfs_print_tree(extent_root, extent_root->node); @@ -183,14 +191,14 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, BUG_ON(err); radix_tree_preload_end(); } - ret = btrfs_del_item(extent_root, &path); + ret = btrfs_del_item(trans, extent_root, &path); if (!pin && extent_root->last_insert.objectid > blocknr) extent_root->last_insert.objectid = blocknr; if (ret) BUG(); } btrfs_release_path(extent_root, &path); - finish_current_insert(extent_root); + finish_current_insert(trans, extent_root); return ret; } @@ -198,7 +206,8 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, * find all the blocks marked as pending in the radix tree and remove * them from the extent map */ -static int del_pending_extents(struct btrfs_root *extent_root) +static int del_pending_extents(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root) { int ret; struct btrfs_buffer *gang[4]; @@ -212,7 +221,7 @@ static int del_pending_extents(struct btrfs_root *extent_root) if (!ret) break; for (i = 0; i < ret; i++) { - ret = __free_extent(extent_root, + ret = __free_extent(trans, extent_root, gang[i]->blocknr, 1, 1); radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, @@ -223,11 +232,12 @@ static int del_pending_extents(struct btrfs_root *extent_root) return 0; } -static int run_pending(struct btrfs_root *extent_root) +static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root + *extent_root) { while(radix_tree_tagged(&extent_root->cache_radix, CTREE_EXTENT_PENDING_DEL)) - del_pending_extents(extent_root); + del_pending_extents(trans, extent_root); return 0; } @@ -235,8 +245,8 @@ static int run_pending(struct btrfs_root *extent_root) /* * remove an extent from the root, returns 0 on success */ -int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, - int pin) +int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_root *extent_root = root->extent_root; struct btrfs_buffer *t; @@ -249,8 +259,8 @@ int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, CTREE_EXTENT_PENDING_DEL); return 0; } - ret = __free_extent(root, blocknr, num_blocks, pin); - pending_ret = run_pending(root->extent_root); + ret = __free_extent(trans, root, blocknr, num_blocks, pin); + pending_ret = run_pending(trans, root->extent_root); return ret ? ret : pending_ret; } @@ -262,9 +272,9 @@ int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks, - u64 search_start, u64 search_end, - struct btrfs_key *ins) +static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *orig_root, u64 num_blocks, u64 search_start, u64 + search_end, struct btrfs_key *ins) { struct btrfs_path path; struct btrfs_key key; @@ -290,7 +300,7 @@ check_failed: ins->objectid = search_start; ins->offset = 0; start_found = 0; - ret = btrfs_search_slot(root, ins, &path, 0, 0); + ret = btrfs_search_slot(trans, root, ins, &path, 0, 0); if (ret < 0) goto error; @@ -367,9 +377,9 @@ error: * * returns 0 if everything worked, non-zero otherwise. */ -static int alloc_extent(struct btrfs_root *root, u64 num_blocks, - u64 search_start, u64 search_end, u64 owner, - struct btrfs_key *ins) +static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 num_blocks, u64 search_start, u64 + search_end, u64 owner, struct btrfs_key *ins) { int ret; int pending_ret; @@ -389,16 +399,16 @@ static int alloc_extent(struct btrfs_root *root, u64 num_blocks, extent_root->current_insert.flags++; return 0; } - ret = find_free_extent(root, num_blocks, search_start, + ret = find_free_extent(trans, root, num_blocks, search_start, search_end, ins); if (ret) return ret; - ret = btrfs_insert_item(extent_root, ins, &extent_item, - sizeof(extent_item)); + ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, + sizeof(extent_item)); - finish_current_insert(extent_root); - pending_ret = run_pending(extent_root); + finish_current_insert(trans, extent_root); + pending_ret = run_pending(trans, extent_root); if (ret) return ret; if (pending_ret) @@ -410,13 +420,14 @@ static int alloc_extent(struct btrfs_root *root, u64 num_blocks, * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ -struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root) +struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { struct btrfs_key ins; int ret; struct btrfs_buffer *buf; - ret = alloc_extent(root, 1, 0, (unsigned long)-1, + ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1, btrfs_header_parentid(&root->node->node.header), &ins); if (ret) { @@ -424,7 +435,7 @@ struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root) return NULL; } buf = find_tree_block(root, ins.objectid); - dirty_tree_block(root, buf); + dirty_tree_block(trans, root, buf); return buf; } @@ -432,8 +443,8 @@ struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root) * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. */ -static int walk_down_tree(struct btrfs_root *root, - struct btrfs_path *path, int *level) +static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int *level) { struct btrfs_buffer *next; struct btrfs_buffer *cur; @@ -441,7 +452,8 @@ static int walk_down_tree(struct btrfs_root *root, int ret; u32 refs; - ret = lookup_block_ref(root, path->nodes[*level]->blocknr, &refs); + ret = lookup_block_ref(trans, root, path->nodes[*level]->blocknr, + &refs); BUG_ON(ret); if (refs > 1) goto out; @@ -454,10 +466,10 @@ static int walk_down_tree(struct btrfs_root *root, btrfs_header_nritems(&cur->node.header)) break; blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]); - ret = lookup_block_ref(root, blocknr, &refs); + ret = lookup_block_ref(trans, root, blocknr, &refs); if (refs != 1 || *level == 1) { path->slots[*level]++; - ret = btrfs_free_extent(root, blocknr, 1, 1); + ret = btrfs_free_extent(trans, root, blocknr, 1, 1); BUG_ON(ret); continue; } @@ -470,7 +482,8 @@ static int walk_down_tree(struct btrfs_root *root, path->slots[*level] = 0; } out: - ret = btrfs_free_extent(root, path->nodes[*level]->blocknr, 1, 1); + ret = btrfs_free_extent(trans, root, path->nodes[*level]->blocknr, 1, + 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -483,8 +496,8 @@ out: * to find the first node higher up where we haven't yet gone through * all the slots */ -static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, - int *level) +static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int *level) { int i; int slot; @@ -497,8 +510,9 @@ static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, *level = i; return 0; } else { - ret = btrfs_free_extent(root, - path->nodes[*level]->blocknr, 1, 1); + ret = btrfs_free_extent(trans, root, + path->nodes[*level]->blocknr, + 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; @@ -513,7 +527,8 @@ static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, * the tree freeing any blocks that have a ref count of zero after being * decremented. */ -int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap) +int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_buffer *snap) { int ret = 0; int wret; @@ -529,13 +544,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap) path.nodes[level] = snap; path.slots[level] = 0; while(1) { - wret = walk_down_tree(root, &path, &level); + wret = walk_down_tree(trans, root, &path, &level); if (wret > 0) break; if (wret < 0) ret = wret; - wret = walk_up_tree(root, &path, &level); + wret = walk_up_tree(trans, root, &path, &level); if (wret > 0) break; if (wret < 0) diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 4908f9afdf8..7caeb11e875 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -4,9 +4,11 @@ #include "radix-tree.h" #include "ctree.h" #include "disk-io.h" +#include "transaction.h" -int btrfs_insert_inode(struct btrfs_root *root, u64 objectid, - struct btrfs_inode_item *inode_item) +int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 objectid, struct btrfs_inode_item + *inode_item) { struct btrfs_path path; struct btrfs_key key; @@ -17,13 +19,14 @@ int btrfs_insert_inode(struct btrfs_root *root, u64 objectid, key.offset = 0; btrfs_init_path(&path); - ret = btrfs_insert_item(root, &key, inode_item, sizeof(*inode_item)); + ret = btrfs_insert_item(trans, root, &key, inode_item, + sizeof(*inode_item)); btrfs_release_path(root, &path); return ret; } -int btrfs_lookup_inode(struct btrfs_root *root, struct btrfs_path *path, - u64 objectid, int mod) +int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, u64 objectid, int mod) { struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; @@ -33,5 +36,5 @@ int btrfs_lookup_inode(struct btrfs_root *root, struct btrfs_path *path, key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - return btrfs_search_slot(root, &key, path, ins_len, cow); + return btrfs_search_slot(trans, root, &key, path, ins_len, cow); } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 9236abda2a7..0882ca904ec 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,3 +1,5 @@ - +#ifndef __PRINT_TREE_ +#define __PRINT_TREE_ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t); +#endif diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index 3a257161712..d676577185d 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -5,6 +5,7 @@ #include "ctree.h" #include "disk-io.h" #include "print-tree.h" +#include "transaction.h" /* for testing only */ int next_key(int i, int max_key) { @@ -25,10 +26,12 @@ int main(int ac, char **av) { struct btrfs_path path; struct btrfs_super_block super; struct btrfs_root *root; + struct btrfs_trans_handle *trans; radix_tree_init(); root = open_ctree("dbfile", &super); + trans = btrfs_start_transaction(root, 1); srand(55); ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_STRING_ITEM_KEY); @@ -41,12 +44,12 @@ int main(int ac, char **av) { fprintf(stderr, "insert %d:%d\n", num, i); ins.objectid = num; ins.offset = 0; - ret = btrfs_insert_item(root, &ins, buf, strlen(buf)); + ret = btrfs_insert_item(trans, root, &ins, buf, strlen(buf)); if (!ret) tree_size++; free(buf); if (i == run_size - 5) { - btrfs_commit_transaction(root, &super); + btrfs_commit_transaction(trans, root, &super); } } @@ -61,7 +64,7 @@ int main(int ac, char **av) { btrfs_init_path(&path); if (i % 10000 == 0) fprintf(stderr, "search %d:%d\n", num, i); - ret = btrfs_search_slot(root, &ins, &path, 0, 0); + ret = btrfs_search_slot(trans, root, &ins, &path, 0, 0); if (ret) { btrfs_print_tree(root, root->node); printf("unable to find %d\n", num); @@ -83,11 +86,11 @@ int main(int ac, char **av) { num = next_key(i, max_key); ins.objectid = num; btrfs_init_path(&path); - ret = btrfs_search_slot(root, &ins, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &ins, &path, -1, 1); if (!ret) { if (i % 10000 == 0) fprintf(stderr, "del %d:%d\n", num, i); - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); if (ret != 0) BUG(); tree_size--; @@ -104,7 +107,7 @@ int main(int ac, char **av) { ins.objectid = num; if (i % 10000 == 0) fprintf(stderr, "insert %d:%d\n", num, i); - ret = btrfs_insert_item(root, &ins, buf, strlen(buf)); + ret = btrfs_insert_item(trans, root, &ins, buf, strlen(buf)); if (!ret) tree_size++; free(buf); @@ -119,7 +122,7 @@ int main(int ac, char **av) { btrfs_init_path(&path); if (i % 10000 == 0) fprintf(stderr, "search %d:%d\n", num, i); - ret = btrfs_search_slot(root, &ins, &path, 0, 0); + ret = btrfs_search_slot(trans, root, &ins, &path, 0, 0); if (ret) { btrfs_print_tree(root, root->node); printf("unable to find %d\n", num); @@ -134,7 +137,7 @@ int main(int ac, char **av) { int slot; ins.objectid = (u64)-1; btrfs_init_path(&path); - ret = btrfs_search_slot(root, &ins, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &ins, &path, -1, 1); if (ret == 0) BUG(); @@ -150,7 +153,7 @@ int main(int ac, char **av) { btrfs_disk_key_to_cpu(&last, &leaf->items[slot].key); if (tree_size % 10000 == 0) printf("big del %d:%d\n", tree_size, i); - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); if (ret != 0) { printf("del_item returned %d\n", ret); BUG(); @@ -165,7 +168,7 @@ int main(int ac, char **av) { printf("map before commit\n"); btrfs_print_tree(root->extent_root, root->extent_root->node); */ - btrfs_commit_transaction(root, &super); + btrfs_commit_transaction(trans, root, &super); printf("tree size is now %d\n", tree_size); printf("root %p commit root %p\n", root->node, root->commit_root); printf("map tree\n"); diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index f05135f13f9..3a38ae7a886 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -6,6 +6,7 @@ #include "ctree.h" #include "disk-io.h" #include "print-tree.h" +#include "transaction.h" int keep_running = 1; struct btrfs_super_block super; @@ -37,7 +38,8 @@ again: return 0; } -static int ins_one(struct btrfs_root *root, struct radix_tree_root *radix) +static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix) { struct btrfs_path path; struct btrfs_key key; @@ -47,7 +49,7 @@ static int ins_one(struct btrfs_root *root, struct radix_tree_root *radix) btrfs_init_path(&path); ret = setup_key(radix, &key, 0); sprintf(buf, "str-%Lu\n", key.objectid); - ret = btrfs_insert_item(root, &key, buf, strlen(buf)); + ret = btrfs_insert_item(trans, root, &key, buf, strlen(buf)); if (ret) goto error; oid = (unsigned long)key.objectid; @@ -62,7 +64,8 @@ error: return -1; } -static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) +static int insert_dup(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix) { struct btrfs_path path; struct btrfs_key key; @@ -73,7 +76,7 @@ static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) if (ret < 0) return 0; sprintf(buf, "str-%Lu\n", key.objectid); - ret = btrfs_insert_item(root, &key, buf, strlen(buf)); + ret = btrfs_insert_item(trans, root, &key, buf, strlen(buf)); if (ret != -EEXIST) { printf("insert on %Lu gave us %d\n", key.objectid, ret); return 1; @@ -81,7 +84,8 @@ static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) return 0; } -static int del_one(struct btrfs_root *root, struct radix_tree_root *radix) +static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix) { struct btrfs_path path; struct btrfs_key key; @@ -91,10 +95,10 @@ static int del_one(struct btrfs_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - ret = btrfs_search_slot(root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret) goto error; - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); btrfs_release_path(root, &path); if (ret != 0) goto error; @@ -107,7 +111,8 @@ error: return -1; } -static int lookup_item(struct btrfs_root *root, struct radix_tree_root *radix) +static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix) { struct btrfs_path path; struct btrfs_key key; @@ -116,7 +121,7 @@ static int lookup_item(struct btrfs_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - ret = btrfs_search_slot(root, &key, &path, 0, 1); + ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); btrfs_release_path(root, &path); if (ret) goto error; @@ -126,7 +131,8 @@ error: return -1; } -static int lookup_enoent(struct btrfs_root *root, struct radix_tree_root *radix) +static int lookup_enoent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix) { struct btrfs_path path; struct btrfs_key key; @@ -135,7 +141,7 @@ static int lookup_enoent(struct btrfs_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 0); if (ret < 0) return ret; - ret = btrfs_search_slot(root, &key, &path, 0, 0); + ret = btrfs_search_slot(trans, root, &key, &path, 0, 0); btrfs_release_path(root, &path); if (ret <= 0) goto error; @@ -145,8 +151,8 @@ error: return -1; } -static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, - int nr) +static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix, int nr) { struct btrfs_path path; struct btrfs_key key; @@ -162,7 +168,7 @@ static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, key.objectid = (unsigned long)-1; while(nr-- >= 0) { btrfs_init_path(&path); - ret = btrfs_search_slot(root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret < 0) { btrfs_release_path(root, &path); return ret; @@ -177,7 +183,7 @@ static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, slot = path.slots[0]; found = btrfs_disk_key_objectid( &path.nodes[0]->leaf.items[slot].key); - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); count++; if (ret) { fprintf(stderr, @@ -198,19 +204,19 @@ error: return -1; } -static int fill_tree(struct btrfs_root *root, struct radix_tree_root *radix, - int count) +static int fill_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix, int count) { int i; int ret = 0; for (i = 0; i < count; i++) { - ret = ins_one(root, radix); + ret = ins_one(trans, root, radix); if (ret) { fprintf(stderr, "fill failed\n"); goto out; } if (i % 1000 == 0) { - ret = btrfs_commit_transaction(root, &super); + ret = btrfs_commit_transaction(trans, root, &super); if (ret) { fprintf(stderr, "fill commit failed\n"); return ret; @@ -226,7 +232,8 @@ out: return ret; } -static int bulk_op(struct btrfs_root *root, struct radix_tree_root *radix) +static int bulk_op(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix) { int ret; int nr = rand() % 5000; @@ -235,17 +242,18 @@ static int bulk_op(struct btrfs_root *root, struct radix_tree_root *radix) /* do the bulk op much less frequently */ if (run_nr++ % 100) return 0; - ret = empty_tree(root, radix, nr); + ret = empty_tree(trans, root, radix, nr); if (ret) return ret; - ret = fill_tree(root, radix, nr); + ret = fill_tree(trans, root, radix, nr); if (ret) return ret; return 0; } -int (*ops[])(struct btrfs_root *root, struct radix_tree_root *radix) = +int (*ops[])(struct btrfs_trans_handle *, + struct btrfs_root *root, struct radix_tree_root *radix) = { ins_one, insert_dup, del_one, lookup_item, lookup_enoent, bulk_op }; @@ -264,7 +272,7 @@ static int fill_radix(struct btrfs_root *root, struct radix_tree_root *radix) key.objectid = (unsigned long)-1; while(1) { btrfs_init_path(&path); - ret = btrfs_search_slot(root, &key, &path, 0, 0); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); if (ret < 0) { btrfs_release_path(root, &path); return ret; @@ -325,6 +333,7 @@ int main(int ac, char **av) int init_fill_count = 800000; int err = 0; int initial_only = 0; + struct btrfs_trans_handle *trans; radix_tree_init(); root = open_ctree("dbfile", &super); fill_radix(root, &radix); @@ -346,7 +355,8 @@ int main(int ac, char **av) } } printf("initial fill\n"); - ret = fill_tree(root, &radix, init_fill_count); + trans = btrfs_start_transaction(root, 1); + ret = fill_tree(trans, root, &radix, init_fill_count); printf("starting run\n"); if (ret) { err = ret; @@ -370,7 +380,7 @@ int main(int ac, char **av) root = open_ctree("dbfile", &super); } while(count--) { - ret = ops[op](root, &radix); + ret = ops[op](trans, root, &radix); if (ret) { fprintf(stderr, "op %d failed %d:%d\n", op, i, iterations); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a24b4727403..9cccecc0f43 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -20,7 +20,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, search_key.offset = (u32)-1; btrfs_init_path(&path); - ret = btrfs_search_slot(root, &search_key, &path, 0, 0); + ret = btrfs_search_slot(NULL, root, &search_key, &path, 0, 0); if (ret < 0) goto out; BUG_ON(ret == 0); @@ -40,8 +40,9 @@ out: return ret; } -int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_root_item *item) +int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item) { struct btrfs_path path; struct btrfs_leaf *l; @@ -49,7 +50,7 @@ int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key, int slot; btrfs_init_path(&path); - ret = btrfs_search_slot(root, key, &path, 0, 1); + ret = btrfs_search_slot(trans, root, key, &path, 0, 1); if (ret < 0) goto out; BUG_ON(ret != 0); @@ -62,26 +63,28 @@ out: return ret; } -int btrfs_insert_root(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_root_item *item) +int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item) { int ret; - ret = btrfs_insert_item(root, key, item, sizeof(*item)); + ret = btrfs_insert_item(trans, root, key, item, sizeof(*item)); BUG_ON(ret); return ret; } -int btrfs_del_root(struct btrfs_root *root, struct btrfs_key *key) +int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_key *key) { struct btrfs_path path; int ret; btrfs_init_path(&path); - ret = btrfs_search_slot(root, key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, key, &path, -1, 1); if (ret < 0) goto out; BUG_ON(ret != 0); - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h new file mode 100644 index 00000000000..3adb6e69fe4 --- /dev/null +++ b/fs/btrfs/transaction.h @@ -0,0 +1,27 @@ +#ifndef __TRANSACTION__ +#define __TRANSACTION__ + +struct btrfs_trans_handle { + u64 transid; + unsigned long blocks_reserved; + unsigned long blocks_used; +}; + +static inline struct btrfs_trans_handle * +btrfs_start_transaction(struct btrfs_root *root, int num_blocks) +{ + struct btrfs_trans_handle *h = malloc(sizeof(*h)); + h->transid = root->root_key.offset; + h->blocks_reserved = num_blocks; + h->blocks_used = 0; + return h; +} + +static inline void btrfs_free_transaction(struct btrfs_root *root, + struct btrfs_trans_handle *handle) +{ + memset(handle, 0, sizeof(*handle)); + free(handle); +} + +#endif -- cgit v1.2.3-70-g09d2 From 9f5fae2fe6dc35b46bf56183f11398451851cb3f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Mar 2007 14:38:32 -0400 Subject: Btrfs: Add inode map, and the start of file extent items Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/ctree.h | 129 ++++++++++++++++++++++++++++++++---- fs/btrfs/debug-tree.c | 9 ++- fs/btrfs/dir-test.c | 91 ++++++++++++++++++++----- fs/btrfs/disk-io.c | 176 +++++++++++++++++++++++++++---------------------- fs/btrfs/extent-tree.c | 105 +++++++++++++++-------------- fs/btrfs/file-item.c | 6 ++ fs/btrfs/inode-map.c | 136 ++++++++++++++++++++++++++++++++++++++ fs/btrfs/mkfs.c | 44 +++++++++++-- fs/btrfs/print-tree.c | 13 ++-- fs/btrfs/quick-test.c | 3 +- 11 files changed, 542 insertions(+), 173 deletions(-) create mode 100644 fs/btrfs/inode-map.c (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 7953b41848c..636c63efde6 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -3,7 +3,8 @@ CFLAGS = -g -Wall -Werror headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h \ transaction.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o \ - root-tree.o dir-item.o hash.o file-item.o inode-item.o + root-tree.o dir-item.o hash.o file-item.o inode-item.o \ + inode-map.o \ # if you don't have sparse installed, use ls instead CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 68f0af39777..68e3da274f1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -10,7 +10,8 @@ struct btrfs_trans_handle; #define BTRFS_ROOT_TREE_OBJECTID 1 #define BTRFS_EXTENT_TREE_OBJECTID 2 -#define BTRFS_FS_TREE_OBJECTID 3 +#define BTRFS_INODE_MAP_OBJECTID 3 +#define BTRFS_FS_TREE_OBJECTID 4 /* * the key defines the order in the tree, and so it also defines (optimal) @@ -178,31 +179,65 @@ struct btrfs_root_item { __le64 block_limit; __le64 blocks_used; __le32 refs; -}; +} __attribute__ ((__packed__)); -/* - * in ram representation of the tree. extent_root is used for all allocations - * and for the extent tree extent_root root. current_insert is used - * only for the extent tree. - */ -struct btrfs_root { - struct btrfs_buffer *node; - struct btrfs_buffer *commit_root; +struct btrfs_file_extent_item { + /* + * disk space consumed by the extent, checksum blocks are included + * in these numbers + */ + __le64 disk_blocknr; + __le64 disk_num_blocks; + /* + * the logical offset in file bytes (no csums) + * this extent record is for. This allows a file extent to point + * into the middle of an existing extent on disk, sharing it + * between two snapshots (useful if some bytes in the middle of the + * extent have changed + */ + __le64 offset; + /* + * the logical number of file blocks (no csums included) + */ + __le64 num_blocks; +} __attribute__ ((__packed__)); + +struct btrfs_inode_map_item { + struct btrfs_disk_key key; +} __attribute__ ((__packed__)); + +struct btrfs_fs_info { + struct btrfs_root *fs_root; struct btrfs_root *extent_root; struct btrfs_root *tree_root; + struct btrfs_root *inode_root; struct btrfs_key current_insert; struct btrfs_key last_insert; - int fp; struct radix_tree_root cache_radix; struct radix_tree_root pinned_radix; struct list_head trans; struct list_head cache; + u64 last_inode_alloc; + u64 last_inode_alloc_dirid; int cache_size; - int ref_cows; + int fp; + struct btrfs_trans_handle *running_transaction; +}; + +/* + * in ram representation of the tree. extent_root is used for all allocations + * and for the extent tree extent_root root. current_insert is used + * only for the extent tree. + */ +struct btrfs_root { + struct btrfs_buffer *node; + struct btrfs_buffer *commit_root; struct btrfs_root_item root_item; struct btrfs_key root_key; + struct btrfs_fs_info *fs_info; u32 blocksize; - struct btrfs_trans_handle *running_transaction; + int ref_cows; + u32 type; }; /* the lower bits in the key flags defines the item type */ @@ -240,11 +275,17 @@ struct btrfs_root { * are used, and how many references there are to each block */ #define BTRFS_EXTENT_ITEM_KEY 6 + +/* + * the inode map records which inode numbers are in use and where + * they actually live on disk + */ +#define BTRFS_INODE_MAP_ITEM_KEY 7 /* * string items are for debugging. They just store a short string of * data in the FS */ -#define BTRFS_STRING_ITEM_KEY 7 +#define BTRFS_STRING_ITEM_KEY 8 static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) { @@ -654,6 +695,57 @@ static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) { return (u8 *)l->items; } + +static inline u64 btrfs_file_extent_disk_blocknr(struct btrfs_file_extent_item + *e) +{ + return le64_to_cpu(e->disk_blocknr); +} + +static inline void btrfs_set_file_extent_disk_blocknr(struct + btrfs_file_extent_item + *e, u64 val) +{ + e->disk_blocknr = cpu_to_le64(val); +} + +static inline u64 btrfs_file_extent_disk_num_blocks(struct + btrfs_file_extent_item *e) +{ + return le64_to_cpu(e->disk_num_blocks); +} + +static inline void btrfs_set_file_extent_disk_num_blocks(struct + btrfs_file_extent_item + *e, u64 val) +{ + e->disk_num_blocks = cpu_to_le64(val); +} + +static inline u64 btrfs_file_extent_offset(struct btrfs_file_extent_item *e) +{ + return le64_to_cpu(e->offset); +} + +static inline void btrfs_set_file_extent_offset(struct btrfs_file_extent_item + *e, u64 val) +{ + e->offset = cpu_to_le64(val); +} + +static inline u64 btrfs_file_extent_num_blocks(struct btrfs_file_extent_item + *e) +{ + return le64_to_cpu(e->num_blocks); +} + +static inline void btrfs_set_file_extent_num_blocks(struct + btrfs_file_extent_item *e, + u64 val) +{ + e->num_blocks = cpu_to_le64(val); +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -701,4 +793,13 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int name_len, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, char *name, int name_len); +int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, + struct btrfs_root *fs_root, + u64 dirid, u64 *objectid); +int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, struct btrfs_key *location); +int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u64 objectid, int mod); #endif diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c index 91dea7a0a47..d9f36efc3dd 100644 --- a/fs/btrfs/debug-tree.c +++ b/fs/btrfs/debug-tree.c @@ -15,8 +15,13 @@ int main(int ac, char **av) { printf("fs tree\n"); btrfs_print_tree(root, root->node); printf("map tree\n"); - btrfs_print_tree(root->extent_root, root->extent_root->node); + btrfs_print_tree(root->fs_info->extent_root, + root->fs_info->extent_root->node); + printf("inode tree\n"); + btrfs_print_tree(root->fs_info->inode_root, + root->fs_info->inode_root->node); printf("root tree\n"); - btrfs_print_tree(root->tree_root, root->tree_root->node); + btrfs_print_tree(root->fs_info->tree_root, + root->fs_info->tree_root->node); return 0; } diff --git a/fs/btrfs/dir-test.c b/fs/btrfs/dir-test.c index e908c0c588c..56f06c3ba57 100644 --- a/fs/btrfs/dir-test.c +++ b/fs/btrfs/dir-test.c @@ -45,13 +45,26 @@ static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, int ret; char buf[128]; unsigned long oid; + u64 objectid; struct btrfs_path path; + struct btrfs_key inode_map; find_num(radix, &oid, 0); sprintf(buf, "str-%lu", oid); + ret = btrfs_find_free_objectid(trans, root, dir_oid + 1, &objectid); + if (ret) + goto error; + + inode_map.objectid = objectid; + inode_map.flags = 0; + inode_map.offset = 0; + + ret = btrfs_insert_inode_map(trans, root, objectid, &inode_map); + if (ret) + goto error; ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid, - file_oid, 1); + objectid, 1); if (ret) goto error; @@ -120,6 +133,53 @@ static int insert_dup(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +static int del_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct radix_tree_root *radix, + unsigned long radix_index, + struct btrfs_path *path) +{ + int ret; + unsigned long *ptr; + u64 file_objectid; + struct btrfs_dir_item *di; + struct btrfs_path map_path; + + /* find the inode number of the file */ + di = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0], + struct btrfs_dir_item); + file_objectid = btrfs_dir_objectid(di); + + /* delete the directory item */ + ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + + /* delete the inode mapping */ + btrfs_init_path(&map_path); + ret = btrfs_lookup_inode_map(trans, root, &map_path, file_objectid, -1); + if (ret) + goto out_release; + ret = btrfs_del_item(trans, root->fs_info->inode_root, &map_path); + if (ret) + goto out_release; + + if (root->fs_info->last_inode_alloc > file_objectid) + root->fs_info->last_inode_alloc = file_objectid; + btrfs_release_path(root, &map_path); + ptr = radix_tree_delete(radix, radix_index); + if (!ptr) { + ret = -5555; + goto out; + } + return 0; +out_release: + btrfs_release_path(root, &map_path); +out: + printf("failed to delete %lu %d\n", radix_index, ret); + return -1; +} + static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct radix_tree_root *radix) { @@ -127,7 +187,6 @@ static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, char buf[128]; unsigned long oid; struct btrfs_path path; - unsigned long *ptr; ret = find_num(radix, &oid, 1); if (ret < 0) @@ -138,19 +197,14 @@ static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, strlen(buf), -1); if (ret) goto out_release; - ret = btrfs_del_item(trans, root, &path); + + ret = del_dir_item(trans, root, radix, oid, &path); if (ret) goto out_release; btrfs_release_path(root, &path); - ptr = radix_tree_delete(radix, oid); - if (!ptr) { - ret = -5555; - goto out; - } - return 0; + return ret; out_release: btrfs_release_path(root, &path); -out: printf("failed to delete %lu %d\n", oid, ret); return -1; } @@ -162,6 +216,8 @@ static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root char buf[128]; int ret; unsigned long oid; + u64 objectid; + struct btrfs_dir_item *di; ret = find_num(radix, &oid, 1); if (ret < 0) @@ -170,6 +226,14 @@ static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_init_path(&path); ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, strlen(buf), 0); + if (!ret) { + di = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + struct btrfs_dir_item); + objectid = btrfs_dir_objectid(di); + btrfs_release_path(root, &path); + btrfs_init_path(&path); + ret = btrfs_lookup_inode_map(trans, root, &path, objectid, 0); + } btrfs_release_path(root, &path); if (ret) { printf("unable to find key %lu\n", oid); @@ -210,7 +274,6 @@ static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root u32 found_len; int ret; int slot; - int *ptr; int count = 0; char buf[128]; struct btrfs_dir_item *di; @@ -241,7 +304,7 @@ static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(found_len > 128); buf[found_len] = '\0'; found = atoi(buf + 4); - ret = btrfs_del_item(trans, root, &path); + ret = del_dir_item(trans, root, radix, found, &path); count++; if (ret) { fprintf(stderr, @@ -250,14 +313,10 @@ static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root return -1; } btrfs_release_path(root, &path); - ptr = radix_tree_delete(radix, found); - if (!ptr) - goto error; if (!keep_running) break; } return 0; -error: fprintf(stderr, "failed to delete from the radix %lu\n", found); return -1; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 25ce07908ee..1849a99690c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -28,15 +28,15 @@ static int free_some_buffers(struct btrfs_root *root) { struct list_head *node, *next; struct btrfs_buffer *b; - if (root->cache_size < cache_max) + if (root->fs_info->cache_size < cache_max) return 0; - list_for_each_safe(node, next, &root->cache) { + list_for_each_safe(node, next, &root->fs_info->cache) { b = list_entry(node, struct btrfs_buffer, cache); if (b->count == 1) { BUG_ON(!list_empty(&b->dirty)); list_del_init(&b->cache); btrfs_block_release(root, b); - if (root->cache_size < cache_max) + if (root->fs_info->cache_size < cache_max) break; } } @@ -57,10 +57,10 @@ struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) INIT_LIST_HEAD(&buf->dirty); free_some_buffers(root); radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(&root->cache_radix, blocknr, buf); + ret = radix_tree_insert(&root->fs_info->cache_radix, blocknr, buf); radix_tree_preload_end(); - list_add_tail(&buf->cache, &root->cache); - root->cache_size++; + list_add_tail(&buf->cache, &root->fs_info->cache); + root->fs_info->cache_size++; if (ret) { free(buf); return NULL; @@ -71,7 +71,7 @@ struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) { struct btrfs_buffer *buf; - buf = radix_tree_lookup(&root->cache_radix, blocknr); + buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); if (buf) { buf->count++; } else { @@ -90,14 +90,15 @@ struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) struct btrfs_buffer *buf; int ret; - buf = radix_tree_lookup(&root->cache_radix, blocknr); + buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); if (buf) { buf->count++; } else { buf = alloc_tree_block(root, blocknr); if (!buf) return NULL; - ret = pread(root->fp, &buf->node, root->blocksize, offset); + ret = pread(root->fs_info->fp, &buf->node, root->blocksize, + offset); if (ret != root->blocksize) { free(buf); return NULL; @@ -113,7 +114,7 @@ int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, { if (!list_empty(&buf->dirty)) return 0; - list_add_tail(&buf->dirty, &root->trans); + list_add_tail(&buf->dirty, &root->fs_info->trans); buf->count++; return 0; } @@ -137,7 +138,7 @@ int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); - ret = pwrite(root->fp, &buf->node, root->blocksize, offset); + ret = pwrite(root->fs_info->fp, &buf->node, root->blocksize, offset); if (ret != root->blocksize) return ret; return 0; @@ -149,8 +150,9 @@ static int __commit_transaction(struct btrfs_trans_handle *trans, struct struct btrfs_buffer *b; int ret = 0; int wret; - while(!list_empty(&root->trans)) { - b = list_entry(root->trans.next, struct btrfs_buffer, dirty); + while(!list_empty(&root->fs_info->trans)) { + b = list_entry(root->fs_info->trans.next, struct btrfs_buffer, + dirty); list_del_init(&b->dirty); wret = write_tree_block(trans, root, b); if (wret) @@ -160,13 +162,21 @@ static int __commit_transaction(struct btrfs_trans_handle *trans, struct return ret; } -static int commit_extent_and_tree_roots(struct btrfs_trans_handle *trans, - struct btrfs_root *tree_root, struct - btrfs_root *extent_root) +static int commit_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { int ret; u64 old_extent_block; - + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *inode_root = fs_info->inode_root; + + btrfs_set_root_blocknr(&inode_root->root_item, + inode_root->node->blocknr); + ret = btrfs_update_root(trans, tree_root, + &inode_root->root_key, + &inode_root->root_item); + BUG_ON(ret); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); if (old_extent_block == extent_root->node->blocknr) @@ -178,8 +188,6 @@ static int commit_extent_and_tree_roots(struct btrfs_trans_handle *trans, &extent_root->root_item); BUG_ON(ret); } - __commit_transaction(trans, extent_root); - __commit_transaction(trans, tree_root); return 0; } @@ -190,9 +198,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct struct btrfs_buffer *snap = root->commit_root; struct btrfs_key snap_key; - ret = __commit_transaction(trans, root); - BUG_ON(ret); - if (root->commit_root == root->node) return 0; @@ -200,54 +205,55 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct root->root_key.offset++; btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); - ret = btrfs_insert_root(trans, root->tree_root, &root->root_key, - &root->root_item); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, + &root->root_key, &root->root_item); + BUG_ON(ret); + + ret = commit_tree_roots(trans, root->fs_info); BUG_ON(ret); - ret = commit_extent_and_tree_roots(trans, root->tree_root, - root->extent_root); + ret = __commit_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root, s); - btrfs_finish_extent_commit(trans, root->extent_root); - btrfs_finish_extent_commit(trans, root->tree_root); + btrfs_finish_extent_commit(trans, root->fs_info->extent_root); + btrfs_finish_extent_commit(trans, root->fs_info->tree_root); root->commit_root = root->node; root->node->count++; ret = btrfs_drop_snapshot(trans, root, snap); BUG_ON(ret); - ret = btrfs_del_root(trans, root->tree_root, &snap_key); + ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); BUG_ON(ret); return ret; } static int __setup_root(struct btrfs_super_block *super, - struct btrfs_root *root, u64 objectid, int fp) + struct btrfs_root *root, + struct btrfs_fs_info *fs_info, + u64 objectid, int fp) { - INIT_LIST_HEAD(&root->trans); - INIT_LIST_HEAD(&root->cache); - root->cache_size = 0; - root->fp = fp; root->node = NULL; root->commit_root = NULL; root->blocksize = btrfs_super_blocksize(super); root->ref_cows = 0; - memset(&root->current_insert, 0, sizeof(root->current_insert)); - memset(&root->last_insert, 0, sizeof(root->last_insert)); + root->fs_info = fs_info; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); return 0; } static int find_and_setup_root(struct btrfs_super_block *super, - struct btrfs_root *tree_root, u64 objectid, + struct btrfs_root *tree_root, + struct btrfs_fs_info *fs_info, + u64 objectid, struct btrfs_root *root, int fp) { int ret; - __setup_root(super, root, objectid, fp); + __setup_root(super, root, fs_info, objectid, fp); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -263,29 +269,31 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); + struct btrfs_root *inode_root = malloc(sizeof(struct btrfs_root)); + struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info)); int fp; int ret; - root->extent_root = extent_root; - root->tree_root = tree_root; - - extent_root->extent_root = extent_root; - extent_root->tree_root = tree_root; - - tree_root->extent_root = extent_root; - tree_root->tree_root = tree_root; - fp = open(filename, O_CREAT | O_RDWR, 0600); if (fp < 0) { free(root); return NULL; } - INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); - INIT_RADIX_TREE(&root->pinned_radix, GFP_KERNEL); - INIT_RADIX_TREE(&extent_root->pinned_radix, GFP_KERNEL); - INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); - INIT_RADIX_TREE(&tree_root->pinned_radix, GFP_KERNEL); - INIT_RADIX_TREE(&tree_root->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); + INIT_LIST_HEAD(&fs_info->trans); + INIT_LIST_HEAD(&fs_info->cache); + fs_info->cache_size = 0; + fs_info->fp = fp; + fs_info->running_transaction = NULL; + fs_info->fs_root = root; + fs_info->tree_root = tree_root; + fs_info->extent_root = extent_root; + fs_info->inode_root = inode_root; + fs_info->last_inode_alloc = 0; + fs_info->last_inode_alloc_dirid = 0; + memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); + memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); ret = pread(fp, super, sizeof(struct btrfs_super_block), BTRFS_SUPER_INFO_OFFSET); @@ -301,16 +309,20 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) } BUG_ON(ret < 0); - __setup_root(super, tree_root, BTRFS_ROOT_TREE_OBJECTID, fp); + __setup_root(super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID, fp); tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(super, tree_root, BTRFS_EXTENT_TREE_OBJECTID, - extent_root, fp); + ret = find_and_setup_root(super, tree_root, fs_info, + BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp); BUG_ON(ret); - ret = find_and_setup_root(super, tree_root, BTRFS_FS_TREE_OBJECTID, - root, fp); + ret = find_and_setup_root(super, tree_root, fs_info, + BTRFS_INODE_MAP_OBJECTID, inode_root, fp); + BUG_ON(ret); + + ret = find_and_setup_root(super, tree_root, fs_info, + BTRFS_FS_TREE_OBJECTID, root, fp); BUG_ON(ret); root->commit_root = root->node; @@ -323,8 +335,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s) { int ret; - btrfs_set_super_root(s, root->tree_root->node->blocknr); - ret = pwrite(root->fp, s, sizeof(*s), + btrfs_set_super_root(s, root->fs_info->tree_root->node->blocknr); + ret = pwrite(root->fs_info->fp, s, sizeof(*s), BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(*s)) { fprintf(stderr, "failed to write new super block err %d\n", ret); @@ -335,9 +347,10 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root static int drop_cache(struct btrfs_root *root) { - while(!list_empty(&root->cache)) { - struct btrfs_buffer *b = list_entry(root->cache.next, - struct btrfs_buffer, cache); + while(!list_empty(&root->fs_info->cache)) { + struct btrfs_buffer *b = list_entry(root->fs_info->cache.next, + struct btrfs_buffer, + cache); list_del_init(&b->cache); btrfs_block_release(root, b); } @@ -348,26 +361,28 @@ int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) int ret; struct btrfs_trans_handle *trans; - trans = root->running_transaction; + trans = root->fs_info->running_transaction; btrfs_commit_transaction(trans, root, s); - ret = commit_extent_and_tree_roots(trans, root->tree_root, - root->extent_root); + ret = commit_tree_roots(trans, root->fs_info); + BUG_ON(ret); + ret = __commit_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root, s); - drop_cache(root->extent_root); - drop_cache(root->tree_root); drop_cache(root); - BUG_ON(!list_empty(&root->trans)); - BUG_ON(!list_empty(&root->extent_root->trans)); - BUG_ON(!list_empty(&root->tree_root->trans)); + BUG_ON(!list_empty(&root->fs_info->trans)); - close(root->fp); + close(root->fs_info->fp); if (root->node) btrfs_block_release(root, root->node); - if (root->extent_root->node) - btrfs_block_release(root->extent_root, root->extent_root->node); - if (root->tree_root->node) - btrfs_block_release(root->tree_root, root->tree_root->node); + if (root->fs_info->extent_root->node) + btrfs_block_release(root->fs_info->extent_root, + root->fs_info->extent_root->node); + if (root->fs_info->inode_root->node) + btrfs_block_release(root->fs_info->inode_root, + root->fs_info->inode_root->node); + if (root->fs_info->tree_root->node) + btrfs_block_release(root->fs_info->tree_root, + root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); free(root); printf("on close %d blocks are allocated\n", allocated_blocks); @@ -382,15 +397,16 @@ void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf) if (buf->count == 0) { BUG_ON(!list_empty(&buf->cache)); BUG_ON(!list_empty(&buf->dirty)); - if (!radix_tree_lookup(&root->cache_radix, buf->blocknr)) + if (!radix_tree_lookup(&root->fs_info->cache_radix, + buf->blocknr)) BUG(); - radix_tree_delete(&root->cache_radix, buf->blocknr); + radix_tree_delete(&root->fs_info->cache_radix, buf->blocknr); memset(buf, 0, sizeof(*buf)); free(buf); BUG_ON(allocated_blocks == 0); allocated_blocks--; - BUG_ON(root->cache_size == 0); - root->cache_size--; + BUG_ON(root->fs_info->cache_size == 0); + root->fs_info->cache_size--; } } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c29b92d440e..09eeeb4d9d2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -35,13 +35,15 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key ins; u32 refs; - find_free_extent(trans, root->extent_root, 0, 0, (u64)-1, &ins); + find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, + &ins); btrfs_init_path(&path); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = 1; - ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 1); + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, + 0, 1); if (ret != 0) BUG(); BUG_ON(ret != 0); @@ -51,9 +53,9 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_extent_refs(item, refs + 1); BUG_ON(list_empty(&path.nodes[0]->dirty)); - btrfs_release_path(root->extent_root, &path); - finish_current_insert(trans, root->extent_root); - run_pending(trans, root->extent_root); + btrfs_release_path(root->fs_info->extent_root, &path); + finish_current_insert(trans, root->fs_info->extent_root); + run_pending(trans, root->fs_info->extent_root); return 0; } @@ -70,13 +72,14 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root key.offset = 1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 0); + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, + 0, 0); if (ret != 0) BUG(); l = &path.nodes[0]->leaf; item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); - btrfs_release_path(root->extent_root, &path); + btrfs_release_path(root->fs_info->extent_root, &path); return 0; } @@ -107,19 +110,20 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct int i; while(1) { - ret = radix_tree_gang_lookup(&root->pinned_radix, - (void **)gang, 0, - ARRAY_SIZE(gang)); + ret = radix_tree_gang_lookup(&root->fs_info->pinned_radix, + (void **)gang, 0, + ARRAY_SIZE(gang)); if (!ret) break; if (!first) first = gang[0]; for (i = 0; i < ret; i++) { - radix_tree_delete(&root->pinned_radix, gang[i]); + radix_tree_delete(&root->fs_info->pinned_radix, + gang[i]); } } - root->last_insert.objectid = first; - root->last_insert.offset = 0; + root->fs_info->last_insert.objectid = first; + root->fs_info->last_insert.offset = 0; return 0; } @@ -138,13 +142,14 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); - for (i = 0; i < extent_root->current_insert.flags; i++) { - ins.objectid = extent_root->current_insert.objectid + i; + for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) { + ins.objectid = extent_root->fs_info->current_insert.objectid + + i; ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, sizeof(extent_item)); BUG_ON(ret); } - extent_root->current_insert.offset = 0; + extent_root->fs_info->current_insert.offset = 0; return 0; } @@ -156,7 +161,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root { struct btrfs_path path; struct btrfs_key key; - struct btrfs_root *extent_root = root->extent_root; + struct btrfs_root *extent_root = root->fs_info->extent_root; int ret; struct btrfs_extent_item *ei; struct btrfs_key ins; @@ -186,14 +191,16 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (pin) { int err; radix_tree_preload(GFP_KERNEL); - err = radix_tree_insert(&extent_root->pinned_radix, - blocknr, (void *)blocknr); + err = radix_tree_insert( + &extent_root->fs_info->pinned_radix, + blocknr, (void *)blocknr); BUG_ON(err); radix_tree_preload_end(); } ret = btrfs_del_item(trans, extent_root, &path); - if (!pin && extent_root->last_insert.objectid > blocknr) - extent_root->last_insert.objectid = blocknr; + if (!pin && extent_root->fs_info->last_insert.objectid > + blocknr) + extent_root->fs_info->last_insert.objectid = blocknr; if (ret) BUG(); } @@ -214,18 +221,19 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int i; while(1) { - ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, - (void **)gang, 0, - ARRAY_SIZE(gang), - CTREE_EXTENT_PENDING_DEL); + ret = radix_tree_gang_lookup_tag( + &extent_root->fs_info->cache_radix, + (void **)gang, 0, + ARRAY_SIZE(gang), + CTREE_EXTENT_PENDING_DEL); if (!ret) break; for (i = 0; i < ret; i++) { ret = __free_extent(trans, extent_root, gang[i]->blocknr, 1, 1); - radix_tree_tag_clear(&extent_root->cache_radix, - gang[i]->blocknr, - CTREE_EXTENT_PENDING_DEL); + radix_tree_tag_clear(&extent_root->fs_info->cache_radix, + gang[i]->blocknr, + CTREE_EXTENT_PENDING_DEL); btrfs_block_release(extent_root, gang[i]); } } @@ -235,8 +243,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root) { - while(radix_tree_tagged(&extent_root->cache_radix, - CTREE_EXTENT_PENDING_DEL)) + while(radix_tree_tagged(&extent_root->fs_info->cache_radix, + CTREE_EXTENT_PENDING_DEL)) del_pending_extents(trans, extent_root); return 0; } @@ -248,19 +256,19 @@ static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin) { - struct btrfs_root *extent_root = root->extent_root; + struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_buffer *t; int pending_ret; int ret; if (root == extent_root) { t = find_tree_block(root, blocknr); - radix_tree_tag_set(&root->cache_radix, blocknr, + radix_tree_tag_set(&root->fs_info->cache_radix, blocknr, CTREE_EXTENT_PENDING_DEL); return 0; } ret = __free_extent(trans, root, blocknr, num_blocks, pin); - pending_ret = run_pending(trans, root->extent_root); + pending_ret = run_pending(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -285,12 +293,12 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 test_block; int start_found; struct btrfs_leaf *l; - struct btrfs_root * root = orig_root->extent_root; + struct btrfs_root * root = orig_root->fs_info->extent_root; int total_needed = num_blocks; total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3; - if (root->last_insert.objectid > search_start) - search_start = root->last_insert.objectid; + if (root->fs_info->last_insert.objectid > search_start) + search_start = root->fs_info->last_insert.objectid; ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -353,16 +361,17 @@ check_pending: BUG_ON(ins->objectid < search_start); for (test_block = ins->objectid; test_block < ins->objectid + total_needed; test_block++) { - if (radix_tree_lookup(&root->pinned_radix, test_block)) { + if (radix_tree_lookup(&root->fs_info->pinned_radix, + test_block)) { search_start = test_block + 1; goto check_failed; } } - BUG_ON(root->current_insert.offset); - root->current_insert.offset = total_needed - num_blocks; - root->current_insert.objectid = ins->objectid + num_blocks; - root->current_insert.flags = 0; - root->last_insert.objectid = ins->objectid; + BUG_ON(root->fs_info->current_insert.offset); + root->fs_info->current_insert.offset = total_needed - num_blocks; + root->fs_info->current_insert.objectid = ins->objectid + num_blocks; + root->fs_info->current_insert.flags = 0; + root->fs_info->last_insert.objectid = ins->objectid; ins->offset = num_blocks; return 0; error: @@ -383,20 +392,20 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root { int ret; int pending_ret; - struct btrfs_root *extent_root = root->extent_root; + struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_extent_item extent_item; btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, owner); if (root == extent_root) { - BUG_ON(extent_root->current_insert.offset == 0); + BUG_ON(extent_root->fs_info->current_insert.offset == 0); BUG_ON(num_blocks != 1); - BUG_ON(extent_root->current_insert.flags == - extent_root->current_insert.offset); + BUG_ON(extent_root->fs_info->current_insert.flags == + extent_root->fs_info->current_insert.offset); ins->offset = 1; - ins->objectid = extent_root->current_insert.objectid + - extent_root->current_insert.flags++; + ins->objectid = extent_root->fs_info->current_insert.objectid + + extent_root->fs_info->current_insert.flags++; return 0; } ret = find_free_extent(trans, root, num_blocks, search_start, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index f5b36c732c5..24cfd6d8524 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -4,4 +4,10 @@ #include "radix-tree.h" #include "ctree.h" #include "disk-io.h" +#include "transaction.h" +int btrfs_create_file(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 dirid, u64 *objectid) +{ + return 0; +} diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c new file mode 100644 index 00000000000..f412b339213 --- /dev/null +++ b/fs/btrfs/inode-map.c @@ -0,0 +1,136 @@ +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" + +/* + * walks the btree of allocated inodes and find a hole. + */ +int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, + struct btrfs_root *fs_root, + u64 dirid, u64 *objectid) +{ + struct btrfs_path path; + struct btrfs_key key; + int ret; + u64 hole_size = 0; + int slot = 0; + u64 last_ino; + int start_found; + struct btrfs_leaf *l; + struct btrfs_root *root = fs_root->fs_info->inode_root; + struct btrfs_key search_key; + u64 search_start = dirid; + + if (fs_root->fs_info->last_inode_alloc_dirid == dirid) + search_start = fs_root->fs_info->last_inode_alloc; + + search_key.objectid = search_start; + search_key.flags = 0; + btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); + search_key.offset = 0; + + btrfs_init_path(&path); + start_found = 0; + ret = btrfs_search_slot(trans, root, &search_key, &path, 0, 0); + if (ret < 0) + goto error; + + if (path.slots[0] > 0) + path.slots[0]--; + + while (1) { + l = &path.nodes[0]->leaf; + slot = path.slots[0]; + if (slot >= btrfs_header_nritems(&l->header)) { + ret = btrfs_next_leaf(root, &path); + if (ret == 0) + continue; + if (ret < 0) + goto error; + if (!start_found) { + *objectid = search_start; + start_found = 1; + goto found; + } + *objectid = last_ino > search_start ? + last_ino : search_start; + goto found; + } + btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + if (key.objectid >= search_start) { + if (start_found) { + if (last_ino < search_start) + last_ino = search_start; + hole_size = key.objectid - last_ino; + if (hole_size > 0) { + *objectid = last_ino; + goto found; + } + } + } + start_found = 1; + last_ino = key.objectid + 1; + path.slots[0]++; + } + // FIXME -ENOSPC +found: + root->fs_info->last_inode_alloc = *objectid; + root->fs_info->last_inode_alloc_dirid = dirid; + btrfs_release_path(root, &path); + BUG_ON(*objectid < search_start); + return 0; +error: + btrfs_release_path(root, &path); + return ret; +} + +int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, + struct btrfs_root *fs_root, + u64 objectid, struct btrfs_key *location) +{ + int ret = 0; + struct btrfs_path path; + struct btrfs_inode_map_item *inode_item; + struct btrfs_key key; + struct btrfs_root *inode_root = fs_root->fs_info->inode_root; + + key.objectid = objectid; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INODE_MAP_ITEM_KEY); + key.offset = 0; + btrfs_init_path(&path); + ret = btrfs_insert_empty_item(trans, inode_root, &path, &key, + sizeof(struct btrfs_inode_map_item)); + if (ret) + goto out; + + inode_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + struct btrfs_inode_map_item); + btrfs_cpu_key_to_disk(&inode_item->key, location); +out: + btrfs_release_path(inode_root, &path); + return ret; +} + +int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, + struct btrfs_root *fs_root, struct btrfs_path *path, + u64 objectid, int mod) +{ + int ret; + struct btrfs_key key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + struct btrfs_root *inode_root = fs_root->fs_info->inode_root; + + key.objectid = objectid; + key.flags = 0; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_INODE_MAP_ITEM_KEY); + ret = btrfs_search_slot(trans, inode_root, &key, path, ins_len, cow); + return ret; +} + diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index 9aa900811c3..1cac5ab114d 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -42,7 +42,7 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) btrfs_set_header_parentid(&empty_leaf->header, BTRFS_ROOT_TREE_OBJECTID); btrfs_set_header_blocknr(&empty_leaf->header, start_block + 1); - btrfs_set_header_nritems(&empty_leaf->header, 2); + btrfs_set_header_nritems(&empty_leaf->header, 3); /* create the items for the root tree */ btrfs_set_root_blocknr(&root_item, start_block + 2); @@ -61,8 +61,16 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) btrfs_set_root_blocknr(&root_item, start_block + 3); itemoff = itemoff - sizeof(root_item); btrfs_set_item_offset(&item, itemoff); - btrfs_set_disk_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&item.key, BTRFS_INODE_MAP_OBJECTID); memcpy(empty_leaf->items + 1, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + itemoff, + &root_item, sizeof(root_item)); + + btrfs_set_root_blocknr(&root_item, start_block + 4); + itemoff = itemoff - sizeof(root_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_disk_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); + memcpy(empty_leaf->items + 2, &item, sizeof(item)); memcpy(btrfs_leaf_data(empty_leaf) + itemoff, &root_item, sizeof(root_item)); ret = pwrite(fd, empty_leaf, blocksize, (start_block + 1) * blocksize); @@ -71,7 +79,7 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) btrfs_set_header_parentid(&empty_leaf->header, BTRFS_EXTENT_TREE_OBJECTID); btrfs_set_header_blocknr(&empty_leaf->header, start_block + 2); - btrfs_set_header_nritems(&empty_leaf->header, 4); + btrfs_set_header_nritems(&empty_leaf->header, 5); /* item1, reserve blocks 0-16 */ btrfs_set_disk_key_objectid(&item.key, 0); @@ -108,12 +116,12 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), &extent_item, btrfs_item_size(&item)); - /* item4, give block 19 to the FS root */ + /* item4, give block 19 to the inode map */ btrfs_set_disk_key_objectid(&item.key, start_block + 3); btrfs_set_disk_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); - btrfs_set_extent_owner(&extent_item, BTRFS_FS_TREE_OBJECTID); + btrfs_set_extent_owner(&extent_item, BTRFS_INODE_MAP_OBJECTID); memcpy(empty_leaf->items + 3, &item, sizeof(item)); memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), &extent_item, btrfs_item_size(&item)); @@ -121,11 +129,33 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) if (ret != blocksize) return -1; - /* finally create the FS root */ - btrfs_set_header_parentid(&empty_leaf->header, BTRFS_FS_TREE_OBJECTID); + /* item5, give block 20 to the FS root */ + btrfs_set_disk_key_objectid(&item.key, start_block + 4); + btrfs_set_disk_key_offset(&item.key, 1); + itemoff = itemoff - sizeof(struct btrfs_extent_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_extent_owner(&extent_item, BTRFS_FS_TREE_OBJECTID); + memcpy(empty_leaf->items + 4, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), + &extent_item, btrfs_item_size(&item)); + ret = pwrite(fd, empty_leaf, blocksize, (start_block + 2) * blocksize); + if (ret != blocksize) + return -1; + + /* create the inode map */ + btrfs_set_header_parentid(&empty_leaf->header, + BTRFS_INODE_MAP_OBJECTID); btrfs_set_header_blocknr(&empty_leaf->header, start_block + 3); btrfs_set_header_nritems(&empty_leaf->header, 0); ret = pwrite(fd, empty_leaf, blocksize, (start_block + 3) * blocksize); + if (ret != blocksize) + return -1; + + /* finally create the FS root */ + btrfs_set_header_parentid(&empty_leaf->header, BTRFS_FS_TREE_OBJECTID); + btrfs_set_header_blocknr(&empty_leaf->header, start_block + 4); + btrfs_set_header_nritems(&empty_leaf->header, 0); + ret = pwrite(fd, empty_leaf, blocksize, (start_block + 4) * blocksize); if (ret != blocksize) return -1; return 0; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 0bb5c38427c..f250e5fad77 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -13,8 +13,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_extent_item *ei; struct btrfs_root_item *ri; struct btrfs_dir_item *di; + struct btrfs_inode_map_item *mi; u32 type; - u32 namelen; printf("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, @@ -34,15 +34,13 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) case BTRFS_INODE_ITEM_KEY: break; case BTRFS_DIR_ITEM_KEY: - namelen = btrfs_item_size(l->items + i) - sizeof(*di); di = btrfs_item_ptr(l, i, struct btrfs_dir_item); printf("\t\tdir oid %Lu flags %u type %u\n", btrfs_dir_objectid(di), btrfs_dir_flags(di), btrfs_dir_type(di)); printf("\t\tname %.*s\n", - namelen, (char *)(di + 1)); - + btrfs_dir_name_len(di),(char *)(di + 1)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); @@ -54,6 +52,13 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) printf("\t\textent data refs %u owner %Lu\n", btrfs_extent_refs(ei), btrfs_extent_owner(ei)); break; + case BTRFS_INODE_MAP_ITEM_KEY: + mi = btrfs_item_ptr(l, i, struct btrfs_inode_map_item); + printf("\t\tinode map key %Lu %u %Lu\n", + btrfs_disk_key_objectid(&mi->key), + btrfs_disk_key_flags(&mi->key), + btrfs_disk_key_offset(&mi->key)); + break; case BTRFS_STRING_ITEM_KEY: printf("\t\titem data %.*s\n", btrfs_item_size(item), btrfs_leaf_data(l) + btrfs_item_offset(item)); diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index d676577185d..ef12cf88878 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -172,7 +172,8 @@ int main(int ac, char **av) { printf("tree size is now %d\n", tree_size); printf("root %p commit root %p\n", root->node, root->commit_root); printf("map tree\n"); - btrfs_print_tree(root->extent_root, root->extent_root->node); + btrfs_print_tree(root->fs_info->extent_root, + root->fs_info->extent_root->node); close_ctree(root, &super); return 0; } -- cgit v1.2.3-70-g09d2 From 1261ec42b3d3a3ad878bd172144940e3ac710749 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Mar 2007 20:35:03 -0400 Subject: Btrfs: Better block record keeping, real mkfs Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 7 ++-- fs/btrfs/ctree.h | 1 + fs/btrfs/debug-tree.c | 13 ++++++- fs/btrfs/dir-test.c | 2 -- fs/btrfs/disk-io.c | 11 ++---- fs/btrfs/extent-tree.c | 24 ++++++++++--- fs/btrfs/mkfs.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++- 7 files changed, 134 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 636c63efde6..0720169b6d6 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -2,7 +2,7 @@ CC=gcc CFLAGS = -g -Wall -Werror headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h \ transaction.h -objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o \ +objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ root-tree.o dir-item.o hash.o file-item.o inode-item.o \ inode-map.o \ @@ -16,7 +16,10 @@ check=sparse $(CHECKFLAGS) $(check) $< $(CC) $(CFLAGS) -c $< -all: tester debug-tree quick-test dir-test tags +all: tester debug-tree quick-test dir-test tags mkfs.btrfs + +mkfs.btrfs: $(objects) mkfs.o + gcc $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o debug-tree: $(objects) debug-tree.o gcc $(CFLAGS) -o debug-tree $(objects) debug-tree.o diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b30b2ce7245..1a4d1d6fa40 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -223,6 +223,7 @@ struct btrfs_fs_info { int cache_size; int fp; struct btrfs_trans_handle *running_transaction; + struct btrfs_super_block *disk_super; }; /* diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c index d9f36efc3dd..fd07969600c 100644 --- a/fs/btrfs/debug-tree.c +++ b/fs/btrfs/debug-tree.c @@ -10,8 +10,17 @@ int main(int ac, char **av) { struct btrfs_super_block super; struct btrfs_root *root; + + if (ac != 2) { + fprintf(stderr, "usage: %s device\n", av[0]); + exit(1); + } radix_tree_init(); - root = open_ctree("dbfile", &super); + root = open_ctree(av[1], &super); + if (!root) { + fprintf(stderr, "unable to open %s\n", av[1]); + exit(1); + } printf("fs tree\n"); btrfs_print_tree(root, root->node); printf("map tree\n"); @@ -23,5 +32,7 @@ int main(int ac, char **av) { printf("root tree\n"); btrfs_print_tree(root->fs_info->tree_root, root->fs_info->tree_root->node); + printf("total blocks %Lu\n", btrfs_super_total_blocks(&super)); + printf("blocks used %Lu\n", btrfs_super_blocks_used(&super)); return 0; } diff --git a/fs/btrfs/dir-test.c b/fs/btrfs/dir-test.c index 8fc77c83a35..b673982a1f3 100644 --- a/fs/btrfs/dir-test.c +++ b/fs/btrfs/dir-test.c @@ -425,8 +425,6 @@ int main(int ac, char **av) struct btrfs_trans_handle *trans; radix_tree_init(); - printf("removing old tree\n"); - unlink("dbfile"); root = open_ctree("dbfile", &super); trans = btrfs_start_transaction(root, 1); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bacaa38ea82..0322c55162c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -293,20 +293,15 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; + fs_info->disk_super = super; memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); ret = pread(fp, super, sizeof(struct btrfs_super_block), BTRFS_SUPER_INFO_OFFSET); if (ret == 0 || btrfs_super_root(super) == 0) { - printf("making new FS!\n"); - ret = mkfs(fp, 0, 1024); - if (ret) - return NULL; - ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET); - if (ret != sizeof(struct btrfs_super_block)) - return NULL; + BUG(); + return NULL; } BUG_ON(ret < 0); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 09eeeb4d9d2..9bc4ad38876 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -134,6 +134,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct struct btrfs_extent_item extent_item; int i; int ret; + u64 super_blocks_used; + struct btrfs_fs_info *info = extent_root->fs_info; btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, @@ -145,6 +147,9 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) { ins.objectid = extent_root->fs_info->current_insert.objectid + i; + super_blocks_used = btrfs_super_blocks_used(info->disk_super); + btrfs_set_super_blocks_used(info->disk_super, + super_blocks_used + 1); ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, sizeof(extent_item)); BUG_ON(ret); @@ -161,7 +166,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root { struct btrfs_path path; struct btrfs_key key; - struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_root *extent_root = info->extent_root; int ret; struct btrfs_extent_item *ei; struct btrfs_key ins; @@ -188,15 +194,18 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); if (refs == 0) { + u64 super_blocks_used; if (pin) { int err; radix_tree_preload(GFP_KERNEL); - err = radix_tree_insert( - &extent_root->fs_info->pinned_radix, - blocknr, (void *)blocknr); + err = radix_tree_insert(&info->pinned_radix, + blocknr, (void *)blocknr); BUG_ON(err); radix_tree_preload_end(); } + super_blocks_used = btrfs_super_blocks_used(info->disk_super); + btrfs_set_super_blocks_used(info->disk_super, + super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, &path); if (!pin && extent_root->fs_info->last_insert.objectid > blocknr) @@ -392,7 +401,9 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root { int ret; int pending_ret; - struct btrfs_root *extent_root = root->fs_info->extent_root; + u64 super_blocks_used; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; btrfs_set_extent_refs(&extent_item, 1); @@ -413,6 +424,9 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) return ret; + super_blocks_used = btrfs_super_blocks_used(info->disk_super); + btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + + num_blocks); ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, sizeof(extent_item)); diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index 1cac5ab114d..f7efc8a5fb1 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -1,4 +1,8 @@ #define _XOPEN_SOURCE 500 +#ifndef __CHECKER__ +#include +#include +#endif #include #include #include @@ -10,6 +14,17 @@ #include "ctree.h" #include "disk-io.h" +#ifdef __CHECKER__ +#define BLKGETSIZE64 0 +static inline int ioctl(int fd, int define, u64 *size) { return 0; } +#endif + +#if 0 +#if defined(__linux__) && defined(_IOR) && !defined(BLKGETSIZE64) +# define BLKGETSIZE64 _IOR(0x12, 114, __u64) +#endif +#endif + int mkfs(int fd, u64 num_blocks, u32 blocksize) { struct btrfs_super_block super; @@ -27,7 +42,7 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) strcpy((char *)(&super.magic), BTRFS_MAGIC); btrfs_set_super_blocksize(&super, blocksize); btrfs_set_super_total_blocks(&super, num_blocks); - btrfs_set_super_blocks_used(&super, 0); + btrfs_set_super_blocks_used(&super, start_block + 5); block = malloc(blocksize); memset(block, 0, blocksize); @@ -160,3 +175,81 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) return -1; return 0; } + +u64 device_size(int fd, struct stat *st) +{ + u64 size; + if (S_ISREG(st->st_mode)) { + return st->st_size; + } + if (!S_ISBLK(st->st_mode)) { + return 0; + } + if (ioctl(fd, BLKGETSIZE64, &size) >= 0) { + return size; + } + return 0; +} + +int main(int ac, char **av) +{ + char *file; + u64 block_count = 0; + int fd; + struct stat st; + int ret; + int i; + char *buf = malloc(4096); + if (ac >= 2) { + file = av[1]; + if (ac == 3) { + block_count = atoi(av[2]); + if (!block_count) { + fprintf(stderr, "error finding block count\n"); + exit(1); + } + } + } else { + fprintf(stderr, "usage: mkfs.btrfs file [block count]\n"); + exit(1); + } + fd = open(file, O_RDWR); + if (fd < 0) { + fprintf(stderr, "unable to open %s\n", file); + exit(1); + } + ret = fstat(fd, &st); + if (ret < 0) { + fprintf(stderr, "unable to stat %s\n", file); + exit(1); + } + if (block_count == 0) { + block_count = device_size(fd, &st); + if (block_count == 0) { + fprintf(stderr, "unable to find %s size\n", file); + exit(1); + } + } + block_count /= 4096; + if (block_count < 256) { + fprintf(stderr, "device %s is too small\n", file); + exit(1); + } + memset(buf, 0, 4096); + for(i = 0; i < 6; i++) { + ret = write(fd, buf, 4096); + if (ret != 4096) { + fprintf(stderr, "unable to zero fill device\n"); + exit(1); + } + } + ret = mkfs(fd, block_count, 4096); + if (ret) { + fprintf(stderr, "error during mkfs %d\n", ret); + exit(1); + } + printf("fs created on %s blocksize %d blocks %Lu\n", + file, 4096, block_count); + return 0; +} + -- cgit v1.2.3-70-g09d2 From 2e635a278354a1a7951e16cfea4c247d6d0e7c99 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 21 Mar 2007 11:12:56 -0400 Subject: Btrfs: initial move to kernel module land Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 50 +-- fs/btrfs/ctree.c | 6 +- fs/btrfs/ctree.h | 15 +- fs/btrfs/debug-tree.c | 38 --- fs/btrfs/dir-item.c | 12 +- fs/btrfs/dir-test.c | 494 ----------------------------- fs/btrfs/disk-io.c | 17 +- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent-tree.c | 10 +- fs/btrfs/file-item.c | 6 +- fs/btrfs/hash.c | 1 - fs/btrfs/hasher.c | 23 -- fs/btrfs/inode-item.c | 5 +- fs/btrfs/inode-map.c | 5 +- fs/btrfs/kerncompat.h | 96 ------ fs/btrfs/list.h | 418 ------------------------- fs/btrfs/mkfs.c | 255 --------------- fs/btrfs/print-tree.c | 30 +- fs/btrfs/quick-test.c | 179 ----------- fs/btrfs/radix-tree.c | 836 ------------------------------------------------- fs/btrfs/radix-tree.h | 73 ----- fs/btrfs/random-test.c | 405 ------------------------ fs/btrfs/root-tree.c | 5 +- fs/btrfs/super.c | 205 ++++++++++++ 24 files changed, 274 insertions(+), 2911 deletions(-) delete mode 100644 fs/btrfs/debug-tree.c delete mode 100644 fs/btrfs/dir-test.c delete mode 100644 fs/btrfs/hasher.c delete mode 100644 fs/btrfs/kerncompat.h delete mode 100644 fs/btrfs/list.h delete mode 100644 fs/btrfs/mkfs.c delete mode 100644 fs/btrfs/quick-test.c delete mode 100644 fs/btrfs/radix-tree.c delete mode 100644 fs/btrfs/radix-tree.h delete mode 100644 fs/btrfs/random-test.c create mode 100644 fs/btrfs/super.c (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 0720169b6d6..99e45a54ebd 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,40 +1,20 @@ -CC=gcc -CFLAGS = -g -Wall -Werror -headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h \ - transaction.h -objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ - root-tree.o dir-item.o hash.o file-item.o inode-item.o \ - inode-map.o \ +ifneq ($(KERNELRELEASE),) +# kbuild part of makefile -# if you don't have sparse installed, use ls instead -CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ - -Wcontext -Wcast-truncate -Wuninitialized -Wshadow -Wundef -check=sparse $(CHECKFLAGS) -#check=ls +obj-m := btrfs.o +btrfs-y := super.o -.c.o: - $(check) $< - $(CC) $(CFLAGS) -c $< +#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ +# root-tree.o dir-item.o hash.o file-item.o inode-item.o \ +# inode-map.o \ -all: tester debug-tree quick-test dir-test tags mkfs.btrfs - -mkfs.btrfs: $(objects) mkfs.o - gcc $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o - -debug-tree: $(objects) debug-tree.o - gcc $(CFLAGS) -o debug-tree $(objects) debug-tree.o - -tester: $(objects) random-test.o - gcc $(CFLAGS) -o tester $(objects) random-test.o - -dir-test: $(objects) dir-test.o - gcc $(CFLAGS) -o dir-test $(objects) dir-test.o -quick-test: $(objects) quick-test.o - gcc $(CFLAGS) -o quick-test $(objects) quick-test.o - -$(objects): $(headers) - -clean : - rm debug-tree tester *.o +else +# Normal Makefile +KERNELDIR := /lib/modules/`uname -r`/build +all:: + $(MAKE) -C $(KERNELDIR) M=`pwd` modules +clean:: + rm *.o btrfs.ko +endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 32922643b5b..9fbd07c37fd 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,10 +1,6 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" #include "disk-io.h" -#include "print-tree.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1a4d1d6fa40..ae8518cb94b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,9 +1,6 @@ #ifndef __BTRFS__ #define __BTRFS__ -#include "list.h" -#include "kerncompat.h" - struct btrfs_trans_handle; #define BTRFS_MAGIC "_BtRfS_M" @@ -75,6 +72,7 @@ struct btrfs_super_block { __le64 root; __le64 total_blocks; __le64 blocks_used; + __le64 root_dir_objectid; } __attribute__ ((__packed__)); /* @@ -693,6 +691,17 @@ static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s, s->blocksize = cpu_to_le32(val); } +static inline u64 btrfs_super_root_dir(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->root_dir_objectid); +} + +static inline void btrfs_set_super_root_dir(struct btrfs_super_block *s, u64 + val) +{ + s->root_dir_objectid = cpu_to_le64(val); +} + static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) { return (u8 *)l->items; diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c deleted file mode 100644 index fd07969600c..00000000000 --- a/fs/btrfs/debug-tree.c +++ /dev/null @@ -1,38 +0,0 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" -#include "ctree.h" -#include "disk-io.h" -#include "print-tree.h" -#include "transaction.h" - -int main(int ac, char **av) { - struct btrfs_super_block super; - struct btrfs_root *root; - - if (ac != 2) { - fprintf(stderr, "usage: %s device\n", av[0]); - exit(1); - } - radix_tree_init(); - root = open_ctree(av[1], &super); - if (!root) { - fprintf(stderr, "unable to open %s\n", av[1]); - exit(1); - } - printf("fs tree\n"); - btrfs_print_tree(root, root->node); - printf("map tree\n"); - btrfs_print_tree(root->fs_info->extent_root, - root->fs_info->extent_root->node); - printf("inode tree\n"); - btrfs_print_tree(root->fs_info->inode_root, - root->fs_info->inode_root->node); - printf("root tree\n"); - btrfs_print_tree(root->fs_info->tree_root, - root->fs_info->tree_root->node); - printf("total blocks %Lu\n", btrfs_super_total_blocks(&super)); - printf("blocks used %Lu\n", btrfs_super_blocks_used(&super)); - return 0; -} diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 949c4e52679..4d8083d92fa 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -1,7 +1,4 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" #include "disk-io.h" #include "hash.h" @@ -21,7 +18,12 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - ret = btrfs_name_hash(name, name_len, &key.offset); + if (name_len == 1 && *name == '.') + key.offset = 1; + else if (name_len == 2 && name[0] == '.' && name[1] == '.') + key.offset = 2; + else + ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); btrfs_init_path(&path); data_size = sizeof(*dir_item) + name_len; diff --git a/fs/btrfs/dir-test.c b/fs/btrfs/dir-test.c deleted file mode 100644 index b673982a1f3..00000000000 --- a/fs/btrfs/dir-test.c +++ /dev/null @@ -1,494 +0,0 @@ -#include -#include -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" -#include "ctree.h" -#include "disk-io.h" -#include "print-tree.h" -#include "hash.h" -#include "transaction.h" - -int keep_running = 1; -struct btrfs_super_block super; -static u64 dir_oid = 44556; -static u64 file_oid = 33778; - -static int find_num(struct radix_tree_root *root, unsigned long *num_ret, - int exists) -{ - unsigned long num = rand(); - unsigned long res[2]; - int ret; - -again: - ret = radix_tree_gang_lookup(root, (void **)res, num, 2); - if (exists) { - if (ret == 0) - return -1; - num = res[0]; - } else if (ret != 0 && num == res[0]) { - num++; - if (ret > 1 && num == res[1]) { - num++; - goto again; - } - } - *num_ret = num; - return 0; -} - -static void initial_inode_init(struct btrfs_root *root, - struct btrfs_inode_item *inode_item) -{ - memset(inode_item, 0, sizeof(*inode_item)); - btrfs_set_inode_generation(inode_item, root->fs_info->generation); -} - -static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix) -{ - int ret; - char buf[128]; - unsigned long oid; - u64 objectid; - struct btrfs_path path; - struct btrfs_key inode_map; - struct btrfs_inode_item inode_item; - - find_num(radix, &oid, 0); - sprintf(buf, "str-%lu", oid); - - ret = btrfs_find_free_objectid(trans, root, dir_oid + 1, &objectid); - if (ret) - goto error; - - inode_map.objectid = objectid; - inode_map.flags = 0; - inode_map.offset = 0; - - ret = btrfs_insert_inode_map(trans, root, objectid, &inode_map); - if (ret) - goto error; - - initial_inode_init(root, &inode_item); - ret = btrfs_insert_inode(trans, root, objectid, &inode_item); - if (ret) - goto error; - ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid, - objectid, 1); - if (ret) - goto error; - - radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(radix, oid, (void *)oid); - radix_tree_preload_end(); - if (ret) - goto error; - return ret; -error: - if (ret != -EEXIST) - goto fatal; - - /* - * if we got an EEXIST, it may be due to hash collision, double - * check - */ - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, - strlen(buf), 0); - if (ret) - goto fatal_release; - if (!btrfs_match_dir_item_name(root, &path, buf, strlen(buf))) { - struct btrfs_dir_item *di; - char *found; - u32 found_len; - u64 myhash; - u64 foundhash; - - di = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], - struct btrfs_dir_item); - found = (char *)(di + 1); - found_len = btrfs_dir_name_len(di); - btrfs_name_hash(buf, strlen(buf), &myhash); - btrfs_name_hash(found, found_len, &foundhash); - if (myhash != foundhash) - goto fatal_release; - btrfs_release_path(root, &path); - return 0; - } -fatal_release: - btrfs_release_path(root, &path); -fatal: - printf("failed to insert %lu ret %d\n", oid, ret); - return -1; -} - -static int insert_dup(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix) -{ - int ret; - char buf[128]; - unsigned long oid; - - ret = find_num(radix, &oid, 1); - if (ret < 0) - return 0; - sprintf(buf, "str-%lu", oid); - - ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid, - file_oid, 1); - if (ret != -EEXIST) { - printf("insert on %s gave us %d\n", buf, ret); - return 1; - } - return 0; -} - -static int del_dir_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct radix_tree_root *radix, - unsigned long radix_index, - struct btrfs_path *path) -{ - int ret; - unsigned long *ptr; - u64 file_objectid; - struct btrfs_dir_item *di; - - /* find the inode number of the file */ - di = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0], - struct btrfs_dir_item); - file_objectid = btrfs_dir_objectid(di); - - /* delete the directory item */ - ret = btrfs_del_item(trans, root, path); - if (ret) - goto out_release; - btrfs_release_path(root, path); - - /* delete the inode */ - btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, path, file_objectid, -1); - if (ret) - goto out_release; - ret = btrfs_del_item(trans, root, path); - if (ret) - goto out_release; - btrfs_release_path(root, path); - - /* delete the inode mapping */ - btrfs_init_path(path); - ret = btrfs_lookup_inode_map(trans, root, path, file_objectid, -1); - if (ret) - goto out_release; - ret = btrfs_del_item(trans, root->fs_info->inode_root, path); - if (ret) - goto out_release; - - if (root->fs_info->last_inode_alloc > file_objectid) - root->fs_info->last_inode_alloc = file_objectid; - btrfs_release_path(root, path); - ptr = radix_tree_delete(radix, radix_index); - if (!ptr) { - ret = -5555; - goto out; - } - return 0; -out_release: - btrfs_release_path(root, path); -out: - printf("failed to delete %lu %d\n", radix_index, ret); - return -1; -} - -static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix) -{ - int ret; - char buf[128]; - unsigned long oid; - struct btrfs_path path; - - ret = find_num(radix, &oid, 1); - if (ret < 0) - return 0; - sprintf(buf, "str-%lu", oid); - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, - strlen(buf), -1); - if (ret) - goto out_release; - - ret = del_dir_item(trans, root, radix, oid, &path); - if (ret) - goto out_release; - return ret; -out_release: - btrfs_release_path(root, &path); - printf("failed to delete %lu %d\n", oid, ret); - return -1; -} - -static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix) -{ - struct btrfs_path path; - char buf[128]; - int ret; - unsigned long oid; - u64 objectid; - struct btrfs_dir_item *di; - - ret = find_num(radix, &oid, 1); - if (ret < 0) - return 0; - sprintf(buf, "str-%lu", oid); - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, - strlen(buf), 0); - if (!ret) { - di = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], - struct btrfs_dir_item); - objectid = btrfs_dir_objectid(di); - btrfs_release_path(root, &path); - btrfs_init_path(&path); - ret = btrfs_lookup_inode_map(trans, root, &path, objectid, 0); - } - btrfs_release_path(root, &path); - if (ret) { - printf("unable to find key %lu\n", oid); - return -1; - } - return 0; -} - -static int lookup_enoent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix) -{ - struct btrfs_path path; - char buf[128]; - int ret; - unsigned long oid; - - ret = find_num(radix, &oid, 0); - if (ret < 0) - return 0; - sprintf(buf, "str-%lu", oid); - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, - strlen(buf), 0); - btrfs_release_path(root, &path); - if (!ret) { - printf("able to find key that should not exist %lu\n", oid); - return -1; - } - return 0; -} - -static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix, int nr) -{ - struct btrfs_path path; - struct btrfs_key key; - unsigned long found = 0; - u32 found_len; - int ret; - int slot; - int count = 0; - char buf[128]; - struct btrfs_dir_item *di; - - key.offset = (u64)-1; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - key.objectid = dir_oid; - while(nr-- >= 0) { - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); - if (ret < 0) { - btrfs_release_path(root, &path); - return ret; - } - if (ret != 0) { - if (path.slots[0] == 0) { - btrfs_release_path(root, &path); - break; - } - path.slots[0] -= 1; - } - slot = path.slots[0]; - di = btrfs_item_ptr(&path.nodes[0]->leaf, slot, - struct btrfs_dir_item); - found_len = btrfs_dir_name_len(di); - memcpy(buf, (char *)(di + 1), found_len); - BUG_ON(found_len > 128); - buf[found_len] = '\0'; - found = atoi(buf + 4); - ret = del_dir_item(trans, root, radix, found, &path); - count++; - if (ret) { - fprintf(stderr, - "failed to remove %lu from tree\n", - found); - return -1; - } - if (!keep_running) - break; - } - return 0; - fprintf(stderr, "failed to delete from the radix %lu\n", found); - return -1; -} - -static int fill_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix, int count) -{ - int i; - int ret = 0; - for (i = 0; i < count; i++) { - ret = ins_one(trans, root, radix); - if (ret) { - fprintf(stderr, "fill failed\n"); - goto out; - } - if (i % 1000 == 0) { - ret = btrfs_commit_transaction(trans, root, &super); - if (ret) { - fprintf(stderr, "fill commit failed\n"); - return ret; - } - } - if (i && i % 10000 == 0) { - printf("bigfill %d\n", i); - } - if (!keep_running) - break; - } -out: - return ret; -} - -static int bulk_op(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix) -{ - int ret; - int nr = rand() % 5000; - static int run_nr = 0; - - /* do the bulk op much less frequently */ - if (run_nr++ % 100) - return 0; - ret = empty_tree(trans, root, radix, nr); - if (ret) - return ret; - ret = fill_tree(trans, root, radix, nr); - if (ret) - return ret; - return 0; -} - - -int (*ops[])(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct - radix_tree_root *radix) = - { ins_one, insert_dup, del_one, lookup_item, - lookup_enoent, bulk_op }; - -void sigstopper(int ignored) -{ - keep_running = 0; - fprintf(stderr, "caught exit signal, stopping\n"); -} - -int print_usage(void) -{ - printf("usage: tester [-ih] [-c count] [-f count]\n"); - printf("\t -c count -- iteration count after filling\n"); - printf("\t -f count -- run this many random inserts before starting\n"); - printf("\t -i -- only do initial fill\n"); - printf("\t -h -- this help text\n"); - exit(1); -} -int main(int ac, char **av) -{ - RADIX_TREE(radix, GFP_KERNEL); - struct btrfs_root *root; - int i; - int ret; - int count; - int op; - int iterations = 20000; - int init_fill_count = 800000; - int err = 0; - int initial_only = 0; - struct btrfs_trans_handle *trans; - radix_tree_init(); - - root = open_ctree("dbfile", &super); - trans = btrfs_start_transaction(root, 1); - - signal(SIGTERM, sigstopper); - signal(SIGINT, sigstopper); - - for (i = 1 ; i < ac ; i++) { - if (strcmp(av[i], "-i") == 0) { - initial_only = 1; - } else if (strcmp(av[i], "-c") == 0) { - iterations = atoi(av[i+1]); - i++; - } else if (strcmp(av[i], "-f") == 0) { - init_fill_count = atoi(av[i+1]); - i++; - } else { - print_usage(); - } - } - printf("initial fill\n"); - ret = fill_tree(trans, root, &radix, init_fill_count); - printf("starting run\n"); - if (ret) { - err = ret; - goto out; - } - if (initial_only == 1) { - goto out; - } - for (i = 0; i < iterations; i++) { - op = rand() % ARRAY_SIZE(ops); - count = rand() % 128; - if (i % 2000 == 0) { - printf("%d\n", i); - fflush(stdout); - } - if (i && i % 5000 == 0) { - printf("open & close, root level %d nritems %d\n", - btrfs_header_level(&root->node->node.header), - btrfs_header_nritems(&root->node->node.header)); - close_ctree(root, &super); - root = open_ctree("dbfile", &super); - } - while(count--) { - ret = ops[op](trans, root, &radix); - if (ret) { - fprintf(stderr, "op %d failed %d:%d\n", - op, i, iterations); - btrfs_print_tree(root, root->node); - fprintf(stderr, "op %d failed %d:%d\n", - op, i, iterations); - err = ret; - goto out; - } - if (ops[op] == bulk_op) - break; - if (keep_running == 0) { - err = 0; - goto out; - } - } - } -out: - close_ctree(root, &super); - return err; -} - diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0322c55162c..05637f9fd7c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -266,20 +266,25 @@ static int find_and_setup_root(struct btrfs_super_block *super, } struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) +{ + int fp; + + fp = open(filename, O_CREAT | O_RDWR, 0600); + if (fp < 0) { + return NULL; + } + return open_ctree_fd(fp, super); +} + +struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) { struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *inode_root = malloc(sizeof(struct btrfs_root)); struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info)); - int fp; int ret; - fp = open(filename, O_CREAT | O_RDWR, 0600); - if (fp < 0) { - free(root); - return NULL; - } INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 24a9e77c831..d888cf5c350 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -24,6 +24,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s); struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); +struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super); int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9bc4ad38876..53a7550b5c1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1,7 +1,5 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include +#include #include "ctree.h" #include "disk-io.h" #include "print-tree.h" @@ -183,9 +181,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_init_path(&path); ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1); if (ret) { - printf("failed to find %Lu\n", key.objectid); + printk("failed to find %Lu\n", key.objectid); btrfs_print_tree(extent_root, extent_root->node); - printf("failed to find %Lu\n", key.objectid); + printk("failed to find %Lu\n", key.objectid); BUG(); } ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 24cfd6d8524..8e1e5b4ccfa 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1,9 +1,5 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" -#include "disk-io.h" #include "transaction.h" int btrfs_create_file(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 92c37eece1f..6c2a71a46c7 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -10,7 +10,6 @@ * License. */ -#include "kerncompat.h" #define DELTA 0x9E3779B9 static void TEA_transform(__u32 buf[2], __u32 const in[]) diff --git a/fs/btrfs/hasher.c b/fs/btrfs/hasher.c deleted file mode 100644 index 96702da4329..00000000000 --- a/fs/btrfs/hasher.c +++ /dev/null @@ -1,23 +0,0 @@ -#include -#include -#include -#include "kerncompat.h" -#include "hash.h" - -int main() { - u64 result; - int ret; - char line[255]; - char *p; - while(1) { - p = fgets(line, 255, stdin); - if (!p) - break; - if (strlen(line) == 0) - continue; - ret = btrfs_name_hash(line, strlen(line), &result); - BUG_ON(ret); - printf("hash returns %Lu\n", result); - } - return 0; -} diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 7caeb11e875..8d8c26a6c1a 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -1,7 +1,4 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index f412b339213..c7fda3bf7b2 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -1,7 +1,4 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h deleted file mode 100644 index 105d3f58408..00000000000 --- a/fs/btrfs/kerncompat.h +++ /dev/null @@ -1,96 +0,0 @@ -#ifndef __KERNCOMPAT -#define __KERNCOMPAT -#define gfp_t int -#define get_cpu_var(p) (p) -#define __get_cpu_var(p) (p) -#define BITS_PER_LONG 64 -#define __GFP_BITS_SHIFT 20 -#define __GFP_BITS_MASK ((int)((1 << __GFP_BITS_SHIFT) - 1)) -#define GFP_KERNEL 0 -#define __read_mostly -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#define PAGE_SHIFT 12 -#define ULONG_MAX (~0UL) -#define BUG() abort() -#ifdef __CHECKER__ -#define __force __attribute__((force)) -#define __bitwise__ __attribute__((bitwise)) -#else -#define __force -#define __bitwise__ -#endif - -typedef unsigned int u32; -typedef u32 __u32; -typedef unsigned long long u64; -typedef unsigned char u8; -typedef unsigned short u16; - -typedef unsigned long pgoff_t; - -#include -#include -#include - -struct vma_shared { int prio_tree_node; }; -struct vm_area_struct { - unsigned long vm_pgoff; - unsigned long vm_start; - unsigned long vm_end; - struct vma_shared shared; -}; - -struct page { - unsigned long index; -}; - -static inline void preempt_enable(void) { do {; } while(0);} -static inline void preempt_disable(void) { do {; } while(0);} - -static inline void __set_bit(int bit, unsigned long *map) { - unsigned long *p = map + bit / BITS_PER_LONG; - bit = bit & (BITS_PER_LONG -1); - *p |= 1UL << bit; -} - -static inline int test_bit(int bit, unsigned long *map) { - unsigned long *p = map + bit / BITS_PER_LONG; - bit = bit & (BITS_PER_LONG -1); - return *p & (1UL << bit) ? 1 : 0; -} - -static inline void __clear_bit(int bit, unsigned long *map) { - unsigned long *p = map + bit / BITS_PER_LONG; - bit = bit & (BITS_PER_LONG -1); - *p &= ~(1UL << bit); -} -#define BUG_ON(c) do { if (c) abort(); } while (0) - -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - __builtin_offsetof(type,member) );}) - -#define ENOMEM 5 -#define EEXIST 6 - -#define __CHECK_ENDIAN__ -#ifdef __CHECK_ENDIAN__ -#define __bitwise __bitwise__ -#else -#define __bitwise -#endif - -typedef u16 __bitwise __le16; -typedef u16 __bitwise __be16; -typedef u32 __bitwise __le32; -typedef u32 __bitwise __be32; -typedef u64 __bitwise __le64; -typedef u64 __bitwise __be64; - -#define cpu_to_le64(x) ((__force __le64)(u64)(x)) -#define le64_to_cpu(x) ((__force u64)(__le64)(x)) -#define cpu_to_le32(x) ((__force __le32)(u32)(x)) -#define le32_to_cpu(x) ((__force u32)(__le32)(x)) -#define cpu_to_le16(x) ((__force __le16)(u16)(x)) -#define le16_to_cpu(x) ((__force u16)(__le16)(x)) -#endif diff --git a/fs/btrfs/list.h b/fs/btrfs/list.h deleted file mode 100644 index 1aafafb1337..00000000000 --- a/fs/btrfs/list.h +++ /dev/null @@ -1,418 +0,0 @@ -#ifndef _LINUX_LIST_H -#define _LINUX_LIST_H - -#define LIST_POISON1 ((void *) 0x00100100) -#define LIST_POISON2 ((void *) 0x00200200) - -/* - * Simple doubly linked list implementation. - * - * Some of the internal functions ("__xxx") are useful when - * manipulating whole lists rather than single entries, as - * sometimes we already know the next/prev entries and we can - * generate better code by using them directly rather than - * using the generic single-entry routines. - */ - -struct list_head { - struct list_head *next, *prev; -}; - -#define LIST_HEAD_INIT(name) { &(name), &(name) } - -#define LIST_HEAD(name) \ - struct list_head name = LIST_HEAD_INIT(name) - -static inline void INIT_LIST_HEAD(struct list_head *list) -{ - list->next = list; - list->prev = list; -} - -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -#ifndef CONFIG_DEBUG_LIST -static inline void __list_add(struct list_head *new, - struct list_head *prev, - struct list_head *next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} -#else -extern void __list_add(struct list_head *new, - struct list_head *prev, - struct list_head *next); -#endif - -/** - * list_add - add a new entry - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -#ifndef CONFIG_DEBUG_LIST -static inline void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} -#else -extern void list_add(struct list_head *new, struct list_head *head); -#endif - - -/** - * list_add_tail - add a new entry - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - */ -static inline void list_add_tail(struct list_head *new, struct list_head *head) -{ - __list_add(new, head->prev, head); -} - -/* - * Delete a list entry by making the prev/next entries - * point to each other. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_del(struct list_head * prev, struct list_head * next) -{ - next->prev = prev; - prev->next = next; -} - -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty on entry does not return true after this, the entry is - * in an undefined state. - */ -#ifndef CONFIG_DEBUG_LIST -static inline void list_del(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - entry->next = LIST_POISON1; - entry->prev = LIST_POISON2; -} -#else -extern void list_del(struct list_head *entry); -#endif - -/** - * list_replace - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * Note: if 'old' was empty, it will be overwritten. - */ -static inline void list_replace(struct list_head *old, - struct list_head *new) -{ - new->next = old->next; - new->next->prev = new; - new->prev = old->prev; - new->prev->next = new; -} - -static inline void list_replace_init(struct list_head *old, - struct list_head *new) -{ - list_replace(old, new); - INIT_LIST_HEAD(old); -} -/** - * list_del_init - deletes entry from list and reinitialize it. - * @entry: the element to delete from the list. - */ -static inline void list_del_init(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - INIT_LIST_HEAD(entry); -} - -/** - * list_move - delete from one list and add as another's head - * @list: the entry to move - * @head: the head that will precede our entry - */ -static inline void list_move(struct list_head *list, struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add(list, head); -} - -/** - * list_move_tail - delete from one list and add as another's tail - * @list: the entry to move - * @head: the head that will follow our entry - */ -static inline void list_move_tail(struct list_head *list, - struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add_tail(list, head); -} - -/** - * list_is_last - tests whether @list is the last entry in list @head - * @list: the entry to test - * @head: the head of the list - */ -static inline int list_is_last(const struct list_head *list, - const struct list_head *head) -{ - return list->next == head; -} - -/** - * list_empty - tests whether a list is empty - * @head: the list to test. - */ -static inline int list_empty(const struct list_head *head) -{ - return head->next == head; -} - -/** - * list_empty_careful - tests whether a list is empty and not being modified - * @head: the list to test - * - * Description: - * tests whether a list is empty _and_ checks that no other CPU might be - * in the process of modifying either member (next or prev) - * - * NOTE: using list_empty_careful() without synchronization - * can only be safe if the only activity that can happen - * to the list entry is list_del_init(). Eg. it cannot be used - * if another CPU could re-list_add() it. - */ -static inline int list_empty_careful(const struct list_head *head) -{ - struct list_head *next = head->next; - return (next == head) && (next == head->prev); -} - -static inline void __list_splice(struct list_head *list, - struct list_head *head) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - first->prev = head; - head->next = first; - - last->next = at; - at->prev = last; -} - -/** - * list_splice - join two lists - * @list: the new list to add. - * @head: the place to add it in the first list. - */ -static inline void list_splice(struct list_head *list, struct list_head *head) -{ - if (!list_empty(list)) - __list_splice(list, head); -} - -/** - * list_splice_init - join two lists and reinitialise the emptied list. - * @list: the new list to add. - * @head: the place to add it in the first list. - * - * The list at @list is reinitialised - */ -static inline void list_splice_init(struct list_head *list, - struct list_head *head) -{ - if (!list_empty(list)) { - __list_splice(list, head); - INIT_LIST_HEAD(list); - } -} - -/** - * list_entry - get the struct for this entry - * @ptr: the &struct list_head pointer. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - */ -#define list_entry(ptr, type, member) \ - container_of(ptr, type, member) - -/** - * list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each(pos, head) \ - for (pos = (head)->next; prefetch(pos->next), pos != (head); \ - pos = pos->next) - -/** - * __list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This variant differs from list_for_each() in that it's the - * simplest possible list iteration code, no prefetching is done. - * Use this for code that knows the list to be very short (empty - * or 1 entry) most of the time. - */ -#define __list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); pos = pos->next) - -/** - * list_for_each_prev - iterate over a list backwards - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each_prev(pos, head) \ - for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \ - pos = pos->prev) - -/** - * list_for_each_safe - iterate over a list safe against removal of list entry - * @pos: the &struct list_head to use as a loop cursor. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - */ -#define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) - -/** - * list_for_each_entry - iterate over list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_reverse - iterate backwards over list of given type. - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_reverse(pos, head, member) \ - for (pos = list_entry((head)->prev, typeof(*pos), member); \ - prefetch(pos->member.prev), &pos->member != (head); \ - pos = list_entry(pos->member.prev, typeof(*pos), member)) - -/** - * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue - * @pos: the type * to use as a start point - * @head: the head of the list - * @member: the name of the list_struct within the struct. - * - * Prepares a pos entry for use as a start point in list_for_each_entry_continue. - */ -#define list_prepare_entry(pos, head, member) \ - ((pos) ? : list_entry(head, typeof(*pos), member)) - -/** - * list_for_each_entry_continue - continue iteration over list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Continue to iterate over list of given type, continuing after - * the current position. - */ -#define list_for_each_entry_continue(pos, head, member) \ - for (pos = list_entry(pos->member.next, typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_from - iterate over list of given type from the current point - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type, continuing from current position. - */ -#define list_for_each_entry_from(pos, head, member) \ - for (; prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_safe(pos, n, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_continue - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type, continuing after current point, - * safe against removal of list entry. - */ -#define list_for_each_entry_safe_continue(pos, n, head, member) \ - for (pos = list_entry(pos->member.next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_from - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type from current point, safe against - * removal of list entry. - */ -#define list_for_each_entry_safe_from(pos, n, head, member) \ - for (n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_reverse - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate backwards over list of given type, safe against removal - * of list entry. - */ -#define list_for_each_entry_safe_reverse(pos, n, head, member) \ - for (pos = list_entry((head)->prev, typeof(*pos), member), \ - n = list_entry(pos->member.prev, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.prev, typeof(*n), member)) - -#endif diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c deleted file mode 100644 index f7efc8a5fb1..00000000000 --- a/fs/btrfs/mkfs.c +++ /dev/null @@ -1,255 +0,0 @@ -#define _XOPEN_SOURCE 500 -#ifndef __CHECKER__ -#include -#include -#endif -#include -#include -#include -#include -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" -#include "ctree.h" -#include "disk-io.h" - -#ifdef __CHECKER__ -#define BLKGETSIZE64 0 -static inline int ioctl(int fd, int define, u64 *size) { return 0; } -#endif - -#if 0 -#if defined(__linux__) && defined(_IOR) && !defined(BLKGETSIZE64) -# define BLKGETSIZE64 _IOR(0x12, 114, __u64) -#endif -#endif - -int mkfs(int fd, u64 num_blocks, u32 blocksize) -{ - struct btrfs_super_block super; - struct btrfs_leaf *empty_leaf; - struct btrfs_root_item root_item; - struct btrfs_item item; - struct btrfs_extent_item extent_item; - char *block; - int ret; - u32 itemoff; - u32 start_block = BTRFS_SUPER_INFO_OFFSET / blocksize; - - btrfs_set_super_blocknr(&super, start_block); - btrfs_set_super_root(&super, start_block + 1); - strcpy((char *)(&super.magic), BTRFS_MAGIC); - btrfs_set_super_blocksize(&super, blocksize); - btrfs_set_super_total_blocks(&super, num_blocks); - btrfs_set_super_blocks_used(&super, start_block + 5); - - block = malloc(blocksize); - memset(block, 0, blocksize); - BUG_ON(sizeof(super) > blocksize); - memcpy(block, &super, sizeof(super)); - ret = pwrite(fd, block, blocksize, BTRFS_SUPER_INFO_OFFSET); - BUG_ON(ret != blocksize); - - /* create the tree of root objects */ - empty_leaf = malloc(blocksize); - memset(empty_leaf, 0, blocksize); - btrfs_set_header_parentid(&empty_leaf->header, - BTRFS_ROOT_TREE_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf->header, start_block + 1); - btrfs_set_header_nritems(&empty_leaf->header, 3); - - /* create the items for the root tree */ - btrfs_set_root_blocknr(&root_item, start_block + 2); - btrfs_set_root_refs(&root_item, 1); - itemoff = __BTRFS_LEAF_DATA_SIZE(blocksize) - sizeof(root_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_item_size(&item, sizeof(root_item)); - btrfs_set_disk_key_objectid(&item.key, BTRFS_EXTENT_TREE_OBJECTID); - btrfs_set_disk_key_offset(&item.key, 0); - btrfs_set_disk_key_flags(&item.key, 0); - btrfs_set_disk_key_type(&item.key, BTRFS_ROOT_ITEM_KEY); - memcpy(empty_leaf->items, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + itemoff, - &root_item, sizeof(root_item)); - - btrfs_set_root_blocknr(&root_item, start_block + 3); - itemoff = itemoff - sizeof(root_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_disk_key_objectid(&item.key, BTRFS_INODE_MAP_OBJECTID); - memcpy(empty_leaf->items + 1, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + itemoff, - &root_item, sizeof(root_item)); - - btrfs_set_root_blocknr(&root_item, start_block + 4); - itemoff = itemoff - sizeof(root_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_disk_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); - memcpy(empty_leaf->items + 2, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + itemoff, - &root_item, sizeof(root_item)); - ret = pwrite(fd, empty_leaf, blocksize, (start_block + 1) * blocksize); - - /* create the items for the extent tree */ - btrfs_set_header_parentid(&empty_leaf->header, - BTRFS_EXTENT_TREE_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf->header, start_block + 2); - btrfs_set_header_nritems(&empty_leaf->header, 5); - - /* item1, reserve blocks 0-16 */ - btrfs_set_disk_key_objectid(&item.key, 0); - btrfs_set_disk_key_offset(&item.key, start_block + 1); - btrfs_set_disk_key_flags(&item.key, 0); - btrfs_set_disk_key_type(&item.key, BTRFS_EXTENT_ITEM_KEY); - itemoff = __BTRFS_LEAF_DATA_SIZE(blocksize) - - sizeof(struct btrfs_extent_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_item_size(&item, sizeof(struct btrfs_extent_item)); - btrfs_set_extent_refs(&extent_item, 1); - btrfs_set_extent_owner(&extent_item, 0); - memcpy(empty_leaf->items, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), - &extent_item, btrfs_item_size(&item)); - - /* item2, give block 17 to the root */ - btrfs_set_disk_key_objectid(&item.key, start_block + 1); - btrfs_set_disk_key_offset(&item.key, 1); - itemoff = itemoff - sizeof(struct btrfs_extent_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_extent_owner(&extent_item, BTRFS_ROOT_TREE_OBJECTID); - memcpy(empty_leaf->items + 1, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), - &extent_item, btrfs_item_size(&item)); - - /* item3, give block 18 to the extent root */ - btrfs_set_disk_key_objectid(&item.key, start_block + 2); - btrfs_set_disk_key_offset(&item.key, 1); - itemoff = itemoff - sizeof(struct btrfs_extent_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_extent_owner(&extent_item, BTRFS_EXTENT_TREE_OBJECTID); - memcpy(empty_leaf->items + 2, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), - &extent_item, btrfs_item_size(&item)); - - /* item4, give block 19 to the inode map */ - btrfs_set_disk_key_objectid(&item.key, start_block + 3); - btrfs_set_disk_key_offset(&item.key, 1); - itemoff = itemoff - sizeof(struct btrfs_extent_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_extent_owner(&extent_item, BTRFS_INODE_MAP_OBJECTID); - memcpy(empty_leaf->items + 3, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), - &extent_item, btrfs_item_size(&item)); - ret = pwrite(fd, empty_leaf, blocksize, (start_block + 2) * blocksize); - if (ret != blocksize) - return -1; - - /* item5, give block 20 to the FS root */ - btrfs_set_disk_key_objectid(&item.key, start_block + 4); - btrfs_set_disk_key_offset(&item.key, 1); - itemoff = itemoff - sizeof(struct btrfs_extent_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_extent_owner(&extent_item, BTRFS_FS_TREE_OBJECTID); - memcpy(empty_leaf->items + 4, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), - &extent_item, btrfs_item_size(&item)); - ret = pwrite(fd, empty_leaf, blocksize, (start_block + 2) * blocksize); - if (ret != blocksize) - return -1; - - /* create the inode map */ - btrfs_set_header_parentid(&empty_leaf->header, - BTRFS_INODE_MAP_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf->header, start_block + 3); - btrfs_set_header_nritems(&empty_leaf->header, 0); - ret = pwrite(fd, empty_leaf, blocksize, (start_block + 3) * blocksize); - if (ret != blocksize) - return -1; - - /* finally create the FS root */ - btrfs_set_header_parentid(&empty_leaf->header, BTRFS_FS_TREE_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf->header, start_block + 4); - btrfs_set_header_nritems(&empty_leaf->header, 0); - ret = pwrite(fd, empty_leaf, blocksize, (start_block + 4) * blocksize); - if (ret != blocksize) - return -1; - return 0; -} - -u64 device_size(int fd, struct stat *st) -{ - u64 size; - if (S_ISREG(st->st_mode)) { - return st->st_size; - } - if (!S_ISBLK(st->st_mode)) { - return 0; - } - if (ioctl(fd, BLKGETSIZE64, &size) >= 0) { - return size; - } - return 0; -} - -int main(int ac, char **av) -{ - char *file; - u64 block_count = 0; - int fd; - struct stat st; - int ret; - int i; - char *buf = malloc(4096); - if (ac >= 2) { - file = av[1]; - if (ac == 3) { - block_count = atoi(av[2]); - if (!block_count) { - fprintf(stderr, "error finding block count\n"); - exit(1); - } - } - } else { - fprintf(stderr, "usage: mkfs.btrfs file [block count]\n"); - exit(1); - } - fd = open(file, O_RDWR); - if (fd < 0) { - fprintf(stderr, "unable to open %s\n", file); - exit(1); - } - ret = fstat(fd, &st); - if (ret < 0) { - fprintf(stderr, "unable to stat %s\n", file); - exit(1); - } - if (block_count == 0) { - block_count = device_size(fd, &st); - if (block_count == 0) { - fprintf(stderr, "unable to find %s size\n", file); - exit(1); - } - } - block_count /= 4096; - if (block_count < 256) { - fprintf(stderr, "device %s is too small\n", file); - exit(1); - } - memset(buf, 0, 4096); - for(i = 0; i < 6; i++) { - ret = write(fd, buf, 4096); - if (ret != 4096) { - fprintf(stderr, "unable to zero fill device\n"); - exit(1); - } - } - ret = mkfs(fd, block_count, 4096); - if (ret) { - fprintf(stderr, "error during mkfs %d\n", ret); - exit(1); - } - printf("fs created on %s blocksize %d blocks %Lu\n", - file, 4096, block_count); - return 0; -} - diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index f53b99da12f..aa2d3fac880 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -1,7 +1,4 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" #include "disk-io.h" @@ -17,14 +14,14 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_inode_item *ii; u32 type; - printf("leaf %Lu total ptrs %d free space %d\n", + printk("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(root, l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; type = btrfs_disk_key_type(&item->key); - printf("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n", + printk("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n", i, btrfs_disk_key_objectid(&item->key), btrfs_disk_key_flags(&item->key), @@ -34,38 +31,39 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); - printf("\t\tinode generation %Lu size %Lu\n", + printk("\t\tinode generation %Lu size %Lu mode %o\n", btrfs_inode_generation(ii), - btrfs_inode_size(ii)); + btrfs_inode_size(ii), + btrfs_inode_mode(ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); - printf("\t\tdir oid %Lu flags %u type %u\n", + printk("\t\tdir oid %Lu flags %u type %u\n", btrfs_dir_objectid(di), btrfs_dir_flags(di), btrfs_dir_type(di)); - printf("\t\tname %.*s\n", + printk("\t\tname %.*s\n", btrfs_dir_name_len(di),(char *)(di + 1)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - printf("\t\troot data blocknr %Lu refs %u\n", + printk("\t\troot data blocknr %Lu refs %u\n", btrfs_root_blocknr(ri), btrfs_root_refs(ri)); break; case BTRFS_EXTENT_ITEM_KEY: ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); - printf("\t\textent data refs %u owner %Lu\n", + printk("\t\textent data refs %u owner %Lu\n", btrfs_extent_refs(ei), btrfs_extent_owner(ei)); break; case BTRFS_INODE_MAP_ITEM_KEY: mi = btrfs_item_ptr(l, i, struct btrfs_inode_map_item); - printf("\t\tinode map key %Lu %u %Lu\n", + printk("\t\tinode map key %Lu %u %Lu\n", btrfs_disk_key_objectid(&mi->key), btrfs_disk_key_flags(&mi->key), btrfs_disk_key_offset(&mi->key)); break; case BTRFS_STRING_ITEM_KEY: - printf("\t\titem data %.*s\n", btrfs_item_size(item), + printk("\t\titem data %.*s\n", btrfs_item_size(item), btrfs_leaf_data(l) + btrfs_item_offset(item)); break; }; @@ -86,12 +84,12 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) btrfs_print_leaf(root, (struct btrfs_leaf *)c); return; } - printf("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, + printk("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, btrfs_header_level(&c->header), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); fflush(stdout); for (i = 0; i < nr; i++) { - printf("\tkey %d (%Lu %u %Lu) block %Lu\n", + printk("\tkey %d (%Lu %u %Lu) block %Lu\n", i, c->ptrs[i].key.objectid, c->ptrs[i].key.flags, diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c deleted file mode 100644 index ef12cf88878..00000000000 --- a/fs/btrfs/quick-test.c +++ /dev/null @@ -1,179 +0,0 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" -#include "ctree.h" -#include "disk-io.h" -#include "print-tree.h" -#include "transaction.h" - -/* for testing only */ -int next_key(int i, int max_key) { - return rand() % max_key; - // return i; -} - -int main(int ac, char **av) { - struct btrfs_key ins; - struct btrfs_key last = { (u64)-1, 0, 0}; - char *buf; - int i; - int num; - int ret; - int run_size = 100000; - int max_key = 100000000; - int tree_size = 0; - struct btrfs_path path; - struct btrfs_super_block super; - struct btrfs_root *root; - struct btrfs_trans_handle *trans; - - radix_tree_init(); - - root = open_ctree("dbfile", &super); - trans = btrfs_start_transaction(root, 1); - srand(55); - ins.flags = 0; - btrfs_set_key_type(&ins, BTRFS_STRING_ITEM_KEY); - for (i = 0; i < run_size; i++) { - buf = malloc(64); - num = next_key(i, max_key); - // num = i; - sprintf(buf, "string-%d", num); - if (i % 10000 == 0) - fprintf(stderr, "insert %d:%d\n", num, i); - ins.objectid = num; - ins.offset = 0; - ret = btrfs_insert_item(trans, root, &ins, buf, strlen(buf)); - if (!ret) - tree_size++; - free(buf); - if (i == run_size - 5) { - btrfs_commit_transaction(trans, root, &super); - } - - } - close_ctree(root, &super); - - root = open_ctree("dbfile", &super); - printf("starting search\n"); - srand(55); - for (i = 0; i < run_size; i++) { - num = next_key(i, max_key); - ins.objectid = num; - btrfs_init_path(&path); - if (i % 10000 == 0) - fprintf(stderr, "search %d:%d\n", num, i); - ret = btrfs_search_slot(trans, root, &ins, &path, 0, 0); - if (ret) { - btrfs_print_tree(root, root->node); - printf("unable to find %d\n", num); - exit(1); - } - btrfs_release_path(root, &path); - } - close_ctree(root, &super); - root = open_ctree("dbfile", &super); - printf("node %p level %d total ptrs %d free spc %lu\n", root->node, - btrfs_header_level(&root->node->node.header), - btrfs_header_nritems(&root->node->node.header), - BTRFS_NODEPTRS_PER_BLOCK(root) - - btrfs_header_nritems(&root->node->node.header)); - printf("all searches good, deleting some items\n"); - i = 0; - srand(55); - for (i = 0 ; i < run_size/4; i++) { - num = next_key(i, max_key); - ins.objectid = num; - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, &ins, &path, -1, 1); - if (!ret) { - if (i % 10000 == 0) - fprintf(stderr, "del %d:%d\n", num, i); - ret = btrfs_del_item(trans, root, &path); - if (ret != 0) - BUG(); - tree_size--; - } - btrfs_release_path(root, &path); - } - close_ctree(root, &super); - root = open_ctree("dbfile", &super); - srand(128); - for (i = 0; i < run_size; i++) { - buf = malloc(64); - num = next_key(i, max_key); - sprintf(buf, "string-%d", num); - ins.objectid = num; - if (i % 10000 == 0) - fprintf(stderr, "insert %d:%d\n", num, i); - ret = btrfs_insert_item(trans, root, &ins, buf, strlen(buf)); - if (!ret) - tree_size++; - free(buf); - } - close_ctree(root, &super); - root = open_ctree("dbfile", &super); - srand(128); - printf("starting search2\n"); - for (i = 0; i < run_size; i++) { - num = next_key(i, max_key); - ins.objectid = num; - btrfs_init_path(&path); - if (i % 10000 == 0) - fprintf(stderr, "search %d:%d\n", num, i); - ret = btrfs_search_slot(trans, root, &ins, &path, 0, 0); - if (ret) { - btrfs_print_tree(root, root->node); - printf("unable to find %d\n", num); - exit(1); - } - btrfs_release_path(root, &path); - } - printf("starting big long delete run\n"); - while(root->node && - btrfs_header_nritems(&root->node->node.header) > 0) { - struct btrfs_leaf *leaf; - int slot; - ins.objectid = (u64)-1; - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, &ins, &path, -1, 1); - if (ret == 0) - BUG(); - - leaf = &path.nodes[0]->leaf; - slot = path.slots[0]; - if (slot != btrfs_header_nritems(&leaf->header)) - BUG(); - while(path.slots[0] > 0) { - path.slots[0] -= 1; - slot = path.slots[0]; - leaf = &path.nodes[0]->leaf; - - btrfs_disk_key_to_cpu(&last, &leaf->items[slot].key); - if (tree_size % 10000 == 0) - printf("big del %d:%d\n", tree_size, i); - ret = btrfs_del_item(trans, root, &path); - if (ret != 0) { - printf("del_item returned %d\n", ret); - BUG(); - } - tree_size--; - } - btrfs_release_path(root, &path); - } - /* - printf("previous tree:\n"); - btrfs_print_tree(root, root->commit_root); - printf("map before commit\n"); - btrfs_print_tree(root->extent_root, root->extent_root->node); - */ - btrfs_commit_transaction(trans, root, &super); - printf("tree size is now %d\n", tree_size); - printf("root %p commit root %p\n", root->node, root->commit_root); - printf("map tree\n"); - btrfs_print_tree(root->fs_info->extent_root, - root->fs_info->extent_root->node); - close_ctree(root, &super); - return 0; -} diff --git a/fs/btrfs/radix-tree.c b/fs/btrfs/radix-tree.c deleted file mode 100644 index baa25ca1c2a..00000000000 --- a/fs/btrfs/radix-tree.c +++ /dev/null @@ -1,836 +0,0 @@ -/* - * Copyright (C) 2001 Momchil Velikov - * Portions Copyright (C) 2001 Christoph Hellwig - * Copyright (C) 2005 SGI, Christoph Lameter - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "kerncompat.h" -#include "radix-tree.h" -#ifdef __KERNEL__ -#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) -#else -#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ -#endif - -#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) -#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) - -#define RADIX_TREE_TAG_LONGS \ - ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) - -struct radix_tree_node { - unsigned int count; - void *slots[RADIX_TREE_MAP_SIZE]; - unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; -}; - -struct radix_tree_path { - struct radix_tree_node *node; - int offset; -}; - -#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) -#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2) - -static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly; - -/* - * Per-cpu pool of preloaded nodes - */ -struct radix_tree_preload { - int nr; - struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; -}; -struct radix_tree_preload radix_tree_preloads = { 0, }; - -static inline gfp_t root_gfp_mask(struct radix_tree_root *root) -{ - return root->gfp_mask & __GFP_BITS_MASK; -} - -static int internal_nodes = 0; -/* - * This assumes that the caller has performed appropriate preallocation, and - * that the caller has pinned this thread of control to the current CPU. - */ -static struct radix_tree_node * -radix_tree_node_alloc(struct radix_tree_root *root) -{ - struct radix_tree_node *ret; - ret = malloc(sizeof(struct radix_tree_node)); - if (ret) { - memset(ret, 0, sizeof(struct radix_tree_node)); - internal_nodes++; - } - return ret; -} - -static inline void -radix_tree_node_free(struct radix_tree_node *node) -{ - internal_nodes--; - free(node); -} - -/* - * Load up this CPU's radix_tree_node buffer with sufficient objects to - * ensure that the addition of a single element in the tree cannot fail. On - * success, return zero, with preemption disabled. On error, return -ENOMEM - * with preemption not disabled. - */ -int radix_tree_preload(gfp_t gfp_mask) -{ - struct radix_tree_preload *rtp; - struct radix_tree_node *node; - int ret = -ENOMEM; - - preempt_disable(); - rtp = &__get_cpu_var(radix_tree_preloads); - while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { - preempt_enable(); - node = radix_tree_node_alloc(NULL); - if (node == NULL) - goto out; - preempt_disable(); - rtp = &__get_cpu_var(radix_tree_preloads); - if (rtp->nr < ARRAY_SIZE(rtp->nodes)) - rtp->nodes[rtp->nr++] = node; - else - radix_tree_node_free(node); - } - ret = 0; -out: - return ret; -} - -static inline void tag_set(struct radix_tree_node *node, unsigned int tag, - int offset) -{ - __set_bit(offset, node->tags[tag]); -} - -static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, - int offset) -{ - __clear_bit(offset, node->tags[tag]); -} - -static inline int tag_get(struct radix_tree_node *node, unsigned int tag, - int offset) -{ - return test_bit(offset, node->tags[tag]); -} - -static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) -{ - root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); -} - - -static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) -{ - root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); -} - -static inline void root_tag_clear_all(struct radix_tree_root *root) -{ - root->gfp_mask &= __GFP_BITS_MASK; -} - -static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) -{ - return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); -} - -/* - * Returns 1 if any slot in the node has this tag set. - * Otherwise returns 0. - */ -static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) -{ - int idx; - for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { - if (node->tags[tag][idx]) - return 1; - } - return 0; -} - -/* - * Return the maximum key which can be store into a - * radix tree with height HEIGHT. - */ -static inline unsigned long radix_tree_maxindex(unsigned int height) -{ - return height_to_maxindex[height]; -} - -/* - * Extend a radix tree so it can store key @index. - */ -static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) -{ - struct radix_tree_node *node; - unsigned int height; - int tag; - - /* Figure out what the height should be. */ - height = root->height + 1; - while (index > radix_tree_maxindex(height)) - height++; - - if (root->rnode == NULL) { - root->height = height; - goto out; - } - - do { - if (!(node = radix_tree_node_alloc(root))) - return -ENOMEM; - - /* Increase the height. */ - node->slots[0] = root->rnode; - - /* Propagate the aggregated tag info into the new root */ - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { - if (root_tag_get(root, tag)) - tag_set(node, tag, 0); - } - - node->count = 1; - root->rnode = node; - root->height++; - } while (height > root->height); -out: - return 0; -} - -/** - * radix_tree_insert - insert into a radix tree - * @root: radix tree root - * @index: index key - * @item: item to insert - * - * Insert an item into the radix tree at position @index. - */ -int radix_tree_insert(struct radix_tree_root *root, - unsigned long index, void *item) -{ - struct radix_tree_node *node = NULL, *slot; - unsigned int height, shift; - int offset; - int error; - - /* Make sure the tree is high enough. */ - if (index > radix_tree_maxindex(root->height)) { - error = radix_tree_extend(root, index); - if (error) - return error; - } - - slot = root->rnode; - height = root->height; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - - offset = 0; /* uninitialised var warning */ - while (height > 0) { - if (slot == NULL) { - /* Have to add a child node. */ - if (!(slot = radix_tree_node_alloc(root))) - return -ENOMEM; - if (node) { - node->slots[offset] = slot; - node->count++; - } else - root->rnode = slot; - } - - /* Go a level down */ - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - node = slot; - slot = node->slots[offset]; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } - - if (slot != NULL) - return -EEXIST; - - if (node) { - node->count++; - node->slots[offset] = item; - BUG_ON(tag_get(node, 0, offset)); - BUG_ON(tag_get(node, 1, offset)); - } else { - root->rnode = item; - BUG_ON(root_tag_get(root, 0)); - BUG_ON(root_tag_get(root, 1)); - } - - return 0; -} - -static inline void **__lookup_slot(struct radix_tree_root *root, - unsigned long index) -{ - unsigned int height, shift; - struct radix_tree_node **slot; - - height = root->height; - - if (index > radix_tree_maxindex(height)) - return NULL; - - if (height == 0 && root->rnode) - return (void **)&root->rnode; - - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = &root->rnode; - - while (height > 0) { - if (*slot == NULL) - return NULL; - - slot = (struct radix_tree_node **) - ((*slot)->slots + - ((index >> shift) & RADIX_TREE_MAP_MASK)); - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } - - return (void **)slot; -} - -/** - * radix_tree_lookup_slot - lookup a slot in a radix tree - * @root: radix tree root - * @index: index key - * - * Lookup the slot corresponding to the position @index in the radix tree - * @root. This is useful for update-if-exists operations. - */ -void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) -{ - return __lookup_slot(root, index); -} - -/** - * radix_tree_lookup - perform lookup operation on a radix tree - * @root: radix tree root - * @index: index key - * - * Lookup the item at the position @index in the radix tree @root. - */ -void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) -{ - void **slot; - - slot = __lookup_slot(root, index); - return slot != NULL ? *slot : NULL; -} - -/** - * radix_tree_tag_set - set a tag on a radix tree node - * @root: radix tree root - * @index: index key - * @tag: tag index - * - * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) - * corresponding to @index in the radix tree. From - * the root all the way down to the leaf node. - * - * Returns the address of the tagged item. Setting a tag on a not-present - * item is a bug. - */ -void *radix_tree_tag_set(struct radix_tree_root *root, - unsigned long index, unsigned int tag) -{ - unsigned int height, shift; - struct radix_tree_node *slot; - - height = root->height; - BUG_ON(index > radix_tree_maxindex(height)); - - slot = root->rnode; - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - - while (height > 0) { - int offset; - - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - if (!tag_get(slot, tag, offset)) - tag_set(slot, tag, offset); - slot = slot->slots[offset]; - BUG_ON(slot == NULL); - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } - - /* set the root's tag bit */ - if (slot && !root_tag_get(root, tag)) - root_tag_set(root, tag); - - return slot; -} - -/** - * radix_tree_tag_clear - clear a tag on a radix tree node - * @root: radix tree root - * @index: index key - * @tag: tag index - * - * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) - * corresponding to @index in the radix tree. If - * this causes the leaf node to have no tags set then clear the tag in the - * next-to-leaf node, etc. - * - * Returns the address of the tagged item on success, else NULL. ie: - * has the same return value and semantics as radix_tree_lookup(). - */ -void *radix_tree_tag_clear(struct radix_tree_root *root, - unsigned long index, unsigned int tag) -{ - struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; - struct radix_tree_node *slot = NULL; - unsigned int height, shift; - - height = root->height; - if (index > radix_tree_maxindex(height)) - goto out; - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - pathp->node = NULL; - slot = root->rnode; - - while (height > 0) { - int offset; - - if (slot == NULL) - goto out; - - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - pathp[1].offset = offset; - pathp[1].node = slot; - slot = slot->slots[offset]; - pathp++; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } - - if (slot == NULL) - goto out; - - while (pathp->node) { - if (!tag_get(pathp->node, tag, pathp->offset)) - goto out; - tag_clear(pathp->node, tag, pathp->offset); - if (any_tag_set(pathp->node, tag)) - goto out; - pathp--; - } - - /* clear the root's tag bit */ - if (root_tag_get(root, tag)) - root_tag_clear(root, tag); - -out: - return slot; -} - -#ifndef __KERNEL__ /* Only the test harness uses this at present */ -/** - * radix_tree_tag_get - get a tag on a radix tree node - * @root: radix tree root - * @index: index key - * @tag: tag index (< RADIX_TREE_MAX_TAGS) - * - * Return values: - * - * 0: tag not present or not set - * 1: tag set - */ -int radix_tree_tag_get(struct radix_tree_root *root, - unsigned long index, unsigned int tag) -{ - unsigned int height, shift; - struct radix_tree_node *slot; - int saw_unset_tag = 0; - - height = root->height; - if (index > radix_tree_maxindex(height)) - return 0; - - /* check the root's tag bit */ - if (!root_tag_get(root, tag)) - return 0; - - if (height == 0) - return 1; - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; - - for ( ; ; ) { - int offset; - - if (slot == NULL) - return 0; - - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - - /* - * This is just a debug check. Later, we can bale as soon as - * we see an unset tag. - */ - if (!tag_get(slot, tag, offset)) - saw_unset_tag = 1; - if (height == 1) { - int ret = tag_get(slot, tag, offset); - - BUG_ON(ret && saw_unset_tag); - return !!ret; - } - slot = slot->slots[offset]; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } -} -#endif - -static unsigned int -__lookup(struct radix_tree_root *root, void **results, unsigned long index, - unsigned int max_items, unsigned long *next_index) -{ - unsigned int nr_found = 0; - unsigned int shift, height; - struct radix_tree_node *slot; - unsigned long i; - - height = root->height; - if (height == 0) { - if (root->rnode && index == 0) - results[nr_found++] = root->rnode; - goto out; - } - - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; - - for ( ; height > 1; height--) { - - for (i = (index >> shift) & RADIX_TREE_MAP_MASK ; - i < RADIX_TREE_MAP_SIZE; i++) { - if (slot->slots[i] != NULL) - break; - index &= ~((1UL << shift) - 1); - index += 1UL << shift; - if (index == 0) - goto out; /* 32-bit wraparound */ - } - if (i == RADIX_TREE_MAP_SIZE) - goto out; - - shift -= RADIX_TREE_MAP_SHIFT; - slot = slot->slots[i]; - } - - /* Bottom level: grab some items */ - for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { - index++; - if (slot->slots[i]) { - results[nr_found++] = slot->slots[i]; - if (nr_found == max_items) - goto out; - } - } -out: - *next_index = index; - return nr_found; -} - -/** - * radix_tree_gang_lookup - perform multiple lookup on a radix tree - * @root: radix tree root - * @results: where the results of the lookup are placed - * @first_index: start the lookup from this key - * @max_items: place up to this many items at *results - * - * Performs an index-ascending scan of the tree for present items. Places - * them at *@results and returns the number of items which were placed at - * *@results. - * - * The implementation is naive. - */ -unsigned int -radix_tree_gang_lookup(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items) -{ - const unsigned long max_index = radix_tree_maxindex(root->height); - unsigned long cur_index = first_index; - unsigned int ret = 0; - - while (ret < max_items) { - unsigned int nr_found; - unsigned long next_index; /* Index of next search */ - - if (cur_index > max_index) - break; - nr_found = __lookup(root, results + ret, cur_index, - max_items - ret, &next_index); - ret += nr_found; - if (next_index == 0) - break; - cur_index = next_index; - } - return ret; -} - -/* - * FIXME: the two tag_get()s here should use find_next_bit() instead of - * open-coding the search. - */ -static unsigned int -__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index, - unsigned int max_items, unsigned long *next_index, unsigned int tag) -{ - unsigned int nr_found = 0; - unsigned int shift; - unsigned int height = root->height; - struct radix_tree_node *slot; - - if (height == 0) { - if (root->rnode && index == 0) - results[nr_found++] = root->rnode; - goto out; - } - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; - - do { - unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK; - - for ( ; i < RADIX_TREE_MAP_SIZE; i++) { - if (tag_get(slot, tag, i)) { - BUG_ON(slot->slots[i] == NULL); - break; - } - index &= ~((1UL << shift) - 1); - index += 1UL << shift; - if (index == 0) - goto out; /* 32-bit wraparound */ - } - if (i == RADIX_TREE_MAP_SIZE) - goto out; - height--; - if (height == 0) { /* Bottom level: grab some items */ - unsigned long j = index & RADIX_TREE_MAP_MASK; - - for ( ; j < RADIX_TREE_MAP_SIZE; j++) { - index++; - if (tag_get(slot, tag, j)) { - BUG_ON(slot->slots[j] == NULL); - results[nr_found++] = slot->slots[j]; - if (nr_found == max_items) - goto out; - } - } - } - shift -= RADIX_TREE_MAP_SHIFT; - slot = slot->slots[i]; - } while (height > 0); -out: - *next_index = index; - return nr_found; -} - -/** - * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree - * based on a tag - * @root: radix tree root - * @results: where the results of the lookup are placed - * @first_index: start the lookup from this key - * @max_items: place up to this many items at *results - * @tag: the tag index (< RADIX_TREE_MAX_TAGS) - * - * Performs an index-ascending scan of the tree for present items which - * have the tag indexed by @tag set. Places the items at *@results and - * returns the number of items which were placed at *@results. - */ -unsigned int -radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items, - unsigned int tag) -{ - const unsigned long max_index = radix_tree_maxindex(root->height); - unsigned long cur_index = first_index; - unsigned int ret = 0; - - /* check the root's tag bit */ - if (!root_tag_get(root, tag)) - return 0; - - while (ret < max_items) { - unsigned int nr_found; - unsigned long next_index; /* Index of next search */ - - if (cur_index > max_index) - break; - nr_found = __lookup_tag(root, results + ret, cur_index, - max_items - ret, &next_index, tag); - ret += nr_found; - if (next_index == 0) - break; - cur_index = next_index; - } - return ret; -} - -/** - * radix_tree_shrink - shrink height of a radix tree to minimal - * @root radix tree root - */ -static inline void radix_tree_shrink(struct radix_tree_root *root) -{ - /* try to shrink tree height */ - while (root->height > 0 && - root->rnode->count == 1 && - root->rnode->slots[0]) { - struct radix_tree_node *to_free = root->rnode; - - root->rnode = to_free->slots[0]; - root->height--; - /* must only free zeroed nodes into the slab */ - tag_clear(to_free, 0, 0); - tag_clear(to_free, 1, 0); - to_free->slots[0] = NULL; - to_free->count = 0; - radix_tree_node_free(to_free); - } -} - -/** - * radix_tree_delete - delete an item from a radix tree - * @root: radix tree root - * @index: index key - * - * Remove the item at @index from the radix tree rooted at @root. - * - * Returns the address of the deleted item, or NULL if it was not present. - */ -void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) -{ - struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; - struct radix_tree_node *slot = NULL; - unsigned int height, shift; - int tag; - int offset; - - height = root->height; - if (index > radix_tree_maxindex(height)) - goto out; - - slot = root->rnode; - if (height == 0 && root->rnode) { - root_tag_clear_all(root); - root->rnode = NULL; - goto out; - } - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - pathp->node = NULL; - - do { - if (slot == NULL) - goto out; - - pathp++; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - pathp->offset = offset; - pathp->node = slot; - slot = slot->slots[offset]; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } while (height > 0); - - if (slot == NULL) - goto out; - - /* - * Clear all tags associated with the just-deleted item - */ - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { - if (tag_get(pathp->node, tag, pathp->offset)) - radix_tree_tag_clear(root, index, tag); - } - - /* Now free the nodes we do not need anymore */ - while (pathp->node) { - pathp->node->slots[pathp->offset] = NULL; - pathp->node->count--; - - if (pathp->node->count) { - if (pathp->node == root->rnode) - radix_tree_shrink(root); - goto out; - } - - /* Node with zero slots in use so free it */ - radix_tree_node_free(pathp->node); - - pathp--; - } - root_tag_clear_all(root); - root->height = 0; - root->rnode = NULL; - -out: - return slot; -} - -/** - * radix_tree_tagged - test whether any items in the tree are tagged - * @root: radix tree root - * @tag: tag to test - */ -int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) -{ - return root_tag_get(root, tag); -} - -static unsigned long __maxindex(unsigned int height) -{ - unsigned int tmp = height * RADIX_TREE_MAP_SHIFT; - unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1; - - if (tmp >= RADIX_TREE_INDEX_BITS) - index = ~0UL; - return index; -} - -static void radix_tree_init_maxindex(void) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) - height_to_maxindex[i] = __maxindex(i); -} - -void radix_tree_init(void) -{ - radix_tree_init_maxindex(); -} diff --git a/fs/btrfs/radix-tree.h b/fs/btrfs/radix-tree.h deleted file mode 100644 index c3ce88137f7..00000000000 --- a/fs/btrfs/radix-tree.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (C) 2001 Momchil Velikov - * Portions Copyright (C) 2001 Christoph Hellwig - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#ifndef _LINUX_RADIX_TREE_H -#define _LINUX_RADIX_TREE_H - -#define RADIX_TREE_MAX_TAGS 2 - -/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */ -struct radix_tree_root { - unsigned int height; - gfp_t gfp_mask; - struct radix_tree_node *rnode; -}; - -#define RADIX_TREE_INIT(mask) { \ - .height = 0, \ - .gfp_mask = (mask), \ - .rnode = NULL, \ -} - -#define RADIX_TREE(name, mask) \ - struct radix_tree_root name = RADIX_TREE_INIT(mask) - -#define INIT_RADIX_TREE(root, mask) \ -do { \ - (root)->height = 0; \ - (root)->gfp_mask = (mask); \ - (root)->rnode = NULL; \ -} while (0) - -int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); -void *radix_tree_lookup(struct radix_tree_root *, unsigned long); -void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); -void *radix_tree_delete(struct radix_tree_root *, unsigned long); -unsigned int -radix_tree_gang_lookup(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items); -int radix_tree_preload(gfp_t gfp_mask); -void radix_tree_init(void); -void *radix_tree_tag_set(struct radix_tree_root *root, - unsigned long index, unsigned int tag); -void *radix_tree_tag_clear(struct radix_tree_root *root, - unsigned long index, unsigned int tag); -int radix_tree_tag_get(struct radix_tree_root *root, - unsigned long index, unsigned int tag); -unsigned int -radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items, - unsigned int tag); -int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); - -static inline void radix_tree_preload_end(void) -{ - preempt_enable(); -} - -#endif /* _LINUX_RADIX_TREE_H */ diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c deleted file mode 100644 index 3a38ae7a886..00000000000 --- a/fs/btrfs/random-test.c +++ /dev/null @@ -1,405 +0,0 @@ -#include -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" -#include "ctree.h" -#include "disk-io.h" -#include "print-tree.h" -#include "transaction.h" - -int keep_running = 1; -struct btrfs_super_block super; - -static int setup_key(struct radix_tree_root *root, struct btrfs_key *key, - int exists) -{ - int num = rand(); - unsigned long res[2]; - int ret; - - key->flags = 0; - btrfs_set_key_type(key, BTRFS_STRING_ITEM_KEY); - key->offset = 0; -again: - ret = radix_tree_gang_lookup(root, (void **)res, num, 2); - if (exists) { - if (ret == 0) - return -1; - num = res[0]; - } else if (ret != 0 && num == res[0]) { - num++; - if (ret > 1 && num == res[1]) { - num++; - goto again; - } - } - key->objectid = num; - return 0; -} - -static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix) -{ - struct btrfs_path path; - struct btrfs_key key; - int ret; - char buf[128]; - unsigned long oid; - btrfs_init_path(&path); - ret = setup_key(radix, &key, 0); - sprintf(buf, "str-%Lu\n", key.objectid); - ret = btrfs_insert_item(trans, root, &key, buf, strlen(buf)); - if (ret) - goto error; - oid = (unsigned long)key.objectid; - radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(radix, oid, (void *)oid); - radix_tree_preload_end(); - if (ret) - goto error; - return ret; -error: - printf("failed to insert %Lu\n", key.objectid); - return -1; -} - -static int insert_dup(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix) -{ - struct btrfs_path path; - struct btrfs_key key; - int ret; - char buf[128]; - btrfs_init_path(&path); - ret = setup_key(radix, &key, 1); - if (ret < 0) - return 0; - sprintf(buf, "str-%Lu\n", key.objectid); - ret = btrfs_insert_item(trans, root, &key, buf, strlen(buf)); - if (ret != -EEXIST) { - printf("insert on %Lu gave us %d\n", key.objectid, ret); - return 1; - } - return 0; -} - -static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix) -{ - struct btrfs_path path; - struct btrfs_key key; - int ret; - unsigned long *ptr; - btrfs_init_path(&path); - ret = setup_key(radix, &key, 1); - if (ret < 0) - return 0; - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); - if (ret) - goto error; - ret = btrfs_del_item(trans, root, &path); - btrfs_release_path(root, &path); - if (ret != 0) - goto error; - ptr = radix_tree_delete(radix, key.objectid); - if (!ptr) - goto error; - return 0; -error: - printf("failed to delete %Lu\n", key.objectid); - return -1; -} - -static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix) -{ - struct btrfs_path path; - struct btrfs_key key; - int ret; - btrfs_init_path(&path); - ret = setup_key(radix, &key, 1); - if (ret < 0) - return 0; - ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); - btrfs_release_path(root, &path); - if (ret) - goto error; - return 0; -error: - printf("unable to find key %Lu\n", key.objectid); - return -1; -} - -static int lookup_enoent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix) -{ - struct btrfs_path path; - struct btrfs_key key; - int ret; - btrfs_init_path(&path); - ret = setup_key(radix, &key, 0); - if (ret < 0) - return ret; - ret = btrfs_search_slot(trans, root, &key, &path, 0, 0); - btrfs_release_path(root, &path); - if (ret <= 0) - goto error; - return 0; -error: - printf("able to find key that should not exist %Lu\n", key.objectid); - return -1; -} - -static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix, int nr) -{ - struct btrfs_path path; - struct btrfs_key key; - unsigned long found = 0; - int ret; - int slot; - int *ptr; - int count = 0; - - key.offset = 0; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_STRING_ITEM_KEY); - key.objectid = (unsigned long)-1; - while(nr-- >= 0) { - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); - if (ret < 0) { - btrfs_release_path(root, &path); - return ret; - } - if (ret != 0) { - if (path.slots[0] == 0) { - btrfs_release_path(root, &path); - break; - } - path.slots[0] -= 1; - } - slot = path.slots[0]; - found = btrfs_disk_key_objectid( - &path.nodes[0]->leaf.items[slot].key); - ret = btrfs_del_item(trans, root, &path); - count++; - if (ret) { - fprintf(stderr, - "failed to remove %lu from tree\n", - found); - return -1; - } - btrfs_release_path(root, &path); - ptr = radix_tree_delete(radix, found); - if (!ptr) - goto error; - if (!keep_running) - break; - } - return 0; -error: - fprintf(stderr, "failed to delete from the radix %lu\n", found); - return -1; -} - -static int fill_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix, int count) -{ - int i; - int ret = 0; - for (i = 0; i < count; i++) { - ret = ins_one(trans, root, radix); - if (ret) { - fprintf(stderr, "fill failed\n"); - goto out; - } - if (i % 1000 == 0) { - ret = btrfs_commit_transaction(trans, root, &super); - if (ret) { - fprintf(stderr, "fill commit failed\n"); - return ret; - } - } - if (i && i % 10000 == 0) { - printf("bigfill %d\n", i); - } - if (!keep_running) - break; - } -out: - return ret; -} - -static int bulk_op(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix) -{ - int ret; - int nr = rand() % 5000; - static int run_nr = 0; - - /* do the bulk op much less frequently */ - if (run_nr++ % 100) - return 0; - ret = empty_tree(trans, root, radix, nr); - if (ret) - return ret; - ret = fill_tree(trans, root, radix, nr); - if (ret) - return ret; - return 0; -} - - -int (*ops[])(struct btrfs_trans_handle *, - struct btrfs_root *root, struct radix_tree_root *radix) = - { ins_one, insert_dup, del_one, lookup_item, - lookup_enoent, bulk_op }; - -static int fill_radix(struct btrfs_root *root, struct radix_tree_root *radix) -{ - struct btrfs_path path; - struct btrfs_key key; - unsigned long found; - int ret; - int slot; - int i; - - key.offset = 0; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_STRING_ITEM_KEY); - key.objectid = (unsigned long)-1; - while(1) { - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); - if (ret < 0) { - btrfs_release_path(root, &path); - return ret; - } - slot = path.slots[0]; - if (ret != 0) { - if (slot == 0) { - btrfs_release_path(root, &path); - break; - } - slot -= 1; - } - for (i = slot; i >= 0; i--) { - found = btrfs_disk_key_objectid(&path.nodes[0]-> - leaf.items[i].key); - radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(radix, found, (void *)found); - if (ret) { - fprintf(stderr, - "failed to insert %lu into radix\n", - found); - exit(1); - } - - radix_tree_preload_end(); - } - btrfs_release_path(root, &path); - key.objectid = found - 1; - if (key.objectid > found) - break; - } - return 0; -} -void sigstopper(int ignored) -{ - keep_running = 0; - fprintf(stderr, "caught exit signal, stopping\n"); -} - -int print_usage(void) -{ - printf("usage: tester [-ih] [-c count] [-f count]\n"); - printf("\t -c count -- iteration count after filling\n"); - printf("\t -f count -- run this many random inserts before starting\n"); - printf("\t -i -- only do initial fill\n"); - printf("\t -h -- this help text\n"); - exit(1); -} -int main(int ac, char **av) -{ - RADIX_TREE(radix, GFP_KERNEL); - struct btrfs_root *root; - int i; - int ret; - int count; - int op; - int iterations = 20000; - int init_fill_count = 800000; - int err = 0; - int initial_only = 0; - struct btrfs_trans_handle *trans; - radix_tree_init(); - root = open_ctree("dbfile", &super); - fill_radix(root, &radix); - - signal(SIGTERM, sigstopper); - signal(SIGINT, sigstopper); - - for (i = 1 ; i < ac ; i++) { - if (strcmp(av[i], "-i") == 0) { - initial_only = 1; - } else if (strcmp(av[i], "-c") == 0) { - iterations = atoi(av[i+1]); - i++; - } else if (strcmp(av[i], "-f") == 0) { - init_fill_count = atoi(av[i+1]); - i++; - } else { - print_usage(); - } - } - printf("initial fill\n"); - trans = btrfs_start_transaction(root, 1); - ret = fill_tree(trans, root, &radix, init_fill_count); - printf("starting run\n"); - if (ret) { - err = ret; - goto out; - } - if (initial_only == 1) { - goto out; - } - for (i = 0; i < iterations; i++) { - op = rand() % ARRAY_SIZE(ops); - count = rand() % 128; - if (i % 2000 == 0) { - printf("%d\n", i); - fflush(stdout); - } - if (i && i % 5000 == 0) { - printf("open & close, root level %d nritems %d\n", - btrfs_header_level(&root->node->node.header), - btrfs_header_nritems(&root->node->node.header)); - close_ctree(root, &super); - root = open_ctree("dbfile", &super); - } - while(count--) { - ret = ops[op](trans, root, &radix); - if (ret) { - fprintf(stderr, "op %d failed %d:%d\n", - op, i, iterations); - btrfs_print_tree(root, root->node); - fprintf(stderr, "op %d failed %d:%d\n", - op, i, iterations); - err = ret; - goto out; - } - if (ops[op] == bulk_op) - break; - if (keep_running == 0) { - err = 0; - goto out; - } - } - } -out: - close_ctree(root, &super); - return err; -} - diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 9cccecc0f43..52c83be4b30 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -1,7 +1,4 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" #include "disk-io.h" #include "print-tree.h" diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c new file mode 100644 index 00000000000..4ae76044aea --- /dev/null +++ b/fs/btrfs/super.c @@ -0,0 +1,205 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ctree.h" + +#define BTRFS_SUPER_MAGIC 0x9123682E +#if 0 +/* some random number */ + +static struct super_operations ramfs_ops; +static struct inode_operations ramfs_dir_inode_operations; + +static struct backing_dev_info ramfs_backing_dev_info = { + .ra_pages = 0, /* No readahead */ + .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | + BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | + BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, +}; + +struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) +{ + struct inode * inode = new_inode(sb); + + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blocks = 0; + inode->i_mapping->a_ops = &ramfs_aops; + inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + default: + init_special_inode(inode, mode, dev); + break; + case S_IFREG: + inode->i_op = &ramfs_file_inode_operations; + inode->i_fop = &ramfs_file_operations; + break; + case S_IFDIR: + inode->i_op = &ramfs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); + break; + case S_IFLNK: + inode->i_op = &page_symlink_inode_operations; + break; + } + } + return inode; +} + +/* + * File creation. Allocate an inode, and we're done.. + */ +/* SMP-safe */ +static int +ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +{ + struct inode * inode = ramfs_get_inode(dir->i_sb, mode, dev); + int error = -ENOSPC; + + if (inode) { + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + inode->i_mode |= S_ISGID; + } + d_instantiate(dentry, inode); + dget(dentry); /* Extra count - pin the dentry in core */ + error = 0; + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + } + return error; +} + +static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) +{ + int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0); + if (!retval) + inc_nlink(dir); + return retval; +} + +static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) +{ + return ramfs_mknod(dir, dentry, mode | S_IFREG, 0); +} + +static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) +{ + struct inode *inode; + int error = -ENOSPC; + + inode = ramfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); + if (inode) { + int l = strlen(symname)+1; + error = page_symlink(inode, symname, l); + if (!error) { + if (dir->i_mode & S_ISGID) + inode->i_gid = dir->i_gid; + d_instantiate(dentry, inode); + dget(dentry); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + } else + iput(inode); + } + return error; +} + +static struct inode_operations ramfs_dir_inode_operations = { + .create = ramfs_create, + .lookup = simple_lookup, + .link = simple_link, + .unlink = simple_unlink, + .symlink = ramfs_symlink, + .mkdir = ramfs_mkdir, + .rmdir = simple_rmdir, + .mknod = ramfs_mknod, + .rename = simple_rename, +}; +#endif + +struct inode *btrfs_get_inode(struct super_block *sb, int mode, dev_t dev) +{ + struct inode * inode = new_inode(sb); + + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blocks = 0; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + } + return inode; +} + +static struct super_operations btrfs_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +}; + +static int btrfs_fill_super(struct super_block * sb, void * data, int silent) +{ + struct inode * inode; + struct dentry * root; + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = BTRFS_SUPER_MAGIC; + sb->s_op = &btrfs_ops; + sb->s_time_gran = 1; + inode = btrfs_get_inode(sb, S_IFDIR | 0755, 0); + if (!inode) + return -ENOMEM; + + root = d_alloc_root(inode); + if (!root) { + iput(inode); + return -ENOMEM; + } + sb->s_root = root; + return 0; +} + +static int btrfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, + btrfs_fill_super, mnt); +} + +static struct file_system_type btrfs_fs_type = { + .owner = THIS_MODULE, + .name = "btrfs", + .get_sb = btrfs_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_btrfs_fs(void) +{ + printk("btrfs loaded!\n"); + return register_filesystem(&btrfs_fs_type); +} + +static void __exit exit_btrfs_fs(void) +{ + unregister_filesystem(&btrfs_fs_type); + printk("btrfs unloaded\n"); +} + +module_init(init_btrfs_fs) +module_exit(exit_btrfs_fs) + +MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From e20d96d64f9cf9288ffecc9ad4714e91c3b97ca8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 22 Mar 2007 12:13:20 -0400 Subject: Mountable btrfs, with readdir Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 5 +- fs/btrfs/ctree.c | 273 ++++++++++++++++++++++++----------------------- fs/btrfs/ctree.h | 62 ++++++++--- fs/btrfs/dir-item.c | 17 ++- fs/btrfs/disk-io.c | 281 +++++++++++++++---------------------------------- fs/btrfs/disk-io.h | 51 +++++---- fs/btrfs/extent-tree.c | 144 +++++++++++++------------ fs/btrfs/hash.c | 9 ++ fs/btrfs/inode-map.c | 8 +- fs/btrfs/print-tree.c | 20 ++-- fs/btrfs/print-tree.h | 2 +- fs/btrfs/root-tree.c | 4 +- fs/btrfs/super.c | 261 +++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/transaction.h | 4 +- 14 files changed, 651 insertions(+), 490 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 99e45a54ebd..1a0fb742239 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -2,7 +2,8 @@ ifneq ($(KERNELRELEASE),) # kbuild part of makefile obj-m := btrfs.o -btrfs-y := super.o +btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ + hash.o file-item.o inode-item.o inode-map.o disk-io.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ @@ -14,7 +15,7 @@ else KERNELDIR := /lib/modules/`uname -r`/build all:: - $(MAKE) -C $(KERNELDIR) M=`pwd` modules + $(MAKE) C=1 -C $(KERNELDIR) M=`pwd` modules clean:: rm *.o btrfs.ko endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9fbd07c37fd..e690e2bb47d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -7,11 +7,11 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size); static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *dst, struct btrfs_buffer + *root, struct buffer_head *dst, struct buffer_head *src); static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct btrfs_buffer *dst_buf, - struct btrfs_buffer *src_buf); + btrfs_root *root, struct buffer_head *dst_buf, + struct buffer_head *src_buf); static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); @@ -32,32 +32,34 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) } static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *buf, struct btrfs_buffer - *parent, int parent_slot, struct btrfs_buffer + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head **cow_ret) { - struct btrfs_buffer *cow; + struct buffer_head *cow; + struct btrfs_node *cow_node; - if (!list_empty(&buf->dirty)) { + if (!buffer_dirty(buf)) { *cow_ret = buf; return 0; } cow = btrfs_alloc_free_block(trans, root); - memcpy(&cow->node, &buf->node, root->blocksize); - btrfs_set_header_blocknr(&cow->node.header, cow->blocknr); + cow_node = btrfs_buffer_node(cow); + memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); + btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); *cow_ret = cow; btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; - cow->count++; + get_bh(cow); if (buf != root->commit_root) - btrfs_free_extent(trans, root, buf->blocknr, 1, 1); + btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); btrfs_block_release(root, buf); } else { - btrfs_set_node_blockptr(&parent->node, parent_slot, - cow->blocknr); - BUG_ON(list_empty(&parent->dirty)); - btrfs_free_extent(trans, root, buf->blocknr, 1, 1); + btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, + cow->b_blocknr); + BUG_ON(!buffer_dirty(parent)); + btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); return 0; @@ -119,12 +121,12 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, { int i; struct btrfs_node *parent = NULL; - struct btrfs_node *node = &path->nodes[level]->node; + struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); int parent_slot; u32 nritems = btrfs_header_nritems(&node->header); if (path->nodes[level + 1]) - parent = &path->nodes[level + 1]->node; + parent = btrfs_buffer_node(path->nodes[level + 1]); parent_slot = path->slots[level + 1]; BUG_ON(nritems == 0); if (parent) { @@ -148,13 +150,13 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { int i; - struct btrfs_leaf *leaf = &path->nodes[level]->leaf; + struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]); struct btrfs_node *parent = NULL; int parent_slot; u32 nritems = btrfs_header_nritems(&leaf->header); if (path->nodes[level + 1]) - parent = &path->nodes[level + 1]->node; + parent = btrfs_buffer_node(path->nodes[level + 1]); parent_slot = path->slots[level + 1]; BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); @@ -250,11 +252,11 @@ static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) return -1; } -static struct btrfs_buffer *read_node_slot(struct btrfs_root *root, - struct btrfs_buffer *parent_buf, +static struct buffer_head *read_node_slot(struct btrfs_root *root, + struct buffer_head *parent_buf, int slot) { - struct btrfs_node *node = &parent_buf->node; + struct btrfs_node *node = btrfs_buffer_node(parent_buf); if (slot < 0) return NULL; if (slot >= btrfs_header_nritems(&node->header)) @@ -265,10 +267,10 @@ static struct btrfs_buffer *read_node_slot(struct btrfs_root *root, static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_buffer *right_buf; - struct btrfs_buffer *mid_buf; - struct btrfs_buffer *left_buf; - struct btrfs_buffer *parent_buf = NULL; + struct buffer_head *right_buf; + struct buffer_head *mid_buf; + struct buffer_head *left_buf; + struct buffer_head *parent_buf = NULL; struct btrfs_node *right = NULL; struct btrfs_node *mid; struct btrfs_node *left = NULL; @@ -283,7 +285,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root return 0; mid_buf = path->nodes[level]; - mid = &mid_buf->node; + mid = btrfs_buffer_node(mid_buf); orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) @@ -295,8 +297,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root * by promoting the node below to a root */ if (!parent_buf) { - struct btrfs_buffer *child; - u64 blocknr = mid_buf->blocknr; + struct buffer_head *child; + u64 blocknr = mid_buf->b_blocknr; if (btrfs_header_nritems(&mid->header) != 1) return 0; @@ -313,7 +315,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root clean_tree_block(trans, root, mid_buf); return btrfs_free_extent(trans, root, blocknr, 1, 1); } - parent = &parent_buf->node; + parent = btrfs_buffer_node(parent_buf); if (btrfs_header_nritems(&mid->header) > BTRFS_NODEPTRS_PER_BLOCK(root) / 4) @@ -326,7 +328,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (left_buf) { btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, &left_buf); - left = &left_buf->node; + left = btrfs_buffer_node(left_buf); orig_slot += btrfs_header_nritems(&left->header); wret = push_node_left(trans, root, left_buf, mid_buf); if (wret < 0) @@ -339,12 +341,12 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (right_buf) { btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1, &right_buf); - right = &right_buf->node; + right = btrfs_buffer_node(right_buf); wret = push_node_left(trans, root, mid_buf, right_buf); if (wret < 0) ret = wret; if (btrfs_header_nritems(&right->header) == 0) { - u64 blocknr = right_buf->blocknr; + u64 blocknr = right_buf->b_blocknr; btrfs_block_release(root, right_buf); clean_tree_block(trans, root, right_buf); right_buf = NULL; @@ -360,7 +362,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&parent->ptrs[pslot + 1].key, &right->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(list_empty(&parent_buf->dirty)); + BUG_ON(!buffer_dirty(parent_buf)); } } if (btrfs_header_nritems(&mid->header) == 1) { @@ -381,7 +383,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = mid_buf->blocknr; + u64 blocknr = mid_buf->b_blocknr; btrfs_block_release(root, mid_buf); clean_tree_block(trans, root, mid_buf); mid_buf = NULL; @@ -396,13 +398,13 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root /* update the parent key to reflect our changes */ memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(list_empty(&parent_buf->dirty)); + BUG_ON(!buffer_dirty(parent_buf)); } /* update the path */ if (left_buf) { if (btrfs_header_nritems(&left->header) > orig_slot) { - left_buf->count++; // released below + get_bh(left_buf); path->nodes[level] = left_buf; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; @@ -415,8 +417,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } /* double check we haven't messed things up */ check_block(root, path, level); - if (orig_ptr != btrfs_node_blockptr(&path->nodes[level]->node, - path->slots[level])) + if (orig_ptr != + btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), + path->slots[level])) BUG(); if (right_buf) @@ -443,8 +446,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { - struct btrfs_buffer *b; - struct btrfs_buffer *cow_buf; + struct buffer_head *b; + struct buffer_head *cow_buf; struct btrfs_node *c; int slot; int ret; @@ -452,18 +455,20 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root again: b = root->node; - b->count++; + get_bh(b); while (b) { - level = btrfs_header_level(&b->node.header); + c = btrfs_buffer_node(b); + level = btrfs_header_level(&c->header); if (cow) { int wret; - wret = btrfs_cow_block(trans, root, b, p->nodes[level + - 1], p->slots[level + 1], + wret = btrfs_cow_block(trans, root, b, + p->nodes[level + 1], + p->slots[level + 1], &cow_buf); b = cow_buf; } BUG_ON(!cow && ins_len); - c = &b->node; + c = btrfs_buffer_node(b); p->nodes[level] = b; ret = check_block(root, p, level); if (ret) @@ -480,7 +485,7 @@ again: if (sret) return sret; b = p->nodes[level]; - c = &b->node; + c = btrfs_buffer_node(b); slot = p->slots[level]; } else if (ins_len < 0) { int sret = balance_level(trans, root, p, @@ -490,7 +495,7 @@ again: b = p->nodes[level]; if (!b) goto again; - c = &b->node; + c = btrfs_buffer_node(b); slot = p->slots[level]; BUG_ON(btrfs_header_nritems(&c->header) == 1); } @@ -505,11 +510,9 @@ again: if (sret) return sret; } - BUG_ON(root->node->count == 1); return ret; } } - BUG_ON(root->node->count == 1); return 1; } @@ -534,9 +537,9 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root int tslot = path->slots[i]; if (!path->nodes[i]) break; - t = &path->nodes[i]->node; + t = btrfs_buffer_node(path->nodes[i]); memcpy(&t->ptrs[tslot].key, key, sizeof(*key)); - BUG_ON(list_empty(&path->nodes[i]->dirty)); + BUG_ON(!buffer_dirty(path->nodes[i])); if (tslot != 0) break; } @@ -551,11 +554,11 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root * error, and > 0 if there was no room in the left hand block. */ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *dst_buf, struct - btrfs_buffer *src_buf) + *root, struct buffer_head *dst_buf, struct + buffer_head *src_buf) { - struct btrfs_node *src = &src_buf->node; - struct btrfs_node *dst = &dst_buf->node; + struct btrfs_node *src = btrfs_buffer_node(src_buf); + struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int src_nritems; int dst_nritems; @@ -580,8 +583,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root } btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(list_empty(&src_buf->dirty)); - BUG_ON(list_empty(&dst_buf->dirty)); + BUG_ON(!buffer_dirty(src_buf)); + BUG_ON(!buffer_dirty(dst_buf)); return ret; } @@ -595,11 +598,11 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root * this will only push up to 1/2 the contents of the left node over */ static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct btrfs_buffer *dst_buf, - struct btrfs_buffer *src_buf) + btrfs_root *root, struct buffer_head *dst_buf, + struct buffer_head *src_buf) { - struct btrfs_node *src = &src_buf->node; - struct btrfs_node *dst = &dst_buf->node; + struct btrfs_node *src = btrfs_buffer_node(src_buf); + struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int max_push; int src_nritems; @@ -628,8 +631,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(list_empty(&src_buf->dirty)); - BUG_ON(list_empty(&dst_buf->dirty)); + BUG_ON(!buffer_dirty(src_buf)); + BUG_ON(!buffer_dirty(dst_buf)); return ret; } @@ -643,7 +646,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_buffer *t; + struct buffer_head *t; struct btrfs_node *lower; struct btrfs_node *c; struct btrfs_disk_key *lower_key; @@ -652,24 +655,24 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level-1] != root->node); t = btrfs_alloc_free_block(trans, root); - c = &t->node; + c = btrfs_buffer_node(t); memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); btrfs_set_header_level(&c->header, level); - btrfs_set_header_blocknr(&c->header, t->blocknr); + btrfs_set_header_blocknr(&c->header, t->b_blocknr); btrfs_set_header_parentid(&c->header, - btrfs_header_parentid(&root->node->node.header)); - lower = &path->nodes[level-1]->node; + btrfs_header_parentid(btrfs_buffer_header(root->node))); + lower = btrfs_buffer_node(path->nodes[level-1]); if (btrfs_is_leaf(lower)) lower_key = &((struct btrfs_leaf *)lower)->items[0].key; else lower_key = &lower->ptrs[0].key; memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); - btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->blocknr); + btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); /* the super has an extra ref to root->node */ btrfs_block_release(root, root->node); root->node = t; - t->count++; + get_bh(t); path->nodes[level] = t; path->slots[level] = 0; return 0; @@ -692,7 +695,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root int nritems; BUG_ON(!path->nodes[level]); - lower = &path->nodes[level]->node; + lower = btrfs_buffer_node(path->nodes[level]); nritems = btrfs_header_nritems(&lower->header); if (slot > nritems) BUG(); @@ -705,7 +708,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); - BUG_ON(list_empty(&path->nodes[level]->dirty)); + BUG_ON(!buffer_dirty(path->nodes[level])); return 0; } @@ -721,9 +724,9 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_buffer *t; + struct buffer_head *t; struct btrfs_node *c; - struct btrfs_buffer *split_buffer; + struct buffer_head *split_buffer; struct btrfs_node *split; int mid; int ret; @@ -731,7 +734,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root u32 c_nritems; t = path->nodes[level]; - c = &t->node; + c = btrfs_buffer_node(t); if (t == root->node) { /* trying to split the root, lets make a new one */ ret = insert_new_root(trans, root, path, level + 1); @@ -740,11 +743,11 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(&c->header); split_buffer = btrfs_alloc_free_block(trans, root); - split = &split_buffer->node; + split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); - btrfs_set_header_blocknr(&split->header, split_buffer->blocknr); + btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr); btrfs_set_header_parentid(&split->header, - btrfs_header_parentid(&root->node->node.header)); + btrfs_header_parentid(btrfs_buffer_header(root->node))); mid = (c_nritems + 1) / 2; memcpy(split->ptrs, c->ptrs + mid, (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); @@ -752,9 +755,9 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&c->header, mid); ret = 0; - BUG_ON(list_empty(&t->dirty)); + BUG_ON(!buffer_dirty(t)); wret = insert_ptr(trans, root, path, &split->ptrs[0].key, - split_buffer->blocknr, path->slots[level + 1] + 1, + split_buffer->b_blocknr, path->slots[level + 1] + 1, level + 1); if (wret) ret = wret; @@ -798,11 +801,12 @@ static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct btrfs_buffer *left_buf = path->nodes[0]; - struct btrfs_leaf *left = &left_buf->leaf; + struct buffer_head *left_buf = path->nodes[0]; + struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf); struct btrfs_leaf *right; - struct btrfs_buffer *right_buf; - struct btrfs_buffer *upper; + struct buffer_head *right_buf; + struct buffer_head *upper; + struct btrfs_node *upper_node; int slot; int i; int free_space; @@ -817,12 +821,13 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } upper = path->nodes[1]; - if (slot >= btrfs_header_nritems(&upper->node.header) - 1) { + upper_node = btrfs_buffer_node(upper); + if (slot >= btrfs_header_nritems(&upper_node->header) - 1) { return 1; } - right_buf = read_tree_block(root, btrfs_node_blockptr(&upper->node, - slot + 1)); - right = &right_buf->leaf; + right_buf = read_tree_block(root, + btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1)); + right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, right_buf); @@ -830,7 +835,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root } /* cow and double check */ btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf); - right = &right_buf->leaf; + right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, right_buf); @@ -881,11 +886,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root left_nritems -= push_items; btrfs_set_header_nritems(&left->header, left_nritems); - BUG_ON(list_empty(&left_buf->dirty)); - BUG_ON(list_empty(&right_buf->dirty)); - memcpy(&upper->node.ptrs[slot + 1].key, + BUG_ON(!buffer_dirty(left_buf)); + BUG_ON(!buffer_dirty(right_buf)); + memcpy(&upper_node->ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(list_empty(&upper->dirty)); + BUG_ON(!buffer_dirty(upper)); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { @@ -905,9 +910,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct btrfs_buffer *right_buf = path->nodes[0]; - struct btrfs_leaf *right = &right_buf->leaf; - struct btrfs_buffer *t; + struct buffer_head *right_buf = path->nodes[0]; + struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf); + struct buffer_head *t; struct btrfs_leaf *left; int slot; int i; @@ -926,9 +931,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (!path->nodes[1]) { return 1; } - t = read_tree_block(root, btrfs_node_blockptr(&path->nodes[1]->node, - slot - 1)); - left = &t->leaf; + t = read_tree_block(root, + btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1)); + left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, t); @@ -937,7 +942,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* cow and double check */ btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); - left = &t->leaf; + left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, t); @@ -999,8 +1004,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root push_space = btrfs_item_offset(right->items + i); } - BUG_ON(list_empty(&t->dirty)); - BUG_ON(list_empty(&right_buf->dirty)); + BUG_ON(!buffer_dirty(t)); + BUG_ON(!buffer_dirty(right_buf)); wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) @@ -1029,13 +1034,13 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct btrfs_buffer *l_buf; + struct buffer_head *l_buf; struct btrfs_leaf *l; u32 nritems; int mid; int slot; struct btrfs_leaf *right; - struct btrfs_buffer *right_buffer; + struct buffer_head *right_buffer; int space_needed = data_size + sizeof(struct btrfs_item); int data_copy_size; int rt_data_off; @@ -1053,7 +1058,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return wret; } l_buf = path->nodes[0]; - l = &l_buf->leaf; + l = btrfs_buffer_leaf(l_buf); /* did the pushes work? */ if (btrfs_leaf_free_space(root, l) >= @@ -1071,7 +1076,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root right_buffer = btrfs_alloc_free_block(trans, root); BUG_ON(!right_buffer); BUG_ON(mid == nritems); - right = &right_buffer->leaf; + right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); if (mid <= slot) { /* FIXME, just alloc a new leaf here */ @@ -1085,10 +1090,10 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BUG(); } btrfs_set_header_nritems(&right->header, nritems - mid); - btrfs_set_header_blocknr(&right->header, right_buffer->blocknr); + btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, - btrfs_header_parentid(&root->node->node.header)); + btrfs_header_parentid(btrfs_buffer_header(root->node))); data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(root, l); memcpy(right->items, l->items + mid, @@ -1107,11 +1112,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&l->header, mid); ret = 0; wret = insert_ptr(trans, root, path, &right->items[0].key, - right_buffer->blocknr, path->slots[1] + 1, 1); + right_buffer->b_blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; - BUG_ON(list_empty(&right_buffer->dirty)); - BUG_ON(list_empty(&l_buf->dirty)); + BUG_ON(!buffer_dirty(right_buffer)); + BUG_ON(!buffer_dirty(l_buf)); BUG_ON(path->slots[0] != slot); if (mid <= slot) { btrfs_block_release(root, path->nodes[0]); @@ -1136,7 +1141,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root int slot; int slot_orig; struct btrfs_leaf *leaf; - struct btrfs_buffer *leaf_buf; + struct buffer_head *leaf_buf; u32 nritems; unsigned int data_end; struct btrfs_disk_key disk_key; @@ -1156,7 +1161,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root slot_orig = path->slots[0]; leaf_buf = path->nodes[0]; - leaf = &leaf_buf->leaf; + leaf = btrfs_buffer_leaf(leaf_buf); nritems = btrfs_header_nritems(&leaf->header); data_end = leaf_data_end(root, leaf); @@ -1202,7 +1207,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root if (slot == 0) ret = fixup_low_keys(trans, root, path, &disk_key, 1); - BUG_ON(list_empty(&leaf_buf->dirty)); + BUG_ON(!buffer_dirty(leaf_buf)); if (btrfs_leaf_free_space(root, leaf) < 0) BUG(); check_leaf(root, path, 0); @@ -1225,7 +1230,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_init_path(&path); ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size); if (!ret) { - ptr = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], u8); + ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], u8); memcpy(ptr, data, data_size); } btrfs_release_path(root, &path); @@ -1243,12 +1249,12 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { struct btrfs_node *node; - struct btrfs_buffer *parent = path->nodes[level]; + struct buffer_head *parent = path->nodes[level]; u32 nritems; int ret = 0; int wret; - node = &parent->node; + node = btrfs_buffer_node(parent); nritems = btrfs_header_nritems(&node->header); if (slot != nritems -1) { memmove(node->ptrs + slot, node->ptrs + slot + 1, @@ -1257,16 +1263,17 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, nritems--; btrfs_set_header_nritems(&node->header, nritems); if (nritems == 0 && parent == root->node) { - BUG_ON(btrfs_header_level(&root->node->node.header) != 1); + struct btrfs_header *header = btrfs_buffer_header(root->node); + BUG_ON(btrfs_header_level(header) != 1); /* just turn the root into a leaf and break */ - btrfs_set_header_level(&root->node->node.header, 0); + btrfs_set_header_level(header, 0); } else if (slot == 0) { wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key, level + 1); if (wret) ret = wret; } - BUG_ON(list_empty(&parent->dirty)); + BUG_ON(!buffer_dirty(parent)); return ret; } @@ -1279,7 +1286,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, { int slot; struct btrfs_leaf *leaf; - struct btrfs_buffer *leaf_buf; + struct buffer_head *leaf_buf; int doff; int dsize; int ret = 0; @@ -1287,7 +1294,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 nritems; leaf_buf = path->nodes[0]; - leaf = &leaf_buf->leaf; + leaf = btrfs_buffer_leaf(leaf_buf); slot = path->slots[0]; doff = btrfs_item_offset(leaf->items + slot); dsize = btrfs_item_size(leaf->items + slot); @@ -1313,14 +1320,13 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (nritems == 0) { if (leaf_buf == root->node) { btrfs_set_header_level(&leaf->header, 0); - BUG_ON(list_empty(&leaf_buf->dirty)); } else { clean_tree_block(trans, root, leaf_buf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - leaf_buf->blocknr, 1, 1); + leaf_buf->b_blocknr, 1, 1); if (wret) ret = wret; } @@ -1332,7 +1338,6 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } - BUG_ON(list_empty(&leaf_buf->dirty)); /* delete the leaf if it is mostly empty */ if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { @@ -1341,7 +1346,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, * for possible call to del_ptr below */ slot = path->slots[1]; - leaf_buf->count++; + get_bh(leaf_buf); wret = push_leaf_left(trans, root, path, 1); if (wret < 0) ret = wret; @@ -1352,7 +1357,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = wret; } if (btrfs_header_nritems(&leaf->header) == 0) { - u64 blocknr = leaf_buf->blocknr; + u64 blocknr = leaf_buf->b_blocknr; clean_tree_block(trans, root, leaf_buf); wret = del_ptr(trans, root, path, 1, slot); if (wret) @@ -1380,19 +1385,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int slot; int level = 1; u64 blocknr; - struct btrfs_buffer *c; - struct btrfs_buffer *next = NULL; + struct buffer_head *c; + struct btrfs_node *c_node; + struct buffer_head *next = NULL; while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; slot = path->slots[level] + 1; c = path->nodes[level]; - if (slot >= btrfs_header_nritems(&c->node.header)) { + c_node = btrfs_buffer_node(c); + if (slot >= btrfs_header_nritems(&c_node->header)) { level++; continue; } - blocknr = btrfs_node_blockptr(&c->node, slot); + blocknr = btrfs_node_blockptr(c_node, slot); if (next) btrfs_block_release(root, next); next = read_tree_block(root, blocknr); @@ -1408,7 +1415,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (!level) break; next = read_tree_block(root, - btrfs_node_blockptr(&next->node, 0)); + btrfs_node_blockptr(btrfs_buffer_node(next), 0)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ae8518cb94b..7748eecd930 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,6 +1,9 @@ #ifndef __BTRFS__ #define __BTRFS__ +#include +#include + struct btrfs_trans_handle; #define BTRFS_MAGIC "_BtRfS_M" @@ -10,6 +13,12 @@ struct btrfs_trans_handle; #define BTRFS_INODE_MAP_OBJECTID 3 #define BTRFS_FS_TREE_OBJECTID 4 +/* + * we can actually store much bigger names, but lets not confuse the rest + * of linux + */ +#define BTRFS_NAME_LEN 255 + /* * the key defines the order in the tree, and so it also defines (optimal) * block layout. objectid corresonds to the inode number. The flags @@ -57,7 +66,7 @@ struct btrfs_header { #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) -struct btrfs_buffer; +struct buffer_head; /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -120,7 +129,7 @@ struct btrfs_node { * used while walking the tree. */ struct btrfs_path { - struct btrfs_buffer *nodes[BTRFS_MAX_LEVEL]; + struct buffer_head *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; }; @@ -211,17 +220,14 @@ struct btrfs_fs_info { struct btrfs_root *inode_root; struct btrfs_key current_insert; struct btrfs_key last_insert; - struct radix_tree_root cache_radix; struct radix_tree_root pinned_radix; - struct list_head trans; - struct list_head cache; u64 last_inode_alloc; u64 last_inode_alloc_dirid; u64 generation; - int cache_size; - int fp; struct btrfs_trans_handle *running_transaction; struct btrfs_super_block *disk_super; + struct buffer_head *sb_buffer; + struct super_block *sb; }; /* @@ -230,8 +236,8 @@ struct btrfs_fs_info { * only for the extent tree. */ struct btrfs_root { - struct btrfs_buffer *node; - struct btrfs_buffer *commit_root; + struct buffer_head *node; + struct buffer_head *commit_root; struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; @@ -389,6 +395,29 @@ static inline void btrfs_set_inode_compat_flags(struct btrfs_inode_item *i, i->compat_flags = cpu_to_le16(val); } +static inline u32 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) +{ + return le32_to_cpu(ts->sec); +} + +static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts, + u32 val) +{ + ts->sec = cpu_to_le32(val); +} + +static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts) +{ + return le32_to_cpu(ts->nsec); +} + +static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts, + u32 val) +{ + ts->nsec = cpu_to_le32(val); +} + + static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) { @@ -757,15 +786,20 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } +static inline struct btrfs_root *btrfs_sb(struct super_block *sb) +{ + return sb->s_fs_info; +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) -struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, +struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf); + struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root @@ -783,7 +817,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *snap); + *root, struct buffer_head *snap); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -800,8 +834,8 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, char *name, int name_len, u64 dir, u64 objectid, u8 type); int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 dir, char *name, - int name_len, int mod); + *root, struct btrfs_path *path, u64 dir, + const char *name, int name_len, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, char *name, int name_len); int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 4d8083d92fa..75d6e373e98 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -18,12 +18,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - if (name_len == 1 && *name == '.') - key.offset = 1; - else if (name_len == 2 && name[0] == '.' && name[1] == '.') - key.offset = 2; - else - ret = btrfs_name_hash(name, name_len, &key.offset); + ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); btrfs_init_path(&path); data_size = sizeof(*dir_item) + name_len; @@ -31,7 +26,8 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) goto out; - dir_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], struct btrfs_dir_item); btrfs_set_dir_objectid(dir_item, objectid); btrfs_set_dir_type(dir_item, type); @@ -45,8 +41,8 @@ out: } int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 dir, char *name, - int name_len, int mod) + *root, struct btrfs_path *path, u64 dir, + const char *name, int name_len, int mod) { int ret; struct btrfs_key key; @@ -69,7 +65,8 @@ int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_dir_item *dir_item; char *name_ptr; - dir_item = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0], + dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_dir_item); if (btrfs_dir_name_len(dir_item) != name_len) return 0; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 05637f9fd7c..df2061a735c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,165 +1,67 @@ -#define _XOPEN_SOURCE 500 -#include -#include -#include -#include -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" -static int allocated_blocks = 0; -int cache_max = 10000; - -static int check_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { - if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) + struct btrfs_node *node = btrfs_buffer_node(buf); + if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) BUG(); - if (root->node && btrfs_header_parentid(&buf->node.header) != - btrfs_header_parentid(&root->node->node.header)) + if (root->node && btrfs_header_parentid(&node->header) != + btrfs_header_parentid(btrfs_buffer_header(root->node))) BUG(); return 0; } -static int free_some_buffers(struct btrfs_root *root) +struct buffer_head *alloc_tree_block(struct btrfs_root *root, u64 blocknr) { - struct list_head *node, *next; - struct btrfs_buffer *b; - if (root->fs_info->cache_size < cache_max) - return 0; - list_for_each_safe(node, next, &root->fs_info->cache) { - b = list_entry(node, struct btrfs_buffer, cache); - if (b->count == 1) { - BUG_ON(!list_empty(&b->dirty)); - list_del_init(&b->cache); - btrfs_block_release(root, b); - if (root->fs_info->cache_size < cache_max) - break; - } - } - return 0; + return sb_getblk(root->fs_info->sb, blocknr); } -struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr) { - struct btrfs_buffer *buf; - int ret; - - buf = malloc(sizeof(struct btrfs_buffer) + root->blocksize); - if (!buf) - return buf; - allocated_blocks++; - buf->blocknr = blocknr; - buf->count = 2; - INIT_LIST_HEAD(&buf->dirty); - free_some_buffers(root); - radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(&root->fs_info->cache_radix, blocknr, buf); - radix_tree_preload_end(); - list_add_tail(&buf->cache, &root->fs_info->cache); - root->fs_info->cache_size++; - if (ret) { - free(buf); - return NULL; - } - return buf; + return sb_getblk(root->fs_info->sb, blocknr); } -struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct btrfs_buffer *buf; - buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); - if (buf) { - buf->count++; - } else { - buf = alloc_tree_block(root, blocknr); - if (!buf) { - BUG(); - return NULL; - } - } - return buf; -} - -struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) -{ - loff_t offset = blocknr * root->blocksize; - struct btrfs_buffer *buf; - int ret; + struct buffer_head *buf = sb_bread(root->fs_info->sb, blocknr); - buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); - if (buf) { - buf->count++; - } else { - buf = alloc_tree_block(root, blocknr); - if (!buf) - return NULL; - ret = pread(root->fs_info->fp, &buf->node, root->blocksize, - offset); - if (ret != root->blocksize) { - free(buf); - return NULL; - } - } + if (!buf) + return buf; if (check_tree_block(root, buf)) BUG(); return buf; } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - if (!list_empty(&buf->dirty)) - return 0; - list_add_tail(&buf->dirty, &root->fs_info->trans); - buf->count++; + mark_buffer_dirty(buf); return 0; } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - if (!list_empty(&buf->dirty)) { - list_del_init(&buf->dirty); - btrfs_block_release(root, buf); - } + clear_buffer_dirty(buf); return 0; } int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - u64 blocknr = buf->blocknr; - loff_t offset = blocknr * root->blocksize; - int ret; - - if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) - BUG(); - ret = pwrite(root->fs_info->fp, &buf->node, root->blocksize, offset); - if (ret != root->blocksize) - return ret; + mark_buffer_dirty(buf); return 0; } static int __commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct btrfs_buffer *b; - int ret = 0; - int wret; - while(!list_empty(&root->fs_info->trans)) { - b = list_entry(root->fs_info->trans.next, struct btrfs_buffer, - dirty); - list_del_init(&b->dirty); - wret = write_tree_block(trans, root, b); - if (wret) - ret = wret; - btrfs_block_release(root, b); - } - return ret; + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + return 0; } static int commit_tree_roots(struct btrfs_trans_handle *trans, @@ -172,17 +74,17 @@ static int commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *inode_root = fs_info->inode_root; btrfs_set_root_blocknr(&inode_root->root_item, - inode_root->node->blocknr); + inode_root->node->b_blocknr); ret = btrfs_update_root(trans, tree_root, &inode_root->root_key, &inode_root->root_item); BUG_ON(ret); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == extent_root->node->blocknr) + if (old_extent_block == extent_root->node->b_blocknr) break; btrfs_set_root_blocknr(&extent_root->root_item, - extent_root->node->blocknr); + extent_root->node->b_blocknr); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -195,7 +97,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s) { int ret = 0; - struct btrfs_buffer *snap = root->commit_root; + struct buffer_head *snap = root->commit_root; struct btrfs_key snap_key; if (root->commit_root == root->node) @@ -204,7 +106,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct memcpy(&snap_key, &root->root_key, sizeof(snap_key)); root->root_key.offset++; - btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); + btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); BUG_ON(ret); @@ -220,7 +122,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_finish_extent_commit(trans, root->fs_info->tree_root); root->commit_root = root->node; - root->node->count++; + get_bh(root->node); ret = btrfs_drop_snapshot(trans, root, snap); BUG_ON(ret); @@ -234,7 +136,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct static int __setup_root(struct btrfs_super_block *super, struct btrfs_root *root, struct btrfs_fs_info *fs_info, - u64 objectid, int fp) + u64 objectid) { root->node = NULL; root->commit_root = NULL; @@ -250,11 +152,11 @@ static int find_and_setup_root(struct btrfs_super_block *super, struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, - struct btrfs_root *root, int fp) + struct btrfs_root *root) { int ret; - __setup_root(super, root, fs_info, objectid, fp); + __setup_root(super, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -265,32 +167,26 @@ static int find_and_setup_root(struct btrfs_super_block *super, return 0; } -struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) +struct btrfs_root *open_ctree(struct super_block *sb, + struct buffer_head *sb_buffer, + struct btrfs_super_block *disk_super) { - int fp; - - fp = open(filename, O_CREAT | O_RDWR, 0600); - if (fp < 0) { - return NULL; - } - return open_ctree_fd(fp, super); -} - -struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) -{ - struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *inode_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info)); + struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *inode_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), + GFP_NOFS); int ret; - INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL); + /* FIXME: don't be stupid */ + if (!btrfs_super_root(disk_super)) + return NULL; INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); - INIT_LIST_HEAD(&fs_info->trans); - INIT_LIST_HEAD(&fs_info->cache); - fs_info->cache_size = 0; - fs_info->fp = fp; fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -298,36 +194,31 @@ struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; - fs_info->disk_super = super; + fs_info->disk_super = disk_super; + fs_info->sb_buffer = sb_buffer; + fs_info->sb = sb; memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); - ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET); - if (ret == 0 || btrfs_super_root(super) == 0) { - BUG(); - return NULL; - } - BUG_ON(ret < 0); - - __setup_root(super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID, fp); - tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); + __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + tree_root->node = read_tree_block(tree_root, + btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_INODE_MAP_OBJECTID, inode_root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_INODE_MAP_OBJECTID, inode_root); BUG_ON(ret); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_FS_TREE_OBJECTID, root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_FS_TREE_OBJECTID, root); BUG_ON(ret); root->commit_root = root->node; - root->node->count++; + get_bh(root->node); root->ref_cows = 1; root->fs_info->generation = root->root_key.offset + 1; return root; @@ -336,8 +227,11 @@ struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s) { + return 0; +#if 0 int ret; - btrfs_set_super_root(s, root->fs_info->tree_root->node->blocknr); + btrfs_set_super_root(s, root->fs_info->tree_root->node->b_blocknr); + ret = pwrite(root->fs_info->fp, s, sizeof(*s), BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(*s)) { @@ -345,35 +239,38 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } return 0; +#endif } static int drop_cache(struct btrfs_root *root) { + return 0; +#if 0 while(!list_empty(&root->fs_info->cache)) { - struct btrfs_buffer *b = list_entry(root->fs_info->cache.next, - struct btrfs_buffer, + struct buffer_head *b = list_entry(root->fs_info->cache.next, + struct buffer_head, cache); list_del_init(&b->cache); btrfs_block_release(root, b); } return 0; +#endif } -int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) + +int close_ctree(struct btrfs_root *root) { int ret; struct btrfs_trans_handle *trans; trans = root->fs_info->running_transaction; - btrfs_commit_transaction(trans, root, s); + btrfs_commit_transaction(trans, root, root->fs_info->disk_super); ret = commit_tree_roots(trans, root->fs_info); BUG_ON(ret); ret = __commit_transaction(trans, root); BUG_ON(ret); - write_ctree_super(trans, root, s); + write_ctree_super(trans, root, root->fs_info->disk_super); drop_cache(root); - BUG_ON(!list_empty(&root->fs_info->trans)); - close(root->fs_info->fp); if (root->node) btrfs_block_release(root, root->node); if (root->fs_info->extent_root->node) @@ -386,29 +283,17 @@ int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) btrfs_block_release(root->fs_info->tree_root, root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); - free(root); - printf("on close %d blocks are allocated\n", allocated_blocks); + btrfs_block_release(root, root->fs_info->sb_buffer); + kfree(root->fs_info->extent_root); + kfree(root->fs_info->inode_root); + kfree(root->fs_info->tree_root); + kfree(root->fs_info); + kfree(root); return 0; } -void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf) +void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { - buf->count--; - if (buf->count < 0) - BUG(); - if (buf->count == 0) { - BUG_ON(!list_empty(&buf->cache)); - BUG_ON(!list_empty(&buf->dirty)); - if (!radix_tree_lookup(&root->fs_info->cache_radix, - buf->blocknr)) - BUG(); - radix_tree_delete(&root->fs_info->cache_radix, buf->blocknr); - memset(buf, 0, sizeof(*buf)); - free(buf); - BUG_ON(allocated_blocks == 0); - allocated_blocks--; - BUG_ON(root->fs_info->cache_size == 0); - root->fs_info->cache_size--; - } + brelse(buf); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index d888cf5c350..7f4bb729b73 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -1,36 +1,41 @@ #ifndef __DISKIO__ #define __DISKIO__ -#include "list.h" -struct btrfs_buffer { - u64 blocknr; - int count; - struct list_head dirty; - struct list_head cache; - union { - struct btrfs_node node; - struct btrfs_leaf leaf; - }; -}; +#include -struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); -struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr); +#define BTRFS_SUPER_INFO_OFFSET (16 * 1024) + +static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) +{ + return (struct btrfs_node *)bh->b_data; +} + +static inline struct btrfs_leaf *btrfs_buffer_leaf(struct buffer_head *bh) +{ + return (struct btrfs_leaf *)bh->b_data; +} + +static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) +{ + return &((struct btrfs_node *)bh->b_data)->header; +} + +struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); +struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr); int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf); + struct buffer_head *buf); int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf); + struct buffer_head *buf); int clean_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_buffer *buf); + struct btrfs_root *root, struct buffer_head *buf); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s); -struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); -struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super); -int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); -void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); +struct btrfs_root *open_ctree(struct super_block *sb, + struct buffer_head *sb_buffer, + struct btrfs_super_block *disk_super); +int close_ctree(struct btrfs_root *root); +void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s); int mkfs(int fd, u64 num_blocks, u32 blocksize); - -#define BTRFS_SUPER_INFO_OFFSET (16 * 1024) - #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 53a7550b5c1..e3af2c03568 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10,9 +10,8 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root search_end, struct btrfs_key *ins); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root - *extent_root); - +static int del_pending_extents(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root); /* * pending extents are blocks that we're trying to allocate in the extent * map while trying to grow the map because of other allocations. To avoid @@ -21,6 +20,7 @@ static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root * manner for deletes. */ #define CTREE_EXTENT_PENDING_DEL 0 +#define CTREE_EXTENT_PINNED 1 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr) @@ -45,15 +45,14 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root if (ret != 0) BUG(); BUG_ON(ret != 0); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); - BUG_ON(list_empty(&path.nodes[0]->dirty)); btrfs_release_path(root->fs_info->extent_root, &path); finish_current_insert(trans, root->fs_info->extent_root); - run_pending(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); return 0; } @@ -74,7 +73,7 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root 0, 0); if (ret != 0) BUG(); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); btrfs_release_path(root->fs_info->extent_root, &path); @@ -82,18 +81,20 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { u64 blocknr; + struct btrfs_node *buf_node; int i; if (!root->ref_cows) return 0; - if (btrfs_is_leaf(&buf->node)) + buf_node = btrfs_buffer_node(buf); + if (btrfs_is_leaf(buf_node)) return 0; - for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) { - blocknr = btrfs_node_blockptr(&buf->node, i); + for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { + blocknr = btrfs_node_blockptr(buf_node, i); inc_block_ref(trans, root, blocknr); } return 0; @@ -108,9 +109,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct int i; while(1) { - ret = radix_tree_gang_lookup(&root->fs_info->pinned_radix, + ret = radix_tree_gang_lookup_tag(&root->fs_info->pinned_radix, (void **)gang, 0, - ARRAY_SIZE(gang)); + ARRAY_SIZE(gang), + CTREE_EXTENT_PINNED); if (!ret) break; if (!first) @@ -137,7 +139,7 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, - btrfs_header_parentid(&extent_root->node->node.header)); + btrfs_header_parentid(btrfs_buffer_header(extent_root->node))); ins.offset = 1; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); @@ -156,11 +158,24 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct return 0; } +static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) +{ + int err; + err = radix_tree_insert(&root->fs_info->pinned_radix, + blocknr, (void *)blocknr); + BUG_ON(err); + if (err) + return err; + radix_tree_tag_set(&root->fs_info->pinned_radix, blocknr, + tag); + return 0; +} + /* * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin) + *root, u64 blocknr, u64 num_blocks) { struct btrfs_path path; struct btrfs_key key; @@ -171,7 +186,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key ins; u32 refs; - BUG_ON(pin && num_blocks != 1); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -186,26 +200,18 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root printk("failed to find %Lu\n", key.objectid); BUG(); } - ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + ei = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_extent_item); BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); if (refs == 0) { u64 super_blocks_used; - if (pin) { - int err; - radix_tree_preload(GFP_KERNEL); - err = radix_tree_insert(&info->pinned_radix, - blocknr, (void *)blocknr); - BUG_ON(err); - radix_tree_preload_end(); - } super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, &path); - if (!pin && extent_root->fs_info->last_insert.objectid > + if (extent_root->fs_info->last_insert.objectid > blocknr) extent_root->fs_info->last_insert.objectid = blocknr; if (ret) @@ -224,39 +230,32 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root) { int ret; - struct btrfs_buffer *gang[4]; + int wret; + int err = 0; + unsigned long gang[4]; int i; + struct radix_tree_root *radix = &extent_root->fs_info->pinned_radix; while(1) { ret = radix_tree_gang_lookup_tag( - &extent_root->fs_info->cache_radix, + &extent_root->fs_info->pinned_radix, (void **)gang, 0, ARRAY_SIZE(gang), CTREE_EXTENT_PENDING_DEL); if (!ret) break; for (i = 0; i < ret; i++) { - ret = __free_extent(trans, extent_root, - gang[i]->blocknr, 1, 1); - radix_tree_tag_clear(&extent_root->fs_info->cache_radix, - gang[i]->blocknr, + radix_tree_tag_set(radix, gang[i], CTREE_EXTENT_PINNED); + radix_tree_tag_clear(radix, gang[i], CTREE_EXTENT_PENDING_DEL); - btrfs_block_release(extent_root, gang[i]); + wret = __free_extent(trans, extent_root, gang[i], 1); + if (wret) + err = wret; } } - return 0; + return err; } -static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root - *extent_root) -{ - while(radix_tree_tagged(&extent_root->fs_info->cache_radix, - CTREE_EXTENT_PENDING_DEL)) - del_pending_extents(trans, extent_root); - return 0; -} - - /* * remove an extent from the root, returns 0 on success */ @@ -264,18 +263,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_buffer *t; + struct buffer_head *t; int pending_ret; int ret; if (root == extent_root) { t = find_tree_block(root, blocknr); - radix_tree_tag_set(&root->fs_info->cache_radix, blocknr, - CTREE_EXTENT_PENDING_DEL); + pin_down_block(root, blocknr, CTREE_EXTENT_PENDING_DEL); return 0; } - ret = __free_extent(trans, root, blocknr, num_blocks, pin); - pending_ret = run_pending(trans, root->fs_info->extent_root); + if (pin) { + ret = pin_down_block(root, blocknr, CTREE_EXTENT_PINNED); + BUG_ON(ret); + } + ret = __free_extent(trans, root, blocknr, num_blocks); + pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -296,14 +298,16 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int ret; u64 hole_size = 0; int slot = 0; - u64 last_block; + u64 last_block = 0; u64 test_block; int start_found; struct btrfs_leaf *l; struct btrfs_root * root = orig_root->fs_info->extent_root; int total_needed = num_blocks; + int level; - total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3; + level = btrfs_header_level(btrfs_buffer_header(root->node)); + total_needed += (level + 1) * 3; if (root->fs_info->last_insert.objectid > search_start) search_start = root->fs_info->last_insert.objectid; @@ -323,7 +327,7 @@ check_failed: path.slots[0]--; while (1) { - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { ret = btrfs_next_leaf(root, &path); @@ -429,7 +433,7 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root sizeof(extent_item)); finish_current_insert(trans, extent_root); - pending_ret = run_pending(trans, extent_root); + pending_ret = del_pending_extents(trans, extent_root); if (ret) return ret; if (pending_ret) @@ -441,16 +445,15 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ -struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, +struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_key ins; int ret; - struct btrfs_buffer *buf; + struct buffer_head *buf; ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1, - btrfs_header_parentid(&root->node->node.header), - &ins); + btrfs_header_parentid(btrfs_buffer_header(root->node)), &ins); if (ret) { BUG(); return NULL; @@ -467,13 +470,13 @@ struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { - struct btrfs_buffer *next; - struct btrfs_buffer *cur; + struct buffer_head *next; + struct buffer_head *cur; u64 blocknr; int ret; u32 refs; - ret = lookup_block_ref(trans, root, path->nodes[*level]->blocknr, + ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr, &refs); BUG_ON(ret); if (refs > 1) @@ -484,9 +487,10 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root while(*level > 0) { cur = path->nodes[*level]; if (path->slots[*level] >= - btrfs_header_nritems(&cur->node.header)) + btrfs_header_nritems(btrfs_buffer_header(cur))) break; - blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]); + blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), + path->slots[*level]); ret = lookup_block_ref(trans, root, blocknr, &refs); if (refs != 1 || *level == 1) { path->slots[*level]++; @@ -499,12 +503,12 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(&next->node.header); + *level = btrfs_header_level(btrfs_buffer_header(next)); path->slots[*level] = 0; } out: - ret = btrfs_free_extent(trans, root, path->nodes[*level]->blocknr, 1, - 1); + ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr, + 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -525,14 +529,14 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root int ret; for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < - btrfs_header_nritems(&path->nodes[i]->node.header)- 1) { + if (slot < btrfs_header_nritems( + btrfs_buffer_header(path->nodes[i])) - 1) { path->slots[i]++; *level = i; return 0; } else { ret = btrfs_free_extent(trans, root, - path->nodes[*level]->blocknr, + path->nodes[*level]->b_blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; @@ -549,7 +553,7 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root * decremented. */ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *snap) + *root, struct buffer_head *snap) { int ret = 0; int wret; @@ -560,7 +564,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_init_path(&path); - level = btrfs_header_level(&snap->node.header); + level = btrfs_header_level(btrfs_buffer_header(snap)); orig_level = level; path.nodes[level] = snap; path.slots[level] = 0; diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 6c2a71a46c7..22519b8e0cf 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -10,6 +10,7 @@ * License. */ +#include #define DELTA 0x9E3779B9 static void TEA_transform(__u32 buf[2], __u32 const in[]) @@ -63,6 +64,14 @@ int btrfs_name_hash(const char *name, int len, u64 *hash_result) const char *p; __u32 in[8], buf[2]; + if (len == 1 && *name == '.') { + *hash_result = 1; + return 0; + } else if (len == 2 && name[0] == '.' && name[1] == '.') { + *hash_result = 2; + return 0; + } + /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; buf[1] = 0xefcdab89; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index c7fda3bf7b2..c45aec258bd 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -15,7 +15,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int ret; u64 hole_size = 0; int slot = 0; - u64 last_ino; + u64 last_ino = 0; int start_found; struct btrfs_leaf *l; struct btrfs_root *root = fs_root->fs_info->inode_root; @@ -40,7 +40,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, path.slots[0]--; while (1) { - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { ret = btrfs_next_leaf(root, &path); @@ -105,8 +105,8 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, if (ret) goto out; - inode_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], - struct btrfs_inode_map_item); + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); out: btrfs_release_path(inode_root, &path); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index aa2d3fac880..c8ee938c125 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -17,7 +17,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) printk("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(root, l)); - fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; type = btrfs_disk_key_type(&item->key); @@ -67,10 +66,10 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) btrfs_leaf_data(l) + btrfs_item_offset(item)); break; }; - fflush(stdout); } } -void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) + +void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t) { int i; u32 nr; @@ -78,16 +77,16 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) if (!t) return; - c = &t->node; + c = btrfs_buffer_node(t); nr = btrfs_header_nritems(&c->header); if (btrfs_is_leaf(c)) { btrfs_print_leaf(root, (struct btrfs_leaf *)c); return; } - printk("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, - btrfs_header_level(&c->header), nr, - (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); - fflush(stdout); + printk("node %Lu level %d total ptrs %d free spc %u\n", + btrfs_header_blocknr(&c->header), + btrfs_header_level(&c->header), nr, + (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { printk("\tkey %d (%Lu %u %Lu) block %Lu\n", i, @@ -95,12 +94,11 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) c->ptrs[i].key.flags, c->ptrs[i].key.offset, btrfs_node_blockptr(c, i)); - fflush(stdout); } for (i = 0; i < nr; i++) { - struct btrfs_buffer *next_buf = read_tree_block(root, + struct buffer_head *next_buf = read_tree_block(root, btrfs_node_blockptr(c, i)); - struct btrfs_node *next = &next_buf->node; + struct btrfs_node *next = btrfs_buffer_node(next_buf); if (btrfs_is_leaf(next) && btrfs_header_level(&c->header) != 1) BUG(); diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 0882ca904ec..396041a05cf 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,5 +1,5 @@ #ifndef __PRINT_TREE_ #define __PRINT_TREE_ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); -void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t); +void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t); #endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 52c83be4b30..a4554c007ef 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -21,7 +21,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, if (ret < 0) goto out; BUG_ON(ret == 0); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); BUG_ON(path.slots[0] == 0); slot = path.slots[0] - 1; if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { @@ -51,7 +51,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) goto out; BUG_ON(ret != 0); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; memcpy(btrfs_item_ptr(l, slot, struct btrfs_root_item), item, sizeof(*item)); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4ae76044aea..ccc056aad69 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -8,13 +9,18 @@ #include #include #include "ctree.h" +#include "disk-io.h" #define BTRFS_SUPER_MAGIC 0x9123682E + +static struct inode_operations btrfs_dir_inode_operations; +static struct super_operations btrfs_super_ops; +static struct file_operations btrfs_dir_file_operations; + #if 0 /* some random number */ static struct super_operations ramfs_ops; -static struct inode_operations ramfs_dir_inode_operations; static struct backing_dev_info ramfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ @@ -129,46 +135,243 @@ static struct inode_operations ramfs_dir_inode_operations = { }; #endif -struct inode *btrfs_get_inode(struct super_block *sb, int mode, dev_t dev) +static void btrfs_read_locked_inode(struct inode *inode) { - struct inode * inode = new_inode(sb); + struct btrfs_path path; + struct btrfs_inode_item *inode_item; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + int ret; +printk("read locked inode %lu\n", inode->i_ino); + btrfs_init_path(&path); + ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); + if (ret) { + make_bad_inode(inode); + return; + } + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], + struct btrfs_inode_item); - if (inode) { - inode->i_mode = mode; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_blocks = 0; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; +printk("found locked inode %lu\n", inode->i_ino); + inode->i_mode = btrfs_inode_mode(inode_item); + inode->i_nlink = btrfs_inode_nlink(inode_item); + inode->i_uid = btrfs_inode_uid(inode_item); + inode->i_gid = btrfs_inode_gid(inode_item); + inode->i_size = btrfs_inode_size(inode_item); + inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime); + inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime); + inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime); + inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime); + inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime); + inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); + inode->i_blocks = btrfs_inode_nblocks(inode_item); + inode->i_generation = btrfs_inode_generation(inode_item); +printk("about to release\n"); + btrfs_release_path(root, &path); + switch (inode->i_mode & S_IFMT) { +#if 0 + default: + init_special_inode(inode, inode->i_mode, + btrfs_inode_rdev(inode_item)); + break; +#endif + case S_IFREG: +printk("inode %lu now a file\n", inode->i_ino); + break; + case S_IFDIR: +printk("inode %lu now a directory\n", inode->i_ino); + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; + break; + case S_IFLNK: +printk("inode %lu now a link\n", inode->i_ino); + // inode->i_op = &page_symlink_inode_operations; + break; } - return inode; +printk("returning!\n"); + return; } -static struct super_operations btrfs_ops = { - .statfs = simple_statfs, - .drop_inode = generic_delete_inode, -}; +static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + ino_t *ino) +{ + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + struct btrfs_dir_item *di; + struct btrfs_path path; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + int ret; + + btrfs_init_path(&path); + ret = btrfs_lookup_dir_item(NULL, root, &path, dir->i_ino, name, + namelen, 0); + if (ret) { + *ino = 0; + goto out; + } + di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_dir_item); + *ino = btrfs_dir_objectid(di); +out: + btrfs_release_path(root, &path); + return ret; +} + +static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode * inode; + ino_t ino; + int ret; + + if (dentry->d_name.len > BTRFS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + ret = btrfs_inode_by_name(dir, dentry, &ino); + if (ret < 0) + return ERR_PTR(ret); + inode = NULL; + if (ino) { +printk("lookup on %.*s returns %lu\n", dentry->d_name.len, dentry->d_name.name, ino); + inode = iget(dir->i_sb, ino); + if (!inode) + return ERR_PTR(-EACCES); + } + return d_splice_alias(inode, dentry); +} + +static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_item *item; + struct btrfs_dir_item *di; + struct btrfs_key key; + struct btrfs_path path; + int ret; + u32 nritems; + struct btrfs_leaf *leaf; + int slot; + int advance; + unsigned char d_type = DT_UNKNOWN; + int over; + + key.objectid = inode->i_ino; +printk("readdir on dir %Lu pos %Lu\n", key.objectid, filp->f_pos); + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + key.offset = filp->f_pos; + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) { + goto err; + } +printk("first ret %d\n", ret); + advance = filp->f_pos > 0 && ret != 0; + while(1) { + leaf = btrfs_buffer_leaf(path.nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path.slots[0]; +printk("leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); + if (advance) { +printk("advancing!\n"); + if (slot == nritems -1) { + ret = btrfs_next_leaf(root, &path); + if (ret) + break; + leaf = btrfs_buffer_leaf(path.nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path.slots[0]; +printk("2leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); + } else { + slot++; + path.slots[0]++; + } + } + advance = 1; + item = leaf->items + slot; +printk("item key %Lu %u %Lu\n", btrfs_disk_key_objectid(&item->key), + btrfs_disk_key_flags(&item->key), btrfs_disk_key_offset(&item->key)); + if (btrfs_disk_key_objectid(&item->key) != key.objectid) + break; + if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY) + continue; + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); +printk("filldir name %.*s, objectid %Lu\n", btrfs_dir_name_len(di), + (const char *)(di + 1), btrfs_dir_objectid(di)); + over = filldir(dirent, (const char *)(di + 1), + btrfs_dir_name_len(di), + btrfs_disk_key_offset(&item->key), + btrfs_dir_objectid(di), d_type); + if (over) + break; + filp->f_pos = btrfs_disk_key_offset(&item->key) + 1; + } +printk("filldir all done\n"); + ret = 0; +err: + btrfs_release_path(root, &path); + return ret; +} + +static void btrfs_put_super (struct super_block * sb) +{ + struct btrfs_root *root = btrfs_sb(sb); + int ret; + + ret = close_ctree(root); + if (ret) { + printk("close ctree returns %d\n", ret); + } + sb->s_fs_info = NULL; +} static int btrfs_fill_super(struct super_block * sb, void * data, int silent) { struct inode * inode; - struct dentry * root; + struct dentry * root_dentry; + struct btrfs_super_block *disk_super; + struct buffer_head *bh; + struct btrfs_root *root; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = BTRFS_SUPER_MAGIC; - sb->s_op = &btrfs_ops; + sb->s_op = &btrfs_super_ops; sb->s_time_gran = 1; - inode = btrfs_get_inode(sb, S_IFDIR | 0755, 0); + + bh = sb_bread(sb, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); + if (!bh) { + printk("btrfs: unable to read on disk super\n"); + return -EIO; + } + disk_super = (struct btrfs_super_block *)bh->b_data; + root = open_ctree(sb, bh, disk_super); + sb->s_fs_info = root; + if (!root) { + printk("btrfs: open_ctree failed\n"); + return -EIO; + } + printk("read in super total blocks %Lu root %Lu\n", + btrfs_super_total_blocks(disk_super), + btrfs_super_root_dir(disk_super)); + + inode = iget_locked(sb, btrfs_super_root_dir(disk_super)); if (!inode) return -ENOMEM; + if (inode->i_state & I_NEW) { + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + } - root = d_alloc_root(inode); - if (!root) { + root_dentry = d_alloc_root(inode); + if (!root_dentry) { iput(inode); return -ENOMEM; } - sb->s_root = root; + sb->s_root = root_dentry; + return 0; } @@ -187,6 +390,24 @@ static struct file_system_type btrfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +static struct super_operations btrfs_super_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .put_super = btrfs_put_super, + .read_inode = btrfs_read_locked_inode, +}; + +static struct inode_operations btrfs_dir_inode_operations = { + .lookup = btrfs_lookup, +}; + +static struct file_operations btrfs_dir_file_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = btrfs_readdir, +}; + + static int __init init_btrfs_fs(void) { printk("btrfs loaded!\n"); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 3adb6e69fe4..9ab27b7491c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -10,7 +10,7 @@ struct btrfs_trans_handle { static inline struct btrfs_trans_handle * btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { - struct btrfs_trans_handle *h = malloc(sizeof(*h)); + struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); h->transid = root->root_key.offset; h->blocks_reserved = num_blocks; h->blocks_used = 0; @@ -21,7 +21,7 @@ static inline void btrfs_free_transaction(struct btrfs_root *root, struct btrfs_trans_handle *handle) { memset(handle, 0, sizeof(*handle)); - free(handle); + kfree(handle); } #endif -- cgit v1.2.3-70-g09d2 From d571976292839cec05a2820b08f7629b145ed157 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 10:01:08 -0400 Subject: btrfs_create, btrfs_write_super, btrfs_sync_fs Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/ctree.c | 49 +++++++++++-------- fs/btrfs/ctree.h | 2 +- fs/btrfs/dir-item.c | 3 +- fs/btrfs/disk-io.c | 23 ++++----- fs/btrfs/extent-tree.c | 26 ++++++---- fs/btrfs/inode-map.c | 1 + fs/btrfs/root-tree.c | 1 + fs/btrfs/super.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.c | 13 +++-- 10 files changed, 201 insertions(+), 47 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index c5d67bbd4d9..4b5bd05de94 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -3,6 +3,7 @@ * Fix ENOSPC handling * make a real mkfs and superblock * Do checksumming +* Use a real check instead of mark_buffer_dirty * Define FS objects in terms of different item types * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e690e2bb47d..052434a328e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -39,7 +39,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *cow; struct btrfs_node *cow_node; - if (!buffer_dirty(buf)) { + if (buffer_dirty(buf)) { *cow_ret = buf; return 0; } @@ -48,6 +48,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); *cow_ret = cow; + mark_buffer_dirty(cow); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; @@ -58,7 +59,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root } else { btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, cow->b_blocknr); - BUG_ON(!buffer_dirty(parent)); + mark_buffer_dirty(parent); btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); @@ -362,7 +363,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&parent->ptrs[pslot + 1].key, &right->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(!buffer_dirty(parent_buf)); + mark_buffer_dirty(parent_buf); } } if (btrfs_header_nritems(&mid->header) == 1) { @@ -398,7 +399,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root /* update the parent key to reflect our changes */ memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(!buffer_dirty(parent_buf)); + mark_buffer_dirty(parent_buf); } /* update the path */ @@ -539,7 +540,7 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root break; t = btrfs_buffer_node(path->nodes[i]); memcpy(&t->ptrs[tslot].key, key, sizeof(*key)); - BUG_ON(!buffer_dirty(path->nodes[i])); + mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; } @@ -583,8 +584,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root } btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(!buffer_dirty(src_buf)); - BUG_ON(!buffer_dirty(dst_buf)); + mark_buffer_dirty(src_buf); + mark_buffer_dirty(dst_buf); return ret; } @@ -631,8 +632,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(!buffer_dirty(src_buf)); - BUG_ON(!buffer_dirty(dst_buf)); + mark_buffer_dirty(src_buf); + mark_buffer_dirty(dst_buf); return ret; } @@ -669,6 +670,9 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root lower_key = &lower->ptrs[0].key; memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); + + mark_buffer_dirty(t); + /* the super has an extra ref to root->node */ btrfs_block_release(root, root->node); root->node = t; @@ -708,7 +712,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); - BUG_ON(!buffer_dirty(path->nodes[level])); + mark_buffer_dirty(path->nodes[level]); return 0; } @@ -755,7 +759,8 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&c->header, mid); ret = 0; - BUG_ON(!buffer_dirty(t)); + mark_buffer_dirty(t); + mark_buffer_dirty(split_buffer); wret = insert_ptr(trans, root, path, &split->ptrs[0].key, split_buffer->b_blocknr, path->slots[level + 1] + 1, level + 1); @@ -886,11 +891,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root left_nritems -= push_items; btrfs_set_header_nritems(&left->header, left_nritems); - BUG_ON(!buffer_dirty(left_buf)); - BUG_ON(!buffer_dirty(right_buf)); + mark_buffer_dirty(left_buf); + mark_buffer_dirty(right_buf); memcpy(&upper_node->ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(!buffer_dirty(upper)); + mark_buffer_dirty(upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { @@ -1004,8 +1009,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root push_space = btrfs_item_offset(right->items + i); } - BUG_ON(!buffer_dirty(t)); - BUG_ON(!buffer_dirty(right_buf)); + mark_buffer_dirty(t); + mark_buffer_dirty(right_buf); wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) @@ -1115,8 +1120,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root right_buffer->b_blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; - BUG_ON(!buffer_dirty(right_buffer)); - BUG_ON(!buffer_dirty(l_buf)); + mark_buffer_dirty(right_buffer); + mark_buffer_dirty(l_buf); BUG_ON(path->slots[0] != slot); if (mid <= slot) { btrfs_block_release(root, path->nodes[0]); @@ -1202,12 +1207,12 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_item_offset(leaf->items + slot, data_end - data_size); btrfs_set_item_size(leaf->items + slot, data_size); btrfs_set_header_nritems(&leaf->header, nritems + 1); + mark_buffer_dirty(leaf_buf); ret = 0; if (slot == 0) ret = fixup_low_keys(trans, root, path, &disk_key, 1); - BUG_ON(!buffer_dirty(leaf_buf)); if (btrfs_leaf_free_space(root, leaf) < 0) BUG(); check_leaf(root, path, 0); @@ -1233,6 +1238,7 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], u8); memcpy(ptr, data, data_size); + mark_buffer_dirty(path.nodes[0]); } btrfs_release_path(root, &path); return ret; @@ -1273,7 +1279,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } - BUG_ON(!buffer_dirty(parent)); + mark_buffer_dirty(parent); return ret; } @@ -1368,8 +1374,11 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } else { + mark_buffer_dirty(leaf_buf); btrfs_block_release(root, leaf_buf); } + } else { + mark_buffer_dirty(leaf_buf); } } return ret; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4c19a3f12af..983e3cc9ae9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -833,7 +833,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, char *name, int name_len, u64 dir, u64 + *root, const char *name, int name_len, u64 dir, u64 objectid, u8 type); int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 75d6e373e98..f81cbcc83b6 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -5,7 +5,7 @@ #include "transaction.h" int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, char *name, int name_len, u64 dir, u64 + *root, const char *name, int name_len, u64 dir, u64 objectid, u8 type) { int ret = 0; @@ -35,6 +35,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); memcpy(name_ptr, name, name_len); + mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9cacca0c525..8e1dcda0839 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -145,19 +145,20 @@ struct btrfs_root *open_ctree(struct super_block *sb, int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return 0; -#if 0 - int ret; - btrfs_set_super_root(s, root->fs_info->tree_root->node->b_blocknr); - - ret = pwrite(root->fs_info->fp, s, sizeof(*s), - BTRFS_SUPER_INFO_OFFSET); - if (ret != sizeof(*s)) { - fprintf(stderr, "failed to write new super block err %d\n", ret); - return ret; + struct buffer_head *bh = root->fs_info->sb_buffer; + btrfs_set_super_root(root->fs_info->disk_super, + root->fs_info->tree_root->node->b_blocknr); + lock_buffer(bh); + clear_buffer_dirty(bh); + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(WRITE, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { + WARN_ON(1); + return -EIO; } return 0; -#endif } int close_ctree(struct btrfs_root *root) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e3af2c03568..2818f1c5717 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -49,6 +49,7 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); + mark_buffer_dirty(path.nodes[0]); btrfs_release_path(root->fs_info->extent_root, &path); finish_current_insert(trans, root->fs_info->extent_root); @@ -103,7 +104,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - unsigned long gang[8]; + struct buffer_head *gang[8]; u64 first = 0; int ret; int i; @@ -116,13 +117,15 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct if (!ret) break; if (!first) - first = gang[0]; + first = gang[0]->b_blocknr; for (i = 0; i < ret; i++) { radix_tree_delete(&root->fs_info->pinned_radix, - gang[i]); + gang[i]->b_blocknr); + brelse(gang[i]); } } - root->fs_info->last_insert.objectid = first; + if (root->fs_info->last_insert.objectid > first) + root->fs_info->last_insert.objectid = first; root->fs_info->last_insert.offset = 0; return 0; } @@ -161,8 +164,10 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) { int err; + struct buffer_head *bh = sb_getblk(root->fs_info->sb, blocknr); + BUG_ON(!bh); err = radix_tree_insert(&root->fs_info->pinned_radix, - blocknr, (void *)blocknr); + blocknr, bh); BUG_ON(err); if (err) return err; @@ -217,6 +222,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) BUG(); } + mark_buffer_dirty(path.nodes[0]); btrfs_release_path(extent_root, &path); finish_current_insert(trans, extent_root); return ret; @@ -232,7 +238,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int ret; int wret; int err = 0; - unsigned long gang[4]; + struct buffer_head *gang[4]; int i; struct radix_tree_root *radix = &extent_root->fs_info->pinned_radix; @@ -245,10 +251,12 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct if (!ret) break; for (i = 0; i < ret; i++) { - radix_tree_tag_set(radix, gang[i], CTREE_EXTENT_PINNED); - radix_tree_tag_clear(radix, gang[i], + radix_tree_tag_set(radix, gang[i]->b_blocknr, + CTREE_EXTENT_PINNED); + radix_tree_tag_clear(radix, gang[i]->b_blocknr, CTREE_EXTENT_PENDING_DEL); - wret = __free_extent(trans, extent_root, gang[i], 1); + wret = __free_extent(trans, extent_root, + gang[i]->b_blocknr, 1); if (wret) err = wret; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index c45aec258bd..ad2d375b830 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -108,6 +108,7 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); + mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(inode_root, &path); return ret; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a4554c007ef..a821b5d1e23 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -55,6 +55,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root slot = path.slots[0]; memcpy(btrfs_item_ptr(l, slot, struct btrfs_root_item), item, sizeof(*item)); + mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 62043082584..6080a8133d7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -10,6 +10,7 @@ #include #include "ctree.h" #include "disk-io.h" +#include "transaction.h" #define BTRFS_SUPER_MAGIC 0x9123682E @@ -357,6 +358,131 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) return 0; } +static void fill_inode_item(struct btrfs_inode_item *item, + struct inode *inode) +{ + btrfs_set_inode_uid(item, inode->i_uid); + btrfs_set_inode_gid(item, inode->i_gid); + btrfs_set_inode_size(item, inode->i_size); + btrfs_set_inode_mode(item, inode->i_mode); + btrfs_set_inode_nlink(item, inode->i_nlink); + btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); + btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); + btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); + btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); + btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); + btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); + btrfs_set_inode_nblocks(item, inode->i_blocks); + btrfs_set_inode_generation(item, inode->i_generation); +} + +static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, + struct inode *dir, int mode) +{ + struct inode *inode; + struct btrfs_inode_item inode_item; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct btrfs_key key; + int ret; + u64 objectid; + + inode = new_inode(dir->i_sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + BUG_ON(ret); + + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_mode = mode; + inode->i_ino = objectid; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + fill_inode_item(&inode_item, inode); + + + key.objectid = objectid; + key.flags = 0; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + ret = btrfs_insert_inode_map(trans, root, objectid, &key); + BUG_ON(ret); + + ret = btrfs_insert_inode(trans, root, objectid, &inode_item); + BUG_ON(ret); + + insert_inode_hash(inode); + // FIXME mark_inode_dirty(inode) + return inode; +} + +static int btrfs_add_link(struct btrfs_trans_handle *trans, + struct dentry *dentry, struct inode *inode) +{ + int ret; + ret = btrfs_insert_dir_item(trans, btrfs_sb(inode->i_sb), + dentry->d_name.name, dentry->d_name.len, + dentry->d_parent->d_inode->i_ino, + inode->i_ino, 0); + BUG_ON(ret); + return ret; +} + +static int btrfs_add_nondir(struct btrfs_trans_handle *trans, + struct dentry *dentry, struct inode *inode) +{ + int err = btrfs_add_link(trans, dentry, inode); + if (!err) { + d_instantiate(dentry, inode); + return 0; + } + inode_dec_link_count(inode); + iput(inode); + return err; +} + +static int btrfs_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct inode *inode; + int err; + + trans = btrfs_start_transaction(root, 1); + inode = btrfs_new_inode(trans, dir, mode); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + return err; + // FIXME mark the inode dirty + err = btrfs_add_nondir(trans, dentry, inode); + dir->i_sb->s_dirt = 1; + btrfs_end_transaction(trans, root); + return err; +} + +static void btrfs_write_super(struct super_block *sb) +{ + sb->s_dirt = 0; +printk("btrfs write_super!\n"); +} + +static int btrfs_sync_fs(struct super_block *sb, int wait) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root; + int ret; + sb->s_dirt = 0; + root = btrfs_sb(sb); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_commit_transaction(trans, root); + sb->s_dirt = 0; + BUG_ON(ret); +printk("btrfs sync_fs\n"); + return 0; +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -377,10 +503,13 @@ static struct super_operations btrfs_super_ops = { .drop_inode = generic_delete_inode, .put_super = btrfs_put_super, .read_inode = btrfs_read_locked_inode, + .write_super = btrfs_write_super, + .sync_fs = btrfs_sync_fs, }; static struct inode_operations btrfs_dir_inode_operations = { .lookup = btrfs_lookup, + .create = btrfs_create, }; static struct file_operations btrfs_dir_file_operations = { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8dc1c170f10..4903b47c978 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -25,7 +25,7 @@ static int join_transaction(struct btrfs_root *root) init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); cur_trans->in_commit = 0; - cur_trans->use_count = 0; + cur_trans->use_count = 1; cur_trans->commit_done = 0; } cur_trans->num_writers++; @@ -56,7 +56,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; - WARN_ON(cur_trans->num_writers <= 1); + WARN_ON(cur_trans->num_writers < 1); if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); cur_trans->num_writers--; @@ -155,10 +155,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; - mutex_unlock(&root->fs_info->trans_mutex); - memcpy(&snap_key, &root->root_key, sizeof(snap_key)); - root->root_key.offset++; + if (root->node != root->commit_root) { + memcpy(&snap_key, &root->root_key, sizeof(snap_key)); + root->root_key.offset++; + } + + mutex_unlock(&root->fs_info->trans_mutex); if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); -- cgit v1.2.3-70-g09d2 From df2ce34c8801af8e00c7f82435fcb1e4b45a2759 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 11:00:45 -0400 Subject: Btrfs: properly set new buffers for new blocks up to date Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 +++++++-- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/super.c | 3 +++ 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e1dcda0839..2f1eb4ae4e1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -10,8 +10,13 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) BUG(); if (root->node && btrfs_header_parentid(&node->header) != - btrfs_header_parentid(btrfs_buffer_header(root->node))) - BUG(); + btrfs_header_parentid(btrfs_buffer_header(root->node))) { + printk("block %Lu parentids don't match buf %Lu, root %Lu\n", + buf->b_blocknr, + btrfs_header_parentid(&node->header), + btrfs_header_parentid(btrfs_buffer_header(root->node))); + WARN_ON(1); + } return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2818f1c5717..c86f0e6152f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -467,7 +467,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return NULL; } buf = find_tree_block(root, ins.objectid); - dirty_tree_block(trans, root, buf); + set_buffer_uptodate(buf); return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6080a8133d7..8970e9a8215 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -473,7 +473,10 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_trans_handle *trans; struct btrfs_root *root; int ret; + sb->s_dirt = 0; + return 0; + root = btrfs_sb(sb); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); -- cgit v1.2.3-70-g09d2 From d561c025ee3a0aee53a225bedce0ee35fe12f335 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 19:47:49 -0400 Subject: Btrfs: very minimal locking Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 5 +++-- fs/btrfs/super.c | 26 +++++++++++++++++--------- 4 files changed, 22 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 96cec6352f1..68cafae6a85 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -231,6 +231,7 @@ struct btrfs_fs_info { struct buffer_head *sb_buffer; struct super_block *sb; struct mutex trans_mutex; + struct mutex fs_mutex; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3dea757d4cb..e32ddff55b0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -116,6 +116,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb_buffer = sb_buffer; fs_info->sb = sb; mutex_init(&fs_info->trans_mutex); + mutex_init(&fs_info->fs_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c86f0e6152f..9583a9ae8b7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -168,9 +168,10 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) BUG_ON(!bh); err = radix_tree_insert(&root->fs_info->pinned_radix, blocknr, bh); - BUG_ON(err); - if (err) + if (err && err != -EEXIST) { + BUG(); return err; + } radix_tree_tag_set(&root->fs_info->pinned_radix, blocknr, tag); return 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a2db0559469..0ca1080e019 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -453,25 +453,21 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, struct inode *inode; int err; + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); inode = btrfs_new_inode(trans, dir, mode); err = PTR_ERR(inode); if (IS_ERR(inode)) - return err; + goto out_unlock; // FIXME mark the inode dirty err = btrfs_add_nondir(trans, dentry, inode); dir->i_sb->s_dirt = 1; btrfs_end_transaction(trans, root); +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); return err; } -static void btrfs_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; -printk("btrfs write_super!\n"); - filemap_flush(sb->s_bdev->bd_inode->i_mapping); -} - static int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; @@ -479,17 +475,29 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) int ret; sb->s_dirt = 0; - return 0; + if (!wait) { + filemap_flush(sb->s_bdev->bd_inode->i_mapping); + return 0; + } + filemap_write_and_wait(sb->s_bdev->bd_inode->i_mapping); root = btrfs_sb(sb); + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; BUG_ON(ret); printk("btrfs sync_fs\n"); + mutex_unlock(&root->fs_info->fs_mutex); return 0; } +static void btrfs_write_super(struct super_block *sb) +{ + btrfs_sync_fs(sb, 1); +} + + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { -- cgit v1.2.3-70-g09d2 From 78fae27ebf5bd35fb9b2e4213e486635eacfc0ad Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 25 Mar 2007 11:35:08 -0400 Subject: Btrfs: leak fixes, pinning fixes Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 30 +++++++++++++++++++++--------- fs/btrfs/super.c | 1 + fs/btrfs/transaction.c | 32 ++++++++++++++++++-------------- 3 files changed, 40 insertions(+), 23 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9583a9ae8b7..369b960fce4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -165,13 +165,23 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) { int err; struct buffer_head *bh = sb_getblk(root->fs_info->sb, blocknr); + struct btrfs_header *header; BUG_ON(!bh); + + header = btrfs_buffer_header(bh); + if (btrfs_header_generation(header) == + root->fs_info->running_transaction->transid) { + return 0; + } + err = radix_tree_insert(&root->fs_info->pinned_radix, blocknr, bh); if (err && err != -EEXIST) { BUG(); return err; } + if (err == -EEXIST) + brelse(bh); radix_tree_tag_set(&root->fs_info->pinned_radix, blocknr, tag); return 0; @@ -181,7 +191,7 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks) + *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_path path; struct btrfs_key key; @@ -213,12 +223,18 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_extent_refs(ei, refs); if (refs == 0) { u64 super_blocks_used; + + if (pin) { + ret = pin_down_block(root, blocknr, + CTREE_EXTENT_PINNED); + BUG_ON(ret); + } + super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, &path); - if (extent_root->fs_info->last_insert.objectid > - blocknr) + if (extent_root->fs_info->last_insert.objectid > blocknr) extent_root->fs_info->last_insert.objectid = blocknr; if (ret) BUG(); @@ -257,7 +273,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct radix_tree_tag_clear(radix, gang[i]->b_blocknr, CTREE_EXTENT_PENDING_DEL); wret = __free_extent(trans, extent_root, - gang[i]->b_blocknr, 1); + gang[i]->b_blocknr, 1, 0); if (wret) err = wret; } @@ -281,11 +297,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root pin_down_block(root, blocknr, CTREE_EXTENT_PENDING_DEL); return 0; } - if (pin) { - ret = pin_down_block(root, blocknr, CTREE_EXTENT_PINNED); - BUG_ON(ret); - } - ret = __free_extent(trans, root, blocknr, num_blocks); + ret = __free_extent(trans, root, blocknr, num_blocks, pin); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0ca1080e019..094a66c267b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -146,6 +146,7 @@ static void btrfs_read_locked_inode(struct inode *inode) ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); if (ret) { make_bad_inode(inode); + btrfs_release_path(root, &path); return; } inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4903b47c978..46a596e345f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -4,12 +4,15 @@ #include "disk-io.h" #include "transaction.h" - +static int total_trans = 0; static void put_transaction(struct btrfs_transaction *transaction) { transaction->use_count--; - if (transaction->use_count == 0) + if (transaction->use_count == 0) { + WARN_ON(total_trans == 0); + total_trans--; kfree(transaction); + } } static int join_transaction(struct btrfs_root *root) @@ -18,6 +21,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans = root->fs_info->running_transaction; if (!cur_trans) { cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS); + total_trans++; BUG_ON(!cur_trans); root->fs_info->running_transaction = cur_trans; cur_trans->num_writers = 0; @@ -108,7 +112,6 @@ static int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { DEFINE_WAIT(wait); - commit->use_count++; while(!commit->commit_done) { prepare_to_wait(&commit->commit_wait, &wait, TASK_UNINTERRUPTIBLE); @@ -126,7 +129,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret = 0; - struct buffer_head *snap = root->commit_root; + struct buffer_head *snap; struct btrfs_key snap_key; struct btrfs_transaction *cur_trans; DEFINE_WAIT(wait); @@ -153,15 +156,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } finish_wait(&trans->transaction->writer_wait, &wait); - cur_trans = root->fs_info->running_transaction; - root->fs_info->running_transaction = NULL; - if (root->node != root->commit_root) { memcpy(&snap_key, &root->root_key, sizeof(snap_key)); root->root_key.offset++; } - mutex_unlock(&root->fs_info->trans_mutex); if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); @@ -173,17 +172,24 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); + cur_trans = root->fs_info->running_transaction; + root->fs_info->running_transaction = NULL; + mutex_unlock(&root->fs_info->trans_mutex); + ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root); - btrfs_finish_extent_commit(trans, root->fs_info->extent_root); - btrfs_finish_extent_commit(trans, root->fs_info->tree_root); + btrfs_finish_extent_commit(trans, root); + mutex_lock(&root->fs_info->trans_mutex); + put_transaction(cur_trans); put_transaction(cur_trans); + mutex_unlock(&root->fs_info->trans_mutex); kfree(trans); if (root->node != root->commit_root) { trans = btrfs_start_transaction(root, 1); + snap = root->commit_root; root->commit_root = root->node; get_bh(root->node); ret = btrfs_drop_snapshot(trans, root, snap); @@ -191,10 +197,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); - BUG_ON(ret); - root->fs_info->generation = root->root_key.offset + 1; - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); + BUG_ON(ret); root->fs_info->generation = root->root_key.offset + 1; ret = btrfs_end_transaction(trans, root); BUG_ON(ret); + printk("at free, total trans %d\n", total_trans); } return ret; -- cgit v1.2.3-70-g09d2 From 8ef97622caa2d5f78d1dc58ab918e2fbfa9b357a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Mar 2007 10:15:30 -0400 Subject: Btrfs: add a radix back bit tree Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/bit-radix.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/bit-radix.h | 15 +++++++ fs/btrfs/ctree.h | 3 +- fs/btrfs/disk-io.c | 3 +- fs/btrfs/extent-tree.c | 93 ++++++++++++++++++------------------------ 6 files changed, 167 insertions(+), 57 deletions(-) create mode 100644 fs/btrfs/bit-radix.c create mode 100644 fs/btrfs/bit-radix.h (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index be7d74cdca0..5346f706b2c 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -3,7 +3,8 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ - hash.o file-item.o inode-item.o inode-map.o disk-io.o transaction.o + hash.o file-item.o inode-item.o inode-map.o disk-io.o \ + transaction.o bit-radix.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c new file mode 100644 index 00000000000..43807cc3cce --- /dev/null +++ b/fs/btrfs/bit-radix.c @@ -0,0 +1,107 @@ +#include +#include "bit-radix.h" + +#define BIT_ARRAY_BYTES 256 +#define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8) + +int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) +{ + unsigned long *bits; + unsigned long slot; + int bit_slot; + int ret; + + slot = bit / BIT_RADIX_BITS_PER_ARRAY; + bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; + + bits = radix_tree_lookup(radix, slot); + if (!bits) { + bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOIO); + if (!bits) + return -ENOMEM; + memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); + bits[0] = slot; + ret = radix_tree_insert(radix, slot, bits); + if (ret) + return ret; + } + set_bit(bit_slot, bits + 1); + return 0; +} + +int test_radix_bit(struct radix_tree_root *radix, unsigned long bit) +{ + unsigned long *bits; + unsigned long slot; + int bit_slot; + + slot = bit / BIT_RADIX_BITS_PER_ARRAY; + bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; + + bits = radix_tree_lookup(radix, slot); + if (!bits) + return 0; + return test_bit(bit_slot, bits + 1); +} + +int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) +{ + unsigned long *bits; + unsigned long slot; + int bit_slot; + int i; + int empty = 1; + + slot = bit / BIT_RADIX_BITS_PER_ARRAY; + bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; + + bits = radix_tree_lookup(radix, slot); + if (!bits) + return 0; + clear_bit(bit_slot, bits + 1); + + for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) { + if (bits[i]) { + empty = 0; + break; + } + } + + if (empty) { + bits = radix_tree_delete(radix, slot); + BUG_ON(!bits); + } + return 0; +} + +int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, + int nr) +{ + unsigned long *bits; + unsigned long *gang[4]; + int found; + int ret; + int i; + int total_found = 0; + + ret = radix_tree_gang_lookup(radix, (void *)&gang, 0, ARRAY_SIZE(gang)); + for (i = 0; i < ret && nr > 0; i++) { + found = 0; + bits = gang[i]; + while(nr > 0) { + found = find_next_bit(bits + 1, + BIT_RADIX_BITS_PER_ARRAY, + found); + if (found < BIT_RADIX_BITS_PER_ARRAY) { + *retbits = bits[0] * + BIT_RADIX_BITS_PER_ARRAY + found; + retbits++; + nr--; + total_found++; + found++; + } else + break; + } + } + return total_found; +} diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h new file mode 100644 index 00000000000..56aad4c7d7f --- /dev/null +++ b/fs/btrfs/bit-radix.h @@ -0,0 +1,15 @@ +#ifndef __BIT_RADIX__ +#define __BIT_RADIX__ +#include + +int set_radix_bit(struct radix_tree_root *radix, unsigned long bit); +int test_radix_bit(struct radix_tree_root *radix, unsigned long bit); +int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit); +int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, + int nr); + +static inline void init_bit_radix(struct radix_tree_root *radix) +{ + INIT_RADIX_TREE(radix, GFP_NOFS); +} +#endif diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 68cafae6a85..0aa1052d9f6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,8 +1,8 @@ #ifndef __BTRFS__ #define __BTRFS__ -#include #include +#include "bit-radix.h" struct btrfs_trans_handle; struct btrfs_transaction; @@ -222,6 +222,7 @@ struct btrfs_fs_info { struct btrfs_root *inode_root; struct btrfs_key current_insert; struct btrfs_key last_insert; + struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; u64 last_inode_alloc; u64 last_inode_alloc_dirid; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e32ddff55b0..758a62aba06 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -104,7 +104,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, /* FIXME: don't be stupid */ if (!btrfs_super_root(disk_super)) return NULL; - INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); + init_bit_radix(&fs_info->pinned_radix); + init_bit_radix(&fs_info->pending_del_radix); fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 369b960fce4..b14104276ee 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1,5 +1,4 @@ #include -#include #include "ctree.h" #include "disk-io.h" #include "print-tree.h" @@ -12,15 +11,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -/* - * pending extents are blocks that we're trying to allocate in the extent - * map while trying to grow the map because of other allocations. To avoid - * recursing, they are tagged in the radix tree and cleaned up after - * other allocations are done. The pending tag is also used in the same - * manner for deletes. - */ -#define CTREE_EXTENT_PENDING_DEL 0 -#define CTREE_EXTENT_PINNED 1 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr) @@ -104,24 +94,21 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct buffer_head *gang[8]; + unsigned long gang[8]; u64 first = 0; int ret; int i; + struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; while(1) { - ret = radix_tree_gang_lookup_tag(&root->fs_info->pinned_radix, - (void **)gang, 0, - ARRAY_SIZE(gang), - CTREE_EXTENT_PINNED); + ret = find_first_radix_bit(pinned_radix, gang, + ARRAY_SIZE(gang)); if (!ret) break; if (!first) - first = gang[0]->b_blocknr; + first = gang[0]; for (i = 0; i < ret; i++) { - radix_tree_delete(&root->fs_info->pinned_radix, - gang[i]->b_blocknr); - brelse(gang[i]); + clear_radix_bit(pinned_radix, gang[i]); } } if (root->fs_info->last_insert.objectid > first) @@ -161,29 +148,27 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct return 0; } -static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) +static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) { int err; - struct buffer_head *bh = sb_getblk(root->fs_info->sb, blocknr); struct btrfs_header *header; - BUG_ON(!bh); - - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - root->fs_info->running_transaction->transid) { - return 0; - } - - err = radix_tree_insert(&root->fs_info->pinned_radix, - blocknr, bh); - if (err && err != -EEXIST) { - BUG(); - return err; - } - if (err == -EEXIST) + struct buffer_head *bh; + + bh = sb_find_get_block(root->fs_info->sb, blocknr); + if (bh) { + header = btrfs_buffer_header(bh); + if (btrfs_header_generation(header) == + root->fs_info->running_transaction->transid) { + brelse(bh); + return 0; + } brelse(bh); - radix_tree_tag_set(&root->fs_info->pinned_radix, blocknr, - tag); + } + if (pending) + err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); + else + err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); + BUG_ON(err); return 0; } @@ -225,8 +210,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 super_blocks_used; if (pin) { - ret = pin_down_block(root, blocknr, - CTREE_EXTENT_PINNED); + ret = pin_down_block(root, blocknr, 0); BUG_ON(ret); } @@ -255,25 +239,26 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int ret; int wret; int err = 0; - struct buffer_head *gang[4]; + unsigned long gang[4]; int i; - struct radix_tree_root *radix = &extent_root->fs_info->pinned_radix; + struct radix_tree_root *pending_radix; + struct radix_tree_root *pinned_radix; + + pending_radix = &extent_root->fs_info->pending_del_radix; + pinned_radix = &extent_root->fs_info->pinned_radix; while(1) { - ret = radix_tree_gang_lookup_tag( - &extent_root->fs_info->pinned_radix, - (void **)gang, 0, - ARRAY_SIZE(gang), - CTREE_EXTENT_PENDING_DEL); + ret = find_first_radix_bit(pending_radix, gang, + ARRAY_SIZE(gang)); if (!ret) break; for (i = 0; i < ret; i++) { - radix_tree_tag_set(radix, gang[i]->b_blocknr, - CTREE_EXTENT_PINNED); - radix_tree_tag_clear(radix, gang[i]->b_blocknr, - CTREE_EXTENT_PENDING_DEL); + wret = set_radix_bit(pinned_radix, gang[i]); + BUG_ON(wret); + wret = clear_radix_bit(pending_radix, gang[i]); + BUG_ON(wret); wret = __free_extent(trans, extent_root, - gang[i]->b_blocknr, 1, 0); + gang[i], 1, 0); if (wret) err = wret; } @@ -294,7 +279,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (root == extent_root) { t = find_tree_block(root, blocknr); - pin_down_block(root, blocknr, CTREE_EXTENT_PENDING_DEL); + pin_down_block(root, blocknr, 1); return 0; } ret = __free_extent(trans, root, blocknr, num_blocks, pin); @@ -393,7 +378,7 @@ check_pending: BUG_ON(ins->objectid < search_start); for (test_block = ins->objectid; test_block < ins->objectid + total_needed; test_block++) { - if (radix_tree_lookup(&root->fs_info->pinned_radix, + if (test_radix_bit(&root->fs_info->pinned_radix, test_block)) { search_start = test_block + 1; goto check_failed; -- cgit v1.2.3-70-g09d2 From dee26a9f7aab7ffe1193cd1415b23a69426acc9f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Mar 2007 16:00:06 -0400 Subject: btrfs_get_block, file read/write Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/ctree.h | 26 ++++++- fs/btrfs/extent-tree.c | 4 +- fs/btrfs/file-item.c | 52 +++++++++++++- fs/btrfs/super.c | 188 ++++++++++++++++++++++++++++++++++++++++--------- 5 files changed, 230 insertions(+), 41 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 454028b6bd4..2ca301b289a 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,6 +8,7 @@ * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter +* Make directory hashing work on 32 bit * Do actual block accounting * Check compat and incompat flags on the inode * Add virtual filesystems, mountable snapshots diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0aa1052d9f6..7b7120d3ab4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -198,7 +198,7 @@ struct btrfs_file_extent_item { __le64 disk_blocknr; __le64 disk_num_blocks; /* - * the logical offset in file bytes (no csums) + * the logical offset in file blocks (no csums) * this extent record is for. This allows a file extent to point * into the middle of an existing extent on disk, sharing it * between two snapshots (useful if some bytes in the middle of the @@ -812,12 +812,19 @@ static inline struct btrfs_root *btrfs_sb(struct super_block *sb) ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) +/* extent-item.c */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 num_blocks, u64 search_start, u64 + search_end, u64 owner, struct btrfs_key *ins); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct + btrfs_root *root); +/* ctree.c */ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); @@ -834,8 +841,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *snap); -int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct - btrfs_root *root); +/* root-item.c */ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key); int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root @@ -846,6 +852,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item); int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); +/* dir-item.c */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, u64 objectid, u8 type); @@ -854,6 +861,7 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root const char *name, int name_len, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len); +/* inode-map.c */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, u64 dirid, u64 *objectid); @@ -863,9 +871,21 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, int mod); +/* inode-item.c */ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, struct btrfs_inode_item *inode_item); int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, int mod); + +/* file-item.c */ +int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 offset, + u64 num_blocks, u64 hint_block, + u64 *result); +int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid, + u64 blocknr, u64 num_blocks, int mod); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b14104276ee..82f6e9eed1d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -403,7 +403,7 @@ error: * * returns 0 if everything worked, non-zero otherwise. */ -static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_blocks, u64 search_start, u64 search_end, u64 owner, struct btrfs_key *ins) { @@ -458,7 +458,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int ret; struct buffer_head *buf; - ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1, + ret = btrfs_alloc_extent(trans, root, 1, 0, (unsigned long)-1, btrfs_header_parentid(btrfs_buffer_header(root->node)), &ins); if (ret) { BUG(); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 8e1e5b4ccfa..4a0367d702b 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1,9 +1,57 @@ #include #include "ctree.h" +#include "disk-io.h" #include "transaction.h" -int btrfs_create_file(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 dirid, u64 *objectid) +int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 offset, + u64 num_blocks, u64 hint_block, + u64 *result) { + struct btrfs_key ins; + int ret = 0; + struct btrfs_file_extent_item *item; + struct btrfs_key file_key; + struct btrfs_path path; + + btrfs_init_path(&path); + ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, + (u64)-1, objectid, &ins); + BUG_ON(ret); + file_key.objectid = objectid; + file_key.offset = offset; + file_key.flags = 0; + btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); + + ret = btrfs_insert_empty_item(trans, root, &path, &file_key, + sizeof(*item)); + item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_disk_blocknr(item, ins.objectid); + btrfs_set_file_extent_disk_num_blocks(item, ins.offset); + btrfs_set_file_extent_offset(item, 0); + btrfs_set_file_extent_num_blocks(item, ins.offset); + mark_buffer_dirty(path.nodes[0]); + *result = ins.objectid; + btrfs_release_path(root, &path); return 0; } + +int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid, + u64 blocknr, u64 num_blocks, int mod) +{ + int ret; + struct btrfs_key file_key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + + file_key.objectid = objectid; + file_key.offset = blocknr; + file_key.flags = 0; + btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); + ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); + return ret; +} diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 905b093a85f..2c2883f2856 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -17,6 +18,9 @@ static struct inode_operations btrfs_dir_inode_operations; static struct super_operations btrfs_super_ops; static struct file_operations btrfs_dir_file_operations; +static struct inode_operations btrfs_file_inode_operations; +static struct address_space_operations btrfs_aops; +static struct file_operations btrfs_file_operations; static void btrfs_read_locked_inode(struct inode *inode) { @@ -57,6 +61,9 @@ static void btrfs_read_locked_inode(struct inode *inode) break; #endif case S_IFREG: + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; break; case S_IFDIR: inode->i_op = &btrfs_dir_inode_operations; @@ -214,35 +221,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } -static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path) -{ - struct buffer_head *bh; - struct btrfs_node *node; - int i; - int nritems; - u64 objectid; - u64 item_objectid; - u64 blocknr; - int slot; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1]; - objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key); - nritems = btrfs_header_nritems(&node->header); - for (i = slot; i < nritems; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid != objectid) - break; - blocknr = btrfs_node_blockptr(node, i); - bh = sb_getblk(root->fs_info->sb, blocknr); - ll_rw_block(READ, 1, &bh); - brelse(bh); - } - -} - static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; @@ -269,21 +247,18 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) goto err; } advance = 0; - reada_leaves(root, &path); while(1) { leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; - if (advance) { - if (slot == nritems -1) { + if (advance || slot >= nritems) { + if (slot >= nritems -1) { ret = btrfs_next_leaf(root, &path); if (ret) break; leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; - if (path.nodes[1] && path.slots[1] == 0) - reada_leaves(root, &path); } else { slot++; path.slots[0]++; @@ -297,6 +272,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) continue; + + advance = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), @@ -524,6 +501,11 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, err = btrfs_add_nondir(trans, dentry, inode); if (err) drop_inode = 1; + else { + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + } dir->i_sb->s_dirt = 1; btrfs_end_transaction(trans, root); out_unlock: @@ -623,11 +605,124 @@ printk("btrfs sync_fs\n"); return 0; } +static int btrfs_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + int ret; + int err = 0; + u64 blocknr; + u64 extent_start = 0; + u64 extent_end = 0; + u64 objectid = inode->i_ino; + struct btrfs_path path; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_trans_handle *trans = NULL; + struct btrfs_file_extent_item *item; + struct btrfs_leaf *leaf; + struct btrfs_disk_key *found_key; + + btrfs_init_path(&path); + mutex_lock(&root->fs_info->fs_mutex); + if (create) + trans = btrfs_start_transaction(root, 1); + + + ret = btrfs_lookup_file_extent(trans, root, &path, + inode->i_ino, iblock, 1, 0); + if (ret < 0) { + btrfs_release_path(root, &path); + err = ret; + goto out; + } + + if (ret != 0) { + if (path.slots[0] == 0) { + btrfs_release_path(root, &path); + goto allocate; + } + path.slots[0]--; + } + + item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_file_extent_item); + leaf = btrfs_buffer_leaf(path.nodes[0]); + blocknr = btrfs_file_extent_disk_blocknr(item); + blocknr += btrfs_file_extent_offset(item); + + /* exact match found, use it */ + if (ret == 0) { + err = 0; + map_bh(result, inode->i_sb, blocknr); + btrfs_release_path(root, &path); + goto out; + } + + /* are we inside the extent that was found? */ + found_key = &leaf->items[path.slots[0]].key; + if (btrfs_disk_key_objectid(found_key) != objectid || + btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) { + extent_end = 0; + extent_start = 0; + btrfs_release_path(root, &path); + goto allocate; + } + + extent_start = btrfs_disk_key_offset(&leaf->items[path.slots[0]].key); + extent_start += btrfs_file_extent_offset(item); + extent_end = extent_start + btrfs_file_extent_num_blocks(item); + btrfs_release_path(root, &path); + if (iblock >= extent_start && iblock < extent_end) { + err = 0; + map_bh(result, inode->i_sb, blocknr + iblock - extent_start); + goto out; + } +allocate: + /* ok, create a new extent */ + if (!create) { + err = 0; + goto out; + } + ret = btrfs_alloc_file_extent(trans, root, objectid, iblock, + 1, extent_end, &blocknr); + if (ret) { + err = ret; + goto out; + } + map_bh(result, inode->i_sb, blocknr); + +out: + if (trans) + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return err; +} + +static int btrfs_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + return block_prepare_write(page, from, to, btrfs_get_block); +} + static void btrfs_write_super(struct super_block *sb) { btrfs_sync_fs(sb, 1); } +static int btrfs_readpage(struct file *file, struct page *page) +{ + return mpage_readpage(page, btrfs_get_block); +} + +static int btrfs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, btrfs_get_block); +} + +static int btrfs_writepage(struct page *page, struct writeback_control *wbc) +{ + return block_write_full_page(page, btrfs_get_block, wbc); +} static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) @@ -667,6 +762,31 @@ static struct file_operations btrfs_dir_file_operations = { .readdir = btrfs_readdir, }; +static struct address_space_operations btrfs_aops = { + .readpage = btrfs_readpage, + .readpages = btrfs_readpages, + .writepage = btrfs_writepage, + .sync_page = block_sync_page, + .prepare_write = btrfs_prepare_write, + .commit_write = generic_commit_write, +}; + +static struct inode_operations btrfs_file_inode_operations = { + .truncate = NULL, +}; + +static struct file_operations btrfs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, + .mmap = generic_file_mmap, + .open = generic_file_open, + .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, +}; static int __init init_btrfs_fs(void) { -- cgit v1.2.3-70-g09d2 From 6407bf6d7c449cbfb0a39d985194e265eda3baf4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Mar 2007 06:33:00 -0400 Subject: Btrfs: reference counts on data extents Signed-off-by: Chris Mason --- fs/btrfs/TODO | 3 ++ fs/btrfs/ctree.h | 9 +++--- fs/btrfs/extent-tree.c | 87 ++++++++++++++++++++++++++++++++++++++++---------- fs/btrfs/inode-map.c | 1 + fs/btrfs/super.c | 6 ++-- 5 files changed, 82 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 2ca301b289a..7aec75e9a37 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,7 +8,10 @@ * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter +* make level a field in header +* add a block group to struct inode * Make directory hashing work on 32 bit +* Make sure nobh stuff is working properly for cows * Do actual block accounting * Check compat and incompat flags on the inode * Add virtual filesystems, mountable snapshots diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7b7120d3ab4..1a98952e0fa 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -9,10 +9,11 @@ struct btrfs_transaction; #define BTRFS_MAGIC "_BtRfS_M" -#define BTRFS_ROOT_TREE_OBJECTID 1 -#define BTRFS_EXTENT_TREE_OBJECTID 2 -#define BTRFS_INODE_MAP_OBJECTID 3 -#define BTRFS_FS_TREE_OBJECTID 4 +#define BTRFS_ROOT_TREE_OBJECTID 1ULL +#define BTRFS_EXTENT_TREE_OBJECTID 2ULL +#define BTRFS_INODE_MAP_OBJECTID 3ULL +#define BTRFS_FS_TREE_OBJECTID 4ULL +#define BTRFS_FIRST_FREE_OBJECTID 5ULL /* * we can actually store much bigger names, but lets not confuse the rest diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 82f6e9eed1d..4d4fc48c0a3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -13,7 +13,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr) + *root, u64 blocknr, u64 num_blocks) { struct btrfs_path path; int ret; @@ -29,7 +29,7 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = 1; + key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, 0, 1); if (ret != 0) @@ -48,7 +48,7 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root } static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u32 *refs) + *root, u64 blocknr, u64 num_blocks, u32 *refs) { struct btrfs_path path; int ret; @@ -57,7 +57,7 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item *item; btrfs_init_path(&path); key.objectid = blocknr; - key.offset = 1; + key.offset = num_blocks; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, @@ -76,17 +76,34 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, { u64 blocknr; struct btrfs_node *buf_node; + struct btrfs_leaf *buf_leaf; + struct btrfs_disk_key *key; + struct btrfs_file_extent_item *fi; int i; + int leaf; + int ret; if (!root->ref_cows) return 0; buf_node = btrfs_buffer_node(buf); - if (btrfs_is_leaf(buf_node)) - return 0; - + leaf = btrfs_is_leaf(buf_node); + buf_leaf = btrfs_buffer_leaf(buf); for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { - blocknr = btrfs_node_blockptr(buf_node, i); - inc_block_ref(trans, root, blocknr); + if (leaf) { + key = &buf_leaf->items[i].key; + if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf_leaf, i, + struct btrfs_file_extent_item); + ret = inc_block_ref(trans, root, + btrfs_file_extent_disk_blocknr(fi), + btrfs_file_extent_disk_num_blocks(fi)); + BUG_ON(ret); + } else { + blocknr = btrfs_node_blockptr(buf_node, i); + ret = inc_block_ref(trans, root, blocknr, 1); + BUG_ON(ret); + } } return 0; } @@ -469,6 +486,37 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } +static int drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *cur) +{ + struct btrfs_disk_key *key; + struct btrfs_leaf *leaf; + struct btrfs_file_extent_item *fi; + int i; + int nritems; + int ret; + + BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur))); + leaf = btrfs_buffer_leaf(cur); + nritems = btrfs_header_nritems(&leaf->header); + for (i = 0; i < nritems; i++) { + key = &leaf->items[i].key; + if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); + /* + * FIXME make sure to insert a trans record that + * repeats the snapshot del on crash + */ + ret = btrfs_free_extent(trans, root, + btrfs_file_extent_disk_blocknr(fi), + btrfs_file_extent_disk_num_blocks(fi), + 0); + BUG_ON(ret); + } + return 0; +} + /* * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. @@ -483,28 +531,33 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root u32 refs; ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr, - &refs); + 1, &refs); BUG_ON(ret); if (refs > 1) goto out; /* * walk down to the last node level and free all the leaves */ - while(*level > 0) { + while(*level >= 0) { cur = path->nodes[*level]; if (path->slots[*level] >= btrfs_header_nritems(btrfs_buffer_header(cur))) break; + if (*level == 0) { + ret = drop_leaf_ref(trans, root, cur); + BUG_ON(ret); + break; + } blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), path->slots[*level]); - ret = lookup_block_ref(trans, root, blocknr, &refs); - if (refs != 1 || *level == 1) { + ret = lookup_block_ref(trans, root, blocknr, 1, &refs); + BUG_ON(ret); + if (refs != 1) { path->slots[*level]++; ret = btrfs_free_extent(trans, root, blocknr, 1, 1); BUG_ON(ret); continue; } - BUG_ON(ret); next = read_tree_block(root, blocknr); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); @@ -513,8 +566,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[*level] = 0; } out: - ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr, - 1, 1); + ret = btrfs_free_extent(trans, root, + path->nodes[*level]->b_blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -544,10 +597,10 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr, 1, 1); + BUG_ON(ret); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; - BUG_ON(ret); } } return 1; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index ad2d375b830..1b2c5e04350 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -25,6 +25,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, if (fs_root->fs_info->last_inode_alloc_dirid == dirid) search_start = fs_root->fs_info->last_inode_alloc; + search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; search_key.flags = 0; btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2c2883f2856..487da5a213c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -700,7 +700,7 @@ out: static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { - return block_prepare_write(page, from, to, btrfs_get_block); + return nobh_prepare_write(page, from, to, btrfs_get_block); } static void btrfs_write_super(struct super_block *sb) @@ -721,7 +721,7 @@ static int btrfs_readpages(struct file *file, struct address_space *mapping, static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - return block_write_full_page(page, btrfs_get_block, wbc); + return nobh_writepage(page, btrfs_get_block, wbc); } static int btrfs_get_sb(struct file_system_type *fs_type, @@ -768,7 +768,7 @@ static struct address_space_operations btrfs_aops = { .writepage = btrfs_writepage, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, - .commit_write = generic_commit_write, + .commit_write = nobh_commit_write, }; static struct inode_operations btrfs_file_inode_operations = { -- cgit v1.2.3-70-g09d2 From f4b9aa8d3b877d0a6044a6d6d9a44b29cab9e265 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Mar 2007 11:05:53 -0400 Subject: btrfs_truncate Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 25 ++++++------ fs/btrfs/super.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 114 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4d4fc48c0a3..176c6dc534b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -171,20 +171,21 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) struct btrfs_header *header; struct buffer_head *bh; - bh = sb_find_get_block(root->fs_info->sb, blocknr); - if (bh) { - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - root->fs_info->running_transaction->transid) { + if (!pending) { + bh = sb_find_get_block(root->fs_info->sb, blocknr); + if (bh) { + header = btrfs_buffer_header(bh); + if (btrfs_header_generation(header) == + root->fs_info->running_transaction->transid) { + brelse(bh); + return 0; + } brelse(bh); - return 0; } - brelse(bh); - } - if (pending) - err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); - else err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); + } else { + err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); + } BUG_ON(err); return 0; } @@ -223,6 +224,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); + mark_buffer_dirty(path.nodes[0]); if (refs == 0) { u64 super_blocks_used; @@ -240,7 +242,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) BUG(); } - mark_buffer_dirty(path.nodes[0]); btrfs_release_path(extent_root, &path); finish_current_insert(trans, extent_root); return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 487da5a213c..fd3d9d616ff 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -28,11 +28,15 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item *inode_item; struct btrfs_root *root = btrfs_sb(inode->i_sb); int ret; + btrfs_init_path(&path); + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); if (ret) { - make_bad_inode(inode); btrfs_release_path(root, &path); + mutex_unlock(&root->fs_info->fs_mutex); + make_bad_inode(inode); return; } inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), @@ -53,6 +57,7 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); btrfs_release_path(root, &path); + mutex_unlock(&root->fs_info->fs_mutex); switch (inode->i_mode & S_IFMT) { #if 0 default: @@ -151,20 +156,85 @@ error: return ret; } +static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + int ret; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_disk_key *found_key; + struct btrfs_leaf *leaf; + struct btrfs_file_extent_item *fi; + u64 extent_start; + u64 extent_num_blocks; + + /* FIXME, add redo link to tree so we don't leak on crash */ + key.objectid = inode->i_ino; + key.offset = (u64)-1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + while(1) { + btrfs_init_path(&path); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + if (ret < 0) { + btrfs_release_path(root, &path); + goto error; + } + if (ret > 0) { + BUG_ON(path.slots[0] == 0); + path.slots[0]--; + } + leaf = btrfs_buffer_leaf(path.nodes[0]); + found_key = &leaf->items[path.slots[0]].key; + if (btrfs_disk_key_objectid(found_key) != inode->i_ino) + break; + if (btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) + break; + if (btrfs_disk_key_offset(found_key) < inode->i_size) + break; + /* FIXME: add extent truncation */ + if (btrfs_disk_key_offset(found_key) < inode->i_size) + break; + fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], + struct btrfs_file_extent_item); + extent_start = btrfs_file_extent_disk_blocknr(fi); + extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi); + key.offset = btrfs_disk_key_offset(found_key) - 1; + ret = btrfs_del_item(trans, root, &path); + BUG_ON(ret); + inode->i_blocks -= btrfs_file_extent_num_blocks(fi) >> 9; + btrfs_release_path(root, &path); + ret = btrfs_free_extent(trans, root, extent_start, + extent_num_blocks, 0); + BUG_ON(ret); + if (btrfs_disk_key_offset(found_key) == 0) + break; + } + btrfs_release_path(root, &path); + ret = 0; +error: + return ret; +} + static void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; struct btrfs_root *root = btrfs_sb(inode->i_sb); + int ret; + truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) { goto no_delete; } inode->i_size = 0; - if (inode->i_blocks) - WARN_ON(1); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + if (S_ISREG(inode->i_mode)) { + ret = btrfs_truncate_in_trans(trans, root, inode); + BUG_ON(ret); + } btrfs_free_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -173,7 +243,6 @@ no_delete: clear_inode(inode); } - static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ino_t *ino) { @@ -688,6 +757,8 @@ allocate: err = ret; goto out; } + inode->i_blocks += inode->i_sb->s_blocksize >> 9; + set_buffer_new(result); map_bh(result, inode->i_sb, blocknr); out: @@ -724,6 +795,30 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) return nobh_writepage(page, btrfs_get_block, wbc); } +static void btrfs_truncate(struct inode *inode) +{ + struct btrfs_root *root = btrfs_sb(inode->i_sb); + int ret; + struct btrfs_trans_handle *trans; + + if (!S_ISREG(inode->i_mode)) + return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + + nobh_truncate_page(inode->i_mapping, inode->i_size); + + /* FIXME, add redo link to tree so we don't leak on crash */ + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_truncate_in_trans(trans, root, inode); + BUG_ON(ret); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + mark_inode_dirty(inode); +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -772,7 +867,7 @@ static struct address_space_operations btrfs_aops = { }; static struct inode_operations btrfs_file_inode_operations = { - .truncate = NULL, + .truncate = btrfs_truncate, }; static struct file_operations btrfs_file_operations = { -- cgit v1.2.3-70-g09d2 From d98237b3ede7ab98892f7fa62201a13694c526e2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Mar 2007 13:57:48 -0400 Subject: Btrfs: use a btree inode instead of sb_getblk Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 174 +++++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/disk-io.h | 5 +- fs/btrfs/extent-tree.c | 8 +-- fs/btrfs/super.c | 8 ++- fs/btrfs/transaction.c | 2 +- 6 files changed, 174 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1897f3a65b4..b2ebed756c1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -233,6 +233,7 @@ struct btrfs_fs_info { struct btrfs_super_block *disk_super; struct buffer_head *sb_buffer; struct super_block *sb; + struct inode *btree_inode; struct mutex trans_mutex; struct mutex fs_mutex; }; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 758a62aba06..970103f2cac 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,14 +1,17 @@ #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" + static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); - if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) + if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) { BUG(); + } if (root->node && btrfs_header_parentid(&node->header) != btrfs_header_parentid(btrfs_buffer_header(root->node))) { BUG(); @@ -16,25 +19,154 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) return 0; } -struct buffer_head *alloc_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) +{ + struct address_space *mapping = root->fs_info->btree_inode->i_mapping; + int blockbits = root->fs_info->sb->s_blocksize_bits; + unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); + struct page *page; + struct buffer_head *bh; + struct buffer_head *head; + struct buffer_head *ret = NULL; + + page = find_lock_page(mapping, index); + if (!page) + return NULL; + + if (!page_has_buffers(page)) + goto out_unlock; + + head = page_buffers(page); + bh = head; + do { + if (buffer_mapped(bh) && bh->b_blocknr == blocknr) { + ret = bh; + get_bh(bh); + goto out_unlock; + } + bh = bh->b_this_page; + } while (bh != head); +out_unlock: + unlock_page(page); + page_cache_release(page); + return ret; +} + +struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr) +{ + struct address_space *mapping = root->fs_info->btree_inode->i_mapping; + int blockbits = root->fs_info->sb->s_blocksize_bits; + unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); + struct page *page; + struct buffer_head *bh; + struct buffer_head *head; + struct buffer_head *ret = NULL; + u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); + page = grab_cache_page(mapping, index); + if (!page) + return NULL; + + wait_on_page_writeback(page); + if (!page_has_buffers(page)) + create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0); + head = page_buffers(page); + bh = head; + do { + if (!buffer_mapped(bh)) { + bh->b_bdev = root->fs_info->sb->s_bdev; + bh->b_blocknr = first_block; + set_buffer_mapped(bh); + } + if (bh->b_blocknr == blocknr) { + ret = bh; + get_bh(bh); + goto out_unlock; + } + bh = bh->b_this_page; + first_block++; + } while (bh != head); +out_unlock: + unlock_page(page); + page_cache_release(page); + return ret; +} + +static sector_t max_block(struct block_device *bdev) +{ + sector_t retval = ~((sector_t)0); + loff_t sz = i_size_read(bdev->bd_inode); + + if (sz) { + unsigned int size = block_size(bdev); + unsigned int sizebits = blksize_bits(size); + retval = (sz >> sizebits); + } + return retval; +} + +static int btree_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create) +{ + if (iblock >= max_block(inode->i_sb->s_bdev)) { + if (create) + return -EIO; + + /* + * for reads, we're just trying to fill a partial page. + * return a hole, they will have to call get_block again + * before they can fill it, and they will get -EIO at that + * time + */ + return 0; + } + bh->b_bdev = inode->i_sb->s_bdev; + bh->b_blocknr = iblock; + set_buffer_mapped(bh); + return 0; +} + +static int btree_writepage(struct page *page, struct writeback_control *wbc) { - return sb_getblk(root->fs_info->sb, blocknr); + return block_write_full_page(page, btree_get_block, wbc); } -struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr) +static int btree_readpage(struct file * file, struct page * page) { - return sb_getblk(root->fs_info->sb, blocknr); + return block_read_full_page(page, btree_get_block); } +static struct address_space_operations btree_aops = { + .readpage = btree_readpage, + .writepage = btree_writepage, + .sync_page = block_sync_page, +}; + struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *buf = sb_bread(root->fs_info->sb, blocknr); + struct buffer_head *bh = NULL; - if (!buf) - return buf; - if (check_tree_block(root, buf)) + bh = btrfs_find_create_tree_block(root, blocknr); + if (!bh) + return bh; + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + goto fail; + } else { + unlock_buffer(bh); + } + if (check_tree_block(root, bh)) BUG(); - return buf; + return bh; +fail: + brelse(bh); + return NULL; + } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -101,11 +233,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); int ret; - /* FIXME: don't be stupid */ if (!btrfs_super_root(disk_super)) return NULL; init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); + sb_set_blocksize(sb, sb_buffer->b_size); fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -114,14 +246,30 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; fs_info->disk_super = disk_super; - fs_info->sb_buffer = sb_buffer; fs_info->sb = sb; + fs_info->btree_inode = new_inode(sb); + fs_info->btree_inode->i_ino = 1; + fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; + fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); + mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + + fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr); + + if (!fs_info->sb_buffer) + return NULL; + + brelse(sb_buffer); + sb_buffer = NULL; + disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + fs_info->disk_super = disk_super; + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); @@ -137,7 +285,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = find_and_setup_root(disk_super, tree_root, fs_info, BTRFS_FS_TREE_OBJECTID, root); BUG_ON(ret); - root->commit_root = root->node; get_bh(root->node); root->ref_cows = 1; @@ -191,6 +338,7 @@ int close_ctree(struct btrfs_root *root) root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); + iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 099f7eea0ec..c2c38bda704 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -21,7 +21,8 @@ static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) } struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); -struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr); +struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr); int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -37,5 +38,5 @@ int close_ctree(struct btrfs_root *root); void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int mkfs(int fd, u64 num_blocks, u32 blocksize); +struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 176c6dc534b..b2ae8e768b6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -172,8 +172,8 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) struct buffer_head *bh; if (!pending) { - bh = sb_find_get_block(root->fs_info->sb, blocknr); - if (bh) { + bh = btrfs_find_tree_block(root, blocknr); + if (bh && buffer_uptodate(bh)) { header = btrfs_buffer_header(bh); if (btrfs_header_generation(header) == root->fs_info->running_transaction->transid) { @@ -291,12 +291,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; - struct buffer_head *t; int pending_ret; int ret; if (root == extent_root) { - t = find_tree_block(root, blocknr); pin_down_block(root, blocknr, 1); return 0; } @@ -482,7 +480,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG(); return NULL; } - buf = find_tree_block(root, ins.objectid); + buf = btrfs_find_create_tree_block(root, ins.objectid); set_buffer_uptodate(buf); return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 67659b6ce96..8f07f462236 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -474,6 +474,8 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) disk_super = (struct btrfs_super_block *)bh->b_data; root = open_ctree(sb, bh, disk_super); sb->s_fs_info = root; + disk_super = root->fs_info->disk_super; + if (!root) { printk("btrfs: open_ctree failed\n"); return -EIO; @@ -734,15 +736,15 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_trans_handle *trans; struct btrfs_root *root; int ret; + root = btrfs_sb(sb); sb->s_dirt = 0; if (!wait) { - filemap_flush(sb->s_bdev->bd_inode->i_mapping); + filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } - filemap_write_and_wait(sb->s_bdev->bd_inode->i_mapping); + filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); - root = btrfs_sb(sb); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 46a596e345f..b20fb53a0d2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -74,7 +74,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); return 0; } -- cgit v1.2.3-70-g09d2 From d6025579531b7ea170ba283b171ff7a6bf7d0e12 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 30 Mar 2007 14:27:56 -0400 Subject: Btrfs: corruption hunt continues Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 9 ++- fs/btrfs/ctree.c | 201 ++++++++++++++++++++++++++++--------------------- fs/btrfs/ctree.h | 32 ++++++++ fs/btrfs/dir-item.c | 6 +- fs/btrfs/disk-io.c | 20 ++++- fs/btrfs/extent-tree.c | 8 +- fs/btrfs/file-item.c | 4 +- fs/btrfs/inode-map.c | 2 +- fs/btrfs/root-tree.c | 7 +- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.c | 1 + 11 files changed, 187 insertions(+), 105 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 43807cc3cce..845422368f4 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -16,12 +16,14 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) bits = radix_tree_lookup(radix, slot); if (!bits) { - bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOIO); + bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOFS); if (!bits) return -ENOMEM; memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); bits[0] = slot; + radix_tree_preload(GFP_NOFS); ret = radix_tree_insert(radix, slot, bits); + radix_tree_preload_end(); if (ret) return ret; } @@ -59,7 +61,7 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) if (!bits) return 0; clear_bit(bit_slot, bits + 1); - +#if 0 for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) { if (bits[i]) { empty = 0; @@ -69,8 +71,11 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) if (empty) { bits = radix_tree_delete(radix, slot); + synchronize_rcu(); BUG_ON(!bits); + kfree(bits); } +#endif return 0; } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b7f3fcb72c6..a0dfa2d6cb9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -51,7 +51,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); btrfs_set_header_generation(&cow_node->header, trans->transid); *cow_ret = cow; - mark_buffer_dirty(cow); + btrfs_mark_buffer_dirty(cow); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; @@ -62,7 +62,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root } else { btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, cow->b_blocknr); - mark_buffer_dirty(parent); + btrfs_mark_buffer_dirty(parent); btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); @@ -312,11 +312,12 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(!child); root->node = child; path->nodes[level] = NULL; + clean_tree_block(trans, root, mid_buf); + wait_on_buffer(mid_buf); /* once for the path */ btrfs_block_release(root, mid_buf); /* once for the root ptr */ btrfs_block_release(root, mid_buf); - clean_tree_block(trans, root, mid_buf); return btrfs_free_extent(trans, root, blocknr, 1, 1); } parent = btrfs_buffer_node(parent_buf); @@ -351,8 +352,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; if (btrfs_header_nritems(&right->header) == 0) { u64 blocknr = right_buf->b_blocknr; - btrfs_block_release(root, right_buf); clean_tree_block(trans, root, right_buf); + wait_on_buffer(right_buf); + btrfs_block_release(root, right_buf); right_buf = NULL; right = NULL; wret = del_ptr(trans, root, path, level + 1, pslot + @@ -363,10 +365,11 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; } else { - memcpy(&parent->ptrs[pslot + 1].key, - &right->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - mark_buffer_dirty(parent_buf); + btrfs_memcpy(root, parent, + &parent->ptrs[pslot + 1].key, + &right->ptrs[0].key, + sizeof(struct btrfs_disk_key)); + btrfs_mark_buffer_dirty(parent_buf); } } if (btrfs_header_nritems(&mid->header) == 1) { @@ -388,8 +391,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ u64 blocknr = mid_buf->b_blocknr; - btrfs_block_release(root, mid_buf); clean_tree_block(trans, root, mid_buf); + wait_on_buffer(mid_buf); + btrfs_block_release(root, mid_buf); mid_buf = NULL; mid = NULL; wret = del_ptr(trans, root, path, level + 1, pslot); @@ -400,9 +404,10 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; } else { /* update the parent key to reflect our changes */ - memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - mark_buffer_dirty(parent_buf); + btrfs_memcpy(root, parent, + &parent->ptrs[pslot].key, &mid->ptrs[0].key, + sizeof(struct btrfs_disk_key)); + btrfs_mark_buffer_dirty(parent_buf); } /* update the path */ @@ -544,8 +549,8 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root if (!path->nodes[i]) break; t = btrfs_buffer_node(path->nodes[i]); - memcpy(&t->ptrs[tslot].key, key, sizeof(*key)); - mark_buffer_dirty(path->nodes[i]); + btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key)); + btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; } @@ -580,17 +585,17 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root if (src_nritems < push_items) push_items = src_nritems; - memcpy(dst->ptrs + dst_nritems, src->ptrs, - push_items * sizeof(struct btrfs_key_ptr)); + btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs, + push_items * sizeof(struct btrfs_key_ptr)); if (push_items < src_nritems) { - memmove(src->ptrs, src->ptrs + push_items, + btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items, (src_nritems - push_items) * sizeof(struct btrfs_key_ptr)); } btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - mark_buffer_dirty(src_buf); - mark_buffer_dirty(dst_buf); + btrfs_mark_buffer_dirty(src_buf); + btrfs_mark_buffer_dirty(dst_buf); return ret; } @@ -629,16 +634,18 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct if (max_push < push_items) push_items = max_push; - memmove(dst->ptrs + push_items, dst->ptrs, - dst_nritems * sizeof(struct btrfs_key_ptr)); - memcpy(dst->ptrs, src->ptrs + src_nritems - push_items, - push_items * sizeof(struct btrfs_key_ptr)); + btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs, + dst_nritems * sizeof(struct btrfs_key_ptr)); + + btrfs_memcpy(root, dst, dst->ptrs, + src->ptrs + src_nritems - push_items, + push_items * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - mark_buffer_dirty(src_buf); - mark_buffer_dirty(dst_buf); + btrfs_mark_buffer_dirty(src_buf); + btrfs_mark_buffer_dirty(dst_buf); return ret; } @@ -674,10 +681,11 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root lower_key = &((struct btrfs_leaf *)lower)->items[0].key; else lower_key = &lower->ptrs[0].key; - memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); + btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key, + sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); - mark_buffer_dirty(t); + btrfs_mark_buffer_dirty(t); /* the super has an extra ref to root->node */ btrfs_block_release(root, root->node); @@ -712,13 +720,15 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) BUG(); if (slot != nritems) { - memmove(lower->ptrs + slot + 1, lower->ptrs + slot, - (nritems - slot) * sizeof(struct btrfs_key_ptr)); + btrfs_memmove(root, lower, lower->ptrs + slot + 1, + lower->ptrs + slot, + (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key)); + btrfs_memcpy(root, lower, &lower->ptrs[slot].key, + key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); - mark_buffer_dirty(path->nodes[level]); + btrfs_mark_buffer_dirty(path->nodes[level]); return 0; } @@ -761,14 +771,14 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_parentid(&split->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); mid = (c_nritems + 1) / 2; - memcpy(split->ptrs, c->ptrs + mid, - (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); + btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid, + (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(&split->header, c_nritems - mid); btrfs_set_header_nritems(&c->header, mid); ret = 0; - mark_buffer_dirty(t); - mark_buffer_dirty(split_buffer); + btrfs_mark_buffer_dirty(t); + btrfs_mark_buffer_dirty(split_buffer); wret = insert_ptr(trans, root, path, &split->ptrs[0].key, split_buffer->b_blocknr, path->slots[level + 1] + 1, level + 1); @@ -875,17 +885,22 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root push_space = btrfs_item_end(left->items + left_nritems - push_items); push_space -= leaf_data_end(root, left); /* make room in the right data area */ - memmove(btrfs_leaf_data(right) + leaf_data_end(root, right) - - push_space, btrfs_leaf_data(right) + leaf_data_end(root, right), - BTRFS_LEAF_DATA_SIZE(root) - leaf_data_end(root, right)); + btrfs_memmove(root, right, btrfs_leaf_data(right) + + leaf_data_end(root, right) - push_space, + btrfs_leaf_data(right) + + leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) - + leaf_data_end(root, right)); /* copy from the left data area */ - memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space, - btrfs_leaf_data(left) + leaf_data_end(root, left), push_space); - memmove(right->items + push_items, right->items, + btrfs_memcpy(root, right, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(left) + leaf_data_end(root, left), + push_space); + btrfs_memmove(root, right, right->items + push_items, right->items, right_nritems * sizeof(struct btrfs_item)); /* copy the items from left to right */ - memcpy(right->items, left->items + left_nritems - push_items, - push_items * sizeof(struct btrfs_item)); + btrfs_memcpy(root, right, right->items, left->items + + left_nritems - push_items, + push_items * sizeof(struct btrfs_item)); /* update the item pointers */ right_nritems += push_items; @@ -899,11 +914,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root left_nritems -= push_items; btrfs_set_header_nritems(&left->header, left_nritems); - mark_buffer_dirty(left_buf); - mark_buffer_dirty(right_buf); - memcpy(&upper_node->ptrs[slot + 1].key, + btrfs_mark_buffer_dirty(left_buf); + btrfs_mark_buffer_dirty(right_buf); + btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); - mark_buffer_dirty(upper); + btrfs_mark_buffer_dirty(upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { @@ -977,14 +992,16 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } /* push data from right to left */ - memcpy(left->items + btrfs_header_nritems(&left->header), - right->items, push_items * sizeof(struct btrfs_item)); + btrfs_memcpy(root, left, left->items + + btrfs_header_nritems(&left->header), + right->items, push_items * sizeof(struct btrfs_item)); push_space = BTRFS_LEAF_DATA_SIZE(root) - btrfs_item_offset(right->items + push_items -1); - memcpy(btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space, - btrfs_leaf_data(right) + - btrfs_item_offset(right->items + push_items - 1), - push_space); + btrfs_memcpy(root, left, btrfs_leaf_data(left) + + leaf_data_end(root, left) - push_space, + btrfs_leaf_data(right) + + btrfs_item_offset(right->items + push_items - 1), + push_space); old_left_nritems = btrfs_header_nritems(&left->header); BUG_ON(old_left_nritems < 0); @@ -1000,10 +1017,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* fixup right node */ push_space = btrfs_item_offset(right->items + push_items - 1) - leaf_data_end(root, right); - memmove(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - - push_space, btrfs_leaf_data(right) + - leaf_data_end(root, right), push_space); - memmove(right->items, right->items + push_items, + btrfs_memmove(root, right, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(right) + + leaf_data_end(root, right), push_space); + btrfs_memmove(root, right, right->items, right->items + push_items, (btrfs_header_nritems(&right->header) - push_items) * sizeof(struct btrfs_item)); btrfs_set_header_nritems(&right->header, @@ -1017,8 +1035,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root push_space = btrfs_item_offset(right->items + i); } - mark_buffer_dirty(t); - mark_buffer_dirty(right_buf); + btrfs_mark_buffer_dirty(t); + btrfs_mark_buffer_dirty(right_buf); wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) @@ -1110,11 +1128,12 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_header_parentid(btrfs_buffer_header(root->node))); data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(root, l); - memcpy(right->items, l->items + mid, - (nritems - mid) * sizeof(struct btrfs_item)); - memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - - data_copy_size, btrfs_leaf_data(l) + - leaf_data_end(root, l), data_copy_size); + btrfs_memcpy(root, right, right->items, l->items + mid, + (nritems - mid) * sizeof(struct btrfs_item)); + btrfs_memcpy(root, right, + btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - + data_copy_size, btrfs_leaf_data(l) + + leaf_data_end(root, l), data_copy_size); rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - btrfs_item_end(l->items + mid); @@ -1129,8 +1148,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root right_buffer->b_blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; - mark_buffer_dirty(right_buffer); - mark_buffer_dirty(l_buf); + btrfs_mark_buffer_dirty(right_buffer); + btrfs_mark_buffer_dirty(l_buf); BUG_ON(path->slots[0] != slot); if (mid <= slot) { btrfs_block_release(root, path->nodes[0]); @@ -1200,22 +1219,23 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root } /* shift the items */ - memmove(leaf->items + slot + 1, leaf->items + slot, - (nritems - slot) * sizeof(struct btrfs_item)); + btrfs_memmove(root, leaf, leaf->items + slot + 1, + leaf->items + slot, + (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ - memmove(btrfs_leaf_data(leaf) + data_end - data_size, - btrfs_leaf_data(leaf) + - data_end, old_data - data_end); + btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + data_end - data_size, btrfs_leaf_data(leaf) + + data_end, old_data - data_end); data_end = old_data; } /* setup the item for the new data */ - memcpy(&leaf->items[slot].key, &disk_key, - sizeof(struct btrfs_disk_key)); + btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key, + sizeof(struct btrfs_disk_key)); btrfs_set_item_offset(leaf->items + slot, data_end - data_size); btrfs_set_item_size(leaf->items + slot, data_size); btrfs_set_header_nritems(&leaf->header, nritems + 1); - mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf_buf); ret = 0; if (slot == 0) @@ -1245,8 +1265,9 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root if (!ret) { ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], u8); - memcpy(ptr, data, data_size); - mark_buffer_dirty(path.nodes[0]); + btrfs_memcpy(root, path.nodes[0]->b_data, + ptr, data, data_size); + btrfs_mark_buffer_dirty(path.nodes[0]); } btrfs_release_path(root, &path); return ret; @@ -1271,8 +1292,10 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, node = btrfs_buffer_node(parent); nritems = btrfs_header_nritems(&node->header); if (slot != nritems -1) { - memmove(node->ptrs + slot, node->ptrs + slot + 1, - sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); + btrfs_memmove(root, node, node->ptrs + slot, + node->ptrs + slot + 1, + sizeof(struct btrfs_key_ptr) * + (nritems - slot - 1)); } nritems--; btrfs_set_header_nritems(&node->header, nritems); @@ -1287,7 +1310,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } - mark_buffer_dirty(parent); + btrfs_mark_buffer_dirty(parent); return ret; } @@ -1317,16 +1340,18 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (slot != nritems - 1) { int i; int data_end = leaf_data_end(root, leaf); - memmove(btrfs_leaf_data(leaf) + data_end + dsize, - btrfs_leaf_data(leaf) + data_end, - doff - data_end); + btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + data_end + dsize, + btrfs_leaf_data(leaf) + data_end, + doff - data_end); for (i = slot + 1; i < nritems; i++) { u32 ioff = btrfs_item_offset(leaf->items + i); btrfs_set_item_offset(leaf->items + i, ioff + dsize); } - memmove(leaf->items + slot, leaf->items + slot + 1, - sizeof(struct btrfs_item) * - (nritems - slot - 1)); + btrfs_memmove(root, leaf, leaf->items + slot, + leaf->items + slot + 1, + sizeof(struct btrfs_item) * + (nritems - slot - 1)); } btrfs_set_header_nritems(&leaf->header, nritems - 1); nritems--; @@ -1336,6 +1361,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_set_header_level(&leaf->header, 0); } else { clean_tree_block(trans, root, leaf_buf); + wait_on_buffer(leaf_buf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; @@ -1373,6 +1399,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_header_nritems(&leaf->header) == 0) { u64 blocknr = leaf_buf->b_blocknr; clean_tree_block(trans, root, leaf_buf); + wait_on_buffer(leaf_buf); wret = del_ptr(trans, root, path, 1, slot); if (wret) ret = wret; @@ -1382,11 +1409,11 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } else { - mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf_buf); btrfs_block_release(root, leaf_buf); } } else { - mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf_buf); } } return ret; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index df1a025a771..9ec0d65ebe9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2,6 +2,7 @@ #define __BTRFS__ #include +#include #include "bit-radix.h" struct btrfs_trans_handle; @@ -834,6 +835,37 @@ static inline struct btrfs_root *btrfs_sb(struct super_block *sb) return sb->s_fs_info; } +static inline void btrfs_check_bounds(void *vptr, size_t len, + void *vcontainer, size_t container_len) +{ + char *ptr = vptr; + char *container = vcontainer; + WARN_ON(ptr < container); + WARN_ON(ptr + len > container + container_len); +} + +static inline void btrfs_memcpy(struct btrfs_root *root, + void *dst_block, + void *dst, const void *src, size_t nr) +{ + btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); + memcpy(dst, src, nr); +} + +static inline void btrfs_memmove(struct btrfs_root *root, + void *dst_block, + void *dst, void *src, size_t nr) +{ + btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); + memmove(dst, src, nr); +} + +static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) +{ + WARN_ON(!atomic_read(&bh->b_count)); + mark_buffer_dirty(bh); +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 18d3e168ad0..f55c89472ac 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -34,10 +34,8 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_flags(dir_item, 0); btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); - memcpy(name_ptr, name, name_len); - if (name_ptr + name_len > path.nodes[0]->b_data + 4096) - WARN_ON(1); - mark_buffer_dirty(path.nodes[0]); + btrfs_memcpy(root, path.nodes[0]->b_data, name_ptr, name, name_len); + btrfs_mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c010463b9d..bb133104e2e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -8,6 +8,17 @@ #include "disk-io.h" #include "transaction.h" +#define PATTERN 0xDEADBEEFUL +static inline void check_pattern(struct buffer_head *buf) +{ + if (buf->b_private != (void *)PATTERN) + WARN_ON(1); +} + +static inline void set_pattern(struct buffer_head *buf) +{ + buf->b_private = (void *)PATTERN; +} static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { @@ -51,8 +62,10 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) } while (bh != head); out_unlock: unlock_page(page); - if (ret) + if (ret) { touch_buffer(ret); + check_pattern(ret); + } page_cache_release(page); return ret; } @@ -82,6 +95,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh->b_bdev = root->fs_info->sb->s_bdev; bh->b_blocknr = first_block; set_buffer_mapped(bh); + set_pattern(bh); } if (bh->b_blocknr == blocknr) { ret = bh; @@ -225,6 +239,7 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) if (!buffer_uptodate(bh)) goto fail; csum_tree_block(root, bh, 1); + set_pattern(bh); } else { unlock_buffer(bh); } @@ -240,6 +255,7 @@ fail: int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { + WARN_ON(atomic_read(&buf->b_count) == 0); mark_buffer_dirty(buf); return 0; } @@ -247,6 +263,7 @@ int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { + WARN_ON(atomic_read(&buf->b_count) == 0); clear_buffer_dirty(buf); return 0; } @@ -431,6 +448,7 @@ int close_ctree(struct btrfs_root *root) void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { + check_pattern(buf); brelse(buf); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b2ae8e768b6..c4194dab7a3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -39,7 +39,7 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); btrfs_release_path(root->fs_info->extent_root, &path); finish_current_insert(trans, root->fs_info->extent_root); @@ -177,10 +177,10 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) header = btrfs_buffer_header(bh); if (btrfs_header_generation(header) == root->fs_info->running_transaction->transid) { - brelse(bh); + btrfs_block_release(root, bh); return 0; } - brelse(bh); + btrfs_block_release(root, bh); } err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); } else { @@ -224,7 +224,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); if (refs == 0) { u64 super_blocks_used; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 2d2c23ca7cb..c3992b7b0c6 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -34,7 +34,7 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_offset(item, 0); btrfs_set_file_extent_num_blocks(item, ins.offset); btrfs_set_file_extent_generation(item, trans->transid); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); *result = ins.objectid; btrfs_release_path(root, &path); return 0; @@ -81,7 +81,7 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_csum_item); ret = 0; ret = btrfs_csum_data(root, data, len, item->csum); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); fail: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 1b2c5e04350..fa4adb81b14 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -109,7 +109,7 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(inode_root, &path); return ret; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a821b5d1e23..f37dab87de1 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -45,6 +45,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_leaf *l; int ret; int slot; + struct btrfs_root_item *update_item; btrfs_init_path(&path); ret = btrfs_search_slot(trans, root, key, &path, 0, 1); @@ -53,9 +54,9 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret != 0); l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; - memcpy(btrfs_item_ptr(l, slot, struct btrfs_root_item), item, - sizeof(*item)); - mark_buffer_dirty(path.nodes[0]); + update_item = btrfs_item_ptr(l, slot, struct btrfs_root_item); + btrfs_memcpy(root, l, update_item, item, sizeof(*item)); + btrfs_mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ab5419eea93..894a70bf26d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -557,7 +557,7 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_inode_item); fill_inode_item(inode_item, inode); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); failed: btrfs_release_path(root, &path); return 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a146463c504..84c4e278ce2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -66,6 +66,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, cur_trans->num_writers--; put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); + memset(trans, 0, sizeof(*trans)); kfree(trans); return 0; } -- cgit v1.2.3-70-g09d2 From 2c90e5d658424bc71b111eb5a972240d5d06fe86 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 10:50:19 -0400 Subject: Btrfs: still corruption hunting Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 7 +-- fs/btrfs/btrfs_inode.h | 14 +++++ fs/btrfs/ctree.c | 42 ++++++++++---- fs/btrfs/ctree.h | 3 + fs/btrfs/disk-io.c | 84 ++++++++++++++-------------- fs/btrfs/disk-io.h | 4 +- fs/btrfs/extent-tree.c | 18 ++++-- fs/btrfs/super.c | 146 ++++++++++++++++++++++++++++++++++++++++++++----- fs/btrfs/transaction.c | 39 +++++++++---- fs/btrfs/transaction.h | 3 + 10 files changed, 265 insertions(+), 95 deletions(-) create mode 100644 fs/btrfs/btrfs_inode.h (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 845422368f4..a50a1c44eab 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -4,6 +4,7 @@ #define BIT_ARRAY_BYTES 256 #define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8) +extern struct kmem_cache *btrfs_bit_radix_cachep; int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) { unsigned long *bits; @@ -16,7 +17,7 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) bits = radix_tree_lookup(radix, slot); if (!bits) { - bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOFS); + bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS); if (!bits) return -ENOMEM; memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); @@ -68,12 +69,10 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) break; } } - if (empty) { bits = radix_tree_delete(radix, slot); - synchronize_rcu(); BUG_ON(!bits); - kfree(bits); + kmem_cache_free(btrfs_bit_radix_cachep, bits); } #endif return 0; diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h new file mode 100644 index 00000000000..e159841650a --- /dev/null +++ b/fs/btrfs/btrfs_inode.h @@ -0,0 +1,14 @@ +#ifndef __BTRFS_I__ +#define __BTRFS_I__ + +struct btrfs_inode { + u32 magic; + struct inode vfs_inode; + u32 magic2; +}; +static inline struct btrfs_inode *BTRFS_I(struct inode *inode) +{ + return container_of(inode, struct btrfs_inode, vfs_inode); +} + +#endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a0dfa2d6cb9..453ce835209 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,6 +16,16 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); +struct btrfs_path *btrfs_alloc_path(void) +{ + return kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); +} + +void btrfs_free_path(struct btrfs_path *p) +{ + kmem_cache_free(btrfs_path_cachep, p); +} + inline void btrfs_init_path(struct btrfs_path *p) { memset(p, 0, sizeof(*p)); @@ -47,17 +57,18 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root } cow = btrfs_alloc_free_block(trans, root); cow_node = btrfs_buffer_node(cow); + if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) + WARN_ON(1); memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); btrfs_set_header_generation(&cow_node->header, trans->transid); - *cow_ret = cow; - btrfs_mark_buffer_dirty(cow); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; get_bh(cow); - if (buf != root->commit_root) + if (buf != root->commit_root) { btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); + } btrfs_block_release(root, buf); } else { btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, @@ -66,6 +77,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); + *cow_ret = cow; return 0; } @@ -477,9 +489,12 @@ again: p->slots[level + 1], &cow_buf); b = cow_buf; + c = btrfs_buffer_node(b); } BUG_ON(!cow && ins_len); - c = btrfs_buffer_node(b); + if (level != btrfs_header_level(&c->header)) + WARN_ON(1); + level = btrfs_header_level(&c->header); p->nodes[level] = b; ret = check_block(root, p, level); if (ret) @@ -1257,19 +1272,22 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root data_size) { int ret = 0; - struct btrfs_path path; + struct btrfs_path *path; u8 *ptr; - btrfs_init_path(&path); - ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { - ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], u8); - btrfs_memcpy(root, path.nodes[0]->b_data, + ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], u8); + btrfs_memcpy(root, path->nodes[0]->b_data, ptr, data, data_size); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); } - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9ec0d65ebe9..d8e03bd797f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -7,6 +7,7 @@ struct btrfs_trans_handle; struct btrfs_transaction; +extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_BtRfS_M" @@ -888,6 +889,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); +struct btrfs_path *btrfs_alloc_path(void); +void btrfs_free_path(struct btrfs_path *p); void btrfs_init_path(struct btrfs_path *p); int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bb133104e2e..2dbd55084a4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -8,18 +8,6 @@ #include "disk-io.h" #include "transaction.h" -#define PATTERN 0xDEADBEEFUL -static inline void check_pattern(struct buffer_head *buf) -{ - if (buf->b_private != (void *)PATTERN) - WARN_ON(1); -} - -static inline void set_pattern(struct buffer_head *buf) -{ - buf->b_private = (void *)PATTERN; -} - static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); @@ -35,6 +23,8 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_find_get_block(root->fs_info->sb, blocknr); +#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -43,6 +33,7 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) struct buffer_head *head; struct buffer_head *ret = NULL; + page = find_lock_page(mapping, index); if (!page) return NULL; @@ -64,15 +55,17 @@ out_unlock: unlock_page(page); if (ret) { touch_buffer(ret); - check_pattern(ret); } page_cache_release(page); return ret; +#endif } struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_getblk(root->fs_info->sb, blocknr); +#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -95,7 +88,6 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh->b_bdev = root->fs_info->sb->s_bdev; bh->b_blocknr = first_block; set_buffer_mapped(bh); - set_pattern(bh); } if (bh->b_blocknr == blocknr) { ret = bh; @@ -111,6 +103,7 @@ out_unlock: touch_buffer(ret); page_cache_release(page); return ret; +#endif } static sector_t max_block(struct block_device *bdev) @@ -225,6 +218,8 @@ static struct address_space_operations btree_aops = { struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_bread(root->fs_info->sb, blocknr); +#if 0 struct buffer_head *bh = NULL; bh = btrfs_find_create_tree_block(root, blocknr); @@ -239,7 +234,6 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) if (!buffer_uptodate(bh)) goto fail; csum_tree_block(root, bh, 1); - set_pattern(bh); } else { unlock_buffer(bh); } @@ -250,6 +244,7 @@ fail: brelse(bh); return NULL; +#endif } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -268,14 +263,14 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -static int __setup_root(struct btrfs_super_block *super, +static int __setup_root(int blocksize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { root->node = NULL; root->commit_root = NULL; - root->blocksize = btrfs_super_blocksize(super); + root->blocksize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; memset(&root->root_key, 0, sizeof(root->root_key)); @@ -283,7 +278,7 @@ static int __setup_root(struct btrfs_super_block *super, return 0; } -static int find_and_setup_root(struct btrfs_super_block *super, +static int find_and_setup_root(int blocksize, struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, @@ -291,7 +286,7 @@ static int find_and_setup_root(struct btrfs_super_block *super, { int ret; - __setup_root(super, root, fs_info, objectid); + __setup_root(blocksize, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -302,9 +297,7 @@ static int find_and_setup_root(struct btrfs_super_block *super, return 0; } -struct btrfs_root *open_ctree(struct super_block *sb, - struct buffer_head *sb_buffer, - struct btrfs_super_block *disk_super) +struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); @@ -317,13 +310,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; + struct btrfs_super_block *disk_super; - if (!btrfs_super_root(disk_super)) { - return NULL; - } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); - sb_set_blocksize(sb, sb_buffer->b_size); + sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -331,55 +322,59 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; - fs_info->disk_super = disk_super; fs_info->sb = sb; + fs_info->btree_inode = NULL; +#if 0 fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; + fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; insert_inode_hash(fs_info->btree_inode); - mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); +#endif fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); - if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { printk("failed to allocate sha256 hash\n"); return NULL; } - mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); - __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - - fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr); + __setup_root(sb->s_blocksize, tree_root, + fs_info, BTRFS_ROOT_TREE_OBJECTID); + fs_info->sb_buffer = read_tree_block(tree_root, + BTRFS_SUPER_INFO_OFFSET / + sb->s_blocksize); if (!fs_info->sb_buffer) { printk("failed2\n"); return NULL; } - brelse(sb_buffer); - sb_buffer = NULL; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + if (!btrfs_super_root(disk_super)) { + return NULL; + } fs_info->disk_super = disk_super; - tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + mutex_lock(&fs_info->fs_mutex); + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_INODE_MAP_OBJECTID, inode_root); BUG_ON(ret); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_FS_TREE_OBJECTID, root); + mutex_unlock(&fs_info->fs_mutex); BUG_ON(ret); root->commit_root = root->node; get_bh(root->node); @@ -392,9 +387,11 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct buffer_head *bh = root->fs_info->sb_buffer; + btrfs_set_super_root(root->fs_info->disk_super, root->fs_info->tree_root->node->b_blocknr); lock_buffer(bh); + WARN_ON(atomic_read(&bh->b_count) < 1); clear_buffer_dirty(bh); csum_tree_block(root, bh, 0); bh->b_end_io = end_buffer_write_sync; @@ -413,6 +410,7 @@ int close_ctree(struct btrfs_root *root) int ret; struct btrfs_trans_handle *trans; + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -421,6 +419,7 @@ int close_ctree(struct btrfs_root *root) ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); write_ctree_super(NULL, root); + mutex_unlock(&root->fs_info->fs_mutex); if (root->node) btrfs_block_release(root, root->node); @@ -436,8 +435,8 @@ int close_ctree(struct btrfs_root *root) btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); crypto_free_hash(root->fs_info->hash_tfm); - truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); - iput(root->fs_info->btree_inode); + // truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); + // iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); @@ -448,7 +447,6 @@ int close_ctree(struct btrfs_root *root) void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { - check_pattern(buf); - brelse(buf); + // brelse(buf); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index f6998e2192c..ac6764ba8aa 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -31,9 +31,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct btrfs_root *open_ctree(struct super_block *sb, - struct buffer_head *sb_buffer, - struct btrfs_super_block *disk_super); +struct btrfs_root *open_ctree(struct super_block *sb); int close_ctree(struct btrfs_root *root); void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c4194dab7a3..37b87e28a2f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -173,12 +173,16 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) if (!pending) { bh = btrfs_find_tree_block(root, blocknr); - if (bh && buffer_uptodate(bh)) { - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - root->fs_info->running_transaction->transid) { - btrfs_block_release(root, bh); - return 0; + if (bh) { + if (buffer_uptodate(bh)) { + u64 transid = + root->fs_info->running_transaction->transid; + header = btrfs_buffer_header(bh); + if (btrfs_header_generation(header) == + transid) { + btrfs_block_release(root, bh); + return 0; + } } btrfs_block_release(root, bh); } @@ -539,6 +543,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root */ while(*level >= 0) { cur = path->nodes[*level]; + if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + WARN_ON(1); if (path->slots[*level] >= btrfs_header_nritems(btrfs_buffer_header(cur))) break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 894a70bf26d..6969b672b57 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -14,6 +14,7 @@ #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "btrfs_inode.h" #define BTRFS_SUPER_MAGIC 0x9123682E @@ -24,6 +25,14 @@ static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct file_operations btrfs_file_operations; +static int check_inode(struct inode *inode) +{ + struct btrfs_inode *ei = BTRFS_I(inode); + WARN_ON(ei->magic != 0xDEADBEEF); + WARN_ON(ei->magic2 != 0xDEADBEAF); + return 0; +} + static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path path; @@ -34,6 +43,7 @@ static void btrfs_read_locked_inode(struct inode *inode) btrfs_init_path(&path); mutex_lock(&root->fs_info->fs_mutex); + check_inode(inode); ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); if (ret) { btrfs_release_path(root, &path); @@ -41,6 +51,7 @@ static void btrfs_read_locked_inode(struct inode *inode) make_bad_inode(inode); return; } + check_inode(inode); inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_inode_item); @@ -60,6 +71,7 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_generation = btrfs_inode_generation(inode_item); btrfs_release_path(root, &path); mutex_unlock(&root->fs_info->fs_mutex); + check_inode(inode); switch (inode->i_mode & S_IFMT) { #if 0 default: @@ -80,6 +92,7 @@ static void btrfs_read_locked_inode(struct inode *inode) // inode->i_op = &page_symlink_inode_operations; break; } + check_inode(inode); return; } @@ -347,6 +360,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, namelen, 0); if (ret || !btrfs_match_dir_item_name(root, &path, name, namelen)) { *ino = 0; + ret = 0; goto out; } di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], @@ -354,6 +368,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, *ino = btrfs_dir_objectid(di); out: btrfs_release_path(root, &path); + check_inode(dir); return ret; } @@ -367,7 +382,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &ino); mutex_unlock(&root->fs_info->fs_mutex); @@ -378,7 +392,9 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, inode = iget(dir->i_sb, ino); if (!inode) return ERR_PTR(-EACCES); + check_inode(inode); } + check_inode(dir); return d_splice_alias(inode, dentry); } @@ -471,23 +487,14 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) struct inode * inode; struct dentry * root_dentry; struct btrfs_super_block *disk_super; - struct buffer_head *bh; struct btrfs_root *root; sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_blocksize = PAGE_CACHE_SIZE; - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = BTRFS_SUPER_MAGIC; sb->s_op = &btrfs_super_ops; sb->s_time_gran = 1; - bh = sb_bread(sb, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); - if (!bh) { - printk("btrfs: unable to read on disk super\n"); - return -EIO; - } - disk_super = (struct btrfs_super_block *)bh->b_data; - root = open_ctree(sb, bh, disk_super); + root = open_ctree(sb); if (!root) { printk("btrfs: open_ctree failed\n"); @@ -533,6 +540,7 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(item, inode->i_blocks); btrfs_set_inode_generation(item, inode->i_generation); + check_inode(inode); } static int btrfs_update_inode(struct btrfs_trans_handle *trans, @@ -560,6 +568,7 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path.nodes[0]); failed: btrfs_release_path(root, &path); + check_inode(inode); return 0; } @@ -577,6 +586,7 @@ static int btrfs_write_inode(struct inode *inode, int wait) else btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + check_inode(inode); return ret; } @@ -594,6 +604,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); + check_inode(inode); ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); BUG_ON(ret); @@ -616,6 +627,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BUG_ON(ret); insert_inode_hash(inode); + check_inode(inode); + check_inode(dir); return inode; } @@ -632,7 +645,8 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb), dentry->d_parent->d_inode); } - + check_inode(inode); + check_inode(dentry->d_parent->d_inode); return ret; } @@ -644,6 +658,9 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, d_instantiate(dentry, inode); return 0; } + if (err > 0) + err = -EEXIST; + check_inode(inode); return err; } @@ -675,6 +692,9 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + check_inode(inode); + check_inode(dir); + if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -755,11 +775,11 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) sb->s_dirt = 0; if (!wait) { - filemap_flush(root->fs_info->btree_inode->i_mapping); + // filemap_flush(root->fs_info->btree_inode->i_mapping); + filemap_flush(root->fs_info->sb->s_bdev->bd_inode->i_mapping); return 0; } - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); - + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -1242,6 +1262,95 @@ static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, return retval; } +static struct kmem_cache *btrfs_inode_cachep; +struct kmem_cache *btrfs_trans_handle_cachep; +struct kmem_cache *btrfs_transaction_cachep; +struct kmem_cache *btrfs_bit_radix_cachep; +struct kmem_cache *btrfs_path_cachep; + +/* + * Called inside transaction, so use GFP_NOFS + */ +static struct inode *btrfs_alloc_inode(struct super_block *sb) +{ + struct btrfs_inode *ei; + + ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); + if (!ei) + return NULL; + ei->magic = 0xDEADBEEF; + ei->magic2 = 0xDEADBEAF; + return &ei->vfs_inode; +} + +static void btrfs_destroy_inode(struct inode *inode) +{ + struct btrfs_inode *ei = BTRFS_I(inode); + WARN_ON(ei->magic != 0xDEADBEEF); + WARN_ON(ei->magic2 != 0xDEADBEAF); + WARN_ON(!list_empty(&inode->i_dentry)); + WARN_ON(inode->i_ino == 1); + WARN_ON(inode->i_data.nrpages); + + ei->magic = 0; + ei->magic2 = 0; + kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); +} + +static void init_once(void * foo, struct kmem_cache * cachep, + unsigned long flags) +{ + struct btrfs_inode *ei = (struct btrfs_inode *) foo; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + inode_init_once(&ei->vfs_inode); + } +} + +static int init_inodecache(void) +{ + btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", + sizeof(struct btrfs_inode), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + init_once, NULL); + btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", + sizeof(struct btrfs_trans_handle), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", + sizeof(struct btrfs_transaction), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", + sizeof(struct btrfs_transaction), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix", + 256, + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD | + SLAB_DESTROY_BY_RCU), + NULL, NULL); + if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL || + btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL) + return -ENOMEM; + return 0; +} + +static void destroy_inodecache(void) +{ + kmem_cache_destroy(btrfs_inode_cachep); + kmem_cache_destroy(btrfs_trans_handle_cachep); + kmem_cache_destroy(btrfs_transaction_cachep); + kmem_cache_destroy(btrfs_bit_radix_cachep); + kmem_cache_destroy(btrfs_path_cachep); +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -1265,6 +1374,8 @@ static struct super_operations btrfs_super_ops = { .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, .write_inode = btrfs_write_inode, + .alloc_inode = btrfs_alloc_inode, + .destroy_inode = btrfs_destroy_inode, }; static struct inode_operations btrfs_dir_inode_operations = { @@ -1305,12 +1416,17 @@ static struct file_operations btrfs_file_operations = { static int __init init_btrfs_fs(void) { + int err; printk("btrfs loaded!\n"); + err = init_inodecache(); + if (err) + return err; return register_filesystem(&btrfs_fs_type); } static void __exit exit_btrfs_fs(void) { + destroy_inodecache(); unregister_filesystem(&btrfs_fs_type); printk("btrfs unloaded\n"); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 84c4e278ce2..72b52e1e0b1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -5,13 +5,20 @@ #include "transaction.h" static int total_trans = 0; +extern struct kmem_cache *btrfs_trans_handle_cachep; +extern struct kmem_cache *btrfs_transaction_cachep; + +#define TRANS_MAGIC 0xE1E10E static void put_transaction(struct btrfs_transaction *transaction) { + WARN_ON(transaction->use_count == 0); transaction->use_count--; + WARN_ON(transaction->magic != TRANS_MAGIC); if (transaction->use_count == 0) { WARN_ON(total_trans == 0); total_trans--; - kfree(transaction); + memset(transaction, 0, sizeof(*transaction)); + kmem_cache_free(btrfs_transaction_cachep, transaction); } } @@ -20,7 +27,8 @@ static int join_transaction(struct btrfs_root *root) struct btrfs_transaction *cur_trans; cur_trans = root->fs_info->running_transaction; if (!cur_trans) { - cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS); + cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, + GFP_NOFS); total_trans++; BUG_ON(!cur_trans); root->fs_info->running_transaction = cur_trans; @@ -28,6 +36,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->transid = root->root_key.offset + 1; init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); + cur_trans->magic = TRANS_MAGIC; cur_trans->in_commit = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; @@ -39,7 +48,8 @@ static int join_transaction(struct btrfs_root *root) struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { - struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); + struct btrfs_trans_handle *h = + kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; mutex_lock(&root->fs_info->trans_mutex); @@ -51,6 +61,7 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, h->blocks_used = 0; root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); + h->magic = h->magic2 = TRANS_MAGIC; return h; } @@ -58,6 +69,8 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_transaction *cur_trans; + WARN_ON(trans->magic != TRANS_MAGIC); + WARN_ON(trans->magic2 != TRANS_MAGIC); mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; WARN_ON(cur_trans->num_writers < 1); @@ -67,7 +80,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); memset(trans, 0, sizeof(*trans)); - kfree(trans); + kmem_cache_free(btrfs_trans_handle_cachep, trans); return 0; } @@ -75,7 +88,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); return 0; } @@ -137,6 +150,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { +printk("already in commit!, waiting\n"); cur_trans = trans->transaction; trans->transaction->use_count++; btrfs_end_transaction(trans, root); @@ -146,7 +160,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); return 0; } + cur_trans = trans->transaction; + trans->transaction->in_commit = 1; while (trans->transaction->num_writers > 1) { + WARN_ON(cur_trans != trans->transaction); prepare_to_wait(&trans->transaction->writer_wait, &wait, TASK_UNINTERRUPTIBLE); if (trans->transaction->num_writers <= 1) @@ -154,15 +171,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); schedule(); mutex_lock(&root->fs_info->trans_mutex); + finish_wait(&trans->transaction->writer_wait, &wait); } finish_wait(&trans->transaction->writer_wait, &wait); - + WARN_ON(cur_trans != trans->transaction); if (root->node != root->commit_root) { memcpy(&snap_key, &root->root_key, sizeof(snap_key)); root->root_key.offset++; } - if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); ret = btrfs_insert_root(trans, root->fs_info->tree_root, @@ -172,22 +189,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); - cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; mutex_unlock(&root->fs_info->trans_mutex); - ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root); btrfs_finish_extent_commit(trans, root); mutex_lock(&root->fs_info->trans_mutex); + cur_trans->commit_done = 1; + wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); - kfree(trans); - + kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->node != root->commit_root) { trans = btrfs_start_transaction(root, 1); snap = root->commit_root; @@ -203,7 +219,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_end_transaction(trans, root); BUG_ON(ret); } - return ret; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 7e6c08a0acc..4f1496ae6f2 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -7,15 +7,18 @@ struct btrfs_transaction { int in_commit; int use_count; int commit_done; + int magic; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; struct btrfs_trans_handle { + int magic; u64 transid; unsigned long blocks_reserved; unsigned long blocks_used; struct btrfs_transaction *transaction; + int magic2; }; -- cgit v1.2.3-70-g09d2 From 5caf2a002901f0fde475371c4bf1c553b51884af Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 11:20:42 -0400 Subject: Btrfs: dynamic allocation of path struct Signed-off-by: Chris Mason --- fs/btrfs/dir-item.c | 18 +++-- fs/btrfs/extent-tree.c | 100 +++++++++++++++---------- fs/btrfs/file-item.c | 43 ++++++----- fs/btrfs/inode-item.c | 9 ++- fs/btrfs/root-tree.c | 47 +++++++----- fs/btrfs/super.c | 198 ++++++++++++++++++++++++++++--------------------- 6 files changed, 243 insertions(+), 172 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index f55c89472ac..baceb1da609 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -9,7 +9,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root objectid, u8 type) { int ret = 0; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_dir_item *dir_item; char *name_ptr; struct btrfs_key key; @@ -20,24 +20,26 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); - btrfs_init_path(&path); + path = btrfs_alloc_path(); + btrfs_init_path(path); data_size = sizeof(*dir_item) + name_len; - ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size); + ret = btrfs_insert_empty_item(trans, root, path, &key, data_size); if (ret) goto out; - dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], + dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_dir_item); btrfs_set_dir_objectid(dir_item, objectid); btrfs_set_dir_type(dir_item, type); btrfs_set_dir_flags(dir_item, 0); btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); - btrfs_memcpy(root, path.nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); out: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 37b87e28a2f..d785b721b46 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -15,7 +15,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks) { - struct btrfs_path path; + struct btrfs_path *path; int ret; struct btrfs_key key; struct btrfs_leaf *l; @@ -25,23 +25,26 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, &ins); - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; - ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); if (ret != 0) BUG(); BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path.nodes[0]); - item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); + l = btrfs_buffer_leaf(path->nodes[0]); + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); - btrfs_release_path(root->fs_info->extent_root, &path); + btrfs_release_path(root->fs_info->extent_root, path); + btrfs_free_path(path); finish_current_insert(trans, root->fs_info->extent_root); del_pending_extents(trans, root->fs_info->extent_root); return 0; @@ -50,24 +53,27 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, u32 *refs) { - struct btrfs_path path; + struct btrfs_path *path; int ret; struct btrfs_key key; struct btrfs_leaf *l; struct btrfs_extent_item *item; - btrfs_init_path(&path); + + path = btrfs_alloc_path(); + btrfs_init_path(path); key.objectid = blocknr; key.offset = num_blocks; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); if (ret != 0) BUG(); - l = btrfs_buffer_leaf(path.nodes[0]); - item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); + l = btrfs_buffer_leaf(path->nodes[0]); + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); - btrfs_release_path(root->fs_info->extent_root, &path); + btrfs_release_path(root->fs_info->extent_root, path); + btrfs_free_path(path); return 0; } @@ -200,7 +206,7 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -215,20 +221,22 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root key.offset = num_blocks; find_free_extent(trans, root, 0, 0, (u64)-1, &ins); - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret) { printk("failed to find %Lu\n", key.objectid); btrfs_print_tree(extent_root, extent_root->node); printk("failed to find %Lu\n", key.objectid); BUG(); } - ei = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_extent_item); BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); if (refs == 0) { u64 super_blocks_used; @@ -240,13 +248,14 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); - ret = btrfs_del_item(trans, extent_root, &path); + ret = btrfs_del_item(trans, extent_root, path); if (extent_root->fs_info->last_insert.objectid > blocknr) extent_root->fs_info->last_insert.objectid = blocknr; if (ret) BUG(); } - btrfs_release_path(extent_root, &path); + btrfs_release_path(extent_root, path); + btrfs_free_path(path); finish_current_insert(trans, extent_root); return ret; } @@ -319,7 +328,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 search_end, struct btrfs_key *ins) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; int ret; u64 hole_size = 0; @@ -339,24 +348,25 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + path = btrfs_alloc_path(); check_failed: - btrfs_init_path(&path); + btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; start_found = 0; - ret = btrfs_search_slot(trans, root, ins, &path, 0, 0); + ret = btrfs_search_slot(trans, root, ins, path, 0, 0); if (ret < 0) goto error; - if (path.slots[0] > 0) - path.slots[0]--; + if (path->slots[0] > 0) + path->slots[0]--; while (1) { - l = btrfs_buffer_leaf(path.nodes[0]); - slot = path.slots[0]; + l = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - ret = btrfs_next_leaf(root, &path); + ret = btrfs_next_leaf(root, path); if (ret == 0) continue; if (ret < 0) @@ -387,14 +397,14 @@ check_failed: } start_found = 1; last_block = key.objectid + key.offset; - path.slots[0]++; + path->slots[0]++; } // FIXME -ENOSPC check_pending: /* we have to make sure we didn't find an extent that has already * been allocated by the map tree or the original allocation */ - btrfs_release_path(root, &path); + btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); for (test_block = ins->objectid; test_block < ins->objectid + total_needed; test_block++) { @@ -410,9 +420,11 @@ check_pending: root->fs_info->current_insert.flags = 0; root->fs_info->last_insert.objectid = ins->objectid; ins->offset = num_blocks; + btrfs_free_path(path); return 0; error: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -533,6 +545,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root int ret; u32 refs; + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr, 1, &refs); BUG_ON(ret); @@ -542,6 +556,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root * walk down to the last node level and free all the leaves */ while(*level >= 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) WARN_ON(1); @@ -564,6 +580,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root continue; } next = read_tree_block(root, blocknr); + WARN_ON(*level <= 0); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); path->nodes[*level-1] = next; @@ -571,6 +588,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[*level] = 0; } out: + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); @@ -622,33 +641,36 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root int ret = 0; int wret; int level; - struct btrfs_path path; + struct btrfs_path *path; int i; int orig_level; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); level = btrfs_header_level(btrfs_buffer_header(snap)); orig_level = level; - path.nodes[level] = snap; - path.slots[level] = 0; + path->nodes[level] = snap; + path->slots[level] = 0; while(1) { - wret = walk_down_tree(trans, root, &path, &level); + wret = walk_down_tree(trans, root, path, &level); if (wret > 0) break; if (wret < 0) ret = wret; - wret = walk_up_tree(trans, root, &path, &level); + wret = walk_up_tree(trans, root, path, &level); if (wret > 0) break; if (wret < 0) ret = wret; } for (i = 0; i <= orig_level; i++) { - if (path.nodes[i]) { - btrfs_block_release(root, path.nodes[i]); + if (path->nodes[i]) { + btrfs_block_release(root, path->nodes[i]); } } + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index c3992b7b0c6..e7510ac5559 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -13,9 +13,11 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, int ret = 0; struct btrfs_file_extent_item *item; struct btrfs_key file_key; - struct btrfs_path path; + struct btrfs_path *path; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, (u64)-1, objectid, &ins); BUG_ON(ret); @@ -24,19 +26,20 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); - ret = btrfs_insert_empty_item(trans, root, &path, &file_key, + ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(*item)); BUG_ON(ret); - item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_disk_blocknr(item, ins.objectid); btrfs_set_file_extent_disk_num_blocks(item, ins.offset); btrfs_set_file_extent_offset(item, 0); btrfs_set_file_extent_num_blocks(item, ins.offset); btrfs_set_file_extent_generation(item, trans->transid); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); *result = ins.objectid; - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return 0; } @@ -65,25 +68,28 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, { int ret; struct btrfs_key file_key; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_csum_item *item; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); - ret = btrfs_insert_empty_item(trans, root, &path, &file_key, + ret = btrfs_insert_empty_item(trans, root, path, &file_key, BTRFS_CSUM_SIZE); if (ret != 0 && ret != -EEXIST) goto fail; - item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_csum_item); ret = 0; ret = btrfs_csum_data(root, data, len, item->csum); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); fail: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -93,19 +99,21 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, { int ret; struct btrfs_key file_key; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_csum_item *item; char result[BTRFS_CSUM_SIZE]; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); - ret = btrfs_search_slot(NULL, root, &file_key, &path, 0, 0); + ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0); if (ret) goto fail; - item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_csum_item); ret = 0; ret = btrfs_csum_data(root, data, len, result); @@ -113,7 +121,8 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, if (memcmp(result, item->csum, BTRFS_CSUM_SIZE)) ret = 1; fail: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 8d8c26a6c1a..6bfa980790c 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -7,7 +7,7 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, struct btrfs_inode_item *inode_item) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; int ret; key.objectid = objectid; @@ -15,10 +15,13 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); ret = btrfs_insert_item(trans, root, &key, inode_item, sizeof(*inode_item)); - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index f37dab87de1..ddc1c13a535 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -6,7 +6,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key search_key; struct btrfs_leaf *l; int ret; @@ -16,14 +16,16 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, search_key.flags = (u32)-1; search_key.offset = (u32)-1; - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, &search_key, &path, 0, 0); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) goto out; BUG_ON(ret == 0); - l = btrfs_buffer_leaf(path.nodes[0]); - BUG_ON(path.slots[0] == 0); - slot = path.slots[0] - 1; + l = btrfs_buffer_leaf(path->nodes[0]); + BUG_ON(path->slots[0] == 0); + slot = path->slots[0] - 1; if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { ret = 1; goto out; @@ -31,9 +33,10 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), sizeof(*item)); btrfs_disk_key_to_cpu(key, &l->items[slot].key); - btrfs_release_path(root, &path); ret = 0; out: + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -41,24 +44,27 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_leaf *l; int ret; int slot; struct btrfs_root_item *update_item; - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, key, &path, 0, 1); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_search_slot(trans, root, key, path, 0, 1); if (ret < 0) goto out; BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path.nodes[0]); - slot = path.slots[0]; + l = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; update_item = btrfs_item_ptr(l, slot, struct btrfs_root_item); btrfs_memcpy(root, l, update_item, item, sizeof(*item)); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); out: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -75,16 +81,19 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key) { - struct btrfs_path path; + struct btrfs_path *path; int ret; - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, key, &path, -1, 1); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) goto out; BUG_ON(ret != 0); - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); out: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6969b672b57..c260fcad17b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -35,25 +35,27 @@ static int check_inode(struct inode *inode) static void btrfs_read_locked_inode(struct inode *inode) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_inode_item *inode_item; struct btrfs_root *root = btrfs_sb(inode->i_sb); int ret; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); check_inode(inode); - ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); + ret = btrfs_lookup_inode(NULL, root, path, inode->i_ino, 0); if (ret) { - btrfs_release_path(root, &path); + btrfs_release_path(root, path); mutex_unlock(&root->fs_info->fs_mutex); make_bad_inode(inode); return; } check_inode(inode); - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_inode_item); inode->i_mode = btrfs_inode_mode(inode_item); @@ -69,7 +71,11 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); - btrfs_release_path(root, &path); + + btrfs_release_path(root, path); + btrfs_free_path(path); + inode_item = NULL; + mutex_unlock(&root->fs_info->fs_mutex); check_inode(inode); switch (inode->i_mode & S_IFMT) { @@ -101,15 +107,17 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct inode *dir, struct dentry *dentry) { - struct btrfs_path path; + struct btrfs_path *path; const char *name = dentry->d_name.name; int name_len = dentry->d_name.len; int ret; u64 objectid; struct btrfs_dir_item *di; - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(trans, root, &path, dir->i_ino, + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, name, name_len, -1); if (ret < 0) goto err; @@ -117,15 +125,16 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, ret = -ENOENT; goto err; } - di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); objectid = btrfs_dir_objectid(di); - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); BUG_ON(ret); dentry->d_inode->i_ctime = dir->i_ctime; err: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); if (ret == 0) inode_dec_link_count(dentry->d_inode); return ret; @@ -152,30 +161,32 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) int err; int ret; struct btrfs_root *root = btrfs_sb(dir->i_sb); - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; struct btrfs_trans_handle *trans; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { err = ret; goto out; } BUG_ON(ret == 0); - BUG_ON(path.slots[0] == 0); - path.slots[0]--; - leaf = btrfs_buffer_leaf(path.nodes[0]); - found_key = &leaf->items[path.slots[0]].key; + BUG_ON(path->slots[0] == 0); + path->slots[0]--; + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; if (btrfs_disk_key_objectid(found_key) != inode->i_ino) { err = -ENOENT; goto out; @@ -185,11 +196,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) err = -ENOTEMPTY; goto out; } - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); BUG_ON(ret); - btrfs_release_path(root, &path); + btrfs_release_path(root, path); key.offset = 1; - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { err = ret; goto out; @@ -198,12 +209,13 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) err = -ENOTEMPTY; goto out; } - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); if (ret) { err = ret; goto out; } - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); /* now the directory is empty */ err = btrfs_unlink_trans(trans, root, dir, dentry); @@ -223,33 +235,36 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, struct inode *inode) { u64 objectid = inode->i_ino; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_inode_map_item *map; struct btrfs_key stat_data_key; int ret; + clear_inode(inode); - btrfs_init_path(&path); - ret = btrfs_lookup_inode_map(trans, root, &path, objectid, -1); + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_inode_map(trans, root, path, objectid, -1); if (ret) { if (ret > 0) ret = -ENOENT; - btrfs_release_path(root, &path); goto error; } - map = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + map = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_inode_map_item); btrfs_disk_key_to_cpu(&stat_data_key, &map->key); - ret = btrfs_del_item(trans, root->fs_info->inode_root, &path); + ret = btrfs_del_item(trans, root->fs_info->inode_root, path); BUG_ON(ret); - btrfs_release_path(root, &path); - btrfs_init_path(&path); + btrfs_release_path(root, path); - ret = btrfs_lookup_inode(trans, root, &path, objectid, -1); + ret = btrfs_lookup_inode(trans, root, path, objectid, -1); BUG_ON(ret); - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); BUG_ON(ret); - btrfs_release_path(root, &path); error: + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -258,7 +273,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct inode *inode) { int ret; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; @@ -267,24 +282,25 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, u64 extent_num_blocks = 0; int found_extent; + path = btrfs_alloc_path(); + BUG_ON(!path); /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY); while(1) { - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + btrfs_init_path(path); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { - btrfs_release_path(root, &path); goto error; } if (ret > 0) { - BUG_ON(path.slots[0] == 0); - path.slots[0]--; + BUG_ON(path->slots[0] == 0); + path->slots[0]--; } - leaf = btrfs_buffer_leaf(path.nodes[0]); - found_key = &leaf->items[path.slots[0]].key; + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; if (btrfs_disk_key_objectid(found_key) != inode->i_ino) break; if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY && @@ -293,8 +309,8 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (btrfs_disk_key_offset(found_key) < inode->i_size) break; if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) { - fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], + fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); extent_start = btrfs_file_extent_disk_blocknr(fi); extent_num_blocks = @@ -305,18 +321,19 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, } else { found_extent = 0; } - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); BUG_ON(ret); - btrfs_release_path(root, &path); + btrfs_release_path(root, path); if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, extent_num_blocks, 0); BUG_ON(ret); } } - btrfs_release_path(root, &path); ret = 0; error: + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -351,23 +368,26 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; struct btrfs_dir_item *di; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_root *root = btrfs_sb(dir->i_sb); int ret; - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(NULL, root, &path, dir->i_ino, name, + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, namelen, 0); - if (ret || !btrfs_match_dir_item_name(root, &path, name, namelen)) { + if (ret || !btrfs_match_dir_item_name(root, path, name, namelen)) { *ino = 0; ret = 0; goto out; } - di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); *ino = btrfs_dir_objectid(di); out: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); check_inode(dir); return ret; } @@ -405,7 +425,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct btrfs_item *item; struct btrfs_dir_item *di; struct btrfs_key key; - struct btrfs_path path; + struct btrfs_path *path; int ret; u32 nritems; struct btrfs_leaf *leaf; @@ -419,27 +439,28 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); key.offset = filp->f_pos; - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + path = btrfs_alloc_path(); + btrfs_init_path(path); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) { goto err; } advance = 0; while(1) { - leaf = btrfs_buffer_leaf(path.nodes[0]); + leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); - slot = path.slots[0]; + slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { - ret = btrfs_next_leaf(root, &path); + ret = btrfs_next_leaf(root, path); if (ret) break; - leaf = btrfs_buffer_leaf(path.nodes[0]); + leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); - slot = path.slots[0]; + slot = path->slots[0]; } else { slot++; - path.slots[0]++; + path->slots[0]++; } } advance = 1; @@ -465,7 +486,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } ret = 0; err: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -548,26 +570,29 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, struct inode *inode) { struct btrfs_inode_item *inode_item; - struct btrfs_path path; + struct btrfs_path *path; int ret; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, &path, inode->i_ino, 1); + ret = btrfs_lookup_inode(trans, root, path, inode->i_ino, 1); if (ret) { if (ret > 0) ret = -ENOENT; goto failed; } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_inode_item); fill_inode_item(inode_item, inode); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); failed: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); check_inode(inode); return 0; } @@ -799,38 +824,39 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, u64 extent_start = 0; u64 extent_end = 0; u64 objectid = inode->i_ino; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_root *root = btrfs_sb(inode->i_sb); struct btrfs_trans_handle *trans = NULL; struct btrfs_file_extent_item *item; struct btrfs_leaf *leaf; struct btrfs_disk_key *found_key; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); if (create) trans = btrfs_start_transaction(root, 1); - ret = btrfs_lookup_file_extent(trans, root, &path, + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, iblock << inode->i_blkbits, 0); if (ret < 0) { - btrfs_release_path(root, &path); err = ret; goto out; } if (ret != 0) { - if (path.slots[0] == 0) { - btrfs_release_path(root, &path); + if (path->slots[0] == 0) { + btrfs_release_path(root, path); goto allocate; } - path.slots[0]--; + path->slots[0]--; } - item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); - leaf = btrfs_buffer_leaf(path.nodes[0]); + leaf = btrfs_buffer_leaf(path->nodes[0]); blocknr = btrfs_file_extent_disk_blocknr(item); blocknr += btrfs_file_extent_offset(item); @@ -838,25 +864,23 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (ret == 0) { err = 0; map_bh(result, inode->i_sb, blocknr); - btrfs_release_path(root, &path); goto out; } /* are we inside the extent that was found? */ - found_key = &leaf->items[path.slots[0]].key; + found_key = &leaf->items[path->slots[0]].key; if (btrfs_disk_key_objectid(found_key) != objectid || btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) { extent_end = 0; extent_start = 0; - btrfs_release_path(root, &path); + btrfs_release_path(root, path); goto allocate; } - extent_start = btrfs_disk_key_offset(&leaf->items[path.slots[0]].key); + extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); extent_start = extent_start >> inode->i_blkbits; extent_start += btrfs_file_extent_offset(item); extent_end = extent_start + btrfs_file_extent_num_blocks(item); - btrfs_release_path(root, &path); if (iblock >= extent_start && iblock < extent_end) { err = 0; map_bh(result, inode->i_sb, blocknr + iblock - extent_start); @@ -880,6 +904,8 @@ allocate: map_bh(result, inode->i_sb, blocknr); out: + btrfs_release_path(root, path); + btrfs_free_path(path); if (trans) btrfs_end_transaction(trans, root); return err; -- cgit v1.2.3-70-g09d2 From 2da566edd8ce32ae2952b863ee634bcc2e7d63c1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 15:43:21 -0400 Subject: Btrfs: csum_verify_file_block locking fix Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 - fs/btrfs/extent-tree.c | 2 -- fs/btrfs/file-item.c | 2 ++ fs/btrfs/super.c | 1 - 4 files changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a227e94b9e8..39b117a57a5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -160,7 +160,6 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, int ret; struct btrfs_node *node; - return 0; ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, bh->b_size - BTRFS_CSUM_SIZE, result); if (ret) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d785b721b46..cd40a08dd8a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -249,8 +249,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); - if (extent_root->fs_info->last_insert.objectid > blocknr) - extent_root->fs_info->last_insert.objectid = blocknr; if (ret) BUG(); } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index e7510ac5559..2c98b620c59 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -110,6 +110,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0); if (ret) goto fail; @@ -123,6 +124,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, fail: btrfs_release_path(root, path); btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9fd87f5edbc..fbc26eafc58 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1316,7 +1316,6 @@ static void btrfs_destroy_inode(struct inode *inode) WARN_ON(ei->magic != 0xDEADBEEF); WARN_ON(ei->magic2 != 0xDEADBEAF); WARN_ON(!list_empty(&inode->i_dentry)); - WARN_ON(inode->i_ino == 1); WARN_ON(inode->i_data.nrpages); ei->magic = 0; -- cgit v1.2.3-70-g09d2 From b1a4d96509a78ad234d94e0b914b289c60d2969d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Apr 2007 15:27:52 -0400 Subject: Btrfs: tweak the inode-map and free extent search starts on cold mount Signed-off-by: Chris Mason --- fs/btrfs/TODO | 2 -- fs/btrfs/ctree.h | 1 - fs/btrfs/disk-io.c | 1 - fs/btrfs/extent-tree.c | 21 +++++++++++++++++++-- fs/btrfs/inode-map.c | 27 ++++++++++++++++++++------- 5 files changed, 39 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index e9371385ccc..db986c46a55 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,9 +1,7 @@ * cleanup, add more error checking, get rid of BUG_ONs * Fix ENOSPC handling -* Do checksumming * Add block mapping tree (simple dm layer) * Make allocator smarter -* make level a field in header * add a block group to struct inode * Make directory hashing work on 32 bit * Make sure nobh stuff is working properly for cows diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d8e03bd797f..41cc013ef08 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -238,7 +238,6 @@ struct btrfs_fs_info { struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; u64 last_inode_alloc; - u64 last_inode_alloc_dirid; u64 generation; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 055cd45e802..de9ee3aa0aa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -311,7 +311,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->extent_root = extent_root; fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; - fs_info->last_inode_alloc_dirid = 0; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cd40a08dd8a..688aa861a92 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -339,13 +339,30 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int total_needed = num_blocks; int level; + path = btrfs_alloc_path(); + ins->flags = 0; + btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + level = btrfs_header_level(btrfs_buffer_header(root->node)); total_needed += (level + 1) * 3; + if (root->fs_info->last_insert.objectid == 0 && search_end == (u64)-1) { + struct btrfs_disk_key *last_key; + btrfs_init_path(path); + ins->objectid = (u64)-1; + ins->offset = (u64)-1; + ret = btrfs_search_slot(trans, root, ins, path, 0, 0); + if (ret < 0) + goto error; + BUG_ON(ret == 0); + if (path->slots[0] > 0) + path->slots[0]--; + l = btrfs_buffer_leaf(path->nodes[0]); + last_key = &l->items[path->slots[0]].key; + search_start = btrfs_disk_key_objectid(last_key); + } if (root->fs_info->last_insert.objectid > search_start) search_start = root->fs_info->last_insert.objectid; - ins->flags = 0; - btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); path = btrfs_alloc_path(); check_failed: diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index b3de823eb62..329edb42897 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -22,17 +22,31 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_key search_key; u64 search_start = dirid; - if (fs_root->fs_info->last_inode_alloc_dirid == dirid) - search_start = fs_root->fs_info->last_inode_alloc; + path = btrfs_alloc_path(); + BUG_ON(!path); + search_key.flags = 0; + btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); + search_start = fs_root->fs_info->last_inode_alloc; + if (search_start == 0) { + struct btrfs_disk_key *last_key; + btrfs_init_path(path); + search_key.objectid = (u64)-1; + search_key.offset = (u64)-1; + ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); + if (ret < 0) + goto error; + BUG_ON(ret == 0); + if (path->slots[0] > 0) + path->slots[0]--; + l = btrfs_buffer_leaf(path->nodes[0]); + last_key = &l->items[path->slots[0]].key; + search_start = btrfs_disk_key_objectid(last_key); + } search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; - search_key.flags = 0; - btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); search_key.offset = 0; - path = btrfs_alloc_path(); - BUG_ON(!path); btrfs_init_path(path); start_found = 0; ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); @@ -79,7 +93,6 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, // FIXME -ENOSPC found: root->fs_info->last_inode_alloc = *objectid; - root->fs_info->last_inode_alloc_dirid = dirid; btrfs_release_path(root, path); btrfs_free_path(path); BUG_ON(*objectid < search_start); -- cgit v1.2.3-70-g09d2 From 5f26f772e5c4e833ffcb0599f54deda466d2a3e5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 5 Apr 2007 10:38:44 -0400 Subject: Btrfs: more inode indexed directory work Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 +++ fs/btrfs/dir-item.c | 18 ++++++++++ fs/btrfs/extent-tree.c | 3 +- fs/btrfs/super.c | 90 ++++++++++++++++++++++++++------------------------ 4 files changed, 70 insertions(+), 45 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 87c56222a62..61d7b4738af 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -921,6 +921,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, const char *name, int name_len, int mod); +int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + u64 objectid, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len); /* inode-map.c */ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 2a87e23ac9c..62d0c0916a7 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -80,6 +80,24 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + u64 objectid, int mod) +{ + int ret; + struct btrfs_key key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + + key.objectid = dir; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.offset = objectid; + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + return ret; +} + int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 688aa861a92..7c21f63f1b9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -224,6 +224,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); + ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret) { printk("failed to find %Lu\n", key.objectid); @@ -363,8 +364,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (root->fs_info->last_insert.objectid > search_start) search_start = root->fs_info->last_insert.objectid; - path = btrfs_alloc_path(); - check_failed: btrfs_init_path(path); ins->objectid = search_start; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4b042460e87..d776b29a167 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -130,6 +130,13 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct btrfs_dir_item); objectid = btrfs_dir_objectid(di); + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + + btrfs_release_path(root, path); + ret = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + objectid, -1); + BUG_ON(ret); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); dentry->d_inode->i_ctime = dir->i_ctime; @@ -138,7 +145,7 @@ err: btrfs_free_path(path); if (ret == 0) { inode_dec_link_count(dentry->d_inode); - dir->i_size -= name_len; + dir->i_size -= name_len * 2; mark_inode_dirty(dir); } return ret; @@ -168,8 +175,10 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_path *path; struct btrfs_key key; struct btrfs_trans_handle *trans; - struct btrfs_disk_key *found_key; + struct btrfs_key found_key; + int found_type; struct btrfs_leaf *leaf; + char *goodnames = ".."; path = btrfs_alloc_path(); BUG_ON(!path); @@ -178,46 +187,42 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) trans = btrfs_start_transaction(root, 1); key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { - err = ret; - goto out; - } + key.flags = (u32)-1; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + err = ret; + goto out; + } + BUG_ON(ret == 0); + if (path->slots[0] == 0) { + err = -ENOENT; + goto out; + } + path->slots[0]--; + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[path->slots[0]].key); + found_type = btrfs_key_type(&found_key); + if (found_key.objectid != inode->i_ino) { + err = -ENOENT; + goto out; + } + if ((found_type != BTRFS_DIR_ITEM_KEY && + found_type != BTRFS_DIR_INDEX_KEY) || + (!btrfs_match_dir_item_name(root, path, goodnames, 2) && + !btrfs_match_dir_item_name(root, path, goodnames, 1))) { + err = -ENOTEMPTY; + goto out; + } + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); - BUG_ON(ret == 0); - BUG_ON(path->slots[0] == 0); - path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - if (btrfs_disk_key_objectid(found_key) != inode->i_ino) { - err = -ENOENT; - goto out; - } - if (btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || - btrfs_disk_key_offset(found_key) != 2) { - err = -ENOTEMPTY; - goto out; - } - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); - btrfs_release_path(root, path); - key.offset = 1; - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { - err = ret; - goto out; - } - if (ret > 0) { - err = -ENOTEMPTY; - goto out; - } - ret = btrfs_del_item(trans, root, path); - if (ret) { - err = ret; - goto out; + if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1) + break; + btrfs_release_path(root, path); } + ret = 0; btrfs_release_path(root, path); /* now the directory is empty */ @@ -676,7 +681,7 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_parent->d_inode->i_ino, inode->i_ino, 0); if (ret == 0) { - dentry->d_parent->d_inode->i_size += dentry->d_name.len; + dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb), dentry->d_parent->d_inode); } @@ -754,7 +759,7 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, dir->i_ino, 1); if (ret) goto error; - inode->i_size = 3; + inode->i_size = 6; ret = btrfs_update_inode(trans, root, inode); error: return ret; @@ -915,7 +920,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (create) trans = btrfs_start_transaction(root, 1); - ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, iblock << inode->i_blkbits, 0); -- cgit v1.2.3-70-g09d2 From c5739bba5260a59cebd20a51a55080592c8d3b07 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 09:27:04 -0400 Subject: Btrfs: snapshot progress Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 15 +++++- fs/btrfs/dir-item.c | 6 ++- fs/btrfs/disk-io.c | 4 -- fs/btrfs/extent-tree.c | 6 +++ fs/btrfs/ioctl.h | 13 +++++ fs/btrfs/root-tree.c | 16 +++++- fs/btrfs/super.c | 135 +++++++++++++++++++++++++++++++++++++++++++++++-- 7 files changed, 183 insertions(+), 12 deletions(-) create mode 100644 fs/btrfs/ioctl.h (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5460030c9e6..2cbcaaeac9e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -227,7 +227,7 @@ struct btrfs_csum_item { } __attribute__ ((__packed__)); struct btrfs_inode_map_item { - struct btrfs_disk_key key; + u32 refs; } __attribute__ ((__packed__)); struct crypto_hash; @@ -883,6 +883,17 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } +static inline u32 btrfs_inode_map_refs(struct btrfs_inode_map_item *m) +{ + return le32_to_cpu(m->refs); +} + +static inline void btrfs_set_inode_map_refs(struct btrfs_inode_map_item *m, + u32 val) +{ + m->refs = cpu_to_le32(val); +} + static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; @@ -925,6 +936,8 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) btrfs_item_offset((leaf)->items + (slot)))) /* extent-item.c */ +int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 7aed9f015b5..0ba46bc0da9 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -55,12 +55,14 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_flags(dir_item, 0); btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); + + btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); + /* FIXME, use some real flag for selecting the extra index */ if (root == root->fs_info->tree_root) goto out; - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root, path); btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b557bdd1e26..6b097ede80b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,10 +16,6 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) { BUG(); } - if (root->node && btrfs_header_parentid(&node->header) != - btrfs_header_parentid(btrfs_buffer_header(root->node))) { - BUG(); - } return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7c21f63f1b9..efc604eea0b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -77,6 +77,12 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + return inc_block_ref(trans, root, root->node->b_blocknr, 1); +} + int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h new file mode 100644 index 00000000000..201fb327072 --- /dev/null +++ b/fs/btrfs/ioctl.h @@ -0,0 +1,13 @@ +#ifndef __IOCTL_ +#define __IOCTL_ +#include + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_VOL_NAME_MAX 255 +struct btrfs_ioctl_vol_args { + char name[BTRFS_VOL_NAME_MAX + 1]; +}; + +#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ + struct btrfs_ioctl_vol_args) +#endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ddc1c13a535..72be9836932 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -83,6 +83,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, { struct btrfs_path *path; int ret; + u32 refs; + struct btrfs_root_item *ri; path = btrfs_alloc_path(); BUG_ON(!path); @@ -91,7 +93,19 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (ret < 0) goto out; BUG_ON(ret != 0); - ret = btrfs_del_item(trans, root, path); + ri = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_root_item); + + refs = btrfs_root_refs(ri); + BUG_ON(refs == 0); + if (refs == 1) { + ret = btrfs_del_item(trans, root, path); +printk("deleting root %Lu %Lu %u\n", key->objectid, key->offset, key->flags); + } else { + btrfs_set_root_refs(ri, refs - 1); +printk("ref now %u root %Lu %Lu %u\n", refs -1, key->objectid, key->offset, key->flags); + mark_buffer_dirty(path->nodes[0]); + } out: btrfs_release_path(root, path); btrfs_free_path(path); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3c9236ca889..bbe5cabcb42 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -15,6 +15,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "ioctl.h" void btrfs_fsinfo_release(struct kobject *obj) { @@ -27,6 +28,11 @@ struct kobj_type btrfs_fsinfo_ktype = { .release = btrfs_fsinfo_release, }; +struct btrfs_iget_args { + u64 ino; + struct btrfs_root *root; +}; + decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL); #define BTRFS_SUPER_MAGIC 0x9123682E @@ -461,6 +467,34 @@ int fixup_tree_root_location(struct btrfs_root *root, return 0; } +int btrfs_init_locked_inode(struct inode *inode, void *p) +{ + struct btrfs_iget_args *args = p; + inode->i_ino = args->ino; + BTRFS_I(inode)->root = args->root; + return 0; +} + +int btrfs_find_actor(struct inode *inode, void *opaque) +{ + struct btrfs_iget_args *args = opaque; + return (args->ino == inode->i_ino && + args->root == BTRFS_I(inode)->root); +} + +struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, + struct btrfs_root *root) +{ + struct inode *inode; + struct btrfs_iget_args args; + args.ino = objectid; + args.root = root; + + inode = iget5_locked(s, objectid, btrfs_find_actor, + btrfs_init_locked_inode, + (void *)&args); + return inode; +} static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) @@ -486,7 +520,8 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(ret); if (ret > 0) return ERR_PTR(-ENOENT); - inode = iget_locked(dir->i_sb, location.objectid); + inode = btrfs_iget_locked(dir->i_sb, location.objectid, + sub_root); if (!inode) return ERR_PTR(-EACCES); if (inode->i_state & I_NEW) { @@ -495,7 +530,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, &root->fs_info->fs_roots_radix, (unsigned long)sub_root, sub_root); -printk("adding new root for inode %lu\n", inode->i_ino); +printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root); igrab(inode); sub_root->inode = inode; } @@ -630,7 +665,8 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) btrfs_super_total_blocks(disk_super), btrfs_super_root_dir(disk_super)); - inode = iget_locked(sb, btrfs_super_root_dir(disk_super)); + inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), + tree_root); bi = BTRFS_I(inode); bi->location.objectid = inode->i_ino; bi->location.offset = 0; @@ -750,7 +786,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mode = mode; inode->i_ino = objectid; inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; fill_inode_item(&inode_item, inode); key->objectid = objectid; @@ -1650,6 +1686,95 @@ static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, return retval; } +static int create_snapshot(struct btrfs_root *root, char *name, int namelen) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_root_item new_root_item; + int ret; + u64 objectid; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + ret = btrfs_update_inode(trans, root, root->inode); + BUG_ON(ret); + + ret = btrfs_find_free_objectid(trans, root, 0, &objectid); + BUG_ON(ret); + + memset(&new_root_item, 0, sizeof(new_root_item)); + memcpy(&new_root_item, &root->root_item, + sizeof(new_root_item)); + + key.objectid = objectid; + key.flags = 0; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + ret = btrfs_insert_inode_map(trans, root, objectid, &key); + BUG_ON(ret); + + key.objectid = objectid; + key.offset = 1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + btrfs_set_root_blocknr(&new_root_item, root->node->b_blocknr); + + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + &new_root_item); + BUG_ON(ret); + +printk("adding snapshot name %.*s root %Lu %Lu %u\n", namelen, name, key.objectid, key.offset, key.flags); + + /* + * insert the directory item + */ + key.offset = (u64)-1; + ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + name, namelen, + root->fs_info->sb->s_root->d_inode->i_ino, + &key, 0); + + BUG_ON(ret); + + ret = btrfs_inc_root_ref(trans, root); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + +static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int + cmd, unsigned long arg) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_ioctl_vol_args vol_args; + int ret; + int namelen; + + if (!root->ref_cows) + return -EINVAL; + switch (cmd) { + case BTRFS_IOC_SNAP_CREATE: + if (copy_from_user(&vol_args, + (struct btrfs_ioctl_vol_args __user *)arg, + sizeof(vol_args))) + return -EFAULT; + namelen = strlen(vol_args.name); + if (namelen > BTRFS_VOL_NAME_MAX) + return -EINVAL; + ret = create_snapshot(root, vol_args.name, namelen); + WARN_ON(ret); + break; + default: + return -ENOTTY; + } + return 0; +} + static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; struct kmem_cache *btrfs_transaction_cachep; @@ -1781,6 +1906,7 @@ static struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = btrfs_readdir, + .ioctl = btrfs_ioctl, }; static struct address_space_operations btrfs_aops = { @@ -1803,6 +1929,7 @@ static struct file_operations btrfs_file_operations = { .write = btrfs_file_write, .mmap = generic_file_mmap, .open = generic_file_open, + .ioctl = btrfs_ioctl, }; static int __init init_btrfs_fs(void) -- cgit v1.2.3-70-g09d2 From d0dbc6245cefa36e19dff49c557ccf05e3063e9c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 12:36:36 -0400 Subject: Btrfs: drop owner and parentid Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 8 -------- fs/btrfs/ctree.h | 25 +------------------------ fs/btrfs/extent-tree.c | 8 ++------ fs/btrfs/file-item.c | 2 +- fs/btrfs/print-tree.c | 4 ++-- fs/btrfs/transaction.c | 6 ------ 6 files changed, 6 insertions(+), 47 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 39b551564fc..f31ac843bf2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -688,8 +688,6 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&c->header, level); btrfs_set_header_blocknr(&c->header, t->b_blocknr); btrfs_set_header_generation(&c->header, trans->transid); - btrfs_set_header_parentid(&c->header, - btrfs_header_parentid(btrfs_buffer_header(root->node))); lower = btrfs_buffer_node(path->nodes[level-1]); memcpy(c->header.fsid, root->fs_info->disk_super->fsid, sizeof(c->header.fsid)); @@ -784,8 +782,6 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr); btrfs_set_header_generation(&split->header, trans->transid); - btrfs_set_header_parentid(&split->header, - btrfs_header_parentid(btrfs_buffer_header(root->node))); memcpy(split->header.fsid, root->fs_info->disk_super->fsid, sizeof(split->header.fsid)); mid = (c_nritems + 1) / 2; @@ -1145,8 +1141,6 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); - btrfs_set_header_parentid(&right->header, - btrfs_header_parentid(btrfs_buffer_header(root->node))); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, sizeof(right->header.fsid)); if (mid <= slot) { @@ -1237,8 +1231,6 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); - btrfs_set_header_parentid(&right->header, - btrfs_header_parentid(btrfs_buffer_header(root->node))); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, sizeof(right->header.fsid)); btrfs_cpu_key_to_disk(&disk_key, ins_key); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5103709bb2b..0ba560f0d2a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -60,7 +60,6 @@ struct btrfs_header { u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; - __le64 parentid; /* objectid of the tree root */ __le16 nritems; __le16 flags; u8 level; @@ -147,7 +146,6 @@ struct btrfs_path { */ struct btrfs_extent_item { __le32 refs; - __le64 owner; } __attribute__ ((__packed__)); struct btrfs_inode_timespec { @@ -443,16 +441,6 @@ static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts, ts->nsec = cpu_to_le32(val); } -static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) -{ - return le64_to_cpu(ei->owner); -} - -static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) -{ - ei->owner = cpu_to_le64(val); -} - static inline u32 btrfs_extent_refs(struct btrfs_extent_item *ei) { return le32_to_cpu(ei->refs); @@ -652,17 +640,6 @@ static inline void btrfs_set_header_generation(struct btrfs_header *h, h->generation = cpu_to_le64(val); } -static inline u64 btrfs_header_parentid(struct btrfs_header *h) -{ - return le64_to_cpu(h->parentid); -} - -static inline void btrfs_set_header_parentid(struct btrfs_header *h, - u64 parentid) -{ - h->parentid = cpu_to_le64(parentid); -} - static inline u16 btrfs_header_nritems(struct btrfs_header *h) { return le16_to_cpu(h->nritems); @@ -919,7 +896,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_blocks, u64 search_start, u64 - search_end, u64 owner, struct btrfs_key *ins); + search_end, struct btrfs_key *ins); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index efc604eea0b..be9630df506 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -157,8 +157,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct struct btrfs_fs_info *info = extent_root->fs_info; btrfs_set_extent_refs(&extent_item, 1); - btrfs_set_extent_owner(&extent_item, - btrfs_header_parentid(btrfs_buffer_header(extent_root->node))); ins.offset = 1; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); @@ -457,7 +455,7 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_blocks, u64 search_start, u64 - search_end, u64 owner, struct btrfs_key *ins) + search_end, struct btrfs_key *ins) { int ret; int pending_ret; @@ -467,7 +465,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item extent_item; btrfs_set_extent_refs(&extent_item, 1); - btrfs_set_extent_owner(&extent_item, owner); if (root == extent_root) { BUG_ON(extent_root->fs_info->current_insert.offset == 0); @@ -510,8 +507,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int ret; struct buffer_head *buf; - ret = btrfs_alloc_extent(trans, root, 1, 0, (unsigned long)-1, - btrfs_header_parentid(btrfs_buffer_header(root->node)), &ins); + ret = btrfs_alloc_extent(trans, root, 1, 0, (unsigned long)-1, &ins); if (ret) { BUG(); return NULL; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 2c98b620c59..d9fd7f4d379 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -19,7 +19,7 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, BUG_ON(!path); btrfs_init_path(path); ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, - (u64)-1, objectid, &ins); + (u64)-1, &ins); BUG_ON(ret); file_key.objectid = objectid; file_key.offset = offset; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 854d47d9bdc..0732a2fbb23 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -50,8 +50,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) break; case BTRFS_EXTENT_ITEM_KEY: ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); - printk("\t\textent data refs %u owner %Lu\n", - btrfs_extent_refs(ei), btrfs_extent_owner(ei)); + printk("\t\textent data refs %u\n", + btrfs_extent_refs(ei)); break; case BTRFS_STRING_ITEM_KEY: printk("\t\titem data %.*s\n", btrfs_item_size(item), diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 54978d1bd95..f64c1729b0e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -163,7 +163,6 @@ int add_dirty_roots(struct btrfs_trans_handle *trans, int i; int ret; int err; -printk("add dirty\n"); while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, ARRAY_SIZE(gang), @@ -188,7 +187,6 @@ printk("add dirty\n"); dirty->commit_root = root->commit_root; root->commit_root = NULL; dirty->root = root; -printk("adding dirty root %Lu gen %Lu blocknr %Lu\n", root->root_key.objectid, root->root_key.offset, dirty->commit_root->b_blocknr); root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); @@ -199,7 +197,6 @@ printk("adding dirty root %Lu gen %Lu blocknr %Lu\n", root->root_key.objectid, r list_add(&dirty->list, list); } } -printk("add dirty done\n"); return 0; } @@ -213,12 +210,10 @@ int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); trans = btrfs_start_transaction(tree_root, 1); -printk("drop snapshot root %p, commit_root blocknr %Lu generation %Lu\n", dirty->root, dirty->commit_root->b_blocknr, dirty->snap_key.offset); ret = btrfs_drop_snapshot(trans, dirty->root, dirty->commit_root); BUG_ON(ret); -printk("del root objectid %Lu, offset %Lu\n", dirty->snap_key.objectid, dirty->snap_key.offset); ret = btrfs_del_root(trans, tree_root, &dirty->snap_key); BUG_ON(ret); ret = btrfs_end_transaction(trans, tree_root); @@ -240,7 +235,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { -printk("already in commit!, waiting\n"); cur_trans = trans->transaction; trans->transaction->use_count++; btrfs_end_transaction(trans, root); -- cgit v1.2.3-70-g09d2 From 7eccb903a817e890c947ba4bc90c6a9af9b4219a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Apr 2007 15:53:25 -0400 Subject: Btrfs: create a logical->phsyical block number mapping scheme Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 38 +++++++------- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 140 +++++++++++++++++++++++++++++++++++-------------- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent-tree.c | 8 +-- fs/btrfs/super.c | 6 +-- fs/btrfs/transaction.c | 8 +-- 7 files changed, 134 insertions(+), 68 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f31ac843bf2..e2fc6f0d024 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -66,21 +66,21 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); - btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); + btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); btrfs_set_header_generation(&cow_node->header, trans->transid); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; get_bh(cow); if (buf != root->commit_root) { - btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); + btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); } btrfs_block_release(root, buf); } else { btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, - cow->b_blocknr); + bh_blocknr(cow)); btrfs_mark_buffer_dirty(parent); - btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); + btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); } btrfs_block_release(root, buf); mark_buffer_dirty(cow); @@ -312,7 +312,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root */ if (!parent_buf) { struct buffer_head *child; - u64 blocknr = mid_buf->b_blocknr; + u64 blocknr = bh_blocknr(mid_buf); if (btrfs_header_nritems(&mid->header) != 1) return 0; @@ -361,7 +361,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; if (btrfs_header_nritems(&right->header) == 0) { - u64 blocknr = right_buf->b_blocknr; + u64 blocknr = bh_blocknr(right_buf); clean_tree_block(trans, root, right_buf); wait_on_buffer(right_buf); btrfs_block_release(root, right_buf); @@ -400,7 +400,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = mid_buf->b_blocknr; + u64 blocknr = bh_blocknr(mid_buf); clean_tree_block(trans, root, mid_buf); wait_on_buffer(mid_buf); btrfs_block_release(root, mid_buf); @@ -686,7 +686,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); btrfs_set_header_level(&c->header, level); - btrfs_set_header_blocknr(&c->header, t->b_blocknr); + btrfs_set_header_blocknr(&c->header, bh_blocknr(t)); btrfs_set_header_generation(&c->header, trans->transid); lower = btrfs_buffer_node(path->nodes[level-1]); memcpy(c->header.fsid, root->fs_info->disk_super->fsid, @@ -697,7 +697,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root lower_key = &lower->ptrs[0].key; btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); - btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); + btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1])); btrfs_mark_buffer_dirty(t); @@ -780,7 +780,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); - btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr); + btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer)); btrfs_set_header_generation(&split->header, trans->transid); memcpy(split->header.fsid, root->fs_info->disk_super->fsid, sizeof(split->header.fsid)); @@ -794,7 +794,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(t); btrfs_mark_buffer_dirty(split_buffer); wret = insert_ptr(trans, root, path, &split->ptrs[0].key, - split_buffer->b_blocknr, path->slots[level + 1] + 1, + bh_blocknr(split_buffer), path->slots[level + 1] + 1, level + 1); if (wret) ret = wret; @@ -1138,7 +1138,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); + btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, @@ -1152,7 +1152,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&right->header, 0); wret = insert_ptr(trans, root, path, &disk_key, - right_buffer->b_blocknr, + bh_blocknr(right_buffer), path->slots[1] + 1, 1); if (wret) ret = wret; @@ -1173,7 +1173,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&right->header, 0); wret = insert_ptr(trans, root, path, &disk_key, - right_buffer->b_blocknr, + bh_blocknr(right_buffer), path->slots[1] - 1, 1); if (wret) ret = wret; @@ -1207,7 +1207,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&l->header, mid); ret = 0; wret = insert_ptr(trans, root, path, &right->items[0].key, - right_buffer->b_blocknr, path->slots[1] + 1, 1); + bh_blocknr(right_buffer), path->slots[1] + 1, 1); if (wret) ret = wret; btrfs_mark_buffer_dirty(right_buffer); @@ -1228,7 +1228,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); + btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, @@ -1237,7 +1237,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&right->header, 0); wret = insert_ptr(trans, root, path, &disk_key, - right_buffer->b_blocknr, + bh_blocknr(right_buffer), path->slots[1], 1); if (wret) ret = wret; @@ -1456,7 +1456,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - leaf_buf->b_blocknr, 1, 1); + bh_blocknr(leaf_buf), 1, 1); if (wret) ret = wret; } @@ -1487,7 +1487,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = wret; } if (btrfs_header_nritems(&leaf->header) == 0) { - u64 blocknr = leaf_buf->b_blocknr; + u64 blocknr = bh_blocknr(leaf_buf); clean_tree_block(trans, root, leaf_buf); wait_on_buffer(leaf_buf); wret = del_ptr(trans, root, path, 1, slot); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 01310de2bf4..454eb88611b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -242,6 +242,7 @@ struct btrfs_fs_info { struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; + struct radix_tree_root dev_radix; u64 generation; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e09233262af..c872a7e67ab 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -10,10 +10,30 @@ #include "transaction.h" #include "btrfs_inode.h" +struct dev_lookup { + u64 block_start; + u64 num_blocks; + struct block_device *bdev; +}; + +u64 bh_blocknr(struct buffer_head *bh) +{ + int blkbits = bh->b_page->mapping->host->i_blkbits; + u64 blocknr = bh->b_page->index << (PAGE_CACHE_SHIFT - blkbits); + unsigned long offset; + + if (PageHighMem(bh->b_page)) + offset = (unsigned long)bh->b_data; + else + offset = bh->b_data - (char *)page_address(bh->b_page); + blocknr += offset >> (PAGE_CACHE_SHIFT - blkbits); + return blocknr; +} + static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); - if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) { + if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { BUG(); } return 0; @@ -40,7 +60,7 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) head = page_buffers(page); bh = head; do { - if (buffer_mapped(bh) && bh->b_blocknr == blocknr) { + if (buffer_mapped(bh) && bh_blocknr(bh) == blocknr) { ret = bh; get_bh(bh); goto out_unlock; @@ -56,6 +76,33 @@ out_unlock: return ret; } +static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, + u64 logical) +{ + struct dev_lookup *lookup[2]; + char b[BDEVNAME_SIZE]; + + int ret; + + root = root->fs_info->dev_root; + ret = radix_tree_gang_lookup(&root->fs_info->dev_radix, + (void **)lookup, + (unsigned long)logical, + ARRAY_SIZE(lookup)); + if (ret == 0 || lookup[0]->block_start > logical || + lookup[0]->block_start + lookup[0]->num_blocks <= logical) { + ret = -ENOENT; + goto out; + } + bh->b_bdev = lookup[0]->bdev; + bh->b_blocknr = logical - lookup[0]->block_start; +printk("logical mapping %Lu to %lu bdev %s\n", logical, bh->b_blocknr, bdevname(bh->b_bdev, b)); + set_buffer_mapped(bh); + ret = 0; +out: + return ret; +} + struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { @@ -66,6 +113,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, struct buffer_head *bh; struct buffer_head *head; struct buffer_head *ret = NULL; + int err; u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); page = grab_cache_page(mapping, index); @@ -78,11 +126,10 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh = head; do { if (!buffer_mapped(bh)) { - bh->b_bdev = root->fs_info->sb->s_bdev; - bh->b_blocknr = first_block; - set_buffer_mapped(bh); + err = map_bh_to_logical(root, bh, first_block); + BUG_ON(err); } - if (bh->b_blocknr == blocknr) { + if (bh_blocknr(bh) == blocknr) { ret = bh; get_bh(bh); goto out_unlock; @@ -98,38 +145,13 @@ out_unlock: return ret; } -static sector_t max_block(struct block_device *bdev) -{ - sector_t retval = ~((sector_t)0); - loff_t sz = i_size_read(bdev->bd_inode); - - if (sz) { - unsigned int size = block_size(bdev); - unsigned int sizebits = blksize_bits(size); - retval = (sz >> sizebits); - } - return retval; -} - static int btree_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create) { - if (iblock >= max_block(inode->i_sb->s_bdev)) { - if (create) - return -EIO; - - /* - * for reads, we're just trying to fill a partial page. - * return a hole, they will have to call get_block again - * before they can fill it, and they will get -EIO at that - * time - */ - return 0; - } - bh->b_bdev = inode->i_sb->s_bdev; - bh->b_blocknr = iblock; - set_buffer_mapped(bh); - return 0; + int err; + struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; + err = map_bh_to_logical(root, bh, iblock); + return err; } int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, @@ -164,8 +186,8 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, return ret; if (verify) { if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) { - printk("checksum verify failed on %lu\n", - bh->b_blocknr); + printk("checksum verify failed on %Lu\n", + bh_blocknr(bh)); return 1; } } else { @@ -386,10 +408,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); int ret; struct btrfs_super_block *disk_super; + struct dev_lookup *dev_lookup; init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); + INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; @@ -422,6 +446,13 @@ struct btrfs_root *open_ctree(struct super_block *sb) __setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + + dev_lookup = kmalloc(sizeof(*dev_lookup), GFP_NOFS); + dev_lookup->block_start = 0; + dev_lookup->num_blocks = (u32)-2; + dev_lookup->bdev = sb->s_bdev; + ret = radix_tree_insert(&fs_info->dev_radix, (u32)-2, dev_lookup); + BUG_ON(ret); fs_info->sb_buffer = read_tree_block(tree_root, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); @@ -432,6 +463,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) return NULL; + radix_tree_delete(&fs_info->dev_radix, (u32)-2); + dev_lookup->block_start = btrfs_super_device_block_start(disk_super); + dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); + ret = radix_tree_insert(&fs_info->dev_radix, + dev_lookup->block_start + + dev_lookup->num_blocks, dev_lookup); + BUG_ON(ret); + fs_info->disk_super = disk_super; dev_root->node = read_tree_block(tree_root, btrfs_super_device_root(disk_super)); @@ -459,7 +498,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *bh = root->fs_info->sb_buffer; btrfs_set_super_root(root->fs_info->disk_super, - root->fs_info->tree_root->node->b_blocknr); + bh_blocknr(root->fs_info->tree_root->node)); lock_buffer(bh); WARN_ON(atomic_read(&bh->b_count) < 1); clear_buffer_dirty(bh); @@ -506,6 +545,29 @@ int del_fs_roots(struct btrfs_fs_info *fs_info) } return 0; } +static int free_dev_radix(struct btrfs_fs_info *fs_info) +{ + struct dev_lookup *lookup[8]; + struct block_device *super_bdev = fs_info->sb->s_bdev; + int ret; + int i; + while(1) { + ret = radix_tree_gang_lookup(&fs_info->dev_radix, + (void **)lookup, 0, + ARRAY_SIZE(lookup)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + if (lookup[i]->bdev != super_bdev) + close_bdev_excl(lookup[i]->bdev); + radix_tree_delete(&fs_info->dev_radix, + lookup[i]->block_start + + lookup[i]->num_blocks); + kfree(lookup[i]); + } + } + return 0; +} int close_ctree(struct btrfs_root *root) { @@ -537,6 +599,8 @@ int close_ctree(struct btrfs_root *root) crypto_free_hash(fs_info->hash_tfm); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); + + free_dev_radix(fs_info); del_fs_roots(fs_info); kfree(fs_info->extent_root); kfree(fs_info->tree_root); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 0ef6e6f714a..46cc5dc9d57 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -41,4 +41,5 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location); +u64 bh_blocknr(struct buffer_head *bh); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index be9630df506..d560831c10a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -80,7 +80,7 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return inc_block_ref(trans, root, root->node->b_blocknr, 1); + return inc_block_ref(trans, root, bh_blocknr(root->node), 1); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -563,7 +563,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr, + ret = lookup_block_ref(trans, root, bh_blocknr(path->nodes[*level]), 1, &refs); BUG_ON(ret); if (refs > 1) @@ -607,7 +607,7 @@ out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = btrfs_free_extent(trans, root, - path->nodes[*level]->b_blocknr, 1, 1); + bh_blocknr(path->nodes[*level]), 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -635,7 +635,7 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } else { ret = btrfs_free_extent(trans, root, - path->nodes[*level]->b_blocknr, + bh_blocknr(path->nodes[*level]), 1, 1); BUG_ON(ret); btrfs_block_release(root, path->nodes[*level]); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2972aaa9eb7..4ff0cc1efb1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1700,7 +1700,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) leaf = btrfs_buffer_leaf(subvol); btrfs_set_header_nritems(&leaf->header, 0); btrfs_set_header_level(&leaf->header, 0); - btrfs_set_header_blocknr(&leaf->header, subvol->b_blocknr); + btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); btrfs_set_header_generation(&leaf->header, trans->transid); memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, sizeof(leaf->header.fsid)); @@ -1713,7 +1713,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_inode_nblocks(inode_item, 1); btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); - btrfs_set_root_blocknr(&root_item, subvol->b_blocknr); + btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); mark_buffer_dirty(subvol); @@ -1803,7 +1803,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) key.offset = 1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - btrfs_set_root_blocknr(&new_root_item, root->node->b_blocknr); + btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e15a072407b..3a15943ea8e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -119,10 +119,10 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == extent_root->node->b_blocknr) + if (old_extent_block == bh_blocknr(extent_root->node)) break; btrfs_set_root_blocknr(&extent_root->root_item, - extent_root->node->b_blocknr); + bh_blocknr(extent_root->node)); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -176,7 +176,7 @@ int add_dirty_roots(struct btrfs_trans_handle *trans, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { - WARN_ON(root->node->b_blocknr != + WARN_ON(bh_blocknr(root->node) != btrfs_root_blocknr(&root->root_item)); brelse(root->commit_root); root->commit_root = NULL; @@ -191,7 +191,7 @@ int add_dirty_roots(struct btrfs_trans_handle *trans, dirty->root = root; root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, - root->node->b_blocknr); + bh_blocknr(root->node)); err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); -- cgit v1.2.3-70-g09d2 From b18c6685810af8e6763760711aece31ccc7a8ea8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 17 Apr 2007 13:26:50 -0400 Subject: Btrfs: progress on file_write Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 57 ++++++++++ fs/btrfs/ctree.h | 21 +++- fs/btrfs/extent-tree.c | 20 ++-- fs/btrfs/file-item.c | 61 +++++++---- fs/btrfs/super.c | 284 ++++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 394 insertions(+), 49 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6b76fd96715..43d4779abdd 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1249,6 +1249,63 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +int btrfs_truncate_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u32 new_size) +{ + int ret = 0; + int slot; + int slot_orig; + struct btrfs_leaf *leaf; + struct buffer_head *leaf_buf; + u32 nritems; + unsigned int data_end; + unsigned int old_data_start; + unsigned int old_size; + unsigned int size_diff; + int i; + + slot_orig = path->slots[0]; + leaf_buf = path->nodes[0]; + leaf = btrfs_buffer_leaf(leaf_buf); + + nritems = btrfs_header_nritems(&leaf->header); + data_end = leaf_data_end(root, leaf); + + slot = path->slots[0]; + old_data_start = btrfs_item_offset(leaf->items + slot); + old_size = btrfs_item_size(leaf->items + slot); + BUG_ON(old_size <= new_size); + size_diff = old_size - new_size; + + BUG_ON(slot < 0); + BUG_ON(slot >= nritems); + + /* + * item0..itemN ... dataN.offset..dataN.size .. data0.size + */ + /* first correct the data pointers */ + for (i = slot; i < nritems; i++) { + u32 ioff = btrfs_item_offset(leaf->items + i); + btrfs_set_item_offset(leaf->items + i, + ioff + size_diff); + } + /* shift the data */ +printk("truncate item, new_size %u old_size %u, diff %u, bufp %p, dst, %p, num %u, old_data_start %u, data_end %u\n", new_size, old_size, size_diff, leaf, btrfs_leaf_data(leaf) + data_end + size_diff, old_data_start-data_end, old_data_start, data_end); + btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + data_end + size_diff, btrfs_leaf_data(leaf) + + data_end, old_data_start + new_size - data_end); + btrfs_set_item_size(leaf->items + slot, new_size); + btrfs_mark_buffer_dirty(leaf_buf); + + ret = 0; + if (btrfs_leaf_free_space(root, leaf) < 0) + BUG(); + check_leaf(root, path, 0); + return ret; +} + int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 796f19d03ab..ca3ab160f46 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -999,7 +999,7 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) -/* extent-item.c */ +/* extent-tree.c */ int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, @@ -1013,9 +1013,16 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 blocknr, u64 num_blocks); /* ctree.c */ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); +int btrfs_truncate_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u32 new_size); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); @@ -1073,11 +1080,10 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key *location, int mod); /* file-item.c */ -int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, +int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 objectid, u64 offset, - u64 num_blocks, u64 hint_block, - u64 *result); + u64 objectid, u64 pos, u64 offset, + u64 num_blocks); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, @@ -1090,6 +1096,11 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, int btrfs_csum_verify_file_block(struct btrfs_root *root, u64 objectid, u64 offset, char *data, size_t len); +struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 objectid, u64 offset, + int cow); /* super.c */ extern struct subsystem btrfs_subsys; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d560831c10a..2cee9df001f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,8 +12,9 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks) +int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 blocknr, u64 num_blocks) { struct btrfs_path *path; int ret; @@ -50,8 +51,9 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } -static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, u32 *refs) +static int lookup_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 blocknr, + u64 num_blocks, u32 *refs) { struct btrfs_path *path; int ret; @@ -80,7 +82,7 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return inc_block_ref(trans, root, bh_blocknr(root->node), 1); + return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -107,13 +109,13 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, continue; fi = btrfs_item_ptr(buf_leaf, i, struct btrfs_file_extent_item); - ret = inc_block_ref(trans, root, + ret = btrfs_inc_extent_ref(trans, root, btrfs_file_extent_disk_blocknr(fi), btrfs_file_extent_disk_num_blocks(fi)); BUG_ON(ret); } else { blocknr = btrfs_node_blockptr(buf_node, i); - ret = inc_block_ref(trans, root, blocknr, 1); + ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); BUG_ON(ret); } } @@ -563,7 +565,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_block_ref(trans, root, bh_blocknr(path->nodes[*level]), + ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]), 1, &refs); BUG_ON(ret); if (refs > 1) @@ -587,7 +589,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root } blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), path->slots[*level]); - ret = lookup_block_ref(trans, root, blocknr, 1, &refs); + ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); BUG_ON(ret); if (refs != 1) { path->slots[*level]++; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 93d42d65082..f49968ad0a0 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -6,13 +6,11 @@ #define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item)) / \ sizeof(struct btrfs_csum_item)) - 1)) -int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, +int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 objectid, u64 offset, - u64 num_blocks, u64 hint_block, - u64 *result) + u64 objectid, u64 pos, + u64 offset, u64 num_blocks) { - struct btrfs_key ins; int ret = 0; struct btrfs_file_extent_item *item; struct btrfs_key file_key; @@ -21,11 +19,13 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); + /* ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, (u64)-1, &ins); + */ BUG_ON(ret); file_key.objectid = objectid; - file_key.offset = offset; + file_key.offset = pos; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); @@ -34,21 +34,22 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(item, ins.objectid); - btrfs_set_file_extent_disk_num_blocks(item, ins.offset); + btrfs_set_file_extent_disk_blocknr(item, offset); + btrfs_set_file_extent_disk_num_blocks(item, num_blocks); btrfs_set_file_extent_offset(item, 0); - btrfs_set_file_extent_num_blocks(item, ins.offset); + btrfs_set_file_extent_num_blocks(item, num_blocks); btrfs_set_file_extent_generation(item, trans->transid); btrfs_mark_buffer_dirty(path->nodes[0]); - *result = ins.objectid; btrfs_release_path(root, path); btrfs_free_path(path); return 0; } -static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, - struct btrfs_path *path, - u64 objectid, u64 offset) +struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 objectid, u64 offset, + int cow) { int ret; struct btrfs_key file_key; @@ -61,19 +62,23 @@ static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); - ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0); +printk("__lookup for %Lu\n", offset); + ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); if (ret < 0) goto fail; leaf = btrfs_buffer_leaf(path->nodes[0]); if (ret > 0) { ret = 1; - if (path->slots[0] == 0) + if (path->slots[0] == 0) { +printk("fail1\n"); goto fail; + } path->slots[0]--; btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid) { +printk("fail2 type %u %Lu %Lu\n", btrfs_key_type(&found_key), found_key.objectid, objectid); goto fail; } csum_offset = (offset - found_key.offset) >> @@ -81,6 +86,7 @@ static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / sizeof(struct btrfs_csum_item)) { +printk("fail3, csum offset %lu size %u\n", csum_offset, btrfs_item_size(leaf->items + path->slots[0]) / sizeof(struct btrfs_csum_item)); goto fail; } } @@ -89,7 +95,7 @@ static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, return item; fail: if (ret > 0) - ret = -EIO; + ret = -ENOENT; return ERR_PTR(ret); } @@ -105,7 +111,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int cow = mod != 0; struct btrfs_csum_item *csum_item; - csum_item = __lookup_csum_item(root, path, objectid, offset); + csum_item = btrfs_lookup_csum(trans, root, path, objectid, offset, 0); if (IS_ERR(csum_item)) return PTR_ERR(csum_item); file_key.objectid = objectid; @@ -113,7 +119,9 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); btrfs_release_path(root, path); +printk("lookup file extent searches for %Lu\n", file_key.offset); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); +printk("ret is %d\n", ret); return ret; } @@ -134,17 +142,23 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); + + item = btrfs_lookup_csum(trans, root, path, objectid, offset, 0); + if (!IS_ERR(item)) + goto found; + btrfs_release_path(root, path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); +printk("searching for csum %Lu %Lu\n", objectid, offset); ret = btrfs_search_slot(trans, root, &file_key, path, sizeof(struct btrfs_csum_item), 1); +printk("ret %d\n", ret); if (ret < 0) goto fail; if (ret == 0) { - csum_offset = 0; - goto csum; + BUG(); } if (path->slots[0] == 0) { btrfs_release_path(root, path); @@ -153,12 +167,15 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, path->slots[0]--; leaf = btrfs_buffer_leaf(path->nodes[0]); btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); +printk("found key %Lu %Lu %u\n", found_key.objectid, found_key.offset, found_key.flags); csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; +printk("csum_offset %Lu\n", csum_offset); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid || csum_offset >= MAX_CSUM_ITEMS(root)) { btrfs_release_path(root, path); +printk("insert1\n"); goto insert; } if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / @@ -166,11 +183,13 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, ret = btrfs_extend_item(trans, root, path, sizeof(struct btrfs_csum_item)); BUG_ON(ret); +printk("item extended\n"); goto csum; } insert: csum_offset = 0; +printk("inserting item %Lu %Lu %u\n", file_key.objectid, file_key.offset, file_key.flags); ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(struct btrfs_csum_item)); if (ret != 0 && ret != -EEXIST) @@ -180,12 +199,14 @@ csum: struct btrfs_csum_item); ret = 0; item += csum_offset; +found: ret = btrfs_csum_data(root, data, len, item->csum); btrfs_set_csum_extent_offset(item, extent_offset); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); btrfs_free_path(path); +printk("return ret %d\n", ret); return ret; } @@ -208,7 +229,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); mutex_lock(&root->fs_info->fs_mutex); - item = __lookup_csum_item(root, path, objectid, offset); + item = btrfs_lookup_csum(NULL, root, path, objectid, offset, 0); if (IS_ERR(item)) { ret = PTR_ERR(item); goto fail; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ec689992fdf..6a56416147e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1089,7 +1089,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); extent_start = extent_start >> inode->i_blkbits; - extent_start += btrfs_file_extent_offset(item); extent_end = extent_start + btrfs_file_extent_num_blocks(item); if (iblock >= extent_start && iblock < extent_end) { err = 0; @@ -1103,6 +1102,7 @@ allocate: err = 0; goto out; } +#if 0 ret = btrfs_alloc_file_extent(trans, root, objectid, iblock << inode->i_blkbits, 1, extent_end, &blocknr); @@ -1115,9 +1115,11 @@ allocate: map_bh(result, inode->i_sb, blocknr); btrfs_map_bh_to_logical(root, result, blocknr); +#endif out: btrfs_release_path(root, path); btrfs_free_path(path); +printk("mapping iblock %lu to %lu\n", iblock, result->b_blocknr); if (trans) btrfs_end_transaction(trans, root); return err; @@ -1273,8 +1275,244 @@ failed: return err; } -static int prepare_pages(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int drop_csums(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 start, u64 end) +{ + struct btrfs_path *path; + struct btrfs_leaf *leaf; + struct btrfs_key key; + int slot; + struct btrfs_csum_item *item; + char *old_block = NULL; + u64 cur = start; + u64 found_end; + u64 num_csums; + u64 item_size; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + while(cur < end) { + item = btrfs_lookup_csum(trans, root, path, + inode->i_ino, cur, 1); + if (IS_ERR(item)) { + cur += root->blocksize; + continue; + } + leaf = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + item_size = btrfs_item_size(leaf->items + slot); + num_csums = item_size / sizeof(struct btrfs_csum_item); + found_end = key.offset + (num_csums << inode->i_blkbits); + cur = found_end; + + if (found_end > end) { + char *src; + old_block = kmalloc(root->blocksize, GFP_NOFS); + src = btrfs_item_ptr(leaf, slot, char); + memcpy(old_block, src, item_size); + } + if (key.offset < start) { + u64 new_size = (start - key.offset) >> + inode->i_blkbits; + new_size *= sizeof(struct btrfs_csum_item); + ret = btrfs_truncate_item(trans, root, path, new_size); + BUG_ON(ret); + } else { + btrfs_del_item(trans, root, path); + } + btrfs_release_path(root, path); + if (found_end > end) { + char *dst; + int i; + int new_size; + + num_csums = (found_end - end) >> inode->i_blkbits; + new_size = num_csums * sizeof(struct btrfs_csum_item); + key.offset = end; + ret = btrfs_insert_empty_item(trans, root, path, + &key, new_size); + BUG_ON(ret); + dst = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], char); + memcpy(dst, old_block + item_size - new_size, + new_size); + item = (struct btrfs_csum_item *)dst; + for (i = 0; i < num_csums; i++) { + btrfs_set_csum_extent_offset(item, end); + item++; + } + mark_buffer_dirty(path->nodes[0]); + kfree(old_block); + break; + } + } + btrfs_free_path(path); + return 0; +} + +static int drop_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 start, u64 end) +{ + int ret; + struct btrfs_key key; + struct btrfs_leaf *leaf; + int slot; + struct btrfs_file_extent_item *extent; + u64 extent_end; + int keep; + struct btrfs_file_extent_item old; + struct btrfs_path *path; + u64 search_start = start; + int bookend; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; +search_again: +printk("drop extent inode %lu start %Lu end %Lu\n", inode->i_ino, start, end); + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + search_start, -1); + if (ret != 0) { +printk("lookup failed\n"); + goto out; + } + while(1) { + keep = 0; + bookend = 0; + leaf = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + +printk("found key %Lu %Lu %u\n", key.objectid, key.offset, key.flags); + + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + extent_end = key.offset + + (btrfs_file_extent_num_blocks(extent) << + inode->i_blkbits); +printk("extent end is %Lu\n", extent_end); + if (key.offset >= end || key.objectid != inode->i_ino) { + ret = 0; + goto out; + } + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + goto next_leaf; + + if (end < extent_end && end >= key.offset) { + memcpy(&old, extent, sizeof(old)); + ret = btrfs_inc_extent_ref(trans, root, + btrfs_file_extent_disk_blocknr(&old), + btrfs_file_extent_disk_num_blocks(&old)); + BUG_ON(ret); + bookend = 1; + } + + if (start > key.offset) { + u64 new_num; + /* truncate existing extent */ + keep = 1; + WARN_ON(start & (root->blocksize - 1)); + new_num = (start - key.offset) >> inode->i_blkbits; +printk("truncating existing extent, was %Lu ", btrfs_file_extent_num_blocks(extent)); + btrfs_set_file_extent_num_blocks(extent, new_num); +printk("now %Lu\n", btrfs_file_extent_num_blocks(extent)); + + mark_buffer_dirty(path->nodes[0]); + } + if (!keep) { + u64 disk_blocknr; + u64 disk_num_blocks; +printk("del old\n"); + disk_blocknr = btrfs_file_extent_disk_blocknr(extent); + disk_num_blocks = + btrfs_file_extent_disk_num_blocks(extent); + search_start = key.offset + + (btrfs_file_extent_num_blocks(extent) << + inode->i_blkbits); + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + btrfs_release_path(root, path); + + ret = btrfs_free_extent(trans, root, disk_blocknr, + disk_num_blocks, 0); + + BUG_ON(ret); + if (!bookend && search_start >= end) { + ret = 0; + goto out; + } + if (!bookend) + goto search_again; + } + if (bookend) { + /* create bookend */ + struct btrfs_key ins; +printk("bookend! extent end %Lu\n", extent_end); + ins.objectid = inode->i_ino; + ins.offset = end; + ins.flags = 0; + btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); + + btrfs_release_path(root, path); + ret = drop_csums(trans, root, inode, start, end); + BUG_ON(ret); + ret = btrfs_insert_empty_item(trans, root, path, &ins, + sizeof(*extent)); + BUG_ON(ret); + extent = btrfs_item_ptr( + btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_disk_blocknr(extent, + btrfs_file_extent_disk_blocknr(&old)); + btrfs_set_file_extent_disk_num_blocks(extent, + btrfs_file_extent_disk_num_blocks(&old)); + + btrfs_set_file_extent_offset(extent, + btrfs_file_extent_offset(&old) + + ((end - key.offset) >> inode->i_blkbits)); + WARN_ON(btrfs_file_extent_num_blocks(&old) < + (end - key.offset) >> inode->i_blkbits); + btrfs_set_file_extent_num_blocks(extent, + btrfs_file_extent_num_blocks(&old) - + ((end - key.offset) >> inode->i_blkbits)); + + btrfs_set_file_extent_generation(extent, + btrfs_file_extent_generation(&old)); +printk("new bookend at offset %Lu, file_extent_offset %Lu, file_extent_num_blocks %Lu\n", end, btrfs_file_extent_offset(extent), btrfs_file_extent_num_blocks(extent)); + btrfs_mark_buffer_dirty(path->nodes[0]); + ret = 0; + goto out_nocsum; + } +next_leaf: + if (slot >= btrfs_header_nritems(&leaf->header) - 1) { + ret = btrfs_next_leaf(root, path); + if (ret) { + ret = 0; + goto out; + } + } else { + path->slots[0]++; + } + } + +out: + ret = drop_csums(trans, root, inode, start, end); + BUG_ON(ret); + +out_nocsum: + btrfs_free_path(path); + return ret; +} + +static int prepare_pages(struct btrfs_root *root, struct file *file, struct page **pages, size_t num_pages, @@ -1289,7 +1527,6 @@ static int prepare_pages(struct btrfs_trans_handle *trans, struct inode *inode = file->f_path.dentry->d_inode; int offset; int err = 0; - int ret; int this_write; struct buffer_head *bh; struct buffer_head *head; @@ -1305,18 +1542,21 @@ static int prepare_pages(struct btrfs_trans_handle *trans, } offset = pos & (PAGE_CACHE_SIZE -1); this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); - if (!PageUptodate(pages[i]) && - (pages[i]->index == first_index || - pages[i]->index == last_index) && pos < isize) { +#if 0 + if ((pages[i]->index == first_index || + pages[i]->index == last_index) && pos < isize && + !PageUptodate(pages[i])) { ret = mpage_readpage(pages[i], btrfs_get_block); BUG_ON(ret); lock_page(pages[i]); } +#endif create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize, (1 << BH_Uptodate)); head = page_buffers(pages[i]); bh = head; do { +printk("mapping page %lu to block %Lu\n", pages[i]->index, alloc_extent_start); err = btrfs_map_bh_to_logical(root, bh, alloc_extent_start); BUG_ON(err); @@ -1351,7 +1591,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, int ret = 0; struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct page *pages[1]; + struct page *pages[8]; unsigned long first_index; unsigned long last_index; u64 start_pos; @@ -1359,6 +1599,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, u64 alloc_extent_start; u64 orig_extent_start; struct btrfs_trans_handle *trans; + struct btrfs_key ins; if (file->f_flags & O_DIRECT) return -EINVAL; @@ -1390,16 +1631,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, trans = btrfs_start_transaction(root, 1); if (!trans) { err = -ENOMEM; + mutex_unlock(&root->fs_info->fs_mutex); goto out_unlock; } - ret = btrfs_alloc_file_extent(trans, root, inode->i_ino, - start_pos, num_blocks, 1, - &alloc_extent_start); - BUG_ON(ret); - + if (start_pos < inode->i_size) { + ret = drop_extents(trans, root, inode, + start_pos, + (pos + count + root->blocksize -1) & + ~(root->blocksize - 1)); + } orig_extent_start = start_pos; - ret = btrfs_end_transaction(trans, root); + ret = btrfs_alloc_extent(trans, root, num_blocks, 1, + (u64)-1, &ins); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + start_pos, ins.objectid, ins.offset); BUG_ON(ret); + alloc_extent_start = ins.objectid; + ret = btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); while(count > 0) { @@ -1407,16 +1656,21 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - ret = prepare_pages(NULL, root, file, pages, num_pages, +printk("num_pages is %lu\n", num_pages); + + memset(pages, 0, sizeof(pages)); + ret = prepare_pages(root, file, pages, num_pages, pos, first_index, last_index, write_bytes, alloc_extent_start); BUG_ON(ret); + /* FIXME blocks != pagesize */ alloc_extent_start += num_pages; ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); BUG_ON(ret); +printk("2num_pages is %lu\n", num_pages); ret = dirty_and_release_pages(NULL, root, file, pages, num_pages, orig_extent_start, pos, write_bytes); -- cgit v1.2.3-70-g09d2 From a429e51371eee3c989160c003ee40bc3947c6a76 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 18 Apr 2007 16:15:28 -0400 Subject: Btrfs: working file_write, reorganized key flags Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 30 +++++++++++-- fs/btrfs/ctree.h | 52 +++++++++++----------- fs/btrfs/dir-item.c | 70 ----------------------------- fs/btrfs/extent-tree.c | 4 +- fs/btrfs/file-item.c | 66 +++++++++++++++++++-------- fs/btrfs/print-tree.c | 2 +- fs/btrfs/super.c | 118 ++++++++++++++++++++++++++----------------------- 7 files changed, 167 insertions(+), 175 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5c160c23677..4efcd1bd63e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -893,7 +893,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root } left_nritems = btrfs_header_nritems(&left->header); - for (i = left_nritems - 1; i >= 0; i--) { + if (left_nritems == 0) { + btrfs_block_release(root, right_buf); + return 1; + } + for (i = left_nritems - 1; i >= 1; i--) { item = left->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -907,6 +911,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_block_release(root, right_buf); return 1; } + if (push_items == left_nritems) + WARN_ON(1); right_nritems = btrfs_header_nritems(&right->header); /* push left to right */ push_space = btrfs_item_end(left->items + left_nritems - push_items); @@ -943,6 +949,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(left_buf); btrfs_mark_buffer_dirty(right_buf); + btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); btrfs_mark_buffer_dirty(upper); @@ -1004,7 +1011,12 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } - for (i = 0; i < btrfs_header_nritems(&right->header); i++) { + if (btrfs_header_nritems(&right->header) == 0) { + btrfs_block_release(root, t); + return 1; + } + + for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) { item = right->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -1018,6 +1030,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_block_release(root, t); return 1; } + if (push_items == btrfs_header_nritems(&right->header)) + WARN_ON(1); /* push data from right to left */ btrfs_memcpy(root, left, left->items + btrfs_header_nritems(&left->header), @@ -1064,7 +1078,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(t); btrfs_mark_buffer_dirty(right_buf); - wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) ret = wret; @@ -1181,6 +1194,12 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root path->nodes[0] = right_buffer; path->slots[0] = 0; path->slots[1] -= 1; + if (path->slots[1] == 0) { + wret = fixup_low_keys(trans, root, + path, &disk_key, 1); + if (wret) + ret = wret; + } return ret; } mid = slot; @@ -1241,6 +1260,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[1], 1); if (wret) ret = wret; + if (path->slots[1] == 0) { + wret = fixup_low_keys(trans, root, path, &disk_key, 1); + if (wret) + ret = wret; + } btrfs_block_release(root, path->nodes[0]); path->nodes[0] = right_buffer; path->slots[0] = 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d75a4d5bc01..8a329d3901a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -282,11 +282,12 @@ struct btrfs_root { /* the lower bits in the key flags defines the item type */ #define BTRFS_KEY_TYPE_MAX 256 -#define BTRFS_KEY_TYPE_MASK (BTRFS_KEY_TYPE_MAX - 1) +#define BTRFS_KEY_TYPE_SHIFT 24 +#define BTRFS_KEY_TYPE_MASK (((u32)BTRFS_KEY_TYPE_MAX - 1) << \ + BTRFS_KEY_TYPE_SHIFT) #define BTRFS_KEY_OVERFLOW_MAX 128 -#define BTRFS_KEY_OVERFLOW_SHIFT 8 -#define BTRFS_KEY_OVERFLOW_MASK (0x7FULL << BTRFS_KEY_OVERFLOW_SHIFT) +#define BTRFS_KEY_OVERFLOW_MASK ((u32)BTRFS_KEY_OVERFLOW_MAX - 1) /* * inode items have the data typically returned from stat and store other @@ -586,56 +587,55 @@ static inline void btrfs_set_disk_key_flags(struct btrfs_disk_key *disk, disk->flags = cpu_to_le32(val); } -static inline u32 btrfs_key_overflow(struct btrfs_key *key) +static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key) { - u32 over = key->flags & BTRFS_KEY_OVERFLOW_MASK; - return over >> BTRFS_KEY_OVERFLOW_SHIFT; + return le32_to_cpu(key->flags) >> BTRFS_KEY_TYPE_SHIFT; } -static inline void btrfs_set_key_overflow(struct btrfs_key *key, u32 over) +static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, + u32 val) { - BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); - over = over << BTRFS_KEY_OVERFLOW_SHIFT; - key->flags = (key->flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; + u32 flags = btrfs_disk_key_flags(key); + BUG_ON(val >= BTRFS_KEY_TYPE_MAX); + val = val << BTRFS_KEY_TYPE_SHIFT; + flags = (flags & ~BTRFS_KEY_TYPE_MASK) | val; + btrfs_set_disk_key_flags(key, flags); } static inline u32 btrfs_key_type(struct btrfs_key *key) { - return key->flags & BTRFS_KEY_TYPE_MASK; + return key->flags >> BTRFS_KEY_TYPE_SHIFT; } -static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key) +static inline void btrfs_set_key_type(struct btrfs_key *key, u32 val) { - return le32_to_cpu(key->flags) & BTRFS_KEY_TYPE_MASK; + BUG_ON(val >= BTRFS_KEY_TYPE_MAX); + val = val << BTRFS_KEY_TYPE_SHIFT; + key->flags = (key->flags & ~(BTRFS_KEY_TYPE_MASK)) | val; } -static inline void btrfs_set_key_type(struct btrfs_key *key, u32 type) +static inline u32 btrfs_key_overflow(struct btrfs_key *key) { - BUG_ON(type >= BTRFS_KEY_TYPE_MAX); - key->flags = (key->flags & ~((u64)BTRFS_KEY_TYPE_MASK)) | type; + return key->flags & BTRFS_KEY_OVERFLOW_MASK; } -static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, u32 type) +static inline void btrfs_set_key_overflow(struct btrfs_key *key, u32 over) { - u32 flags = btrfs_disk_key_flags(key); - BUG_ON(type >= BTRFS_KEY_TYPE_MAX); - flags = (flags & ~((u64)BTRFS_KEY_TYPE_MASK)) | type; - btrfs_set_disk_key_flags(key, flags); + BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); + key->flags = (key->flags & ~BTRFS_KEY_OVERFLOW_MASK) | over; } static inline u32 btrfs_disk_key_overflow(struct btrfs_disk_key *key) { - u32 over = le32_to_cpu(key->flags) & BTRFS_KEY_OVERFLOW_MASK; - return over >> BTRFS_KEY_OVERFLOW_SHIFT; + return le32_to_cpu(key->flags) & BTRFS_KEY_OVERFLOW_MASK; } -static inline void btrfs_set_disK_key_overflow(struct btrfs_disk_key *key, +static inline void btrfs_set_disk_key_overflow(struct btrfs_disk_key *key, u32 over) { u32 flags = btrfs_disk_key_flags(key); BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); - over = over << BTRFS_KEY_OVERFLOW_SHIFT; - flags = (flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; + flags = (flags & ~BTRFS_KEY_OVERFLOW_MASK) | over; btrfs_set_disk_key_flags(key, flags); } diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index cd4137a8b87..a43deb72648 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -58,30 +58,6 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); btrfs_mark_buffer_dirty(path->nodes[0]); - - /* FIXME, use some real flag for selecting the extra index */ - if (root == root->fs_info->tree_root) - goto out; - - btrfs_release_path(root, path); - - btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); - key.offset = location->objectid; - ret = insert_with_overflow(trans, root, path, &key, data_size); - // FIXME clear the dirindex bit - if (ret) - goto out; - - dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_dir_item); - btrfs_cpu_key_to_disk(&dir_item->location, location); - btrfs_set_dir_type(dir_item, type); - btrfs_set_dir_flags(dir_item, 0); - btrfs_set_dir_name_len(dir_item, name_len); - name_ptr = (char *)(dir_item + 1); - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_release_path(root, path); btrfs_free_path(path); @@ -135,52 +111,6 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } -int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 dir, - u64 objectid, int mod) -{ - int ret; - struct btrfs_key key; - int ins_len = mod < 0 ? -1 : 0; - int cow = mod != 0; - struct btrfs_disk_key *found_key; - struct btrfs_leaf *leaf; - int overflow = 0; - - key.objectid = dir; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); - key.offset = objectid; - - while(1) { - btrfs_set_key_overflow(&key, overflow); - ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); - if (ret < 0) - return ret; - if (ret > 0) { - if (overflow >= BTRFS_KEY_OVERFLOW_MAX) - return 1; - overflow++; - btrfs_set_key_overflow(&key, overflow); - btrfs_release_path(root, path); - continue; - } else { - /* found */ - break; - } - } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - - if (btrfs_disk_key_objectid(found_key) != dir || - btrfs_disk_key_type(found_key) != BTRFS_DIR_INDEX_KEY) - return 1; - if (btrfs_disk_key_offset(found_key) == objectid) - return 0; - return 1; -} - int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2cee9df001f..cb04a70eb7e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -35,8 +35,10 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); - if (ret != 0) + if (ret != 0) { +printk("can't find block %Lu %Lu\n", blocknr, num_blocks); BUG(); + } BUG_ON(ret != 0); l = btrfs_buffer_leaf(path->nodes[0]); item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ff8f3339c68..8cc3c1d1541 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -4,7 +4,7 @@ #include "transaction.h" #define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \ - sizeof(struct btrfs_item)) / \ + sizeof(struct btrfs_item) * 2) / \ sizeof(struct btrfs_csum_item)) - 1)) int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -19,11 +19,6 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - /* - ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, - (u64)-1, &ins); - */ - BUG_ON(ret); file_key.objectid = objectid; file_key.offset = pos; file_key.flags = 0; @@ -40,6 +35,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_num_blocks(item, num_blocks); btrfs_set_file_extent_generation(item, trans->transid); btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(root, path); btrfs_free_path(path); return 0; @@ -57,6 +53,7 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_csum_item *item; struct btrfs_leaf *leaf; u64 csum_offset = 0; + int csums_in_item; file_key.objectid = objectid; file_key.offset = offset; @@ -79,9 +76,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, } csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; - if (csum_offset >= - btrfs_item_size(leaf->items + path->slots[0]) / - sizeof(struct btrfs_csum_item)) { + csums_in_item = btrfs_item_size(leaf->items + path->slots[0]); + csums_in_item /= sizeof(struct btrfs_csum_item); + + if (csum_offset >= csums_in_item) { + ret = -EFBIG; goto fail; } } @@ -128,16 +127,36 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); - item = btrfs_lookup_csum(trans, root, path, objectid, offset, 0); - if (!IS_ERR(item)) - goto found; - btrfs_release_path(root, path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + + item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1); + if (!IS_ERR(item)) + goto found; + ret = PTR_ERR(item); + if (ret == -EFBIG) { + u32 item_size; + /* we found one, but it isn't big enough yet */ + leaf = btrfs_buffer_leaf(path->nodes[0]); + item_size = btrfs_item_size(leaf->items + path->slots[0]); + if ((item_size / sizeof(struct btrfs_csum_item)) >= + MAX_CSUM_ITEMS(root)) { + /* already at max size, make a new one */ + goto insert; + } + } else { + /* we didn't find a csum item, insert one */ + goto insert; + } + + /* + * at this point, we know the tree has an item, but it isn't big + * enough yet to put our csum in. Grow it + */ + btrfs_release_path(root, path); ret = btrfs_search_slot(trans, root, &file_key, path, sizeof(struct btrfs_csum_item), 1); if (ret < 0) @@ -146,7 +165,6 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, BUG(); } if (path->slots[0] == 0) { - btrfs_release_path(root, path); goto insert; } path->slots[0]--; @@ -157,29 +175,36 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid || csum_offset >= MAX_CSUM_ITEMS(root)) { - btrfs_release_path(root, path); + WARN_ON(1); goto insert; } if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / sizeof(struct btrfs_csum_item)) { - ret = btrfs_extend_item(trans, root, path, - sizeof(struct btrfs_csum_item)); + u32 diff = (csum_offset + 1) * sizeof(struct btrfs_csum_item); + diff = diff - btrfs_item_size(leaf->items + path->slots[0]); + WARN_ON(diff != sizeof(struct btrfs_csum_item)); + ret = btrfs_extend_item(trans, root, path, diff); BUG_ON(ret); goto csum; } insert: + btrfs_release_path(root, path); csum_offset = 0; ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(struct btrfs_csum_item)); - if (ret != 0 && ret != -EEXIST) + if (ret != 0) { + printk("at insert for %Lu %u %Lu ret is %d\n", file_key.objectid, file_key.flags, file_key.offset, ret); + WARN_ON(1); goto fail; + } csum: item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_csum_item); ret = 0; item += csum_offset; found: + btrfs_check_bounds(item->csum, BTRFS_CSUM_SIZE, path->nodes[0]->b_data, root->fs_info->sb->s_blocksize); ret = btrfs_csum_data(root, data, len, item->csum); btrfs_mark_buffer_dirty(path->nodes[0]); fail: @@ -210,6 +235,9 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, item = btrfs_lookup_csum(NULL, root, path, objectid, offset, 0); if (IS_ERR(item)) { ret = PTR_ERR(item); + /* a csum that isn't present is a preallocated region. */ + if (ret == -ENOENT || ret == -EFBIG) + ret = 1; goto fail; } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 0732a2fbb23..1e7038b070a 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -19,7 +19,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) for (i = 0 ; i < nr ; i++) { item = l->items + i; type = btrfs_disk_key_type(&item->key); - printk("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n", + printk("\titem %d key (%Lu %x %Lu) itemoff %d itemsize %d\n", i, btrfs_disk_key_objectid(&item->key), btrfs_disk_key_flags(&item->key), diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b2a2220d135..583cd87a62e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -150,11 +150,6 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, BUG_ON(ret); btrfs_release_path(root, path); - ret = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, - objectid, -1); - BUG_ON(ret); - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); dentry->d_inode->i_ctime = dir->i_ctime; err: btrfs_release_path(root, path); @@ -329,8 +324,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, extent_start = btrfs_file_extent_disk_blocknr(fi); extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi); + /* FIXME blocksize != 4096 */ inode->i_blocks -= - btrfs_file_extent_num_blocks(fi) >> 9; + btrfs_file_extent_num_blocks(fi) << 3; found_extent = 1; } else { found_extent = 0; @@ -562,7 +558,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; if (btrfs_disk_key_type(&item->key) != key_type) - continue; + break; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) continue; filp->f_pos = btrfs_disk_key_offset(&item->key); @@ -1285,29 +1281,27 @@ static int drop_extents(struct btrfs_trans_handle *trans, struct btrfs_path *path; u64 search_start = start; int bookend; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; -search_again: - ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, - search_start, -1); - if (ret < 0) - goto out; - if (ret > 0) { - if (path->slots[0] == 0) { - ret = -ENOENT; + while(1) { + btrfs_release_path(root, path); + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + search_start, -1); + if (ret < 0) goto out; + if (ret > 0) { + if (path->slots[0] == 0) { + ret = -ENOENT; + goto out; + } + path->slots[0]--; } - path->slots[0]--; - } - while(1) { keep = 0; bookend = 0; leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); - extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); extent_end = key.offset + @@ -1318,7 +1312,10 @@ search_again: goto out; } if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) - goto next_leaf; + goto out; + if (search_start >= extent_end) + goto out; + search_start = extent_end; if (end < extent_end && end >= key.offset) { memcpy(&old, extent, sizeof(old)); @@ -1331,10 +1328,13 @@ search_again: if (start > key.offset) { u64 new_num; + u64 old_num; /* truncate existing extent */ keep = 1; WARN_ON(start & (root->blocksize - 1)); new_num = (start - key.offset) >> inode->i_blkbits; + old_num = btrfs_file_extent_num_blocks(extent); + inode->i_blocks -= (old_num - new_num) << 3; btrfs_set_file_extent_num_blocks(extent, new_num); mark_buffer_dirty(path->nodes[0]); } @@ -1344,13 +1344,11 @@ search_again: disk_blocknr = btrfs_file_extent_disk_blocknr(extent); disk_num_blocks = btrfs_file_extent_disk_num_blocks(extent); - search_start = key.offset + - (btrfs_file_extent_num_blocks(extent) << - inode->i_blkbits); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); + inode->i_blocks -= + btrfs_file_extent_num_blocks(extent) << 3; btrfs_release_path(root, path); - ret = btrfs_free_extent(trans, root, disk_blocknr, disk_num_blocks, 0); @@ -1360,7 +1358,7 @@ search_again: goto out; } if (!bookend) - goto search_again; + continue; } if (bookend) { /* create bookend */ @@ -1395,21 +1393,12 @@ search_again: btrfs_set_file_extent_generation(extent, btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); + inode->i_blocks += + btrfs_file_extent_num_blocks(extent) << 3; ret = 0; goto out; } -next_leaf: - if (slot >= btrfs_header_nritems(&leaf->header) - 1) { - ret = btrfs_next_leaf(root, path); - if (ret) { - ret = 0; - goto out; - } - } else { - path->slots[0]++; - } } - out: btrfs_free_path(path); return ret; @@ -1445,15 +1434,6 @@ static int prepare_pages(struct btrfs_root *root, } offset = pos & (PAGE_CACHE_SIZE -1); this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); -#if 0 - if ((pages[i]->index == first_index || - pages[i]->index == last_index) && pos < isize && - !PageUptodate(pages[i])) { - ret = mpage_readpage(pages[i], btrfs_get_block); - BUG_ON(ret); - lock_page(pages[i]); - } -#endif create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize, (1 << BH_Uptodate)); head = page_buffers(pages[i]); @@ -1494,6 +1474,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct page *pages[8]; + struct page *pinned[2] = { NULL, NULL }; unsigned long first_index; unsigned long last_index; u64 start_pos; @@ -1505,14 +1486,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (file->f_flags & O_DIRECT) return -EINVAL; pos = *ppos; - - start_pos = pos & ~(root->blocksize - 1); - /* FIXME */ - if (start_pos != pos) - return -EINVAL; - num_blocks = (count + pos - start_pos + root->blocksize - 1) >> - inode->i_blkbits; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); @@ -1524,10 +1497,37 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (err) goto out; file_update_time(file); + + start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); + num_blocks = (count + pos - start_pos + root->blocksize - 1) >> + inode->i_blkbits; + mutex_lock(&inode->i_mutex); first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; + if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size && + (pos & (PAGE_CACHE_SIZE - 1))) { + pinned[0] = grab_cache_page(inode->i_mapping, first_index); + if (!PageUptodate(pinned[0])) { + ret = mpage_readpage(pinned[0], btrfs_get_block); + BUG_ON(ret); + } else { + unlock_page(pinned[0]); + } + } + if (first_index != last_index && + (last_index << PAGE_CACHE_SHIFT) < inode->i_size && + (count & (PAGE_CACHE_SIZE - 1))) { + pinned[1] = grab_cache_page(inode->i_mapping, last_index); + if (!PageUptodate(pinned[1])) { + ret = mpage_readpage(pinned[1], btrfs_get_block); + BUG_ON(ret); + } else { + unlock_page(pinned[1]); + } + } + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { @@ -1535,11 +1535,14 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, mutex_unlock(&root->fs_info->fs_mutex); goto out_unlock; } + /* FIXME blocksize != 4096 */ + inode->i_blocks += num_blocks << 3; if (start_pos < inode->i_size) { + /* FIXME blocksize != pagesize */ ret = drop_extents(trans, root, inode, start_pos, (pos + count + root->blocksize -1) & - ~(root->blocksize - 1)); + ~((u64)root->blocksize - 1)); } ret = btrfs_alloc_extent(trans, root, num_blocks, 1, (u64)-1, &ins); @@ -1585,8 +1588,13 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, out_unlock: mutex_unlock(&inode->i_mutex); out: + if (pinned[0]) + page_cache_release(pinned[0]); + if (pinned[1]) + page_cache_release(pinned[1]); *ppos = pos; current->backing_dev_info = NULL; + mark_inode_dirty(inode); return num_written ? num_written : err; } -- cgit v1.2.3-70-g09d2 From 236454dfffb64a95ee01c50a215153f5de61c475 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 19 Apr 2007 13:37:44 -0400 Subject: Btrfs: many file_write fixes, inline data Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 35 ++++ fs/btrfs/disk-io.c | 6 + fs/btrfs/extent-tree.c | 5 + fs/btrfs/file-item.c | 1 + fs/btrfs/super.c | 553 ++++++++++++++++++++++++++++++++++++++----------- 5 files changed, 484 insertions(+), 116 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8a329d3901a..f1800008be3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -73,6 +73,9 @@ struct btrfs_header { (sizeof(struct btrfs_disk_key) + sizeof(u64))) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) +#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ + sizeof(struct btrfs_item) - \ + sizeof(struct btrfs_file_extent_item)) struct buffer_head; /* @@ -204,8 +207,12 @@ struct btrfs_root_item { __le32 refs; } __attribute__ ((__packed__)); +#define BTRFS_FILE_EXTENT_REG 0 +#define BTRFS_FILE_EXTENT_INLINE 1 + struct btrfs_file_extent_item { __le64 generation; + u8 type; /* * disk space consumed by the extent, checksum blocks are included * in these numbers @@ -862,6 +869,34 @@ static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) return (u8 *)l->items; } +static inline int btrfs_file_extent_type(struct btrfs_file_extent_item *e) +{ + return e->type; +} +static inline void btrfs_set_file_extent_type(struct btrfs_file_extent_item *e, + u8 val) +{ + e->type = val; +} + +static inline char *btrfs_file_extent_inline_start(struct + btrfs_file_extent_item *e) +{ + return (char *)(&e->disk_blocknr); +} + +static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) +{ + return (unsigned long)(&((struct + btrfs_file_extent_item *)NULL)->disk_blocknr) + datasize; +} + +static inline u32 btrfs_file_extent_inline_len(struct btrfs_item *e) +{ + struct btrfs_file_extent_item *fe = NULL; + return btrfs_item_size(e) - (unsigned long)(&fe->disk_blocknr); +} + static inline u64 btrfs_file_extent_disk_blocknr(struct btrfs_file_extent_item *e) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a2a3f529cad..11e17a2f736 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -108,6 +108,12 @@ int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, int ret; + if (logical == 0) { + bh->b_bdev = NULL; + bh->b_blocknr = 0; + set_buffer_mapped(bh); + return 0; + } root = root->fs_info->dev_root; ret = radix_tree_gang_lookup(&root->fs_info->dev_radix, (void **)lookup, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cb04a70eb7e..b2faad3e879 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -111,6 +111,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, continue; fi = btrfs_item_ptr(buf_leaf, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; ret = btrfs_inc_extent_ref(trans, root, btrfs_file_extent_disk_blocknr(fi), btrfs_file_extent_disk_num_blocks(fi)); @@ -539,6 +542,8 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE) + continue; /* * FIXME make sure to insert a trans record that * repeats the snapshot del on crash diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 8cc3c1d1541..10e4cf08e9e 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -34,6 +34,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_offset(item, 0); btrfs_set_file_extent_num_blocks(item, num_blocks); btrfs_set_file_extent_generation(item, trans->transid); + btrfs_set_file_extent_type(item, BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root, path); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 583cd87a62e..1b286bb26f1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -317,19 +317,22 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, break; if (btrfs_disk_key_offset(found_key) < inode->i_size) break; + found_extent = 0; if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) { fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); - extent_start = btrfs_file_extent_disk_blocknr(fi); - extent_num_blocks = - btrfs_file_extent_disk_num_blocks(fi); - /* FIXME blocksize != 4096 */ - inode->i_blocks -= - btrfs_file_extent_num_blocks(fi) << 3; - found_extent = 1; - } else { - found_extent = 0; + if (btrfs_file_extent_type(fi) != + BTRFS_FILE_EXTENT_INLINE) { + extent_start = + btrfs_file_extent_disk_blocknr(fi); + extent_num_blocks = + btrfs_file_extent_disk_num_blocks(fi); + /* FIXME blocksize != 4096 */ + inode->i_blocks -= + btrfs_file_extent_num_blocks(fi) << 3; + found_extent = 1; + } } ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -1010,9 +1013,9 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, u64 extent_start = 0; u64 extent_end = 0; u64 objectid = inode->i_ino; + u32 found_type; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans = NULL; struct btrfs_file_extent_item *item; struct btrfs_leaf *leaf; struct btrfs_disk_key *found_key; @@ -1021,13 +1024,12 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, BUG_ON(!path); btrfs_init_path(path); if (create) { - trans = btrfs_start_transaction(root, 1); WARN_ON(1); } - ret = btrfs_lookup_file_extent(trans, root, path, + ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, - iblock << inode->i_blkbits, create); + iblock << inode->i_blkbits, 0); if (ret < 0) { err = ret; goto out; @@ -1036,7 +1038,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (ret != 0) { if (path->slots[0] == 0) { btrfs_release_path(root, path); - goto allocate; + goto out; } path->slots[0]--; } @@ -1047,73 +1049,51 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, blocknr = btrfs_file_extent_disk_blocknr(item); blocknr += btrfs_file_extent_offset(item); - /* exact match found, use it, FIXME, deal with extents - * other than the page size - */ - if (0 && ret == 0) { - err = 0; - if (create && - btrfs_file_extent_generation(item) != trans->transid) { - struct btrfs_key ins; - ret = btrfs_alloc_extent(trans, root, 1, - blocknr, (u64)-1, &ins); - BUG_ON(ret); - btrfs_set_file_extent_disk_blocknr(item, ins.objectid); - mark_buffer_dirty(path->nodes[0]); - ret = btrfs_free_extent(trans, root, - blocknr, 1, 0); - BUG_ON(ret); - blocknr = ins.objectid; - - } - btrfs_map_bh_to_logical(root, result, blocknr); - goto out; - } - /* are we inside the extent that was found? */ found_key = &leaf->items[path->slots[0]].key; + found_type = btrfs_disk_key_type(found_key); if (btrfs_disk_key_objectid(found_key) != objectid || - btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) { + found_type != BTRFS_EXTENT_DATA_KEY) { extent_end = 0; extent_start = 0; btrfs_release_path(root, path); - goto allocate; - } - - extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); - extent_start = extent_start >> inode->i_blkbits; - extent_end = extent_start + btrfs_file_extent_num_blocks(item); - if (iblock >= extent_start && iblock < extent_end) { - err = 0; - btrfs_map_bh_to_logical(root, result, blocknr + iblock - - extent_start); - goto out; - } -allocate: - /* ok, create a new extent */ - if (!create) { - err = 0; goto out; } -#if 0 - ret = btrfs_alloc_file_extent(trans, root, objectid, - iblock << inode->i_blkbits, - 1, extent_end, &blocknr); - if (ret) { - err = ret; - goto out; + found_type = btrfs_file_extent_type(item); + extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_start = extent_start >> inode->i_blkbits; + extent_end = extent_start + btrfs_file_extent_num_blocks(item); + if (iblock >= extent_start && iblock < extent_end) { + err = 0; + btrfs_map_bh_to_logical(root, result, blocknr + + iblock - extent_start); + goto out; + } + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + char *ptr; + char *map; + u32 size; + size = btrfs_file_extent_inline_len(leaf->items + + path->slots[0]); + extent_end = (extent_start + size) >> inode->i_blkbits; + extent_start >>= inode->i_blkbits; + if (iblock < extent_start || iblock > extent_end) { + goto out; + } + ptr = btrfs_file_extent_inline_start(item); + map = kmap(result->b_page); + memcpy(map, ptr, size); + memset(map + size, 0, PAGE_CACHE_SIZE - size); + flush_dcache_page(result->b_page); + kunmap(result->b_page); + set_buffer_uptodate(result); + SetPageChecked(result->b_page); + btrfs_map_bh_to_logical(root, result, 0); } - inode->i_blocks += inode->i_sb->s_blocksize >> 9; - set_buffer_new(result); - map_bh(result, inode->i_sb, blocknr); - - btrfs_map_bh_to_logical(root, result, blocknr); -#endif out: btrfs_release_path(root, path); btrfs_free_path(path); - if (trans) - btrfs_end_transaction(trans, root); return err; } @@ -1124,7 +1104,6 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, struct btrfs_root *root = BTRFS_I(inode)->root; mutex_lock(&root->fs_info->fs_mutex); err = btrfs_get_block_lock(inode, iblock, result, create); - // err = btrfs_get_block_inline(inode, iblock, result, create); mutex_unlock(&root->fs_info->fs_mutex); return err; } @@ -1134,11 +1113,6 @@ static int btrfs_prepare_write(struct file *file, struct page *page, { return nobh_prepare_write(page, from, to, btrfs_get_block); } -static int btrfs_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - return nobh_commit_write(file, page, from, to); -} static void btrfs_write_super(struct super_block *sb) { @@ -1150,9 +1124,227 @@ static int btrfs_readpage(struct file *file, struct page *page) return mpage_readpage(page, btrfs_get_block); } +/* + * While block_write_full_page is writing back the dirty buffers under + * the page lock, whoever dirtied the buffers may decide to clean them + * again at any time. We handle that by only looking at the buffer + * state inside lock_buffer(). + * + * If block_write_full_page() is called for regular writeback + * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a + * locked buffer. This only can happen if someone has written the buffer + * directly, with submit_bh(). At the address_space level PageWriteback + * prevents this contention from occurring. + */ +static int __btrfs_write_full_page(struct inode *inode, struct page *page, + struct writeback_control *wbc) +{ + int err; + sector_t block; + sector_t last_block; + struct buffer_head *bh, *head; + const unsigned blocksize = 1 << inode->i_blkbits; + int nr_underway = 0; + + BUG_ON(!PageLocked(page)); + + last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; + + if (!page_has_buffers(page)) { + create_empty_buffers(page, blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + + /* + * Be very careful. We have no exclusion from __set_page_dirty_buffers + * here, and the (potentially unmapped) buffers may become dirty at + * any time. If a buffer becomes dirty here after we've inspected it + * then we just miss that fact, and the page stays dirty. + * + * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; + * handle that here by just cleaning them. + */ + + block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + head = page_buffers(page); + bh = head; + + /* + * Get all the dirty buffers mapped to disk addresses and + * handle any aliases from the underlying blockdev's mapping. + */ + do { + if (block > last_block) { + /* + * mapped buffers outside i_size will occur, because + * this page can be outside i_size when there is a + * truncate in progress. + */ + /* + * The buffer was zeroed by block_write_full_page() + */ + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { + WARN_ON(bh->b_size != blocksize); + err = btrfs_get_block(inode, block, bh, 0); + if (err) + goto recover; + if (buffer_new(bh)) { + /* blockdev mappings never come here */ + clear_buffer_new(bh); + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); + } + } + bh = bh->b_this_page; + block++; + } while (bh != head); + + do { + if (!buffer_mapped(bh)) + continue; + /* + * If it's a fully non-blocking write attempt and we cannot + * lock the buffer then redirty the page. Note that this can + * potentially cause a busy-wait loop from pdflush and kswapd + * activity, but those code paths have their own higher-level + * throttling. + */ + if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { + lock_buffer(bh); + } else if (test_set_buffer_locked(bh)) { + redirty_page_for_writepage(wbc, page); + continue; + } + if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { + mark_buffer_async_write(bh); + } else { + unlock_buffer(bh); + } + } while ((bh = bh->b_this_page) != head); + + /* + * The page and its buffers are protected by PageWriteback(), so we can + * drop the bh refcounts early. + */ + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + submit_bh(WRITE, bh); + nr_underway++; + } + bh = next; + } while (bh != head); + unlock_page(page); + + err = 0; +done: + if (nr_underway == 0) { + /* + * The page was marked dirty, but the buffers were + * clean. Someone wrote them back by hand with + * ll_rw_block/submit_bh. A rare case. + */ + int uptodate = 1; + do { + if (!buffer_uptodate(bh)) { + uptodate = 0; + break; + } + bh = bh->b_this_page; + } while (bh != head); + if (uptodate) + SetPageUptodate(page); + end_page_writeback(page); + /* + * The page and buffer_heads can be released at any time from + * here on. + */ + wbc->pages_skipped++; /* We didn't write this page */ + } + return err; + +recover: + /* + * ENOSPC, or some other error. We may already have added some + * blocks to the file, so we need to write these out to avoid + * exposing stale data. + * The page is currently locked and not marked for writeback + */ + bh = head; + /* Recovery: lock and submit the mapped buffers */ + do { + if (buffer_mapped(bh) && buffer_dirty(bh)) { + lock_buffer(bh); + mark_buffer_async_write(bh); + } else { + /* + * The buffer may have been set dirty during + * attachment to a dirty page. + */ + clear_buffer_dirty(bh); + } + } while ((bh = bh->b_this_page) != head); + SetPageError(page); + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + clear_buffer_dirty(bh); + submit_bh(WRITE, bh); + nr_underway++; + } + bh = next; + } while (bh != head); + unlock_page(page); + goto done; +} + +/* + * The generic ->writepage function for buffer-backed address_spaces + */ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - return nobh_writepage(page, btrfs_get_block, wbc); + struct inode * const inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + unsigned offset; + void *kaddr; + + /* Is the page fully inside i_size? */ + if (page->index < end_index) + return __btrfs_write_full_page(inode, page, wbc); + + /* Is the page fully outside i_size? (truncate in progress) */ + offset = i_size & (PAGE_CACHE_SIZE-1); + if (page->index >= end_index+1 || !offset) { + /* + * The page may have dirty, unmapped buffers. For example, + * they may have been added in ext3_writepage(). Make them + * freeable here, so the page does not leak. + */ + block_invalidatepage(page, 0); + unlock_page(page); + return 0; /* don't care */ + } + + /* + * The page straddles i_size. It must be zeroed out on each and every + * writepage invokation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + return __btrfs_write_full_page(inode, page, wbc); } static void btrfs_truncate(struct inode *inode) @@ -1179,6 +1371,29 @@ static void btrfs_truncate(struct inode *inode) mark_inode_dirty(inode); } +/* + * Make sure any changes to nobh_commit_write() are reflected in + * nobh_truncate_page(), since it doesn't call commit_write(). + */ +static int btrfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + struct buffer_head *bh; + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + SetPageUptodate(page); + bh = page_buffers(page); + if (buffer_mapped(bh) && bh->b_blocknr != 0) { + set_page_dirty(page); + } + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; +} + static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, struct page **prepared_pages, const char __user * buf) @@ -1234,6 +1449,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, int ret; int this_write; struct inode *inode = file->f_path.dentry->d_inode; + struct buffer_head *bh; + struct btrfs_file_extent_item *ei; for (i = 0; i < num_pages; i++) { offset = pos & (PAGE_CACHE_SIZE -1); @@ -1242,16 +1459,47 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - btrfs_csum_file_block(trans, root, inode->i_ino, + + bh = page_buffers(pages[i]); + if (buffer_mapped(bh) && bh->b_blocknr == 0) { + struct btrfs_key key; + struct btrfs_path *path; + char *ptr; + u32 datasize; + + path = btrfs_alloc_path(); + BUG_ON(!path); + key.objectid = inode->i_ino; + key.offset = pages[i]->index << PAGE_CACHE_SHIFT; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + BUG_ON(write_bytes >= PAGE_CACHE_SIZE); + datasize = offset + + btrfs_file_extent_calc_inline_size(write_bytes); + ret = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + BUG_ON(ret); + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(ei, trans->transid); + btrfs_set_file_extent_type(ei, + BTRFS_FILE_EXTENT_INLINE); + ptr = btrfs_file_extent_inline_start(ei); + memcpy(ptr, bh->b_data, offset + write_bytes); + mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + } else { + btrfs_csum_file_block(trans, root, inode->i_ino, pages[i]->index << PAGE_CACHE_SHIFT, kmap(pages[i]), PAGE_CACHE_SIZE); - kunmap(pages[i]); + kunmap(pages[i]); + } SetPageChecked(pages[i]); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - ret = nobh_commit_write(file, pages[i], offset, + ret = btrfs_commit_write(file, pages[i], offset, offset + this_write); pos += this_write; if (ret) { @@ -1275,12 +1523,16 @@ static int drop_extents(struct btrfs_trans_handle *trans, struct btrfs_leaf *leaf; int slot; struct btrfs_file_extent_item *extent; - u64 extent_end; + u64 extent_end = 0; int keep; struct btrfs_file_extent_item old; struct btrfs_path *path; u64 search_start = start; int bookend; + int found_type; + int found_extent; + int found_inline; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1292,37 +1544,62 @@ static int drop_extents(struct btrfs_trans_handle *trans, goto out; if (ret > 0) { if (path->slots[0] == 0) { - ret = -ENOENT; + ret = 0; goto out; } path->slots[0]--; } keep = 0; bookend = 0; + found_extent = 0; + found_inline = 0; + extent = NULL; leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - extent_end = key.offset + - (btrfs_file_extent_num_blocks(extent) << - inode->i_blkbits); if (key.offset >= end || key.objectid != inode->i_ino) { ret = 0; goto out; } - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) { + ret = 0; + goto out; + } + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = key.offset + + (btrfs_file_extent_num_blocks(extent) << + inode->i_blkbits); + found_extent = 1; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_inline = 1; + extent_end = key.offset + + btrfs_file_extent_inline_len(leaf->items + slot); + } + + if (!found_extent && !found_inline) { + ret = 0; goto out; - if (search_start >= extent_end) + } + + if (search_start >= extent_end) { + ret = 0; goto out; + } + search_start = extent_end; if (end < extent_end && end >= key.offset) { - memcpy(&old, extent, sizeof(old)); - ret = btrfs_inc_extent_ref(trans, root, - btrfs_file_extent_disk_blocknr(&old), - btrfs_file_extent_disk_num_blocks(&old)); - BUG_ON(ret); + if (found_extent) { + memcpy(&old, extent, sizeof(old)); + ret = btrfs_inc_extent_ref(trans, root, + btrfs_file_extent_disk_blocknr(&old), + btrfs_file_extent_disk_num_blocks(&old)); + BUG_ON(ret); + } + WARN_ON(found_inline); bookend = 1; } @@ -1332,25 +1609,45 @@ static int drop_extents(struct btrfs_trans_handle *trans, /* truncate existing extent */ keep = 1; WARN_ON(start & (root->blocksize - 1)); - new_num = (start - key.offset) >> inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(extent); - inode->i_blocks -= (old_num - new_num) << 3; - btrfs_set_file_extent_num_blocks(extent, new_num); - mark_buffer_dirty(path->nodes[0]); + if (found_extent) { + new_num = (start - key.offset) >> + inode->i_blkbits; + old_num = btrfs_file_extent_num_blocks(extent); + inode->i_blocks -= (old_num - new_num) << 3; + btrfs_set_file_extent_num_blocks(extent, + new_num); + mark_buffer_dirty(path->nodes[0]); + } else { + WARN_ON(1); + /* + ret = btrfs_truncate_item(trans, root, path, + start - key.offset); + BUG_ON(ret); + */ + } } if (!keep) { - u64 disk_blocknr; - u64 disk_num_blocks; - disk_blocknr = btrfs_file_extent_disk_blocknr(extent); - disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); + u64 disk_blocknr = 0; + u64 disk_num_blocks = 0; + u64 extent_num_blocks = 0; + if (found_extent) { + disk_blocknr = + btrfs_file_extent_disk_blocknr(extent); + disk_num_blocks = + btrfs_file_extent_disk_num_blocks(extent); + extent_num_blocks = + btrfs_file_extent_num_blocks(extent); + } ret = btrfs_del_item(trans, root, path); BUG_ON(ret); - inode->i_blocks -= - btrfs_file_extent_num_blocks(extent) << 3; btrfs_release_path(root, path); - ret = btrfs_free_extent(trans, root, disk_blocknr, - disk_num_blocks, 0); + if (found_extent) { + inode->i_blocks -= + btrfs_file_extent_num_blocks(extent) << 3; + ret = btrfs_free_extent(trans, root, + disk_blocknr, + disk_num_blocks, 0); + } BUG_ON(ret); if (!bookend && search_start >= end) { @@ -1360,7 +1657,7 @@ static int drop_extents(struct btrfs_trans_handle *trans, if (!bookend) continue; } - if (bookend) { + if (bookend && found_extent) { /* create bookend */ struct btrfs_key ins; ins.objectid = inode->i_ino; @@ -1390,6 +1687,8 @@ static int drop_extents(struct btrfs_trans_handle *trans, btrfs_file_extent_num_blocks(&old) - ((end - key.offset) >> inode->i_blkbits)); + btrfs_set_file_extent_type(extent, + BTRFS_FILE_EXTENT_REG); btrfs_set_file_extent_generation(extent, btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); @@ -1445,7 +1744,8 @@ static int prepare_pages(struct btrfs_root *root, if (err) goto failed_truncate; bh = bh->b_this_page; - alloc_extent_start++; + if (alloc_extent_start) + alloc_extent_start++; } while (bh != head); pos += this_write; WARN_ON(this_write > write_bytes); @@ -1543,12 +1843,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, start_pos, (pos + count + root->blocksize -1) & ~((u64)root->blocksize - 1)); + BUG_ON(ret); } - ret = btrfs_alloc_extent(trans, root, num_blocks, 1, + if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || + pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { + ret = btrfs_alloc_extent(trans, root, num_blocks, 1, (u64)-1, &ins); - BUG_ON(ret); - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); + BUG_ON(ret); + } else { + ins.offset = 0; + ins.objectid = 0; + } BUG_ON(ret); alloc_extent_start = ins.objectid; ret = btrfs_end_transaction(trans, root); @@ -1567,7 +1875,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, BUG_ON(ret); /* FIXME blocks != pagesize */ - alloc_extent_start += num_pages; + if (alloc_extent_start) + alloc_extent_start += num_pages; ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); BUG_ON(ret); @@ -1779,10 +2088,11 @@ static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, if (!PageChecked(page)) { /* FIXME, do it per block */ struct btrfs_root *root = BTRFS_I(inode)->root; + int ret = btrfs_csum_verify_file_block(root, - page->mapping->host->i_ino, - page->index << PAGE_CACHE_SHIFT, - kmap(page), PAGE_CACHE_SIZE); + page->mapping->host->i_ino, + page->index << PAGE_CACHE_SHIFT, + kmap(page), PAGE_CACHE_SIZE); if (ret) { printk("failed to verify ino %lu page %lu\n", page->mapping->host->i_ino, @@ -2249,6 +2559,16 @@ static int btrfs_get_sb(struct file_system_type *fs_type, btrfs_fill_super, mnt); } + +static int btrfs_getattr(struct vfsmount *mnt, + struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + generic_fillattr(inode, stat); + stat->blksize = 256 * 1024; + return 0; +} + static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2298,6 +2618,7 @@ static struct address_space_operations btrfs_aops = { static struct inode_operations btrfs_file_inode_operations = { .truncate = btrfs_truncate, + .getattr = btrfs_getattr, }; static struct file_operations btrfs_file_operations = { -- cgit v1.2.3-70-g09d2 From 4d775673091d43b39fa9d086071009f98dec289e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 20 Apr 2007 20:23:12 -0400 Subject: Btrfs: add owner and type fields to the extents aand block headers Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 +++++ fs/btrfs/ctree.h | 45 ++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 15 ++++++++++----- fs/btrfs/super.c | 13 +++++++------ 5 files changed, 65 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1bae035f517..b4783bf8bf4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -68,6 +68,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); btrfs_set_header_generation(&cow_node->header, trans->transid); + btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; @@ -806,6 +807,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&c->header, level); btrfs_set_header_blocknr(&c->header, bh_blocknr(t)); btrfs_set_header_generation(&c->header, trans->transid); + btrfs_set_header_owner(&c->header, root->root_key.objectid); lower = btrfs_buffer_node(path->nodes[level-1]); memcpy(c->header.fsid, root->fs_info->disk_super->fsid, sizeof(c->header.fsid)); @@ -909,6 +911,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer)); btrfs_set_header_generation(&split->header, trans->transid); + btrfs_set_header_owner(&split->header, root->root_key.objectid); memcpy(split->header.fsid, root->fs_info->disk_super->fsid, sizeof(split->header.fsid)); mid = (c_nritems + 1) / 2; @@ -1280,6 +1283,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root memset(&right->header, 0, sizeof(right->header)); btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); btrfs_set_header_generation(&right->header, trans->transid); + btrfs_set_header_owner(&right->header, root->root_key.objectid); btrfs_set_header_level(&right->header, 0); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, sizeof(right->header.fsid)); @@ -1376,6 +1380,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root memset(&right->header, 0, sizeof(right->header)); btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); btrfs_set_header_generation(&right->header, trans->transid); + btrfs_set_header_owner(&right->header, root->root_key.objectid); btrfs_set_header_level(&right->header, 0); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, sizeof(right->header.fsid)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 681b23933d9..78248d57729 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -62,6 +62,7 @@ struct btrfs_header { u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; + __le64 owner; __le16 nritems; __le16 flags; u8 level; @@ -151,12 +152,17 @@ struct btrfs_path { int slots[BTRFS_MAX_LEVEL]; }; +/* values for the type field in btrfs_extent_item */ +#define BTRFS_EXTENT_TREE 1 +#define BTRFS_EXTENT_FILE 2 /* * items in the extent btree are used to record the objectid of the * owner of the block and the number of references */ struct btrfs_extent_item { __le32 refs; + __le64 owner; + u8 type; } __attribute__ ((__packed__)); struct btrfs_inode_timespec { @@ -473,11 +479,32 @@ static inline void btrfs_set_extent_refs(struct btrfs_extent_item *ei, u32 val) ei->refs = cpu_to_le32(val); } +static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) +{ + return le64_to_cpu(ei->owner); +} + +static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) +{ + ei->owner = cpu_to_le64(val); +} + +static inline u8 btrfs_extent_type(struct btrfs_extent_item *ei) +{ + return ei->type; +} + +static inline void btrfs_set_extent_type(struct btrfs_extent_item *ei, u8 val) +{ + ei->type = val; +} + static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) { return le64_to_cpu(n->ptrs[nr].blockptr); } + static inline void btrfs_set_node_blockptr(struct btrfs_node *n, int nr, u64 val) { @@ -636,6 +663,17 @@ static inline void btrfs_set_header_generation(struct btrfs_header *h, h->generation = cpu_to_le64(val); } +static inline u64 btrfs_header_owner(struct btrfs_header *h) +{ + return le64_to_cpu(h->owner); +} + +static inline void btrfs_set_header_owner(struct btrfs_header *h, + u64 val) +{ + h->owner = cpu_to_le64(val); +} + static inline u16 btrfs_header_nritems(struct btrfs_header *h) { return le16_to_cpu(h->nritems); @@ -996,9 +1034,10 @@ int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins); +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 owner, + u8 type, u64 num_blocks, u64 search_start, + u64 search_end, struct btrfs_key *ins); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 13046295bf7..7aff6bb55d9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -319,6 +319,7 @@ static int __setup_root(int blocksize, root->last_inode_alloc = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); + root->root_key.objectid = objectid; return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b2faad3e879..49f7cd6e067 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -460,9 +460,10 @@ error: * * returns 0 if everything worked, non-zero otherwise. */ -int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins) +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 owner, + u8 type, u64 num_blocks, u64 search_start, + u64 search_end, struct btrfs_key *ins) { int ret; int pending_ret; @@ -472,6 +473,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item extent_item; btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_extent_owner(&extent_item, owner); + btrfs_set_extent_type(&extent_item, type); if (root == extent_root) { BUG_ON(extent_root->fs_info->current_insert.offset == 0); @@ -508,13 +511,15 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root * returns the tree buffer or NULL. */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root) { struct btrfs_key ins; int ret; struct buffer_head *buf; - ret = btrfs_alloc_extent(trans, root, 1, 0, (unsigned long)-1, &ins); + ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, + BTRFS_EXTENT_TREE, + 1, 0, (unsigned long)-1, &ins); if (ret) { BUG(); return NULL; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f99c764a59d..eba239cce21 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1819,8 +1819,9 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { - ret = btrfs_alloc_extent(trans, root, num_blocks, 1, - (u64)-1, &ins); + ret = btrfs_alloc_extent(trans, root, inode->i_ino, + BTRFS_EXTENT_FILE, num_blocks, 1, + (u64)-1, &ins); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); @@ -2017,8 +2018,12 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_header_level(&leaf->header, 0); btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); btrfs_set_header_generation(&leaf->header, trans->transid); + btrfs_set_header_owner(&leaf->header, root->root_key.objectid); memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, sizeof(leaf->header.fsid)); + mark_buffer_dirty(subvol); + brelse(subvol); + subvol = NULL; inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); @@ -2031,10 +2036,6 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); - mark_buffer_dirty(subvol); - brelse(subvol); - subvol = NULL; - ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); BUG_ON(ret); -- cgit v1.2.3-70-g09d2 From 5d0c3e60fea7e23fbfbe3ab2886b444b4c702441 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 23 Apr 2007 17:01:05 -0400 Subject: Btrfs: fix extent owner/type setting on extent tree blocks Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 49f7cd6e067..aac3eccffb8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -167,6 +167,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct ins.offset = 1; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); + btrfs_set_extent_type(&extent_item, BTRFS_EXTENT_TREE); + btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) { ins.objectid = extent_root->fs_info->current_insert.objectid + @@ -356,7 +358,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); - total_needed += (level + 1) * 3; + total_needed += (level + 2) * 3; if (root->fs_info->last_insert.objectid == 0 && search_end == (u64)-1) { struct btrfs_disk_key *last_key; btrfs_init_path(path); -- cgit v1.2.3-70-g09d2 From c62a1920ced752e86f57ab1d4ad0ec65012bce4d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 24 Apr 2007 12:07:39 -0400 Subject: Btrfs: get rid of the extent_item type field Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 16 +--------------- fs/btrfs/extent-tree.c | 5 +---- fs/btrfs/super.c | 3 +-- 3 files changed, 3 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 78248d57729..2d166ca8b10 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -152,9 +152,6 @@ struct btrfs_path { int slots[BTRFS_MAX_LEVEL]; }; -/* values for the type field in btrfs_extent_item */ -#define BTRFS_EXTENT_TREE 1 -#define BTRFS_EXTENT_FILE 2 /* * items in the extent btree are used to record the objectid of the * owner of the block and the number of references @@ -162,7 +159,6 @@ struct btrfs_path { struct btrfs_extent_item { __le32 refs; __le64 owner; - u8 type; } __attribute__ ((__packed__)); struct btrfs_inode_timespec { @@ -489,16 +485,6 @@ static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) ei->owner = cpu_to_le64(val); } -static inline u8 btrfs_extent_type(struct btrfs_extent_item *ei) -{ - return ei->type; -} - -static inline void btrfs_set_extent_type(struct btrfs_extent_item *ei, u8 val) -{ - ei->type = val; -} - static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) { return le64_to_cpu(n->ptrs[nr].blockptr); @@ -1036,7 +1022,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u8 type, u64 num_blocks, u64 search_start, + u64 num_blocks, u64 search_start, u64 search_end, struct btrfs_key *ins); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index aac3eccffb8..116519503d0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -167,7 +167,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct ins.offset = 1; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); - btrfs_set_extent_type(&extent_item, BTRFS_EXTENT_TREE); btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) { @@ -464,7 +463,7 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u8 type, u64 num_blocks, u64 search_start, + u64 num_blocks, u64 search_start, u64 search_end, struct btrfs_key *ins) { int ret; @@ -476,7 +475,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, owner); - btrfs_set_extent_type(&extent_item, type); if (root == extent_root) { BUG_ON(extent_root->fs_info->current_insert.offset == 0); @@ -520,7 +518,6 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - BTRFS_EXTENT_TREE, 1, 0, (unsigned long)-1, &ins); if (ret) { BUG(); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 43f081593c4..c076474c8bf 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1830,8 +1830,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { ret = btrfs_alloc_extent(trans, root, inode->i_ino, - BTRFS_EXTENT_FILE, num_blocks, 1, - (u64)-1, &ins); + num_blocks, 1, (u64)-1, &ins); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); -- cgit v1.2.3-70-g09d2 From f2458e1d8c90958ed3631654cb7fd5ab01478505 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Apr 2007 15:52:25 -0400 Subject: Btrfs: change around extent-tree prealloc Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 10 +++-- fs/btrfs/disk-io.c | 3 +- fs/btrfs/extent-tree.c | 101 ++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 87 insertions(+), 27 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2d166ca8b10..26d0cdd46f4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -244,12 +244,17 @@ struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *dev_root; - struct btrfs_key current_insert; struct btrfs_key last_insert; struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; struct radix_tree_root dev_radix; + + u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; + int extent_tree_insert_nr; + u64 extent_tree_prealloc[BTRFS_MAX_LEVEL * 3]; + int extent_tree_prealloc_nr; + u64 generation; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; @@ -267,8 +272,7 @@ struct btrfs_fs_info { /* * in ram representation of the tree. extent_root is used for all allocations - * and for the extent tree extent_root root. current_insert is used - * only for the extent tree. + * and for the extent tree extent_root root. */ struct btrfs_root { struct buffer_head *node; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7aff6bb55d9..956727f015a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -542,6 +542,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; fs_info->do_barriers = 1; + fs_info->extent_tree_insert_nr = 0; + fs_info->extent_tree_prealloc_nr = 0; BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -555,7 +557,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); __setup_root(sb->s_blocksize, dev_root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 116519503d0..e6fe3fd3881 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -169,9 +169,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); - for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) { - ins.objectid = extent_root->fs_info->current_insert.objectid + - i; + for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) { + ins.objectid = extent_root->fs_info->extent_tree_insert[i]; super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + 1); @@ -179,7 +178,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct sizeof(extent_item)); BUG_ON(ret); } - extent_root->fs_info->current_insert.offset = 0; + extent_root->fs_info->extent_tree_insert_nr = 0; + extent_root->fs_info->extent_tree_prealloc_nr = 0; return 0; } @@ -349,7 +349,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int start_found; struct btrfs_leaf *l; struct btrfs_root * root = orig_root->fs_info->extent_root; + struct btrfs_fs_info *info = root->fs_info; int total_needed = num_blocks; + int total_found = 0; + int fill_prealloc = 0; int level; path = btrfs_alloc_path(); @@ -357,8 +360,12 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); - total_needed += (level + 2) * 3; - if (root->fs_info->last_insert.objectid == 0 && search_end == (u64)-1) { + if (num_blocks == 0) { + fill_prealloc = 1; + num_blocks = 1; + total_needed = min(level + 2, BTRFS_MAX_LEVEL) * 3; + } + if (info->last_insert.objectid == 0 && search_end == (u64)-1) { struct btrfs_disk_key *last_key; btrfs_init_path(path); ins->objectid = (u64)-1; @@ -373,8 +380,8 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root last_key = &l->items[path->slots[0]].key; search_start = btrfs_disk_key_objectid(last_key); } - if (root->fs_info->last_insert.objectid > search_start) - search_start = root->fs_info->last_insert.objectid; + if (info->last_insert.objectid > search_start) + search_start = info->last_insert.objectid; check_failed: btrfs_init_path(path); @@ -392,6 +399,10 @@ check_failed: l = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { + if (fill_prealloc) { + info->extent_tree_prealloc_nr = 0; + total_found = 0; + } ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -399,13 +410,13 @@ check_failed: goto error; if (!start_found) { ins->objectid = search_start; - ins->offset = (u64)-1; + ins->offset = (u64)-1 - search_start; start_found = 1; goto check_pending; } ins->objectid = last_block > search_start ? last_block : search_start; - ins->offset = (u64)-1; + ins->offset = (u64)-1 - ins->objectid; goto check_pending; } btrfs_disk_key_to_cpu(&key, &l->items[slot].key); @@ -414,7 +425,7 @@ check_failed: if (last_block < search_start) last_block = search_start; hole_size = key.objectid - last_block; - if (hole_size > total_needed) { + if (hole_size > num_blocks) { ins->objectid = last_block; ins->offset = hole_size; goto check_pending; @@ -433,17 +444,51 @@ check_pending: btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); for (test_block = ins->objectid; - test_block < ins->objectid + total_needed; test_block++) { - if (test_radix_bit(&root->fs_info->pinned_radix, - test_block)) { + test_block < ins->objectid + num_blocks; test_block++) { + if (test_radix_bit(&info->pinned_radix, test_block)) { search_start = test_block + 1; goto check_failed; } } - BUG_ON(root->fs_info->current_insert.offset); - root->fs_info->current_insert.offset = total_needed - num_blocks; - root->fs_info->current_insert.objectid = ins->objectid + num_blocks; - root->fs_info->current_insert.flags = 0; + if (!fill_prealloc && info->extent_tree_insert_nr) { + u64 last = + info->extent_tree_insert[info->extent_tree_insert_nr - 1]; + if (ins->objectid + num_blocks > + info->extent_tree_insert[0] && + ins->objectid <= last) { + search_start = last + 1; + WARN_ON(1); + goto check_failed; + } + } + if (!fill_prealloc && info->extent_tree_prealloc_nr) { + u64 first = + info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1]; + if (ins->objectid + num_blocks > first && + ins->objectid <= info->extent_tree_prealloc[0]) { + search_start = info->extent_tree_prealloc[0] + 1; + WARN_ON(1); + goto check_failed; + } + } + if (fill_prealloc) { + int nr; + test_block = ins->objectid; + while(test_block < ins->objectid + ins->offset && + total_found < total_needed) { + nr = total_needed - total_found - 1; + BUG_ON(nr < 0); + root->fs_info->extent_tree_prealloc[nr] = + test_block; + total_found++; + test_block++; + } + if (total_found < total_needed) { + search_start = test_block; + goto check_failed; + } + root->fs_info->extent_tree_prealloc_nr = total_found; + } root->fs_info->last_insert.objectid = ins->objectid; ins->offset = num_blocks; btrfs_free_path(path); @@ -472,25 +517,35 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; + struct btrfs_key prealloc_key; btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, owner); if (root == extent_root) { - BUG_ON(extent_root->fs_info->current_insert.offset == 0); + int nr; + BUG_ON(info->extent_tree_prealloc_nr == 0); BUG_ON(num_blocks != 1); - BUG_ON(extent_root->fs_info->current_insert.flags == - extent_root->fs_info->current_insert.offset); ins->offset = 1; - ins->objectid = extent_root->fs_info->current_insert.objectid + - extent_root->fs_info->current_insert.flags++; + info->extent_tree_prealloc_nr--; + nr = info->extent_tree_prealloc_nr; + ins->objectid = info->extent_tree_prealloc[nr]; + info->extent_tree_insert[info->extent_tree_insert_nr++] = + ins->objectid; return 0; } + /* do the real allocation */ ret = find_free_extent(trans, root, num_blocks, search_start, search_end, ins); if (ret) return ret; + /* then do prealloc for the extent tree */ + ret = find_free_extent(trans, root, 0, ins->objectid + ins->offset, + search_end, &prealloc_key); + if (ret) + return ret; + super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + num_blocks); -- cgit v1.2.3-70-g09d2 From 9078a3e1e4e489dddc636c7bb8780349d4298743 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 26 Apr 2007 16:46:15 -0400 Subject: Btrfs: start of block group code Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 66 ++++++++++++---- fs/btrfs/disk-io.c | 4 + fs/btrfs/extent-tree.c | 204 +++++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/print-tree.c | 7 ++ fs/btrfs/super.c | 1 - fs/btrfs/transaction.c | 2 + 6 files changed, 252 insertions(+), 32 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 26d0cdd46f4..419917279e6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -239,6 +239,19 @@ struct btrfs_device_item { __le64 device_id; } __attribute__ ((__packed__)); +/* tag for the radix tree of block groups in ram */ +#define BTRFS_BLOCK_GROUP_DIRTY 0 +#define BTRFS_BLOCK_GROUP_HINTS 8 +#define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) +struct btrfs_block_group_item { + __le64 used; +} __attribute__ ((__packed__)); + +struct btrfs_block_group_cache { + struct btrfs_key key; + struct btrfs_block_group_item item; +}; + struct crypto_hash; struct btrfs_fs_info { struct btrfs_root *extent_root; @@ -249,6 +262,7 @@ struct btrfs_fs_info { struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; struct radix_tree_root dev_radix; + struct radix_tree_root block_group_radix; u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; int extent_tree_insert_nr; @@ -301,49 +315,67 @@ struct btrfs_root { * info about object characteristics. There is one for every file and dir in * the FS */ -#define BTRFS_INODE_ITEM_KEY 1 +#define BTRFS_INODE_ITEM_KEY 1 + +/* reserve 2-15 close to the inode for later flexibility */ /* * dir items are the name -> inode pointers in a directory. There is one * for every name in a directory. */ -#define BTRFS_DIR_ITEM_KEY 2 -#define BTRFS_DIR_INDEX_KEY 3 +#define BTRFS_DIR_ITEM_KEY 16 +#define BTRFS_DIR_INDEX_KEY 17 /* - * inline data is file data that fits in the btree. + * extent data is for file data */ -#define BTRFS_INLINE_DATA_KEY 4 -/* - * extent data is for data that can't fit in the btree. It points to - * a (hopefully) huge chunk of disk - */ -#define BTRFS_EXTENT_DATA_KEY 5 +#define BTRFS_EXTENT_DATA_KEY 18 /* * csum items have the checksums for data in the extents */ -#define BTRFS_CSUM_ITEM_KEY 6 +#define BTRFS_CSUM_ITEM_KEY 19 + +/* reserve 20-31 for other file stuff */ /* * root items point to tree roots. There are typically in the root * tree used by the super block to find all the other trees */ -#define BTRFS_ROOT_ITEM_KEY 7 +#define BTRFS_ROOT_ITEM_KEY 32 /* * extent items are in the extent map tree. These record which blocks * are used, and how many references there are to each block */ -#define BTRFS_EXTENT_ITEM_KEY 8 +#define BTRFS_EXTENT_ITEM_KEY 33 + +/* + * block groups give us hints into the extent allocation trees. Which + * blocks are free etc etc + */ +#define BTRFS_BLOCK_GROUP_ITEM_KEY 34 /* * dev items list the devices that make up the FS */ -#define BTRFS_DEV_ITEM_KEY 9 +#define BTRFS_DEV_ITEM_KEY 35 /* * string items are for debugging. They just store a short string of * data in the FS */ -#define BTRFS_STRING_ITEM_KEY 10 +#define BTRFS_STRING_ITEM_KEY 253 + + +static inline u64 btrfs_block_group_used(struct btrfs_block_group_item *bi) +{ + return le64_to_cpu(bi->used); +} + +static inline void btrfs_set_block_group_used(struct + btrfs_block_group_item *bi, + u64 val) +{ + bi->used = cpu_to_le64(val); +} static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) { @@ -1037,6 +1069,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks); +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_free_block_groups(struct btrfs_fs_info *info); +int btrfs_read_block_groups(struct btrfs_root *root); /* ctree.c */ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 956727f015a..1c27eb64551 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -529,6 +529,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pending_del_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); + INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; @@ -613,6 +614,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); + btrfs_read_block_groups(extent_root); + fs_info->generation = btrfs_super_generation(disk_super) + 1; memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); kobj_set_kset_s(fs_info, btrfs_subsys); @@ -741,6 +744,7 @@ int close_ctree(struct btrfs_root *root) iput(fs_info->btree_inode); free_dev_radix(fs_info); + btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); kfree(fs_info->extent_root); kfree(fs_info->tree_root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e6fe3fd3881..0bb4fc83cfd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -127,6 +127,105 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } +static int write_one_cache_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_block_group_cache *cache) +{ + int ret; + int pending_ret; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_block_group_item *bi; + struct btrfs_key ins; + + find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins); + ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); + BUG_ON(ret); + bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + struct btrfs_block_group_item); + memcpy(bi, &cache->item, sizeof(*bi)); + mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(extent_root, path); + + finish_current_insert(trans, extent_root); + pending_ret = del_pending_extents(trans, extent_root); + if (ret) + return ret; + if (pending_ret) + return pending_ret; + return 0; + +} + +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_block_group_cache *cache[8]; + int ret; + int err = 0; + int werr = 0; + struct radix_tree_root *radix = &root->fs_info->block_group_radix; + int i; + struct btrfs_path *path; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while(1) { + ret = radix_tree_gang_lookup_tag(radix, (void **)cache, + 0, ARRAY_SIZE(cache), + BTRFS_BLOCK_GROUP_DIRTY); + if (!ret) + break; + for (i = 0; i < ret; i++) { + radix_tree_tag_clear(radix, cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_DIRTY); + err = write_one_cache_group(trans, root, + path, cache[i]); + if (err) + werr = err; + } + } + btrfs_free_path(path); + return werr; +} + +static int update_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 blocknr, u64 num, int alloc) +{ + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *info = root->fs_info; + u64 total = num; + u64 old_val; + u64 block_in_group; + int ret; + while(total) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&cache, blocknr, 1); + if (!ret) + return -1; + block_in_group = blocknr - cache->key.objectid; + WARN_ON(block_in_group > cache->key.offset); + radix_tree_tag_set(&info->block_group_radix, + cache->key.objectid + cache->key.offset - 1, + BTRFS_BLOCK_GROUP_DIRTY); + + old_val = btrfs_block_group_used(&cache->item); + num = min(total, cache->key.offset - block_in_group); + total -= num; + blocknr += num; + if (alloc) + old_val += num; + else + old_val -= num; + btrfs_set_block_group_used(&cache->item, old_val); + } + return 0; +} + int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -264,6 +363,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_del_item(trans, extent_root, path); if (ret) BUG(); + ret = update_block_group(trans, root, blocknr, num_blocks, 0); + BUG_ON(ret); } btrfs_release_path(extent_root, path); btrfs_free_path(path); @@ -365,21 +466,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_blocks = 1; total_needed = min(level + 2, BTRFS_MAX_LEVEL) * 3; } - if (info->last_insert.objectid == 0 && search_end == (u64)-1) { - struct btrfs_disk_key *last_key; - btrfs_init_path(path); - ins->objectid = (u64)-1; - ins->offset = (u64)-1; - ret = btrfs_search_slot(trans, root, ins, path, 0, 0); - if (ret < 0) - goto error; - BUG_ON(ret == 0); - if (path->slots[0] > 0) - path->slots[0]--; - l = btrfs_buffer_leaf(path->nodes[0]); - last_key = &l->items[path->slots[0]].key; - search_start = btrfs_disk_key_objectid(last_key); - } if (info->last_insert.objectid > search_start) search_start = info->last_insert.objectid; @@ -420,6 +506,8 @@ check_failed: goto check_pending; } btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) + goto next; if (key.objectid >= search_start) { if (start_found) { if (last_block < search_start) @@ -434,6 +522,7 @@ check_failed: } start_found = 1; last_block = key.objectid + key.offset; +next: path->slots[0]++; } // FIXME -ENOSPC @@ -498,7 +587,6 @@ error: btrfs_free_path(path); return ret; } - /* * finds a free extent and does all the dirty work required for allocation * returns the key for the extent through ins, and a tree buffer for @@ -532,6 +620,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ins->objectid = info->extent_tree_prealloc[nr]; info->extent_tree_insert[info->extent_tree_insert_nr++] = ins->objectid; + ret = update_block_group(trans, root, + ins->objectid, ins->offset, 1); + BUG_ON(ret); return 0; } /* do the real allocation */ @@ -558,6 +649,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, return ret; if (pending_ret) return pending_ret; + ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); return 0; } @@ -578,6 +670,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG(); return NULL; } + BUG_ON(ret); buf = btrfs_find_create_tree_block(root, ins.objectid); set_buffer_uptodate(buf); return buf; @@ -758,3 +851,82 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_free_path(path); return ret; } + +int btrfs_free_block_groups(struct btrfs_fs_info *info) +{ + int ret; + struct btrfs_block_group_cache *cache[8]; + int i; + + while(1) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)cache, 0, + ARRAY_SIZE(cache)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + radix_tree_delete(&info->block_group_radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1); + kfree(cache[i]); + } + } + return 0; +} + +int btrfs_read_block_groups(struct btrfs_root *root) +{ + struct btrfs_path *path; + int ret; + int err = 0; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_cache *cache; + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_leaf *leaf; + u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; + + root = root->fs_info->extent_root; + key.objectid = 0; + key.offset = group_size_blocks; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while(1) { + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + &key, path, 0, 0); + if (ret != 0) { + err = ret; + break; + } + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[path->slots[0]].key); + cache = kmalloc(sizeof(*cache), GFP_NOFS); + if (!cache) { + err = -1; + break; + } + bi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_block_group_item); + memcpy(&cache->item, bi, sizeof(*bi)); + memcpy(&cache->key, &found_key, sizeof(found_key)); + key.objectid = found_key.objectid + found_key.offset; + btrfs_release_path(root, path); + ret = radix_tree_insert(&root->fs_info->block_group_radix, + found_key.objectid + + found_key.offset - 1, + (void *)cache); + BUG_ON(ret); + if (key.objectid >= + btrfs_super_total_blocks(root->fs_info->disk_super)) + break; + } + + btrfs_free_path(path); + return 0; +} diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 1e7038b070a..2f95fc67a03 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -11,6 +11,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_root_item *ri; struct btrfs_dir_item *di; struct btrfs_inode_item *ii; + struct btrfs_block_group_item *bi; u32 type; printk("leaf %Lu total ptrs %d free space %d\n", @@ -53,6 +54,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) printk("\t\textent data refs %u\n", btrfs_extent_refs(ei)); break; + case BTRFS_BLOCK_GROUP_ITEM_KEY: + bi = btrfs_item_ptr(l, i, + struct btrfs_block_group_item); + printk("\t\tblock group used %Lu\n", + btrfs_block_group_used(bi)); + break; case BTRFS_STRING_ITEM_KEY: printk("\t\titem data %.*s\n", btrfs_item_size(item), btrfs_leaf_data(l) + btrfs_item_offset(item)); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5bbccbc7e3c..edcebf79b04 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -377,7 +377,6 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (btrfs_disk_key_objectid(found_key) != inode->i_ino) break; if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY && - btrfs_disk_key_type(found_key) != BTRFS_INLINE_DATA_KEY && btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) break; if (btrfs_disk_key_offset(found_key) < inode->i_size) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 078cb9cbf9d..8a2545f6a5b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -125,6 +125,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_set_super_device_root(fs_info->disk_super, bh_blocknr(dev_root->node)); } + btrfs_write_dirty_block_groups(trans, extent_root); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); if (old_extent_block == bh_blocknr(extent_root->node)) @@ -135,6 +136,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, &extent_root->root_key, &extent_root->root_item); BUG_ON(ret); + btrfs_write_dirty_block_groups(trans, extent_root); } return 0; } -- cgit v1.2.3-70-g09d2 From cd1bc4653dc37f6390f4d6df4f987044c64f700b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 27 Apr 2007 10:08:34 -0400 Subject: Btrfs: more block allocator work Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 95 +++++++++++++++++++++++++++++++++++++++++++------- fs/btrfs/super.c | 3 ++ 4 files changed, 90 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 419917279e6..c432222d40e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -250,6 +250,8 @@ struct btrfs_block_group_item { struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; + u64 first_free; + u64 last_alloc; }; struct crypto_hash; @@ -257,7 +259,7 @@ struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *dev_root; - struct btrfs_key last_insert; + struct btrfs_block_group_cache *block_group_cache; struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1c27eb64551..2489ffa5fb3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -558,7 +558,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); + fs_info->block_group_cache = NULL; __setup_root(sb->s_blocksize, dev_root, fs_info, BTRFS_DEV_TREE_OBJECTID); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0bb4fc83cfd..71e3b311fc4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,6 +12,63 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static int find_search_start(struct btrfs_root *root, int data) +{ + struct btrfs_block_group_cache *cache[8]; + struct btrfs_fs_info *info = root->fs_info; + u64 used; + u64 last; + int i; + int ret; + + cache[0] = info->block_group_cache; + if (!cache[0]) + goto find_new; + used = btrfs_block_group_used(&cache[0]->item); + if (used < (cache[0]->key.offset * 3 / 2)) + return 0; +find_new: + last = 0; + while(1) { + ret = radix_tree_gang_lookup_tag(&info->block_group_radix, + (void **)cache, + last, ARRAY_SIZE(cache), + BTRFS_BLOCK_GROUP_DIRTY); + if (!ret) + break; + for (i = 0; i < ret; i++) { + used = btrfs_block_group_used(&cache[i]->item); + if (used < (cache[i]->key.offset * 3 / 2)) { + info->block_group_cache = cache[i]; + cache[i]->last_alloc = cache[i]->first_free; + return 0; + } + last = cache[i]->key.objectid + + cache[i]->key.offset - 1; + } + } + last = 0; + while(1) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)cache, + last, ARRAY_SIZE(cache)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + used = btrfs_block_group_used(&cache[i]->item); + if (used < (cache[i]->key.offset * 3 / 2)) { + info->block_group_cache = cache[i]; + cache[i]->last_alloc = cache[i]->first_free; + return 0; + } + last = cache[i]->key.objectid + + cache[i]->key.offset - 1; + } + } + info->block_group_cache = NULL; + return 0; +} + int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks) @@ -205,8 +262,11 @@ static int update_block_group(struct btrfs_trans_handle *trans, while(total) { ret = radix_tree_gang_lookup(&info->block_group_radix, (void **)&cache, blocknr, 1); - if (!ret) + if (!ret) { + printk(KERN_CRIT "blocknr %Lu lookup failed\n", + blocknr); return -1; + } block_in_group = blocknr - cache->key.objectid; WARN_ON(block_in_group > cache->key.offset); radix_tree_tag_set(&info->block_group_radix, @@ -217,10 +277,15 @@ static int update_block_group(struct btrfs_trans_handle *trans, num = min(total, cache->key.offset - block_in_group); total -= num; blocknr += num; - if (alloc) + if (alloc) { old_val += num; - else + if (blocknr > cache->last_alloc) + cache->last_alloc = blocknr; + } else { old_val -= num; + if (blocknr < cache->first_free) + cache->first_free = blocknr; + } btrfs_set_block_group_used(&cache->item, old_val); } return 0; @@ -246,9 +311,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct clear_radix_bit(pinned_radix, gang[i]); } } - if (root->fs_info->last_insert.objectid > first) - root->fs_info->last_insert.objectid = first; - root->fs_info->last_insert.offset = 0; + root->fs_info->block_group_cache = NULL; return 0; } @@ -466,8 +529,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_blocks = 1; total_needed = min(level + 2, BTRFS_MAX_LEVEL) * 3; } - if (info->last_insert.objectid > search_start) - search_start = info->last_insert.objectid; + find_search_start(root, 0); + if (info->block_group_cache && + info->block_group_cache->last_alloc > search_start) + search_start = info->block_group_cache->last_alloc; check_failed: btrfs_init_path(path); @@ -567,8 +632,7 @@ check_pending: total_found < total_needed) { nr = total_needed - total_found - 1; BUG_ON(nr < 0); - root->fs_info->extent_tree_prealloc[nr] = - test_block; + info->extent_tree_prealloc[nr] = test_block; total_found++; test_block++; } @@ -576,9 +640,14 @@ check_pending: search_start = test_block; goto check_failed; } - root->fs_info->extent_tree_prealloc_nr = total_found; + info->extent_tree_prealloc_nr = total_found; + } + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&info->block_group_cache, + ins->objectid, 1); + if (ret) { + info->block_group_cache->last_alloc = ins->objectid; } - root->fs_info->last_insert.objectid = ins->objectid; ins->offset = num_blocks; btrfs_free_path(path); return 0; @@ -915,6 +984,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_block_group_item); memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); + cache->last_alloc = 0; + cache->first_free = 0; key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); ret = radix_tree_insert(&root->fs_info->block_group_radix, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index edcebf79b04..a10e902d310 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -223,6 +223,7 @@ err: btrfs_update_inode(trans, root, dir); drop_nlink(dentry->d_inode); btrfs_update_inode(trans, root, dentry->d_inode); + dir->i_sb->s_dirt = 1; } return ret; } @@ -411,6 +412,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, error: btrfs_release_path(root, path); btrfs_free_path(path); + inode->i_sb->s_dirt = 1; return ret; } @@ -935,6 +937,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_fail; d_instantiate(dentry, inode); drop_on_err = 0; + dir->i_sb->s_dirt = 1; out_fail: btrfs_end_transaction(trans, root); -- cgit v1.2.3-70-g09d2 From 28b8bb9e001aa1c4abdd10a8e36972658237fe43 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 27 Apr 2007 11:42:05 -0400 Subject: Btrfs: allocator tweaks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 --- fs/btrfs/extent-tree.c | 7 +++++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2489ffa5fb3..e1b6e13a5ae 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -94,9 +94,6 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) } while (bh != head); out_unlock: unlock_page(page); - if (ret) { - touch_buffer(ret); - } page_cache_release(page); return ret; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71e3b311fc4..aca5802a8a1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -311,7 +311,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct clear_radix_bit(pinned_radix, gang[i]); } } - root->fs_info->block_group_cache = NULL; + if (root->fs_info->block_group_cache) { + root->fs_info->block_group_cache->last_alloc = + root->fs_info->block_group_cache->first_free; + } return 0; } @@ -578,7 +581,7 @@ check_failed: if (last_block < search_start) last_block = search_start; hole_size = key.objectid - last_block; - if (hole_size > num_blocks) { + if (hole_size >= num_blocks) { ins->objectid = last_block; ins->offset = hole_size; goto check_pending; -- cgit v1.2.3-70-g09d2 From 06a2f9fa4c12a055cc396936408a78ae0acfb6b4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 28 Apr 2007 08:48:10 -0400 Subject: Btrfs: try to drop dead cow pages from ram Signed-off-by: Chris Mason --- fs/btrfs/TODO | 3 +-- fs/btrfs/extent-tree.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 5ff2cef9f02..6a8c8cd03ca 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -6,8 +6,7 @@ * Check compat and incompat flags on the inode * Get rid of struct ctree_path, limiting tree levels held at one time * Add generation number to key pointer in nodes -* Add parent pointer back to extent map. -* Implement real list of pending transactions +* Add generation number to inode * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index aca5802a8a1..afc5267515e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -291,10 +291,18 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } +static int try_remove_page(struct address_space *mapping, unsigned long index) +{ + int ret; + ret = invalidate_mapping_pages(mapping, index, index); + return ret; +} + int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { unsigned long gang[8]; + struct inode *btree_inode = root->fs_info->btree_inode; u64 first = 0; int ret; int i; @@ -309,6 +317,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); + try_remove_page(btree_inode->i_mapping, + gang[i] << (PAGE_CACHE_SHIFT - + btree_inode->i_blkbits)); } } if (root->fs_info->block_group_cache) { @@ -600,6 +611,12 @@ check_pending: */ btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); + if (ins->objectid >= btrfs_super_total_blocks(info->disk_super)) { + if (search_start == 0) + return -ENOSPC; + search_start = 0; + goto check_failed; + } for (test_block = ins->objectid; test_block < ins->objectid + num_blocks; test_block++) { if (test_radix_bit(&info->pinned_radix, test_block)) { -- cgit v1.2.3-70-g09d2 From 7c4452b9a6ca7aabe37ea2e43d443110bdc08cd8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 28 Apr 2007 09:29:35 -0400 Subject: Btrfs: smarter transaction writeback Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 1 + fs/btrfs/super.c | 1 - fs/btrfs/transaction.c | 37 +++++++++++++++++++++++++++++++++++-- fs/btrfs/transaction.h | 1 + 4 files changed, 37 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index afc5267515e..652cf305a96 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -762,6 +762,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG_ON(ret); buf = btrfs_find_create_tree_block(root, ins.objectid); set_buffer_uptodate(buf); + set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a10e902d310..1890e8648db 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -980,7 +980,6 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8a2545f6a5b..f9b8864dcc4 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -45,6 +45,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->use_count = 1; cur_trans->commit_done = 0; list_add_tail(&cur_trans->list, &root->fs_info->trans_list); + init_bit_radix(&cur_trans->dirty_pages); } cur_trans->num_writers++; return 0; @@ -106,8 +107,40 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); - return 0; + unsigned long gang[16]; + int ret; + int i; + int err; + int werr = 0; + struct page *page; + struct radix_tree_root *dirty_pages; + struct inode *btree_inode = root->fs_info->btree_inode; + + if (!trans || !trans->transaction) { + return filemap_write_and_wait(btree_inode->i_mapping); + } + dirty_pages = &trans->transaction->dirty_pages; + while(1) { + ret = find_first_radix_bit(dirty_pages, gang, ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + /* FIXME EIO */ + clear_radix_bit(dirty_pages, gang[i]); + page = find_lock_page(btree_inode->i_mapping, + gang[i]); + if (!page) + continue; + err = write_one_page(page, 0); + if (err) + werr = err; + page_cache_release(page); + } + } + err = filemap_fdatawait(btree_inode->i_mapping); + if (err) + werr = err; + return werr; } int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 3cc29900a07..afe42d167ce 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -9,6 +9,7 @@ struct btrfs_transaction { int commit_done; int magic; struct list_head list; + struct radix_tree_root dirty_pages; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; -- cgit v1.2.3-70-g09d2 From 308535a05e4c39d2be26e0aeee722682deeb6f77 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 28 Apr 2007 15:17:08 -0400 Subject: Btrfs: prealloc more blocks for the extent map Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 652cf305a96..62051a36664 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -541,7 +541,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (num_blocks == 0) { fill_prealloc = 1; num_blocks = 1; - total_needed = min(level + 2, BTRFS_MAX_LEVEL) * 3; + total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } find_search_start(root, 0); if (info->block_group_cache && -- cgit v1.2.3-70-g09d2 From 31f3c99b73483f7b738a886c552050cbd6128ff3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 30 Apr 2007 15:25:45 -0400 Subject: Btrfs: allocator improvements, inode block groups Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/ctree.c | 10 ++-- fs/btrfs/ctree.h | 18 ++++++- fs/btrfs/extent-tree.c | 138 +++++++++++++++++++++++++++++++++++-------------- fs/btrfs/super.c | 53 +++++++++++++++---- fs/btrfs/transaction.c | 1 + fs/btrfs/transaction.h | 15 ++++++ 7 files changed, 181 insertions(+), 55 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 43a4f1ddb37..6b50076b02d 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -3,6 +3,7 @@ struct btrfs_inode { struct btrfs_root *root; + struct btrfs_block_group_cache *block_group; struct btrfs_key location; struct inode vfs_inode; }; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b4783bf8bf4..dbd3f636dd3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -61,7 +61,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *cow_ret = buf; return 0; } - cow = btrfs_alloc_free_block(trans, root); + cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr); cow_node = btrfs_buffer_node(cow); if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); @@ -800,7 +800,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root); + t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); c = btrfs_buffer_node(t); memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); @@ -905,7 +905,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root); + split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); @@ -1277,7 +1277,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root slot = path->slots[0]; nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); @@ -1374,7 +1374,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c432222d40e..e6bf9919536 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -174,6 +174,7 @@ struct btrfs_inode_item { __le64 generation; __le64 size; __le64 nblocks; + __le64 block_group; __le32 nlink; __le32 uid; __le32 gid; @@ -241,6 +242,7 @@ struct btrfs_device_item { /* tag for the radix tree of block groups in ram */ #define BTRFS_BLOCK_GROUP_DIRTY 0 +#define BTRFS_BLOCK_GROUP_AVAIL 1 #define BTRFS_BLOCK_GROUP_HINTS 8 #define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) struct btrfs_block_group_item { @@ -410,6 +412,17 @@ static inline void btrfs_set_inode_nblocks(struct btrfs_inode_item *i, u64 val) i->nblocks = cpu_to_le64(val); } +static inline u64 btrfs_inode_block_group(struct btrfs_inode_item *i) +{ + return le64_to_cpu(i->block_group); +} + +static inline void btrfs_set_inode_block_group(struct btrfs_inode_item *i, + u64 val) +{ + i->block_group = cpu_to_le64(val); +} + static inline u32 btrfs_inode_nlink(struct btrfs_inode_item *i) { return le32_to_cpu(i->nlink); @@ -1054,10 +1067,13 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache + *hint, int data); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, u64 hint); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, u64 num_blocks, u64 search_start, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 62051a36664..8b8cbe25fff 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,42 +12,57 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int find_search_start(struct btrfs_root *root, int data) +struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache + *hint, int data) { struct btrfs_block_group_cache *cache[8]; + struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; u64 used; - u64 last; + u64 last = 0; + u64 hint_last; int i; int ret; - - cache[0] = info->block_group_cache; - if (!cache[0]) - goto find_new; - used = btrfs_block_group_used(&cache[0]->item); - if (used < (cache[0]->key.offset * 3 / 2)) - return 0; -find_new: - last = 0; + int full_search = 0; + if (hint) { + used = btrfs_block_group_used(&hint->item); + if (used < (hint->key.offset * 2) / 3) { + return hint; + } + radix_tree_tag_clear(&info->block_group_radix, + hint->key.objectid + hint->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + last = hint->key.objectid + hint->key.offset; + hint_last = last; + } else { + hint_last = 0; + last = 0; + } while(1) { ret = radix_tree_gang_lookup_tag(&info->block_group_radix, (void **)cache, last, ARRAY_SIZE(cache), - BTRFS_BLOCK_GROUP_DIRTY); + BTRFS_BLOCK_GROUP_AVAIL); if (!ret) break; for (i = 0; i < ret; i++) { used = btrfs_block_group_used(&cache[i]->item); - if (used < (cache[i]->key.offset * 3 / 2)) { + if (used < (cache[i]->key.offset * 2) / 3) { info->block_group_cache = cache[i]; - cache[i]->last_alloc = cache[i]->first_free; - return 0; + found_group = cache[i]; + goto found; } + radix_tree_tag_clear(&info->block_group_radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); last = cache[i]->key.objectid + - cache[i]->key.offset - 1; + cache[i]->key.offset; } } - last = 0; + last = hint_last; +again: while(1) { ret = radix_tree_gang_lookup(&info->block_group_radix, (void **)cache, @@ -56,17 +71,32 @@ find_new: break; for (i = 0; i < ret; i++) { used = btrfs_block_group_used(&cache[i]->item); - if (used < (cache[i]->key.offset * 3 / 2)) { + if (used < cache[i]->key.offset) { info->block_group_cache = cache[i]; - cache[i]->last_alloc = cache[i]->first_free; - return 0; + found_group = cache[i]; + goto found; } + radix_tree_tag_clear(&info->block_group_radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); last = cache[i]->key.objectid + - cache[i]->key.offset - 1; + cache[i]->key.offset; } } info->block_group_cache = NULL; - return 0; + if (!full_search) { + last = 0; + full_search = 1; + goto again; + } +found: + if (!found_group) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&found_group, 0, 1); + BUG_ON(ret != 1); + } + return found_group; } int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, @@ -243,6 +273,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, path, cache[i]); if (err) werr = err; + cache[i]->last_alloc = cache[i]->first_free; } } btrfs_free_path(path); @@ -322,10 +353,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btree_inode->i_blkbits)); } } - if (root->fs_info->block_group_cache) { - root->fs_info->block_group_cache->last_alloc = - root->fs_info->block_group_cache->first_free; - } return 0; } @@ -532,22 +559,43 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int total_found = 0; int fill_prealloc = 0; int level; + int update_block_group = 0; + struct btrfs_block_group_cache *hint_block_group; path = btrfs_alloc_path(); ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); + /* find search start here */ + if (0 && search_start && num_blocks) { + u64 used; + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&hint_block_group, + search_start, 1); + if (ret) { + used = btrfs_block_group_used(&hint_block_group->item); + if (used > (hint_block_group->key.offset * 9) / 10) + search_start = 0; + else if (search_start < hint_block_group->last_alloc) + search_start = hint_block_group->last_alloc; + } else { + search_start = 0; + } + } if (num_blocks == 0) { fill_prealloc = 1; num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } - find_search_start(root, 0); - if (info->block_group_cache && - info->block_group_cache->last_alloc > search_start) - search_start = info->block_group_cache->last_alloc; - + if (1 || !search_start) { + trans->block_group = btrfs_find_block_group(root, + trans->block_group, + 0); + if (trans->block_group->last_alloc > search_start) + search_start = trans->block_group->last_alloc; + update_block_group = 1; + } check_failed: btrfs_init_path(path); ins->objectid = search_start; @@ -662,11 +710,13 @@ check_pending: } info->extent_tree_prealloc_nr = total_found; } - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&info->block_group_cache, - ins->objectid, 1); - if (ret) { - info->block_group_cache->last_alloc = ins->objectid; + if (update_block_group) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&trans->block_group, + ins->objectid, 1); + if (ret) { + trans->block_group->last_alloc = ins->objectid; + } } ins->offset = num_blocks; btrfs_free_path(path); @@ -747,14 +797,14 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * returns the tree buffer or NULL. */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root, u64 hint) { struct btrfs_key ins; int ret; struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, 0, (unsigned long)-1, &ins); + 1, hint, (unsigned long)-1, &ins); if (ret) { BUG(); return NULL; @@ -975,6 +1025,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_key found_key; struct btrfs_leaf *leaf; u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; + u64 used; root = root->fs_info->extent_root; key.objectid = 0; @@ -1005,8 +1056,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_block_group_item); memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); - cache->last_alloc = 0; - cache->first_free = 0; + cache->last_alloc = cache->key.objectid; + cache->first_free = cache->key.objectid; key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); ret = radix_tree_insert(&root->fs_info->block_group_radix, @@ -1014,6 +1065,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) found_key.offset - 1, (void *)cache); BUG_ON(ret); + used = btrfs_block_group_used(bi); + if (used < (key.offset * 2) / 3) { + radix_tree_tag_set(&root->fs_info->block_group_radix, + found_key.objectid + + found_key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } if (key.objectid >= btrfs_super_total_blocks(root->fs_info->disk_super)) break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1890e8648db..7ecbe7c8618 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -52,6 +52,8 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item *inode_item; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; + struct btrfs_block_group_cache *alloc_group; + u64 alloc_group_block; int ret; path = btrfs_alloc_path(); @@ -82,6 +84,12 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); + alloc_group_block = btrfs_inode_block_group(inode_item); + ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix, + (void **)&alloc_group, + alloc_group_block, 1); + BUG_ON(!ret); + BTRFS_I(inode)->block_group = alloc_group; btrfs_free_path(path); inode_item = NULL; @@ -136,6 +144,8 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(item, inode->i_blocks); btrfs_set_inode_generation(item, inode->i_generation); + btrfs_set_inode_block_group(item, + BTRFS_I(inode)->block_group->key.objectid); } @@ -237,6 +247,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); ret = btrfs_unlink_trans(trans, root, dir, dentry); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -262,6 +273,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = (u32)-1; @@ -429,6 +441,7 @@ static void btrfs_delete_inode(struct inode *inode) inode->i_size = 0; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); if (S_ISREG(inode->i_mode)) { ret = btrfs_truncate_in_trans(trans, root, inode); BUG_ON(ret); @@ -731,6 +744,7 @@ static int btrfs_write_inode(struct inode *inode, int wait) if (wait) { mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); } @@ -744,6 +758,7 @@ static void btrfs_dirty_inode(struct inode *inode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -751,7 +766,9 @@ static void btrfs_dirty_inode(struct inode *inode) static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 objectid, int mode) + u64 objectid, + struct btrfs_block_group_cache *group, + int mode) { struct inode *inode; struct btrfs_inode_item inode_item; @@ -763,6 +780,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); BTRFS_I(inode)->root = root; + group = btrfs_find_block_group(root, group, 0); + BTRFS_I(inode)->block_group = group; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; @@ -832,6 +851,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); if (err) { @@ -839,11 +859,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } - inode = btrfs_new_inode(trans, root, objectid, mode); + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, mode); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; - // FIXME mark the inode dirty + + btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dentry, inode); if (err) drop_inode = 1; @@ -853,6 +875,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; } dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -904,6 +928,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out_unlock; @@ -915,7 +940,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_unlock; } - inode = btrfs_new_inode(trans, root, objectid, S_IFDIR | mode); + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, S_IFDIR | mode); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fail; @@ -923,6 +949,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) drop_on_err = 1; inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; + btrfs_set_trans_block_group(trans, inode); err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino); if (err) @@ -938,6 +965,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); drop_on_err = 0; dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); out_fail: btrfs_end_transaction(trans, root); @@ -1349,6 +1378,7 @@ static void btrfs_truncate(struct inode *inode) /* FIXME, add redo link to tree so we don't leak on crash */ mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); BUG_ON(ret); ret = btrfs_end_transaction(trans, root); @@ -1445,6 +1475,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); bh = page_buffers(pages[i]); if (buffer_mapped(bh) && bh->b_blocknr == 0) { @@ -1481,6 +1512,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, kunmap(pages[i]); } SetPageChecked(pages[i]); + btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1821,6 +1853,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, mutex_unlock(&root->fs_info->fs_mutex); goto out_unlock; } + btrfs_set_trans_block_group(trans, inode); /* FIXME blocksize != 4096 */ inode->i_blocks += num_blocks << 3; if (start_pos < inode->i_size) { @@ -1845,6 +1878,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } BUG_ON(ret); alloc_extent_start = ins.objectid; + btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -2017,6 +2051,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) struct btrfs_leaf *leaf; struct btrfs_root *new_root; struct inode *inode; + struct inode *dir; int ret; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; @@ -2025,7 +2060,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root); + subvol = btrfs_alloc_free_block(trans, root, 0); if (subvol == NULL) return -ENOSPC; leaf = btrfs_buffer_leaf(subvol); @@ -2069,10 +2104,9 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) * insert the directory item */ key.offset = (u64)-1; + dir = root->fs_info->sb->s_root->d_inode; ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, - root->fs_info->sb->s_root->d_inode->i_ino, - &key, 0); + name, namelen, dir->i_ino, &key, 0); BUG_ON(ret); ret = btrfs_commit_transaction(trans, root); @@ -2084,7 +2118,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(new_root, 1); BUG_ON(!trans); - inode = btrfs_new_inode(trans, new_root, new_dirid, S_IFDIR | 0700); + inode = btrfs_new_inode(trans, new_root, new_dirid, + BTRFS_I(dir)->block_group, S_IFDIR | 0700); inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f9b8864dcc4..bdbf514c111 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -77,6 +77,7 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; h->blocks_used = 0; + h->block_group = NULL; root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); h->magic = h->magic2 = TRANS_MAGIC; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index afe42d167ce..b5378119e76 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -1,5 +1,6 @@ #ifndef __TRANSACTION__ #define __TRANSACTION__ +#include "btrfs_inode.h" struct btrfs_transaction { u64 transid; @@ -20,10 +21,24 @@ struct btrfs_trans_handle { unsigned long blocks_reserved; unsigned long blocks_used; struct btrfs_transaction *transaction; + struct btrfs_block_group_cache *block_group; int magic2; }; +static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, + struct inode *inode) +{ + trans->block_group = BTRFS_I(inode)->block_group; +} + +static inline void btrfs_update_inode_block_group(struct + btrfs_trans_handle *trans, + struct inode *inode) +{ + BTRFS_I(inode)->block_group = trans->block_group; +} + int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, -- cgit v1.2.3-70-g09d2 From 090d18753c7fb73f7d846c3a89a50cd35136d144 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 1 May 2007 08:53:32 -0400 Subject: Btrfs: directory readahead Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 29 ++++++++++++++++++++++++++++- fs/btrfs/disk-io.h | 6 ++++++ fs/btrfs/extent-tree.c | 1 + fs/btrfs/super.c | 28 ++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e1b6e13a5ae..354524adf98 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -253,6 +254,29 @@ static struct address_space_operations btree_aops = { .sync_page = block_sync_page, }; +int readahead_tree_block(struct btrfs_root *root, u64 blocknr) +{ + struct buffer_head *bh = NULL; + + bh = btrfs_find_create_tree_block(root, blocknr); + if (!bh) + return 0; + if (buffer_uptodate(bh)) + goto done; + if (test_set_buffer_locked(bh)) + goto done; + if (!buffer_uptodate(bh)) { + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh); + } else { + unlock_buffer(bh); + } +done: + brelse(bh); + return 0; +} + struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { struct buffer_head *bh = NULL; @@ -270,11 +294,14 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto fail; - csum_tree_block(root, bh, 1); } else { unlock_buffer(bh); } uptodate: + if (!buffer_checked(bh)) { + csum_tree_block(root, bh, 1); + set_buffer_checked(bh); + } if (check_tree_block(root, bh)) BUG(); return bh; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 444ebb0141a..1ee7d2a55b5 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -5,6 +5,11 @@ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) +enum btrfs_bh_state_bits { + BH_Checked = BH_PrivateStart, +}; +BUFFER_FNS(Checked, checked); + static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) { return (struct btrfs_node *)bh->b_data; @@ -21,6 +26,7 @@ static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) } struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); +int readahead_tree_block(struct btrfs_root *root, u64 blocknr); struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr); int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8b8cbe25fff..2837fddf7b1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -812,6 +812,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG_ON(ret); buf = btrfs_find_create_tree_block(root, ins.objectid); set_buffer_uptodate(buf); + set_buffer_checked(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7ecbe7c8618..a29a781b86c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -585,6 +585,31 @@ printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_r return d_splice_alias(inode, dentry); } +static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path) +{ + struct btrfs_node *node; + int i; + int nritems; + u64 objectid; + u64 item_objectid; + u64 blocknr; + int slot; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1]; + objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key); + nritems = btrfs_header_nritems(&node->header); + for (i = slot; i < nritems; i++) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid != objectid) + break; + blocknr = btrfs_node_blockptr(node, i); + readahead_tree_block(root, blocknr); + } +} + static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; @@ -619,6 +644,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (ret < 0) goto err; advance = 0; + reada_leaves(root, path); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); @@ -631,6 +657,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path->slots[0]; + if (path->slots[1] == 0) + reada_leaves(root, path); } else { slot++; path->slots[0]++; -- cgit v1.2.3-70-g09d2 From 35b7e476107e3d54f03384e0f2fa3dfd68933353 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 2 May 2007 15:53:43 -0400 Subject: Btrfs: fix page cache memory leak Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 2 -- fs/btrfs/ctree.h | 3 +++ fs/btrfs/dir-item.c | 11 +++++----- fs/btrfs/disk-io.c | 8 ++++++-- fs/btrfs/disk-io.h | 2 ++ fs/btrfs/extent-tree.c | 12 +---------- fs/btrfs/hash.c | 1 + fs/btrfs/print-tree.c | 1 + fs/btrfs/super.c | 56 ++++++++++++++++++++++++++++++-------------------- fs/btrfs/transaction.c | 8 +++++--- 10 files changed, 59 insertions(+), 45 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 9fc42e99c7d..e9ace32d7ba 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -22,9 +22,7 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) return -ENOMEM; memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); bits[0] = slot; - radix_tree_preload(GFP_NOFS); ret = radix_tree_insert(radix, slot, bits); - radix_tree_preload_end(); if (ret) return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e6bf9919536..b5855a5365e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -8,6 +8,9 @@ struct btrfs_trans_handle; struct btrfs_transaction; +extern struct kmem_cache *btrfs_trans_handle_cachep; +extern struct kmem_cache *btrfs_transaction_cachep; +extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_BtRfS_M" diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 7a7e9846860..00a28d90fea 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -4,11 +4,12 @@ #include "hash.h" #include "transaction.h" -struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_key *cpu_key, - u32 data_size) +static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle + *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, + u32 data_size) { int ret; char *ptr; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 354524adf98..5828a104dfe 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -694,7 +694,7 @@ static int free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) return 0; } -int del_fs_roots(struct btrfs_fs_info *fs_info) +static int del_fs_roots(struct btrfs_fs_info *fs_info) { int ret; struct btrfs_root *gang[8]; @@ -781,3 +781,7 @@ void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) brelse(buf); } +void btrfs_btree_balance_dirty(struct btrfs_root *root) +{ + balance_dirty_pages_ratelimited(root->fs_info->btree_inode->i_mapping); +} diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 1ee7d2a55b5..822ccb8aa4a 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -55,4 +55,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, u64 num_blocks); int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical); +int btrfs_releasepage(struct page *page, gfp_t flags); +void btrfs_btree_balance_dirty(struct btrfs_root *root); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2837fddf7b1..0e20d1c42fc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -322,18 +322,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } -static int try_remove_page(struct address_space *mapping, unsigned long index) -{ - int ret; - ret = invalidate_mapping_pages(mapping, index, index); - return ret; -} - int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { unsigned long gang[8]; - struct inode *btree_inode = root->fs_info->btree_inode; u64 first = 0; int ret; int i; @@ -348,9 +340,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); - try_remove_page(btree_inode->i_mapping, - gang[i] << (PAGE_CACHE_SHIFT - - btree_inode->i_blkbits)); } } return 0; @@ -983,6 +972,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; + btrfs_btree_balance_dirty(root); } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 22519b8e0cf..32de1ea1b64 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -11,6 +11,7 @@ */ #include +#include "hash.h" #define DELTA 0x9E3779B9 static void TEA_transform(__u32 buf[2], __u32 const in[]) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 2f95fc67a03..28813411de6 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -1,6 +1,7 @@ #include #include "ctree.h" #include "disk-io.h" +#include "print-tree.h" void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a29a781b86c..130a1d3d9f5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -18,14 +18,14 @@ #include "btrfs_inode.h" #include "ioctl.h" -void btrfs_fsinfo_release(struct kobject *obj) +static void btrfs_fsinfo_release(struct kobject *obj) { struct btrfs_fs_info *fsinfo = container_of(obj, struct btrfs_fs_info, kobj); kfree(fsinfo); } -struct kobj_type btrfs_fsinfo_ktype = { +static struct kobj_type btrfs_fsinfo_ktype = { .release = btrfs_fsinfo_release, }; @@ -148,7 +148,6 @@ static void fill_inode_item(struct btrfs_inode_item *item, BTRFS_I(inode)->block_group->key.objectid); } - static int btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) @@ -251,6 +250,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_trans(trans, root, dir, dentry); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); return ret; } @@ -324,6 +324,7 @@ out: btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root); if (ret && !err) err = ret; return err; @@ -449,6 +450,7 @@ static void btrfs_delete_inode(struct inode *inode) btrfs_free_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); return; no_delete: clear_inode(inode); @@ -481,7 +483,7 @@ out: return ret; } -int fixup_tree_root_location(struct btrfs_root *root, +static int fixup_tree_root_location(struct btrfs_root *root, struct btrfs_key *location, struct btrfs_root **sub_root) { @@ -512,7 +514,7 @@ int fixup_tree_root_location(struct btrfs_root *root, return 0; } -int btrfs_init_locked_inode(struct inode *inode, void *p) +static int btrfs_init_locked_inode(struct inode *inode, void *p) { struct btrfs_iget_args *args = p; inode->i_ino = args->ino; @@ -520,15 +522,15 @@ int btrfs_init_locked_inode(struct inode *inode, void *p) return 0; } -int btrfs_find_actor(struct inode *inode, void *opaque) +static int btrfs_find_actor(struct inode *inode, void *opaque) { struct btrfs_iget_args *args = opaque; return (args->ino == inode->i_ino && args->root == BTRFS_I(inode)->root); } -struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, - struct btrfs_root *root) +static struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, + struct btrfs_root *root) { struct inode *inode; struct btrfs_iget_args args; @@ -790,6 +792,7 @@ static void btrfs_dirty_inode(struct inode *inode) btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); } static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, @@ -913,6 +916,7 @@ out_unlock: inode_dec_link_count(inode); iput(inode); } + btrfs_btree_balance_dirty(root); return err; } @@ -1002,6 +1006,7 @@ out_unlock: mutex_unlock(&root->fs_info->fs_mutex); if (drop_on_err) iput(inode); + btrfs_btree_balance_dirty(root); return err; } @@ -1099,7 +1104,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, found_type != BTRFS_EXTENT_DATA_KEY) { extent_end = 0; extent_start = 0; - btrfs_release_path(root, path); goto out; } found_type = btrfs_file_extent_type(item); @@ -1135,7 +1139,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, btrfs_map_bh_to_logical(root, result, 0); } out: - btrfs_release_path(root, path); btrfs_free_path(path); return err; } @@ -1231,13 +1234,13 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { WARN_ON(bh->b_size != blocksize); err = btrfs_get_block(inode, block, bh, 0); - if (err) + if (err) { +printk("writepage going to recovery err %d\n", err); goto recover; + } if (buffer_new(bh)) { /* blockdev mappings never come here */ clear_buffer_new(bh); - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); } } bh = bh->b_this_page; @@ -1303,11 +1306,6 @@ done: if (uptodate) SetPageUptodate(page); end_page_writeback(page); - /* - * The page and buffer_heads can be released at any time from - * here on. - */ - wbc->pages_skipped++; /* We didn't write this page */ } return err; @@ -1409,10 +1407,11 @@ static void btrfs_truncate(struct inode *inode) btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); BUG_ON(ret); + btrfs_update_inode(trans, root, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - mark_inode_dirty(inode); + btrfs_btree_balance_dirty(root); } /* @@ -1777,10 +1776,15 @@ static int prepare_pages(struct btrfs_root *root, err = -ENOMEM; goto failed_release; } + cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); + wait_on_page_writeback(pages[i]); offset = pos & (PAGE_CACHE_SIZE -1); this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); - create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize, - (1 << BH_Uptodate)); + if (!page_has_buffers(pages[i])) { + create_empty_buffers(pages[i], + root->fs_info->sb->s_blocksize, + (1 << BH_Uptodate)); + } head = page_buffers(pages[i]); bh = head; do { @@ -1820,7 +1824,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct page *pages[8]; - struct page *pinned[2] = { NULL, NULL }; + struct page *pinned[2]; unsigned long first_index; unsigned long last_index; u64 start_pos; @@ -1829,6 +1833,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct btrfs_trans_handle *trans; struct btrfs_key ins; + pinned[0] = NULL; + pinned[1] = NULL; if (file->f_flags & O_DIRECT) return -EINVAL; pos = *ppos; @@ -1858,6 +1864,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (!PageUptodate(pinned[0])) { ret = mpage_readpage(pinned[0], btrfs_get_block); BUG_ON(ret); + wait_on_page_locked(pinned[0]); } else { unlock_page(pinned[0]); } @@ -1869,6 +1876,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (!PageUptodate(pinned[1])) { ret = mpage_readpage(pinned[1], btrfs_get_block); BUG_ON(ret); + wait_on_page_locked(pinned[1]); } else { unlock_page(pinned[1]); } @@ -1940,6 +1948,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, num_written += write_bytes; balance_dirty_pages_ratelimited(inode->i_mapping); + btrfs_btree_balance_dirty(root); cond_resched(); } out_unlock: @@ -2165,6 +2174,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) iput(inode); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); return 0; } @@ -2220,6 +2230,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); return 0; } @@ -2295,6 +2306,7 @@ out: mutex_unlock(&root->fs_info->fs_mutex); out_nolock: btrfs_free_path(path); + btrfs_btree_balance_dirty(root); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bdbf514c111..8bbe9107ff7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -199,8 +199,9 @@ struct dirty_root { struct btrfs_root *root; }; -int add_dirty_roots(struct btrfs_trans_handle *trans, - struct radix_tree_root *radix, struct list_head *list) +static int add_dirty_roots(struct btrfs_trans_handle *trans, + struct radix_tree_root *radix, + struct list_head *list) { struct dirty_root *dirty; struct btrfs_root *gang[8]; @@ -246,7 +247,8 @@ int add_dirty_roots(struct btrfs_trans_handle *trans, return 0; } -int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) +static int drop_dirty_roots(struct btrfs_root *tree_root, + struct list_head *list) { struct dirty_root *dirty; struct btrfs_trans_handle *trans; -- cgit v1.2.3-70-g09d2 From be08c1b9f8e679d45e086728445ac36cf250e92e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 May 2007 09:06:49 -0400 Subject: Btrfs: early metadata/data split Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 95 +++++++++++++++++++++++++------------------------- fs/btrfs/super.c | 2 +- 3 files changed, 50 insertions(+), 49 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b5855a5365e..179a046ce5a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1080,7 +1080,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, u64 num_blocks, u64 search_start, - u64 search_end, struct btrfs_key *ins); + u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0e20d1c42fc..c5ae51893f7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6,7 +6,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins); + search_end, struct btrfs_key *ins, int data); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -25,7 +25,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int i; int ret; int full_search = 0; - if (hint) { + if (!data && hint) { used = btrfs_block_group_used(&hint->item); if (used < (hint->key.offset * 2) / 3) { return hint; @@ -47,6 +47,14 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (!ret) break; for (i = 0; i < ret; i++) { + last = cache[i]->key.objectid + + cache[i]->key.offset; + if (!full_search && !data && + (cache[i]->key.objectid & cache[i]->key.offset)) + continue; + if (!full_search && data && + (cache[i]->key.objectid & cache[i]->key.offset) == 0) + continue; used = btrfs_block_group_used(&cache[i]->item); if (used < (cache[i]->key.offset * 2) / 3) { info->block_group_cache = cache[i]; @@ -57,8 +65,6 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, cache[i]->key.objectid + cache[i]->key.offset - 1, BTRFS_BLOCK_GROUP_AVAIL); - last = cache[i]->key.objectid + - cache[i]->key.offset; } } last = hint_last; @@ -70,6 +76,14 @@ again: if (!ret) break; for (i = 0; i < ret; i++) { + last = cache[i]->key.objectid + + cache[i]->key.offset; + if (!full_search && !data && + (cache[i]->key.objectid & cache[i]->key.offset)) + continue; + if (!full_search && data && + (cache[i]->key.objectid & cache[i]->key.offset) == 0) + continue; used = btrfs_block_group_used(&cache[i]->item); if (used < cache[i]->key.offset) { info->block_group_cache = cache[i]; @@ -80,8 +94,6 @@ again: cache[i]->key.objectid + cache[i]->key.offset - 1, BTRFS_BLOCK_GROUP_AVAIL); - last = cache[i]->key.objectid + - cache[i]->key.offset; } } info->block_group_cache = NULL; @@ -112,7 +124,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u32 refs; find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, - &ins); + &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); @@ -225,7 +237,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins); + find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins, 0); ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); BUG_ON(ret); bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], @@ -322,10 +334,18 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } +static int try_remove_page(struct address_space *mapping, unsigned long index) +{ + int ret; + ret = invalidate_mapping_pages(mapping, index, index); + return ret; +} + int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { unsigned long gang[8]; + struct inode *btree_inode = root->fs_info->btree_inode; u64 first = 0; int ret; int i; @@ -340,6 +360,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); + try_remove_page(btree_inode->i_mapping, + gang[i] << (PAGE_CACHE_SHIFT - + btree_inode->i_blkbits)); } } return 0; @@ -424,7 +447,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; - find_free_extent(trans, root, 0, 0, (u64)-1, &ins); + find_free_extent(trans, root, 0, 0, (u64)-1, &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); @@ -531,7 +554,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins) + search_end, struct btrfs_key *ins, int data) { struct btrfs_path *path; struct btrfs_key key; @@ -548,43 +571,21 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int total_found = 0; int fill_prealloc = 0; int level; - int update_block_group = 0; - struct btrfs_block_group_cache *hint_block_group; + struct btrfs_block_group_cache *block_group; path = btrfs_alloc_path(); ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); - /* find search start here */ - if (0 && search_start && num_blocks) { - u64 used; - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&hint_block_group, - search_start, 1); - if (ret) { - used = btrfs_block_group_used(&hint_block_group->item); - if (used > (hint_block_group->key.offset * 9) / 10) - search_start = 0; - else if (search_start < hint_block_group->last_alloc) - search_start = hint_block_group->last_alloc; - } else { - search_start = 0; - } - } if (num_blocks == 0) { fill_prealloc = 1; num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } - if (1 || !search_start) { - trans->block_group = btrfs_find_block_group(root, - trans->block_group, - 0); - if (trans->block_group->last_alloc > search_start) - search_start = trans->block_group->last_alloc; - update_block_group = 1; - } + block_group = btrfs_find_block_group(root, trans->block_group, data); + if (block_group->last_alloc > search_start) + search_start = block_group->last_alloc; check_failed: btrfs_init_path(path); ins->objectid = search_start; @@ -699,13 +700,13 @@ check_pending: } info->extent_tree_prealloc_nr = total_found; } - if (update_block_group) { - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&trans->block_group, - ins->objectid, 1); - if (ret) { - trans->block_group->last_alloc = ins->objectid; - } + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&block_group, + ins->objectid, 1); + if (ret) { + block_group->last_alloc = ins->objectid; + if (!data) + trans->block_group = block_group; } ins->offset = num_blocks; btrfs_free_path(path); @@ -725,7 +726,7 @@ error: int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, u64 num_blocks, u64 search_start, - u64 search_end, struct btrfs_key *ins) + u64 search_end, struct btrfs_key *ins, int data) { int ret; int pending_ret; @@ -755,13 +756,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } /* do the real allocation */ ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, ins); + search_end, ins, data); if (ret) return ret; /* then do prealloc for the extent tree */ ret = find_free_extent(trans, root, 0, ins->objectid + ins->offset, - search_end, &prealloc_key); + search_end, &prealloc_key, 0); if (ret) return ret; @@ -793,7 +794,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, hint, (unsigned long)-1, &ins); + 1, 0, (unsigned long)-1, &ins, 0); if (ret) { BUG(); return NULL; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 130a1d3d9f5..23ba4e979fc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1903,7 +1903,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { ret = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, 1, (u64)-1, &ins); + num_blocks, 1, (u64)-1, &ins, 1); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); -- cgit v1.2.3-70-g09d2 From be74417553f4b2ee46be2088007a674ef2f02330 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 6 May 2007 10:15:01 -0400 Subject: Btrfs: more allocator enhancements Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 6 +- fs/btrfs/ctree.h | 7 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 317 +++++++++++++++++++++++++++++++++++++------------ fs/btrfs/super.c | 2 +- 5 files changed, 249 insertions(+), 85 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index e9ace32d7ba..1a0271445df 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -26,8 +26,10 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) if (ret) return ret; } - set_bit(bit_slot, bits + 1); - return 0; + ret = test_and_set_bit(bit_slot, bits + 1); + if (ret < 0) + ret = 1; + return ret; } int test_radix_bit(struct radix_tree_root *radix, unsigned long bit) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 179a046ce5a..086e7dea3c9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -257,6 +257,8 @@ struct btrfs_block_group_cache { struct btrfs_block_group_item item; u64 first_free; u64 last_alloc; + u64 pinned; + int data; }; struct crypto_hash; @@ -264,12 +266,12 @@ struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *dev_root; - struct btrfs_block_group_cache *block_group_cache; struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; struct radix_tree_root dev_radix; struct radix_tree_root block_group_radix; + struct radix_tree_root block_group_data_radix; u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; int extent_tree_insert_nr; @@ -1072,7 +1074,8 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) /* extent-tree.c */ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache - *hint, int data); + *hint, u64 search_start, + int data); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5828a104dfe..7930458c227 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -554,6 +554,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); + INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; @@ -582,7 +583,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - fs_info->block_group_cache = NULL; __setup_root(sb->s_blocksize, dev_root, fs_info, BTRFS_DEV_TREE_OBJECTID); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c5ae51893f7..2937fd9aba7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,36 +12,88 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static struct btrfs_block_group_cache *lookup_block_group(struct + btrfs_fs_info *info, + u64 blocknr) +{ + struct btrfs_block_group_cache *block_group; + int ret; + + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&block_group, + blocknr, 1); + if (ret) { + if (block_group->key.objectid <= blocknr && blocknr < + block_group->key.objectid + block_group->key.offset) + return block_group; + } + ret = radix_tree_gang_lookup(&info->block_group_data_radix, + (void **)&block_group, + blocknr, 1); + if (ret) { + if (block_group->key.objectid <= blocknr && blocknr < + block_group->key.objectid + block_group->key.offset) + return block_group; + } +printk("lookup_block_group fails for blocknr %Lu\n", blocknr); + return NULL; +} + struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache - *hint, int data) + *hint, u64 search_start, + int data) { struct btrfs_block_group_cache *cache[8]; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; + struct radix_tree_root *radix; u64 used; u64 last = 0; u64 hint_last; int i; int ret; int full_search = 0; - if (!data && hint) { + + if (data) + radix = &info->block_group_data_radix; + else + radix = &info->block_group_radix; + + if (search_start) { + struct btrfs_block_group_cache *shint; + shint = lookup_block_group(info, search_start); + if (shint->data == data) { + used = btrfs_block_group_used(&shint->item); + if (used + shint->pinned < + (shint->key.offset * 8) / 10) { + return shint; + } + } + } + if (hint && hint->data == data) { used = btrfs_block_group_used(&hint->item); - if (used < (hint->key.offset * 2) / 3) { + if (used + hint->pinned < (hint->key.offset * 8) / 10) { return hint; } - radix_tree_tag_clear(&info->block_group_radix, - hint->key.objectid + hint->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - last = hint->key.objectid + hint->key.offset; + if (used >= (hint->key.offset * 8) / 10) { + radix_tree_tag_clear(radix, + hint->key.objectid + + hint->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } + last = hint->key.offset * 2; + if (hint->key.objectid >= last) + last = max(search_start, hint->key.objectid - last); + else + last = hint->key.objectid + hint->key.offset; hint_last = last; } else { - hint_last = 0; - last = 0; + hint_last = search_start; + last = search_start; } while(1) { - ret = radix_tree_gang_lookup_tag(&info->block_group_radix, - (void **)cache, + ret = radix_tree_gang_lookup_tag(radix, (void **)cache, last, ARRAY_SIZE(cache), BTRFS_BLOCK_GROUP_AVAIL); if (!ret) @@ -49,65 +101,54 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, for (i = 0; i < ret; i++) { last = cache[i]->key.objectid + cache[i]->key.offset; - if (!full_search && !data && - (cache[i]->key.objectid & cache[i]->key.offset)) - continue; - if (!full_search && data && - (cache[i]->key.objectid & cache[i]->key.offset) == 0) - continue; used = btrfs_block_group_used(&cache[i]->item); - if (used < (cache[i]->key.offset * 2) / 3) { - info->block_group_cache = cache[i]; + if (used + cache[i]->pinned < + (cache[i]->key.offset * 8) / 10) { found_group = cache[i]; goto found; } - radix_tree_tag_clear(&info->block_group_radix, - cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); + if (used >= (cache[i]->key.offset * 8) / 10) { + radix_tree_tag_clear(radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } } } last = hint_last; again: while(1) { - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)cache, - last, ARRAY_SIZE(cache)); + ret = radix_tree_gang_lookup(radix, (void **)cache, + last, ARRAY_SIZE(cache)); if (!ret) break; for (i = 0; i < ret; i++) { last = cache[i]->key.objectid + cache[i]->key.offset; - if (!full_search && !data && - (cache[i]->key.objectid & cache[i]->key.offset)) - continue; - if (!full_search && data && - (cache[i]->key.objectid & cache[i]->key.offset) == 0) - continue; used = btrfs_block_group_used(&cache[i]->item); - if (used < cache[i]->key.offset) { - info->block_group_cache = cache[i]; + if (used + cache[i]->pinned < cache[i]->key.offset) { found_group = cache[i]; goto found; } - radix_tree_tag_clear(&info->block_group_radix, - cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); + if (used >= cache[i]->key.offset) { + radix_tree_tag_clear(radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } } } - info->block_group_cache = NULL; if (!full_search) { - last = 0; + last = search_start; full_search = 1; goto again; } -found: if (!found_group) { - ret = radix_tree_gang_lookup(&info->block_group_radix, + ret = radix_tree_gang_lookup(radix, (void **)&found_group, 0, 1); BUG_ON(ret != 1); } +found: return found_group; } @@ -252,18 +293,20 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, return ret; if (pending_ret) return pending_ret; + if (cache->data) + cache->last_alloc = cache->first_free; return 0; } -int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static int write_dirty_block_radix(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct radix_tree_root *radix) { struct btrfs_block_group_cache *cache[8]; int ret; int err = 0; int werr = 0; - struct radix_tree_root *radix = &root->fs_info->block_group_radix; int i; struct btrfs_path *path; @@ -285,35 +328,74 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, path, cache[i]); if (err) werr = err; - cache[i]->last_alloc = cache[i]->first_free; } } btrfs_free_path(path); return werr; } +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + int ret2; + ret = write_dirty_block_radix(trans, root, + &root->fs_info->block_group_radix); + ret2 = write_dirty_block_radix(trans, root, + &root->fs_info->block_group_data_radix); + if (ret) + return ret; + if (ret2) + return ret2; + return 0; +} + static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num, int alloc) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; + struct radix_tree_root *radix; u64 total = num; u64 old_val; u64 block_in_group; int ret; + if (num != 1) + radix = &info->block_group_data_radix; + else + radix = &info->block_group_radix; while(total) { - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&cache, blocknr, 1); + ret = radix_tree_gang_lookup(radix, (void **)&cache, + blocknr, 1); if (!ret) { printk(KERN_CRIT "blocknr %Lu lookup failed\n", blocknr); return -1; } block_in_group = blocknr - cache->key.objectid; + if (block_in_group > cache->key.offset || cache->key.objectid > + blocknr) { + if (radix == &info->block_group_data_radix) + radix = &info->block_group_radix; + else + radix = &info->block_group_data_radix; + ret = radix_tree_gang_lookup(radix, (void **)&cache, + blocknr, 1); + if (!ret) { + printk(KERN_CRIT "blocknr %Lu lookup failed\n", + blocknr); + return -1; + } + block_in_group = blocknr - cache->key.objectid; + if (block_in_group > cache->key.offset || + cache->key.objectid > blocknr) { + BUG(); + } + } WARN_ON(block_in_group > cache->key.offset); - radix_tree_tag_set(&info->block_group_radix, - cache->key.objectid + cache->key.offset - 1, + radix_tree_tag_set(radix, cache->key.objectid + + cache->key.offset - 1, BTRFS_BLOCK_GROUP_DIRTY); old_val = btrfs_block_group_used(&cache->item); @@ -346,6 +428,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct { unsigned long gang[8]; struct inode *btree_inode = root->fs_info->btree_inode; + struct btrfs_block_group_cache *block_group; u64 first = 0; int ret; int i; @@ -360,6 +443,14 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); + block_group = lookup_block_group(root->fs_info, + gang[i]); + if (block_group) { + WARN_ON(block_group->pinned == 0); + block_group->pinned--; + if (gang[i] < block_group->last_alloc) + block_group->last_alloc = gang[i]; + } try_remove_page(btree_inode->i_mapping, gang[i] << (PAGE_CACHE_SHIFT - btree_inode->i_blkbits)); @@ -420,10 +511,16 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) btrfs_block_release(root, bh); } err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); + if (!err) { + struct btrfs_block_group_cache *cache; + cache = lookup_block_group(root->fs_info, blocknr); + if (cache) + cache->pinned++; + } } else { err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); } - BUG_ON(err); + BUG_ON(err < 0); return 0; } @@ -502,6 +599,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int i; struct radix_tree_root *pending_radix; struct radix_tree_root *pinned_radix; + struct btrfs_block_group_cache *cache; pending_radix = &extent_root->fs_info->pending_del_radix; pinned_radix = &extent_root->fs_info->pinned_radix; @@ -513,7 +611,17 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct break; for (i = 0; i < ret; i++) { wret = set_radix_bit(pinned_radix, gang[i]); - BUG_ON(wret); + if (wret == 0) { + cache = lookup_block_group(extent_root->fs_info, + gang[i]); + if (cache) + cache->pinned++; + } + if (wret < 0) { + printk(KERN_CRIT "set_radix_bit, err %d\n", + wret); + BUG_ON(wret < 0); + } wret = clear_radix_bit(pending_radix, gang[i]); BUG_ON(wret); wret = __free_extent(trans, extent_root, @@ -563,6 +671,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int slot = 0; u64 last_block = 0; u64 test_block; + u64 orig_search_start = search_start; int start_found; struct btrfs_leaf *l; struct btrfs_root * root = orig_root->fs_info->extent_root; @@ -572,6 +681,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int fill_prealloc = 0; int level; struct btrfs_block_group_cache *block_group; + int full_scan = 0; path = btrfs_alloc_path(); ins->flags = 0; @@ -583,10 +693,21 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } - block_group = btrfs_find_block_group(root, trans->block_group, data); + if (search_start) { + block_group = lookup_block_group(info, search_start); + block_group = btrfs_find_block_group(root, block_group, + search_start, data); + } else { + block_group = btrfs_find_block_group(root, + trans->block_group, 0, + data); + } + +check_failed: + if (block_group->data != data) + WARN_ON(1); if (block_group->last_alloc > search_start) search_start = block_group->last_alloc; -check_failed: btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; @@ -639,6 +760,13 @@ check_failed: } start_found = 1; last_block = key.objectid + key.offset; + if (last_block >= block_group->key.objectid + + block_group->key.offset) { + btrfs_release_path(root, path); + search_start = block_group->key.objectid + + block_group->key.offset * 2; + goto new_group; + } next: path->slots[0]++; } @@ -650,16 +778,17 @@ check_pending: btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); if (ins->objectid >= btrfs_super_total_blocks(info->disk_super)) { - if (search_start == 0) + if (full_scan) return -ENOSPC; - search_start = 0; - goto check_failed; + search_start = orig_search_start; + full_scan = 1; + goto new_group; } for (test_block = ins->objectid; test_block < ins->objectid + num_blocks; test_block++) { if (test_radix_bit(&info->pinned_radix, test_block)) { search_start = test_block + 1; - goto check_failed; + goto new_group; } } if (!fill_prealloc && info->extent_tree_insert_nr) { @@ -670,7 +799,7 @@ check_pending: ins->objectid <= last) { search_start = last + 1; WARN_ON(1); - goto check_failed; + goto new_group; } } if (!fill_prealloc && info->extent_tree_prealloc_nr) { @@ -680,7 +809,7 @@ check_pending: ins->objectid <= info->extent_tree_prealloc[0]) { search_start = info->extent_tree_prealloc[0] + 1; WARN_ON(1); - goto check_failed; + goto new_group; } } if (fill_prealloc) { @@ -696,14 +825,12 @@ check_pending: } if (total_found < total_needed) { search_start = test_block; - goto check_failed; + goto new_group; } info->extent_tree_prealloc_nr = total_found; } - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&block_group, - ins->objectid, 1); - if (ret) { + block_group = lookup_block_group(info, ins->objectid); + if (block_group) { block_group->last_alloc = ins->objectid; if (!data) trans->block_group = block_group; @@ -711,6 +838,18 @@ check_pending: ins->offset = num_blocks; btrfs_free_path(path); return 0; + +new_group: + if (search_start >= btrfs_super_total_blocks(info->disk_super)) { + search_start = orig_search_start; + full_scan = 1; + } + block_group = lookup_block_group(info, search_start); + if (!full_scan) + block_group = btrfs_find_block_group(root, block_group, + search_start, data); + goto check_failed; + error: btrfs_release_path(root, path); btrfs_free_path(path); @@ -794,7 +933,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, 0, (unsigned long)-1, &ins, 0); + 1, hint, (unsigned long)-1, &ins, 0); if (ret) { BUG(); return NULL; @@ -984,21 +1123,19 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } -int btrfs_free_block_groups(struct btrfs_fs_info *info) +static int free_block_group_radix(struct radix_tree_root *radix) { int ret; struct btrfs_block_group_cache *cache[8]; int i; while(1) { - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)cache, 0, + ret = radix_tree_gang_lookup(radix, (void **)cache, 0, ARRAY_SIZE(cache)); if (!ret) break; for (i = 0; i < ret; i++) { - radix_tree_delete(&info->block_group_radix, - cache[i]->key.objectid + + radix_tree_delete(radix, cache[i]->key.objectid + cache[i]->key.offset - 1); kfree(cache[i]); } @@ -1006,6 +1143,20 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +int btrfs_free_block_groups(struct btrfs_fs_info *info) +{ + int ret; + int ret2; + + ret = free_block_group_radix(&info->block_group_radix); + ret2 = free_block_group_radix(&info->block_group_data_radix); + if (ret) + return ret; + if (ret2) + return ret2; + return 0; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -1013,13 +1164,16 @@ int btrfs_read_block_groups(struct btrfs_root *root) int err = 0; struct btrfs_block_group_item *bi; struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *info = root->fs_info; + struct radix_tree_root *radix; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_leaf *leaf; u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; u64 used; + u64 nr = 0; - root = root->fs_info->extent_root; + root = info->extent_root; key.objectid = 0; key.offset = group_size_blocks; key.flags = 0; @@ -1030,7 +1184,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) return -ENOMEM; while(1) { - ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0); if (ret != 0) { err = ret; @@ -1050,23 +1204,28 @@ int btrfs_read_block_groups(struct btrfs_root *root) memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; + cache->pinned = 0; + cache->data = (nr & 1); key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); - ret = radix_tree_insert(&root->fs_info->block_group_radix, - found_key.objectid + + if (nr & 1) + radix = &info->block_group_data_radix; + else + radix = &info->block_group_radix; + ret = radix_tree_insert(radix, found_key.objectid + found_key.offset - 1, (void *)cache); BUG_ON(ret); used = btrfs_block_group_used(bi); - if (used < (key.offset * 2) / 3) { - radix_tree_tag_set(&root->fs_info->block_group_radix, - found_key.objectid + + if (used < (key.offset * 8) / 10) { + radix_tree_tag_set(radix, found_key.objectid + found_key.offset - 1, BTRFS_BLOCK_GROUP_AVAIL); } if (key.objectid >= - btrfs_super_total_blocks(root->fs_info->disk_super)) + btrfs_super_total_blocks(info->disk_super)) break; + nr++; } btrfs_free_path(path); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 23ba4e979fc..d5ac0d8ee3c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -811,7 +811,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); BTRFS_I(inode)->root = root; - group = btrfs_find_block_group(root, group, 0); + group = btrfs_find_block_group(root, group, 0, 0); BTRFS_I(inode)->block_group = group; inode->i_uid = current->fsuid; -- cgit v1.2.3-70-g09d2 From 3e1ad54fe2839319c1aa66b954da0753f5b1f906 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 7 May 2007 20:03:49 -0400 Subject: Btrfs: allocator and tuning Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 75 ++++++++++++++++++++++---------------------------- 2 files changed, 34 insertions(+), 42 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 086e7dea3c9..cdb7c23c41f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -255,6 +255,7 @@ struct btrfs_block_group_item { struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; + struct radix_tree_root *radix; u64 first_free; u64 last_alloc; u64 pinned; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2937fd9aba7..3edfc300289 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -23,7 +23,7 @@ static struct btrfs_block_group_cache *lookup_block_group(struct (void **)&block_group, blocknr, 1); if (ret) { - if (block_group->key.objectid <= blocknr && blocknr < + if (block_group->key.objectid <= blocknr && blocknr <= block_group->key.objectid + block_group->key.offset) return block_group; } @@ -31,11 +31,16 @@ static struct btrfs_block_group_cache *lookup_block_group(struct (void **)&block_group, blocknr, 1); if (ret) { - if (block_group->key.objectid <= blocknr && blocknr < + if (block_group->key.objectid <= blocknr && blocknr <= block_group->key.objectid + block_group->key.offset) return block_group; } -printk("lookup_block_group fails for blocknr %Lu\n", blocknr); + WARN_ON(1); + printk("lookup_block_group fails for blocknr %Lu\n", blocknr); + printk("last ret was %d\n", ret); + if (ret) { + printk("last block group was %Lu %Lu\n", block_group->key.objectid, block_group->key.offset); + } return NULL; } @@ -356,45 +361,20 @@ static int update_block_group(struct btrfs_trans_handle *trans, { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - struct radix_tree_root *radix; u64 total = num; u64 old_val; u64 block_in_group; - int ret; - if (num != 1) - radix = &info->block_group_data_radix; - else - radix = &info->block_group_radix; + while(total) { - ret = radix_tree_gang_lookup(radix, (void **)&cache, - blocknr, 1); - if (!ret) { + cache = lookup_block_group(info, blocknr); + if (!cache) { printk(KERN_CRIT "blocknr %Lu lookup failed\n", blocknr); return -1; } block_in_group = blocknr - cache->key.objectid; - if (block_in_group > cache->key.offset || cache->key.objectid > - blocknr) { - if (radix == &info->block_group_data_radix) - radix = &info->block_group_radix; - else - radix = &info->block_group_data_radix; - ret = radix_tree_gang_lookup(radix, (void **)&cache, - blocknr, 1); - if (!ret) { - printk(KERN_CRIT "blocknr %Lu lookup failed\n", - blocknr); - return -1; - } - block_in_group = blocknr - cache->key.objectid; - if (block_in_group > cache->key.offset || - cache->key.objectid > blocknr) { - BUG(); - } - } WARN_ON(block_in_group > cache->key.offset); - radix_tree_tag_set(radix, cache->key.objectid + + radix_tree_tag_set(cache->radix, cache->key.objectid + cache->key.offset - 1, BTRFS_BLOCK_GROUP_DIRTY); @@ -693,6 +673,8 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } + if (search_end == (u64)-1) + search_end = btrfs_super_total_blocks(info->disk_super); if (search_start) { block_group = lookup_block_group(info, search_start); block_group = btrfs_find_block_group(root, block_group, @@ -704,7 +686,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root } check_failed: - if (block_group->data != data) + if (!full_scan && block_group->data != data) WARN_ON(1); if (block_group->last_alloc > search_start) search_start = block_group->last_alloc; @@ -734,13 +716,13 @@ check_failed: goto error; if (!start_found) { ins->objectid = search_start; - ins->offset = (u64)-1 - search_start; + ins->offset = search_end - search_start; start_found = 1; goto check_pending; } ins->objectid = last_block > search_start ? last_block : search_start; - ins->offset = (u64)-1 - ins->objectid; + ins->offset = search_end - ins->objectid; goto check_pending; } btrfs_disk_key_to_cpu(&key, &l->items[slot].key); @@ -777,7 +759,7 @@ check_pending: */ btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); - if (ins->objectid >= btrfs_super_total_blocks(info->disk_super)) { + if (ins->objectid + num_blocks >= search_end) { if (full_scan) return -ENOSPC; search_start = orig_search_start; @@ -840,7 +822,7 @@ check_pending: return 0; new_group: - if (search_start >= btrfs_super_total_blocks(info->disk_super)) { + if (search_start + num_blocks >= search_end) { search_start = orig_search_start; full_scan = 1; } @@ -900,7 +882,12 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, return ret; /* then do prealloc for the extent tree */ - ret = find_free_extent(trans, root, 0, ins->objectid + ins->offset, + if (ins->objectid + ins->offset >= search_end) + search_end = ins->objectid - 1; + else + search_start = ins->objectid + ins->offset; + + ret = find_free_extent(trans, root, 0, search_start, search_end, &prealloc_key, 0); if (ret) return ret; @@ -1198,6 +1185,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) err = -1; break; } + + if (nr & 1) + radix = &info->block_group_data_radix; + else + radix = &info->block_group_radix; + bi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_block_group_item); memcpy(&cache->item, bi, sizeof(*bi)); @@ -1206,12 +1199,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->first_free = cache->key.objectid; cache->pinned = 0; cache->data = (nr & 1); + cache->radix = radix; + key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); - if (nr & 1) - radix = &info->block_group_data_radix; - else - radix = &info->block_group_radix; ret = radix_tree_insert(radix, found_key.objectid + found_key.offset - 1, (void *)cache); -- cgit v1.2.3-70-g09d2 From e37c9e6921207cf503634b06bee37ecb7904408d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 May 2007 20:13:14 -0400 Subject: Btrfs: many allocator fixes, pretty solid Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/bit-radix.c | 10 +- fs/btrfs/bit-radix.h | 2 +- fs/btrfs/ctree.h | 3 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 375 ++++++++++++++++++++++++++++++++++++++++++------- fs/btrfs/super.c | 4 +- fs/btrfs/transaction.c | 3 +- 8 files changed, 340 insertions(+), 59 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 6a8c8cd03ca..f6df246f26c 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -7,6 +7,7 @@ * Get rid of struct ctree_path, limiting tree levels held at one time * Add generation number to key pointer in nodes * Add generation number to inode +* Add ability to switch a block group from data to metadata or vice versa * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 1a0271445df..8f9cd427723 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -77,7 +77,7 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) } int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - int nr) + unsigned long start, int nr) { unsigned long *bits; unsigned long *gang[4]; @@ -85,10 +85,13 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, int ret; int i; int total_found = 0; + unsigned long slot; - ret = radix_tree_gang_lookup(radix, (void **)gang, 0, ARRAY_SIZE(gang)); + slot = start / BIT_RADIX_BITS_PER_ARRAY; + ret = radix_tree_gang_lookup(radix, (void **)gang, slot, + ARRAY_SIZE(gang)); + found = start % BIT_RADIX_BITS_PER_ARRAY; for (i = 0; i < ret && nr > 0; i++) { - found = 0; bits = gang[i]; while(nr > 0) { found = find_next_bit(bits + 1, @@ -104,6 +107,7 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, } else break; } + found = 0; } return total_found; } diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h index 56aad4c7d7f..4e717e30db4 100644 --- a/fs/btrfs/bit-radix.h +++ b/fs/btrfs/bit-radix.h @@ -6,7 +6,7 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit); int test_radix_bit(struct radix_tree_root *radix, unsigned long bit); int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit); int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - int nr); + unsigned long start, int nr); static inline void init_bit_radix(struct radix_tree_root *radix) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cdb7c23c41f..92a6078de82 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -259,7 +259,9 @@ struct btrfs_block_group_cache { u64 first_free; u64 last_alloc; u64 pinned; + u64 last_prealloc; int data; + int cached; }; struct crypto_hash; @@ -273,6 +275,7 @@ struct btrfs_fs_info { struct radix_tree_root dev_radix; struct radix_tree_root block_group_radix; struct radix_tree_root block_group_data_radix; + struct radix_tree_root extent_map_radix; u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; int extent_tree_insert_nr; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7930458c227..2dbf422a2b9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -551,6 +551,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); + init_bit_radix(&fs_info->extent_map_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3edfc300289..3ac9da45347 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,6 +12,97 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static int cache_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache *block_group) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct btrfs_leaf *leaf; + struct radix_tree_root *extent_radix; + int slot; + u64 i; + u64 last = 0; + u64 hole_size; + int found = 0; + + root = root->fs_info->extent_root; + extent_radix = &root->fs_info->extent_map_radix; + + if (block_group->cached) + return 0; + if (block_group->data) + return 0; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; +printk("cache block group %Lu\n", block_group->key.objectid); + key.objectid = block_group->key.objectid; + key.flags = 0; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + if (ret && path->slots[0] > 0) + path->slots[0]--; + while(1) { + leaf = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(&leaf->header)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + else { + if (found) { + hole_size = block_group->key.objectid + + block_group->key.offset - last; + } else { + last = block_group->key.objectid; + hole_size = block_group->key.offset; + } + for (i = 0; i < hole_size; i++) { + set_radix_bit(extent_radix, + last + i); + } + break; + } + } + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + if (key.objectid >= block_group->key.objectid + + block_group->key.offset) { + if (found) { + hole_size = block_group->key.objectid + + block_group->key.offset - last; + } else { + last = block_group->key.objectid; + hole_size = block_group->key.offset; + } + for (i = 0; i < hole_size; i++) { + set_radix_bit(extent_radix, last + i); + } + break; + } + if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { + if (!found) { + last = key.objectid + key.offset; + found = 1; + } else { + hole_size = key.objectid - last; + for (i = 0; i < hole_size; i++) { + set_radix_bit(extent_radix, last + i); + } + last = key.objectid + key.offset; + } + } + path->slots[0]++; + } + + block_group->cached = 1; + btrfs_free_path(path); + return 0; +} + static struct btrfs_block_group_cache *lookup_block_group(struct btrfs_fs_info *info, u64 blocknr) @@ -44,6 +135,63 @@ static struct btrfs_block_group_cache *lookup_block_group(struct return NULL; } +static u64 leaf_range(struct btrfs_root *root) +{ + u64 size = BTRFS_LEAF_DATA_SIZE(root); + size = size / (sizeof(struct btrfs_extent_item) + + sizeof(struct btrfs_item)); + return size; +} + +static u64 find_search_start(struct btrfs_root *root, + struct btrfs_block_group_cache **cache_ret, + u64 search_start, int num) +{ + unsigned long gang[8]; + int ret; + struct btrfs_block_group_cache *cache = *cache_ret; + u64 last = max(search_start, cache->key.objectid); + + if (cache->data) + goto out; + if (num > 1) { + last = max(last, cache->last_prealloc); + } +again: + cache_block_group(root, cache); + while(1) { + ret = find_first_radix_bit(&root->fs_info->extent_map_radix, + gang, last, ARRAY_SIZE(gang)); + if (!ret) + goto out; + last = gang[ret-1] + 1; + if (num > 1) { + if (ret != ARRAY_SIZE(gang)) { + goto new_group; + } + if (gang[ret-1] - gang[0] > leaf_range(root)) { + continue; + } + } + if (gang[0] >= cache->key.objectid + cache->key.offset) { + goto new_group; + } + return gang[0]; + } +out: + return max(cache->last_alloc, search_start); + +new_group: + cache = lookup_block_group(root->fs_info, last + cache->key.offset - 1); + if (!cache) { + return max((*cache_ret)->last_alloc, search_start); + } + cache = btrfs_find_block_group(root, cache, + last + cache->key.offset - 1, 0); + *cache_ret = cache; + goto again; +} + struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, @@ -89,13 +237,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } last = hint->key.offset * 2; if (hint->key.objectid >= last) - last = max(search_start, hint->key.objectid - last); + last = max(search_start + hint->key.offset - 1, + hint->key.objectid - last); else last = hint->key.objectid + hint->key.offset; hint_last = last; } else { - hint_last = search_start; - last = search_start; + if (hint) + hint_last = max(hint->key.objectid, search_start); + else + hint_last = search_start; + + last = hint_last; } while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)cache, @@ -357,13 +510,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num, int alloc) + u64 blocknr, u64 num, int alloc, int mark_free) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; u64 total = num; u64 old_val; u64 block_in_group; + u64 i; while(total) { cache = lookup_block_group(info, blocknr); @@ -380,18 +534,38 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val = btrfs_block_group_used(&cache->item); num = min(total, cache->key.offset - block_in_group); - total -= num; - blocknr += num; if (alloc) { old_val += num; if (blocknr > cache->last_alloc) cache->last_alloc = blocknr; + if (!cache->data) { + for (i = 0; i < num; i++) { + clear_radix_bit(&info->extent_map_radix, + blocknr + i); + } + } } else { old_val -= num; if (blocknr < cache->first_free) cache->first_free = blocknr; + if (!cache->data && mark_free) { + for (i = 0; i < num; i++) { + set_radix_bit(&info->extent_map_radix, + blocknr + i); + } + } + if (old_val < (cache->key.offset * 8) / 10 && + old_val + num >= (cache->key.offset * 8) / 10) { +printk("group %Lu now available\n", cache->key.objectid); + radix_tree_tag_set(cache->radix, + cache->key.objectid + + cache->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } } btrfs_set_block_group_used(&cache->item, old_val); + total -= num; + blocknr += num; } return 0; } @@ -413,9 +587,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct int ret; int i; struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; + struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix; while(1) { - ret = find_first_radix_bit(pinned_radix, gang, + ret = find_first_radix_bit(pinned_radix, gang, 0, ARRAY_SIZE(gang)); if (!ret) break; @@ -430,6 +605,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct block_group->pinned--; if (gang[i] < block_group->last_alloc) block_group->last_alloc = gang[i]; + if (gang[i] < block_group->last_prealloc) + block_group->last_prealloc = gang[i]; + if (!block_group->data) + set_radix_bit(extent_radix, gang[i]); } try_remove_page(btree_inode->i_mapping, gang[i] << (PAGE_CACHE_SHIFT - @@ -508,7 +687,8 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin) + *root, u64 blocknr, u64 num_blocks, int pin, + int mark_free) { struct btrfs_path *path; struct btrfs_key key; @@ -556,10 +736,10 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_del_item(trans, extent_root, path); if (ret) BUG(); - ret = update_block_group(trans, root, blocknr, num_blocks, 0); + ret = update_block_group(trans, root, blocknr, num_blocks, 0, + mark_free); BUG_ON(ret); } - btrfs_release_path(extent_root, path); btrfs_free_path(path); finish_current_insert(trans, extent_root); return ret; @@ -585,7 +765,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct pinned_radix = &extent_root->fs_info->pinned_radix; while(1) { - ret = find_first_radix_bit(pending_radix, gang, + ret = find_first_radix_bit(pending_radix, gang, 0, ARRAY_SIZE(gang)); if (!ret) break; @@ -605,7 +785,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct wret = clear_radix_bit(pending_radix, gang[i]); BUG_ON(wret); wret = __free_extent(trans, extent_root, - gang[i], 1, 0); + gang[i], 1, 0, 0); if (wret) err = wret; } @@ -627,7 +807,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root pin_down_block(root, blocknr, 1); return 0; } - ret = __free_extent(trans, root, blocknr, num_blocks, pin); + ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -688,18 +868,45 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: if (!full_scan && block_group->data != data) WARN_ON(1); - if (block_group->last_alloc > search_start) - search_start = block_group->last_alloc; + + if (!data) + search_start = find_search_start(root, &block_group, + search_start, total_needed); + else + search_start = max(block_group->last_alloc, search_start); + btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; start_found = 0; + ret = btrfs_search_slot(trans, root, ins, path, 0, 0); if (ret < 0) goto error; - if (path->slots[0] > 0) + if (path->slots[0] > 0) { path->slots[0]--; + } + + l = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key); + /* + * a rare case, go back one key if we hit a block group item + * instead of an extent item + */ + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY && + key.objectid + key.offset >= search_start) { + ins->objectid = key.objectid; + ins->offset = key.offset - 1; + btrfs_release_path(root, path); + ret = btrfs_search_slot(trans, root, ins, path, 0, 0); + if (ret < 0) + goto error; + + if (path->slots[0] > 0) { + path->slots[0]--; + } + } while (1) { l = btrfs_buffer_leaf(path->nodes[0]); @@ -725,21 +932,23 @@ check_failed: ins->offset = search_end - ins->objectid; goto check_pending; } + btrfs_disk_key_to_cpu(&key, &l->items[slot].key); - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) - goto next; - if (key.objectid >= search_start) { - if (start_found) { - if (last_block < search_start) - last_block = search_start; - hole_size = key.objectid - last_block; - if (hole_size >= num_blocks) { - ins->objectid = last_block; - ins->offset = hole_size; - goto check_pending; - } + if (key.objectid >= search_start && key.objectid > last_block && + start_found) { + if (last_block < search_start) + last_block = search_start; + hole_size = key.objectid - last_block; + if (hole_size >= num_blocks) { + ins->objectid = last_block; + ins->offset = hole_size; + goto check_pending; } } + + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) + goto next; + start_found = 1; last_block = key.objectid + key.offset; if (last_block >= block_group->key.objectid + @@ -759,6 +968,7 @@ check_pending: */ btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); + if (ins->objectid + num_blocks >= search_end) { if (full_scan) return -ENOSPC; @@ -780,7 +990,7 @@ check_pending: info->extent_tree_insert[0] && ins->objectid <= last) { search_start = last + 1; - WARN_ON(1); + WARN_ON(!full_scan); goto new_group; } } @@ -790,13 +1000,18 @@ check_pending: if (ins->objectid + num_blocks > first && ins->objectid <= info->extent_tree_prealloc[0]) { search_start = info->extent_tree_prealloc[0] + 1; - WARN_ON(1); + WARN_ON(!full_scan); goto new_group; } } if (fill_prealloc) { int nr; test_block = ins->objectid; + if (test_block - info->extent_tree_prealloc[total_needed - 1] >= + leaf_range(root)) { + total_found = 0; + info->extent_tree_prealloc_nr = total_found; + } while(test_block < ins->objectid + ins->offset && total_found < total_needed) { nr = total_needed - total_found - 1; @@ -811,11 +1026,15 @@ check_pending: } info->extent_tree_prealloc_nr = total_found; } - block_group = lookup_block_group(info, ins->objectid); - if (block_group) { - block_group->last_alloc = ins->objectid; - if (!data) - trans->block_group = block_group; + if (!data) { + block_group = lookup_block_group(info, ins->objectid); + if (block_group) { + if (fill_prealloc) + block_group->last_prealloc = + info->extent_tree_prealloc[total_needed-1]; + else + trans->block_group = block_group; + } } ins->offset = num_blocks; btrfs_free_path(path); @@ -824,6 +1043,7 @@ check_pending: new_group: if (search_start + num_blocks >= search_end) { search_start = orig_search_start; +printk("doing full scan!\n"); full_scan = 1; } block_group = lookup_block_group(info, search_start); @@ -871,26 +1091,57 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, info->extent_tree_insert[info->extent_tree_insert_nr++] = ins->objectid; ret = update_block_group(trans, root, - ins->objectid, ins->offset, 1); + ins->objectid, ins->offset, 1, 0); BUG_ON(ret); return 0; } + + /* + * if we're doing a data allocation, preallocate room in the + * extent tree first. This way the extent tree blocks end up + * in the correct block group. + */ + if (data) { + ret = find_free_extent(trans, root, 0, search_start, + search_end, &prealloc_key, 0); + if (ret) { + return ret; + } + if (prealloc_key.objectid + prealloc_key.offset >= search_end) { + int nr = info->extent_tree_prealloc_nr; + search_end = info->extent_tree_prealloc[nr - 1] - 1; + } else { + search_start = info->extent_tree_prealloc[0] + 1; + } + } /* do the real allocation */ ret = find_free_extent(trans, root, num_blocks, search_start, search_end, ins, data); - if (ret) + if (ret) { return ret; + } - /* then do prealloc for the extent tree */ - if (ins->objectid + ins->offset >= search_end) - search_end = ins->objectid - 1; - else - search_start = ins->objectid + ins->offset; + /* + * if we're doing a metadata allocation, preallocate space in the + * extent tree second. This way, we don't create a tiny hole + * in the allocation map between any unused preallocation blocks + * and the metadata block we're actually allocating. On disk, + * it'll go: + * [block we've allocated], [used prealloc 1], [ unused prealloc ] + * The unused prealloc will get reused the next time around. + */ + if (!data) { + if (ins->objectid + ins->offset >= search_end) + search_end = ins->objectid - 1; + else + search_start = ins->objectid + ins->offset; - ret = find_free_extent(trans, root, 0, search_start, - search_end, &prealloc_key, 0); - if (ret) - return ret; + ret = find_free_extent(trans, root, 0, search_start, + search_end, &prealloc_key, 0); + if (ret) { + return ret; + } + } super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + @@ -900,11 +1151,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); - if (ret) + if (ret) { return ret; - if (pending_ret) + } + if (pending_ret) { return pending_ret; - ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); + } + ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); return 0; } @@ -920,7 +1173,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, hint, (unsigned long)-1, &ins, 0); + 1, 0, (unsigned long)-1, &ins, 0); if (ret) { BUG(); return NULL; @@ -1134,6 +1387,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) { int ret; int ret2; + unsigned long gang[16]; + int i; ret = free_block_group_radix(&info->block_group_radix); ret2 = free_block_group_radix(&info->block_group_data_radix); @@ -1141,6 +1396,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return ret; if (ret2) return ret2; + + while(1) { + ret = find_first_radix_bit(&info->extent_map_radix, + gang, 0, ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + clear_radix_bit(&info->extent_map_radix, gang[i]); + } + } return 0; } @@ -1186,7 +1451,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) break; } - if (nr & 1) + if (nr % 3) radix = &info->block_group_data_radix; else radix = &info->block_group_radix; @@ -1197,8 +1462,14 @@ int btrfs_read_block_groups(struct btrfs_root *root) memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; + cache->last_prealloc = cache->key.objectid; cache->pinned = 0; - cache->data = (nr & 1); + cache->cached = 0; + + if (nr % 3) + cache->data = 1; + else + cache->data = 0; cache->radix = radix; key.objectid = found_key.objectid + found_key.offset; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d5ac0d8ee3c..4c2870e2d12 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1539,7 +1539,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, kunmap(pages[i]); } SetPageChecked(pages[i]); - btrfs_update_inode_block_group(trans, inode); + // btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1914,7 +1914,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } BUG_ON(ret); alloc_extent_start = ins.objectid; - btrfs_update_inode_block_group(trans, inode); + // btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8bbe9107ff7..f0f03121b7b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -122,7 +122,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, } dirty_pages = &trans->transaction->dirty_pages; while(1) { - ret = find_first_radix_bit(dirty_pages, gang, ARRAY_SIZE(gang)); + ret = find_first_radix_bit(dirty_pages, gang, + 0, ARRAY_SIZE(gang)); if (!ret) break; for (i = 0; i < ret; i++) { -- cgit v1.2.3-70-g09d2 From 8d7be552a733e53135a91d2d28f57ab87dc0e889 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 10 May 2007 11:24:42 -0400 Subject: Btrfs: fix check_node and check_leaf to use less cpu Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 44 ++++++++++++++++++++++++++------------------ fs/btrfs/extent-tree.c | 6 +++--- 2 files changed, 29 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index dbd3f636dd3..990d297a7a1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -130,15 +130,17 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) static int check_node(struct btrfs_root *root, struct btrfs_path *path, int level) { - int i; struct btrfs_node *parent = NULL; struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); int parent_slot; + int slot; + struct btrfs_key cpukey; u32 nritems = btrfs_header_nritems(&node->header); if (path->nodes[level + 1]) parent = btrfs_buffer_node(path->nodes[level + 1]); parent_slot = path->slots[level + 1]; + slot = path->slots[level]; BUG_ON(nritems == 0); if (parent) { struct btrfs_disk_key *parent_key; @@ -149,10 +151,13 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, btrfs_header_blocknr(&node->header)); } BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); - for (i = 0; nritems > 1 && i < nritems - 2; i++) { - struct btrfs_key cpukey; - btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key); - BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0); + if (slot != 0) { + btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot - 1].key); + BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) <= 0); + } + if (slot < nritems - 1) { + btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot + 1].key); + BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) >= 0); } return 0; } @@ -160,10 +165,12 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { - int i; struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]); struct btrfs_node *parent = NULL; int parent_slot; + int slot = path->slots[0]; + struct btrfs_key cpukey; + u32 nritems = btrfs_header_nritems(&leaf->header); if (path->nodes[level + 1]) @@ -182,19 +189,20 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(btrfs_node_blockptr(parent, parent_slot) != btrfs_header_blocknr(&leaf->header)); } - for (i = 0; nritems > 1 && i < nritems - 2; i++) { - struct btrfs_key cpukey; - btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key); - BUG_ON(comp_keys(&leaf->items[i].key, - &cpukey) >= 0); - BUG_ON(btrfs_item_offset(leaf->items + i) != - btrfs_item_end(leaf->items + i + 1)); - if (i == 0) { - BUG_ON(btrfs_item_offset(leaf->items + i) + - btrfs_item_size(leaf->items + i) != - BTRFS_LEAF_DATA_SIZE(root)); - } + if (slot != 0) { + btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot - 1].key); + BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) <= 0); + BUG_ON(btrfs_item_offset(leaf->items + slot - 1) != + btrfs_item_end(leaf->items + slot)); + } + if (slot < nritems - 1) { + btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot + 1].key); + BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) >= 0); + BUG_ON(btrfs_item_offset(leaf->items + slot) != + btrfs_item_end(leaf->items + slot + 1)); } + BUG_ON(btrfs_item_offset(leaf->items) + + btrfs_item_size(leaf->items) != BTRFS_LEAF_DATA_SIZE(root)); return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3ac9da45347..1497ff98f0d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -235,7 +235,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, hint->key.offset - 1, BTRFS_BLOCK_GROUP_AVAIL); } - last = hint->key.offset * 2; + last = hint->key.offset * 3; if (hint->key.objectid >= last) last = max(search_start + hint->key.offset - 1, hint->key.objectid - last); @@ -554,8 +554,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, blocknr + i); } } - if (old_val < (cache->key.offset * 8) / 10 && - old_val + num >= (cache->key.offset * 8) / 10) { + if (old_val < (cache->key.offset * 6) / 10 && + old_val + num >= (cache->key.offset * 6) / 10) { printk("group %Lu now available\n", cache->key.objectid); radix_tree_tag_set(cache->radix, cache->key.objectid + -- cgit v1.2.3-70-g09d2 From de428b63b16f9c74a24228a517713e8c65f79475 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 May 2007 13:28:27 -0400 Subject: Btrfs: allocator optimizations, truncate readahead Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 12 ++++++-- fs/btrfs/extent-tree.c | 76 ++++++++++++++++++++++++++++++++++++++++---------- fs/btrfs/super.c | 76 +++++++++++++++++++++++++++++++++++++------------- 4 files changed, 129 insertions(+), 37 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d8caeba5d75..5830b4333d8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1081,7 +1081,7 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, - int data); + int data, int owner); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 678456d1e2f..4c0262b5766 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -257,24 +257,30 @@ static struct address_space_operations btree_aops = { int readahead_tree_block(struct btrfs_root *root, u64 blocknr) { struct buffer_head *bh = NULL; + int ret = 0; bh = btrfs_find_create_tree_block(root, blocknr); if (!bh) return 0; - if (buffer_uptodate(bh)) + if (buffer_uptodate(bh)) { + ret = 1; goto done; - if (test_set_buffer_locked(bh)) + } + if (test_set_buffer_locked(bh)) { + ret = 1; goto done; + } if (!buffer_uptodate(bh)) { get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); } else { unlock_buffer(bh); + ret = 1; } done: brelse(bh); - return 0; + return ret; } struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1497ff98f0d..e3c6bfea375 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,6 +12,33 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static void reada_extent_leaves(struct btrfs_root *root, + struct btrfs_path *path, u64 limit) +{ + struct btrfs_node *node; + int i; + int nritems; + u64 item_objectid; + u64 blocknr; + int slot; + int ret; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1] + 1; + nritems = btrfs_header_nritems(&node->header); + for (i = slot; i < nritems && i < slot + 8; i++) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid > limit) + break; + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) { @@ -24,6 +51,7 @@ static int cache_block_group(struct btrfs_root *root, u64 i; u64 last = 0; u64 hole_size; + u64 limit; int found = 0; root = root->fs_info->extent_root; @@ -46,14 +74,17 @@ printk("cache block group %Lu\n", block_group->key.objectid); return ret; if (ret && path->slots[0] > 0) path->slots[0]--; + limit = block_group->key.objectid + block_group->key.offset; + reada_extent_leaves(root, path, limit); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&leaf->header)) { + reada_extent_leaves(root, path, limit); ret = btrfs_next_leaf(root, path); - if (ret == 0) + if (ret == 0) { continue; - else { + } else { if (found) { hole_size = block_group->key.objectid + block_group->key.offset - last; @@ -187,7 +218,7 @@ new_group: return max((*cache_ret)->last_alloc, search_start); } cache = btrfs_find_block_group(root, cache, - last + cache->key.offset - 1, 0); + last + cache->key.offset - 1, 0, 0); *cache_ret = cache; goto again; } @@ -195,7 +226,7 @@ new_group: struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, - int data) + int data, int owner) { struct btrfs_block_group_cache *cache[8]; struct btrfs_block_group_cache *found_group = NULL; @@ -207,6 +238,10 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int i; int ret; int full_search = 0; + int factor = 8; + + if (!owner) + factor = 5; if (data) radix = &info->block_group_data_radix; @@ -219,14 +254,14 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (shint->data == data) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < - (shint->key.offset * 8) / 10) { + (shint->key.offset * factor) / 10) { return shint; } } } if (hint && hint->data == data) { used = btrfs_block_group_used(&hint->item); - if (used + hint->pinned < (hint->key.offset * 8) / 10) { + if (used + hint->pinned < (hint->key.offset * factor) / 10) { return hint; } if (used >= (hint->key.offset * 8) / 10) { @@ -261,7 +296,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, cache[i]->key.offset; used = btrfs_block_group_used(&cache[i]->item); if (used + cache[i]->pinned < - (cache[i]->key.offset * 8) / 10) { + (cache[i]->key.offset * factor) / 10) { found_group = cache[i]; goto found; } @@ -272,6 +307,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, BTRFS_BLOCK_GROUP_AVAIL); } } + cond_resched(); } last = hint_last; again: @@ -295,13 +331,16 @@ again: BTRFS_BLOCK_GROUP_AVAIL); } } + cond_resched(); } if (!full_search) { +printk("find block group doing full search data %d start %Lu\n", data, search_start); last = search_start; full_search = 1; goto again; } if (!found_group) { +printk("find block group bailing to zero data %d\n", data); ret = radix_tree_gang_lookup(radix, (void **)&found_group, 0, 1); BUG_ON(ret != 1); @@ -554,8 +593,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, blocknr + i); } } - if (old_val < (cache->key.offset * 6) / 10 && - old_val + num >= (cache->key.offset * 6) / 10) { + if (old_val < (cache->key.offset * 5) / 10 && + old_val + num >= (cache->key.offset * 5) / 10) { printk("group %Lu now available\n", cache->key.objectid); radix_tree_tag_set(cache->radix, cache->key.objectid + @@ -842,6 +881,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; + u64 limit; path = btrfs_alloc_path(); ins->flags = 0; @@ -858,11 +898,11 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (search_start) { block_group = lookup_block_group(info, search_start); block_group = btrfs_find_block_group(root, block_group, - search_start, data); + search_start, data, 1); } else { block_group = btrfs_find_block_group(root, trans->block_group, 0, - data); + data, 1); } check_failed: @@ -916,6 +956,12 @@ check_failed: info->extent_tree_prealloc_nr = 0; total_found = 0; } + if (start_found) + limit = last_block + + block_group->key.offset / 2; + else + limit = search_start + + block_group->key.offset / 2; ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -960,6 +1006,7 @@ check_failed: } next: path->slots[0]++; + cond_resched(); } // FIXME -ENOSPC check_pending: @@ -1049,7 +1096,8 @@ printk("doing full scan!\n"); block_group = lookup_block_group(info, search_start); if (!full_scan) block_group = btrfs_find_block_group(root, block_group, - search_start, data); + search_start, data, 0); + cond_resched(); goto check_failed; error: @@ -1102,7 +1150,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * in the correct block group. */ if (data) { - ret = find_free_extent(trans, root, 0, search_start, + ret = find_free_extent(trans, root, 0, 0, search_end, &prealloc_key, 0); if (ret) { return ret; @@ -1173,7 +1221,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, 0, (unsigned long)-1, &ins, 0); + 1, hint, (unsigned long)-1, &ins, 0); if (ret) { BUG(); return NULL; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index eaa48f09d1c..0f79490123c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -351,6 +351,35 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, return ret; } +static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path, + u64 objectid) +{ + struct btrfs_node *node; + int i; + int nritems; + u64 item_objectid; + u64 blocknr; + int slot; + int ret; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1]; + if (slot == 0) + return; + nritems = btrfs_header_nritems(&node->header); + for (i = slot - 1; i >= 0; i--) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid != objectid) + break; + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) @@ -386,6 +415,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(path->slots[0] == 0); path->slots[0]--; } + reada_truncate(root, path, inode->i_ino); leaf = btrfs_buffer_leaf(path->nodes[0]); found_key = &leaf->items[path->slots[0]].key; if (btrfs_disk_key_objectid(found_key) != inode->i_ino) @@ -587,28 +617,30 @@ printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_r return d_splice_alias(inode, dentry); } -static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path) +static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path, + u64 objectid) { struct btrfs_node *node; int i; - int nritems; - u64 objectid; + u32 nritems; u64 item_objectid; u64 blocknr; int slot; + int ret; if (!path->nodes[1]) return; node = btrfs_buffer_node(path->nodes[1]); slot = path->slots[1]; - objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key); nritems = btrfs_header_nritems(&node->header); - for (i = slot; i < nritems; i++) { + for (i = slot + 1; i < nritems; i++) { item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); if (item_objectid != objectid) break; blocknr = btrfs_node_blockptr(node, i); - readahead_tree_block(root, blocknr); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; } } @@ -646,21 +678,20 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (ret < 0) goto err; advance = 0; - reada_leaves(root, path); + reada_leaves(root, path, inode->i_ino); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { + reada_leaves(root, path, inode->i_ino); ret = btrfs_next_leaf(root, path); if (ret) break; leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path->slots[0]; - if (path->slots[1] == 0) - reada_leaves(root, path); } else { slot++; path->slots[0]++; @@ -805,13 +836,18 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_inode_item inode_item; struct btrfs_key *location; int ret; + int owner; inode = new_inode(root->fs_info->sb); if (!inode) return ERR_PTR(-ENOMEM); BTRFS_I(inode)->root = root; - group = btrfs_find_block_group(root, group, 0, 0); + if (mode & S_IFDIR) + owner = 0; + else + owner = 1; + group = btrfs_find_block_group(root, group, 0, 0, owner); BTRFS_I(inode)->block_group = group; inode->i_uid = current->fsuid; @@ -1562,7 +1598,7 @@ failed: static int drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end) + u64 start, u64 end, u64 *hint_block) { int ret; struct btrfs_key key; @@ -1659,17 +1695,14 @@ static int drop_extents(struct btrfs_trans_handle *trans, new_num = (start - key.offset) >> inode->i_blkbits; old_num = btrfs_file_extent_num_blocks(extent); + *hint_block = + btrfs_file_extent_disk_blocknr(extent); inode->i_blocks -= (old_num - new_num) << 3; btrfs_set_file_extent_num_blocks(extent, new_num); mark_buffer_dirty(path->nodes[0]); } else { WARN_ON(1); - /* - ret = btrfs_truncate_item(trans, root, path, - start - key.offset); - BUG_ON(ret); - */ } } if (!keep) { @@ -1683,6 +1716,8 @@ static int drop_extents(struct btrfs_trans_handle *trans, btrfs_file_extent_disk_num_blocks(extent); extent_num_blocks = btrfs_file_extent_num_blocks(extent); + *hint_block = + btrfs_file_extent_disk_blocknr(extent); } ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -1831,6 +1866,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, u64 start_pos; u64 num_blocks; u64 alloc_extent_start; + u64 hint_block; struct btrfs_trans_handle *trans; struct btrfs_key ins; pinned[0] = NULL; @@ -1871,6 +1907,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } if (first_index != last_index && (last_index << PAGE_CACHE_SHIFT) < inode->i_size && + pos + count < inode->i_size && (count & (PAGE_CACHE_SIZE - 1))) { pinned[1] = grab_cache_page(inode->i_mapping, last_index); if (!PageUptodate(pinned[1])) { @@ -1892,18 +1929,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, btrfs_set_trans_block_group(trans, inode); /* FIXME blocksize != 4096 */ inode->i_blocks += num_blocks << 3; + hint_block = 0; if (start_pos < inode->i_size) { /* FIXME blocksize != pagesize */ ret = drop_extents(trans, root, inode, start_pos, (pos + count + root->blocksize -1) & - ~((u64)root->blocksize - 1)); + ~((u64)root->blocksize - 1), &hint_block); BUG_ON(ret); } if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { ret = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, 1, (u64)-1, &ins, 1); + num_blocks, hint_block, (u64)-1, + &ins, 1); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); @@ -2455,7 +2494,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, btrfs_fill_super, mnt); } - static int btrfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { -- cgit v1.2.3-70-g09d2 From 3a686375629da5d2e2ad019265b66ef113c87455 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 May 2007 13:35:57 -0400 Subject: Btrfs: sparse files! Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 14 ++++-- fs/btrfs/file-item.c | 11 +++-- fs/btrfs/super.c | 121 ++++++++++++++++++++++++++++++++++++++++--------- 4 files changed, 116 insertions(+), 31 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3330004af1a..e8f741167c6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1180,6 +1180,7 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, u64 offset, + u64 disk_num_blocks, u64 num_blocks); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e3c6bfea375..a366415e03a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -443,6 +443,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, buf_leaf = btrfs_buffer_leaf(buf); for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { if (leaf) { + u64 disk_blocknr; key = &buf_leaf->items[i].key; if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -451,8 +452,10 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE) continue; - ret = btrfs_inc_extent_ref(trans, root, - btrfs_file_extent_disk_blocknr(fi), + disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + if (disk_blocknr == 0) + continue; + ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, btrfs_file_extent_disk_num_blocks(fi)); BUG_ON(ret); } else { @@ -1248,6 +1251,7 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, leaf = btrfs_buffer_leaf(cur); nritems = btrfs_header_nritems(&leaf->header); for (i = 0; i < nritems; i++) { + u64 disk_blocknr; key = &leaf->items[i].key; if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -1258,8 +1262,10 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, * FIXME make sure to insert a trans record that * repeats the snapshot del on crash */ - ret = btrfs_free_extent(trans, root, - btrfs_file_extent_disk_blocknr(fi), + disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + if (disk_blocknr == 0) + continue; + ret = btrfs_free_extent(trans, root, disk_blocknr, btrfs_file_extent_disk_num_blocks(fi), 0); BUG_ON(ret); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a66709e6d03..7990b574211 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -9,7 +9,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, - u64 offset, u64 num_blocks) + u64 offset, u64 disk_num_blocks, + u64 num_blocks) { int ret = 0; struct btrfs_file_extent_item *item; @@ -30,7 +31,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_disk_blocknr(item, offset); - btrfs_set_file_extent_disk_num_blocks(item, num_blocks); + btrfs_set_file_extent_disk_num_blocks(item, disk_num_blocks); btrfs_set_file_extent_offset(item, 0); btrfs_set_file_extent_num_blocks(item, num_blocks); btrfs_set_file_extent_generation(item, trans->transid); @@ -176,14 +177,14 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid || csum_offset >= MAX_CSUM_ITEMS(root)) { - WARN_ON(1); goto insert; } if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / BTRFS_CRC32_SIZE) { u32 diff = (csum_offset + 1) * BTRFS_CRC32_SIZE; diff = diff - btrfs_item_size(leaf->items + path->slots[0]); - WARN_ON(diff != BTRFS_CRC32_SIZE); + if (diff != BTRFS_CRC32_SIZE) + goto insert; ret = btrfs_extend_item(trans, root, path, diff); BUG_ON(ret); goto csum; @@ -241,7 +242,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, ret = PTR_ERR(item); /* a csum that isn't present is a preallocated region. */ if (ret == -ENOENT || ret == -EFBIG) - ret = 1; + ret = -ENOENT; goto fail; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c1a0adde9b1..5b87c4e9d49 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -17,6 +17,7 @@ #include "transaction.h" #include "btrfs_inode.h" #include "ioctl.h" +#include "print-tree.h" struct btrfs_iget_args { u64 ino; @@ -421,14 +422,17 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item); if (btrfs_file_extent_type(fi) != BTRFS_FILE_EXTENT_INLINE) { + u64 num_dec; extent_start = btrfs_file_extent_disk_blocknr(fi); extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi); /* FIXME blocksize != 4096 */ - inode->i_blocks -= - btrfs_file_extent_num_blocks(fi) << 3; - found_extent = 1; + num_dec = btrfs_file_extent_num_blocks(fi) << 3; + if (extent_start != 0) { + found_extent = 1; + inode->i_blocks -= num_dec; + } } } ret = btrfs_del_item(trans, root, path); @@ -448,6 +452,43 @@ error: return ret; } +static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int err; + + err = inode_change_ok(inode, attr); + if (err) + return err; + + if (S_ISREG(inode->i_mode) && + attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 mask = root->blocksize - 1; + u64 pos = (inode->i_size + mask) & ~mask; + u64 hole_size; + + if (attr->ia_size < pos) + goto out; + hole_size = (attr->ia_size - pos + mask) & ~mask; + hole_size >>= inode->i_blkbits; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + err = btrfs_insert_file_extent(trans, root, inode->i_ino, + pos, 0, 0, hole_size); + BUG_ON(err); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + } + + err = inode_setattr(inode, attr); + +out: + return err; +} static void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; @@ -1169,8 +1210,10 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (found_type == BTRFS_FILE_EXTENT_REG) { extent_start = extent_start >> inode->i_blkbits; extent_end = extent_start + btrfs_file_extent_num_blocks(item); + err = 0; + if (blocknr == 0) + goto out; if (iblock >= extent_start && iblock < extent_end) { - err = 0; btrfs_map_bh_to_logical(root, result, blocknr + iblock - extent_start); goto out; @@ -1591,7 +1634,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, ptr, bh->b_data, offset + write_bytes); mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); - } else { + } else if (buffer_mapped(bh)) { btrfs_csum_file_block(trans, root, inode->i_ino, pages[i]->index << PAGE_CACHE_SHIFT, kmap(pages[i]), PAGE_CACHE_SIZE); @@ -1693,15 +1736,24 @@ static int drop_extents(struct btrfs_trans_handle *trans, goto out; } - search_start = extent_end; + if (found_inline) { + u64 mask = root->blocksize - 1; + search_start = (extent_end + mask) & ~mask; + } else + search_start = extent_end; if (end < extent_end && end >= key.offset) { if (found_extent) { + u64 disk_blocknr = + btrfs_file_extent_disk_blocknr(extent); + u64 disk_num_blocks = + btrfs_file_extent_disk_num_blocks(extent); memcpy(&old, extent, sizeof(old)); - ret = btrfs_inc_extent_ref(trans, root, - btrfs_file_extent_disk_blocknr(&old), - btrfs_file_extent_disk_num_blocks(&old)); - BUG_ON(ret); + if (disk_blocknr != 0) { + ret = btrfs_inc_extent_ref(trans, root, + disk_blocknr, disk_num_blocks); + BUG_ON(ret); + } } WARN_ON(found_inline); bookend = 1; @@ -1719,7 +1771,10 @@ static int drop_extents(struct btrfs_trans_handle *trans, old_num = btrfs_file_extent_num_blocks(extent); *hint_block = btrfs_file_extent_disk_blocknr(extent); - inode->i_blocks -= (old_num - new_num) << 3; + if (btrfs_file_extent_disk_blocknr(extent)) { + inode->i_blocks -= + (old_num - new_num) << 3; + } btrfs_set_file_extent_num_blocks(extent, new_num); mark_buffer_dirty(path->nodes[0]); @@ -1745,7 +1800,7 @@ static int drop_extents(struct btrfs_trans_handle *trans, BUG_ON(ret); btrfs_release_path(root, path); extent = NULL; - if (found_extent) { + if (found_extent && disk_blocknr != 0) { inode->i_blocks -= extent_num_blocks << 3; ret = btrfs_free_extent(trans, root, disk_blocknr, @@ -1785,18 +1840,19 @@ static int drop_extents(struct btrfs_trans_handle *trans, btrfs_file_extent_offset(&old) + ((end - key.offset) >> inode->i_blkbits)); WARN_ON(btrfs_file_extent_num_blocks(&old) < - (end - key.offset) >> inode->i_blkbits); + (extent_end - end) >> inode->i_blkbits); btrfs_set_file_extent_num_blocks(extent, - btrfs_file_extent_num_blocks(&old) - - ((end - key.offset) >> inode->i_blkbits)); + (extent_end - end) >> inode->i_blkbits); btrfs_set_file_extent_type(extent, BTRFS_FILE_EXTENT_REG); btrfs_set_file_extent_generation(extent, btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); - inode->i_blocks += - btrfs_file_extent_num_blocks(extent) << 3; + if (btrfs_file_extent_disk_blocknr(&old) != 0) { + inode->i_blocks += + btrfs_file_extent_num_blocks(extent) << 3; + } ret = 0; goto out; } @@ -1960,6 +2016,21 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ~((u64)root->blocksize - 1), &hint_block); BUG_ON(ret); } + if (inode->i_size < start_pos) { + u64 last_pos_in_file; + u64 hole_size; + u64 mask = root->blocksize - 1; + last_pos_in_file = (inode->i_size + mask) & ~mask; + hole_size = (start_pos - last_pos_in_file + mask) & ~mask; + hole_size >>= inode->i_blkbits; + if (last_pos_in_file < start_pos) { + ret = btrfs_insert_file_extent(trans, root, + inode->i_ino, + last_pos_in_file, + 0, 0, hole_size); + } + BUG_ON(ret); + } if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { ret = btrfs_alloc_extent(trans, root, inode->i_ino, @@ -1967,7 +2038,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, &ins, 1); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - start_pos, ins.objectid, ins.offset); + start_pos, ins.objectid, ins.offset, + ins.offset); BUG_ON(ret); } else { ins.offset = 0; @@ -2044,10 +2116,13 @@ static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, page->index << PAGE_CACHE_SHIFT, kmap(page), PAGE_CACHE_SIZE); if (ret) { - printk("failed to verify ino %lu page %lu\n", - page->mapping->host->i_ino, - page->index); - memset(page_address(page), 0, PAGE_CACHE_SIZE); + if (ret != -ENOENT) { + printk("failed to verify ino %lu page %lu\n", + page->mapping->host->i_ino, + page->index); + memset(page_address(page), 0, PAGE_CACHE_SIZE); + flush_dcache_page(page); + } } SetPageChecked(page); kunmap(page); @@ -2759,6 +2834,7 @@ static struct inode_operations btrfs_dir_inode_operations = { .rmdir = btrfs_rmdir, .rename = btrfs_rename, .symlink = btrfs_symlink, + .setattr = btrfs_setattr, }; static struct inode_operations btrfs_dir_ro_inode_operations = { @@ -2788,6 +2864,7 @@ static struct address_space_operations btrfs_symlink_aops = { static struct inode_operations btrfs_file_inode_operations = { .truncate = btrfs_truncate, .getattr = btrfs_getattr, + .setattr = btrfs_setattr, }; static struct file_operations btrfs_file_operations = { -- cgit v1.2.3-70-g09d2 From 1e2677e000fbc7f1892b1ea9f952f1667f4443f1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 May 2007 16:52:18 -0400 Subject: Btrfs: block group switching Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 5 ++- fs/btrfs/extent-tree.c | 85 ++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 65 insertions(+), 25 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 756ea9d0123..1bb62867ded 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -249,10 +249,13 @@ struct btrfs_device_item { /* tag for the radix tree of block groups in ram */ #define BTRFS_BLOCK_GROUP_DIRTY 0 #define BTRFS_BLOCK_GROUP_AVAIL 1 -#define BTRFS_BLOCK_GROUP_HINTS 8 #define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) + + +#define BTRFS_BLOCK_GROUP_DATA 1 struct btrfs_block_group_item { __le64 used; + u8 flags; } __attribute__ ((__packed__)); struct btrfs_block_group_cache { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a366415e03a..b6dc020bdde 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -232,6 +232,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; struct radix_tree_root *radix; + struct radix_tree_root *swap_radix; u64 used; u64 last = 0; u64 hint_last; @@ -239,14 +240,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int ret; int full_search = 0; int factor = 8; + int data_swap = 0; if (!owner) factor = 5; - if (data) + if (data) { radix = &info->block_group_data_radix; - else + swap_radix = &info->block_group_radix; + } else { radix = &info->block_group_radix; + swap_radix = &info->block_group_data_radix; + } if (search_start) { struct btrfs_block_group_cache *shint; @@ -334,15 +339,27 @@ again: cond_resched(); } if (!full_search) { -printk("find block group doing full search data %d start %Lu\n", data, search_start); last = search_start; full_search = 1; goto again; } + if (!data_swap) { + struct radix_tree_root *tmp = radix; + data_swap = 1; + radix = swap_radix; + swap_radix = tmp; + last = search_start; + goto again; + } if (!found_group) { printk("find block group bailing to zero data %d\n", data); ret = radix_tree_gang_lookup(radix, (void **)&found_group, 0, 1); + if (ret == 0) { + ret = radix_tree_gang_lookup(swap_radix, + (void **)&found_group, + 0, 1); + } BUG_ON(ret != 1); } found: @@ -552,7 +569,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num, int alloc, int mark_free) + u64 blocknr, u64 num, int alloc, int mark_free, + int data) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; @@ -560,6 +578,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 old_val; u64 block_in_group; u64 i; + int ret; while(total) { cache = lookup_block_group(info, blocknr); @@ -577,7 +596,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val = btrfs_block_group_used(&cache->item); num = min(total, cache->key.offset - block_in_group); if (alloc) { - old_val += num; if (blocknr > cache->last_alloc) cache->last_alloc = blocknr; if (!cache->data) { @@ -586,6 +604,30 @@ static int update_block_group(struct btrfs_trans_handle *trans, blocknr + i); } } + if (cache->data != data && + old_val < cache->key.offset / 2) { +printk("changing block group %Lu from %d to %d\n", cache->key.objectid, cache->data, data); + cache->data = data; + radix_tree_delete(cache->radix, + cache->key.objectid + + cache->key.offset - 1); + + if (data) { + cache->radix = + &info->block_group_data_radix; + cache->item.flags |= + BTRFS_BLOCK_GROUP_DATA; + } else { + cache->radix = &info->block_group_radix; + cache->item.flags &= + ~BTRFS_BLOCK_GROUP_DATA; + } + ret = radix_tree_insert(cache->radix, + cache->key.objectid + + cache->key.offset - 1, + (void *)cache); + } + old_val += num; } else { old_val -= num; if (blocknr < cache->first_free) @@ -596,8 +638,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, blocknr + i); } } - if (old_val < (cache->key.offset * 5) / 10 && - old_val + num >= (cache->key.offset * 5) / 10) { + if (old_val < cache->key.offset / 2 && + old_val + num >= cache->key.offset / 2) { printk("group %Lu now available\n", cache->key.objectid); radix_tree_tag_set(cache->radix, cache->key.objectid + @@ -779,7 +821,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) BUG(); ret = update_block_group(trans, root, blocknr, num_blocks, 0, - mark_free); + mark_free, 0); BUG_ON(ret); } btrfs_free_path(path); @@ -909,10 +951,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root } check_failed: - if (!full_scan && block_group->data != data) - WARN_ON(1); - - if (!data) + if (!block_group->data) search_start = find_search_start(root, &block_group, search_start, total_needed); else @@ -1142,7 +1181,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, info->extent_tree_insert[info->extent_tree_insert_nr++] = ins->objectid; ret = update_block_group(trans, root, - ins->objectid, ins->offset, 1, 0); + ins->objectid, ins->offset, 1, 0, 0); BUG_ON(ret); return 0; } @@ -1208,7 +1247,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (pending_ret) { return pending_ret; } - ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); + ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, + data); return 0; } @@ -1477,7 +1517,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_leaf *leaf; u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; u64 used; - u64 nr = 0; root = info->extent_root; key.objectid = 0; @@ -1505,13 +1544,16 @@ int btrfs_read_block_groups(struct btrfs_root *root) break; } - if (nr % 3) + bi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_block_group_item); + if (bi->flags & BTRFS_BLOCK_GROUP_DATA) { radix = &info->block_group_data_radix; - else + cache->data = 1; + } else { radix = &info->block_group_radix; + cache->data = 0; + } - bi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_block_group_item); memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; @@ -1520,10 +1562,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->pinned = 0; cache->cached = 0; - if (nr % 3) - cache->data = 1; - else - cache->data = 0; cache->radix = radix; key.objectid = found_key.objectid + found_key.offset; @@ -1541,7 +1579,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) if (key.objectid >= btrfs_super_total_blocks(info->disk_super)) break; - nr++; } btrfs_free_path(path); -- cgit v1.2.3-70-g09d2 From fbdc762b4e1833b5d75cada5aabeadccd8379792 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 30 May 2007 10:22:12 -0400 Subject: Btrfs: use a separate flag for search_start vs a hint in find_free_extent Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 69 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 27 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b6dc020bdde..85616b458e1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5,8 +5,9 @@ #include "transaction.h" static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins, int data); + *orig_root, u64 num_blocks, u64 search_start, + u64 search_end, u64 hint_block, + struct btrfs_key *ins, int data); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -157,12 +158,6 @@ static struct btrfs_block_group_cache *lookup_block_group(struct block_group->key.objectid + block_group->key.offset) return block_group; } - WARN_ON(1); - printk("lookup_block_group fails for blocknr %Lu\n", blocknr); - printk("last ret was %d\n", ret); - if (ret) { - printk("last block group was %Lu %Lu\n", block_group->key.objectid, block_group->key.offset); - } return NULL; } @@ -378,7 +373,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_key ins; u32 refs; - find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, + find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, 0, &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); @@ -495,7 +490,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins, 0); + find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0); ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); BUG_ON(ret); bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], @@ -788,7 +783,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; - find_free_extent(trans, root, 0, 0, (u64)-1, &ins, 0); + find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); @@ -906,7 +901,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins, int data) + search_end, u64 hint_block, + struct btrfs_key *ins, int data) { struct btrfs_path *path; struct btrfs_key key; @@ -926,6 +922,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; + int wrapped = 0; u64 limit; path = btrfs_alloc_path(); @@ -940,10 +937,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root } if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(info->disk_super); - if (search_start) { - block_group = lookup_block_group(info, search_start); + if (hint_block) { + block_group = lookup_block_group(info, hint_block); block_group = btrfs_find_block_group(root, block_group, - search_start, data, 1); + hint_block, data, 1); } else { block_group = btrfs_find_block_group(root, trans->block_group, 0, @@ -954,7 +951,7 @@ check_failed: if (!block_group->data) search_start = find_search_start(root, &block_group, search_start, total_needed); - else + else if (!full_scan) search_start = max(block_group->last_alloc, search_start); btrfs_init_path(path); @@ -1039,7 +1036,7 @@ check_failed: start_found = 1; last_block = key.objectid + key.offset; - if (last_block >= block_group->key.objectid + + if (!full_scan && last_block >= block_group->key.objectid + block_group->key.offset) { btrfs_release_path(root, path); search_start = block_group->key.objectid + @@ -1059,10 +1056,15 @@ check_pending: BUG_ON(ins->objectid < search_start); if (ins->objectid + num_blocks >= search_end) { - if (full_scan) - return -ENOSPC; + if (full_scan) { + ret = -ENOSPC; + goto error; + } search_start = orig_search_start; - full_scan = 1; + if (wrapped) + full_scan = 1; + else + wrapped = 1; goto new_group; } for (test_block = ins->objectid; @@ -1132,14 +1134,20 @@ check_pending: new_group: if (search_start + num_blocks >= search_end) { search_start = orig_search_start; -printk("doing full scan!\n"); - full_scan = 1; + if (full_scan) { + ret = -ENOSPC; + goto error; + } + if (wrapped) + full_scan = 1; + else + wrapped = 1; } block_group = lookup_block_group(info, search_start); + cond_resched(); if (!full_scan) block_group = btrfs_find_block_group(root, block_group, search_start, data, 0); - cond_resched(); goto check_failed; error: @@ -1156,12 +1164,13 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 search_start, + u64 num_blocks, u64 hint_block, u64 search_end, struct btrfs_key *ins, int data) { int ret; int pending_ret; u64 super_blocks_used; + u64 search_start = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; @@ -1193,7 +1202,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, */ if (data) { ret = find_free_extent(trans, root, 0, 0, - search_end, &prealloc_key, 0); + search_end, 0, &prealloc_key, 0); if (ret) { return ret; } @@ -1204,9 +1213,11 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_start = info->extent_tree_prealloc[0] + 1; } } + if (hint_block < search_start) + hint_block = search_start; /* do the real allocation */ ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, ins, data); + search_end, hint_block, ins, data); if (ret) { return ret; } @@ -1226,8 +1237,12 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, else search_start = ins->objectid + ins->offset; + if (hint_block < search_start) + hint_block = search_start; + ret = find_free_extent(trans, root, 0, search_start, - search_end, &prealloc_key, 0); + search_end, hint_block, + &prealloc_key, 0); if (ret) { return ret; } -- cgit v1.2.3-70-g09d2 From fabb568183de7996257080260d3537fa75b3667e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 7 Jun 2007 22:13:21 -0400 Subject: Btrfs: d_type optimization Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 10 ++++++++ fs/btrfs/extent-tree.c | 1 + fs/btrfs/super.c | 66 +++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 68 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1bb62867ded..a1c95c980fd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -34,6 +34,16 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_CRC32_SIZE 4 #define BTRFS_EMPTY_DIR_SIZE 6 +#define BTRFS_FT_UNKNOWN 0 +#define BTRFS_FT_REG_FILE 1 +#define BTRFS_FT_DIR 2 +#define BTRFS_FT_CHRDEV 3 +#define BTRFS_FT_BLKDEV 4 +#define BTRFS_FT_FIFO 5 +#define BTRFS_FT_SOCK 6 +#define BTRFS_FT_SYMLINK 7 +#define BTRFS_FT_MAX 8 + /* * the key defines the order in the tree, and so it also defines (optimal) * block layout. objectid corresonds to the inode number. The flags diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 85616b458e1..645a4228bfe 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1264,6 +1264,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, data); + BUG_ON(ret); return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0eb64d6eaf9..b58b4cf6676 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -43,6 +43,18 @@ static int drop_extents(struct btrfs_trans_handle *trans, static int btrfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *result, int create); + +#define S_SHIFT 12 +static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, + [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK, + [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, +}; + static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -785,6 +797,9 @@ static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path, break; } } +static unsigned char btrfs_filetype_table[] = { + DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK +}; static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { @@ -799,7 +814,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct btrfs_leaf *leaf; int slot; int advance; - unsigned char d_type = DT_UNKNOWN; + unsigned char d_type; int over = 0; u32 di_cur; u32 di_total; @@ -853,6 +868,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) di_cur = 0; di_total = btrfs_item_size(leaf->items + slot); while(di_cur < di_total) { + d_type = btrfs_filetype_table[btrfs_dir_type(di)]; over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), btrfs_disk_key_offset(&item->key), @@ -1012,6 +1028,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return inode; } +static inline u8 btrfs_inode_type(struct inode *inode) +{ + return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; +} + static int btrfs_add_link(struct btrfs_trans_handle *trans, struct dentry *dentry, struct inode *inode) { @@ -1026,7 +1047,7 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_insert_dir_item(trans, root, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, - &key, 0); + &key, btrfs_inode_type(inode)); if (ret == 0) { dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; ret = btrfs_update_inode(trans, root, @@ -1150,12 +1171,12 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, - &key, 1); + &key, BTRFS_FT_DIR); if (ret) goto error; key.objectid = dirid; ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, - &key, 1); + &key, BTRFS_FT_DIR); if (ret) goto error; error: @@ -1265,6 +1286,10 @@ printk("btrfs sync_fs\n"); return 0; } +#define BTRFS_GET_BLOCK_NO_CREATE 0 +#define BTRFS_GET_BLOCK_CREATE 1 +#define BTRFS_GET_BLOCK_NO_DIRECT 2 + static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { @@ -1286,7 +1311,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - if (create) { + if (create & BTRFS_GET_BLOCK_CREATE) { WARN_ON(1); /* this almost but not quite works */ trans = btrfs_start_transaction(root, 1); @@ -1349,6 +1374,11 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, char *ptr; char *map; u32 size; + + if (create & BTRFS_GET_BLOCK_NO_DIRECT) { + err = -EINVAL; + goto out; + } size = btrfs_file_extent_inline_len(leaf->items + path->slots[0]); extent_end = (extent_start + size) >> inode->i_blkbits; @@ -1367,7 +1397,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, btrfs_map_bh_to_logical(root, result, 0); } not_found: - if (create) { + if (create & BTRFS_GET_BLOCK_CREATE) { struct btrfs_key ins; ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, alloc_hint, (u64)-1, @@ -1399,6 +1429,21 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, return err; } +static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + mutex_lock(&root->fs_info->fs_mutex); + btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + +static sector_t btrfs_bmap(struct address_space *as, sector_t block) +{ + return generic_block_bmap(as, block, btrfs_get_block_bmap); +} + static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { @@ -2428,7 +2473,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) key.offset = (u64)-1; dir = root->fs_info->sb->s_root->d_inode; ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, dir->i_ino, &key, 0); + name, namelen, dir->i_ino, &key, + BTRFS_FT_DIR); BUG_ON(ret); ret = btrfs_commit_transaction(trans, root); @@ -2505,7 +2551,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, name, namelen, root->fs_info->sb->s_root->d_inode->i_ino, - &key, 0); + &key, BTRFS_FT_DIR); BUG_ON(ret); @@ -2833,7 +2879,8 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, btrfs_release_path(root, path); ret = btrfs_insert_dir_item(trans, root, "..", 2, - old_inode->i_ino, location, 0); + old_inode->i_ino, location, + BTRFS_FT_DIR); if (ret) goto out_fail; } @@ -3003,6 +3050,7 @@ static struct address_space_operations btrfs_aops = { .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, .commit_write = btrfs_commit_write, + .bmap = btrfs_bmap, }; static struct address_space_operations btrfs_symlink_aops = { -- cgit v1.2.3-70-g09d2 From 5276aedab0baacfb3c5483208b8be85a8416bd5f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 11 Jun 2007 21:33:38 -0400 Subject: Btrfs: fix oops after block group lookup Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 - fs/btrfs/ctree.h | 3 +++ fs/btrfs/extent-tree.c | 29 ++++++++++++++++------------- fs/btrfs/super.c | 8 ++------ 4 files changed, 21 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 61d25b5de28..d9b6d38c603 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -16,6 +16,5 @@ * Use relocation to try and fix write errors * Make allocator much smarter * xattrs (directory streams for regular files) -* fsck * Scrub & defrag diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5ab25a0cb16..4e136b7b03a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -998,6 +998,9 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +struct btrfs_block_group_cache *btrfs_lookup_block_group(struct + btrfs_fs_info *info, + u64 blocknr); struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 645a4228bfe..f509ffa38d0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -135,9 +135,9 @@ printk("cache block group %Lu\n", block_group->key.objectid); return 0; } -static struct btrfs_block_group_cache *lookup_block_group(struct - btrfs_fs_info *info, - u64 blocknr) +struct btrfs_block_group_cache *btrfs_lookup_block_group(struct + btrfs_fs_info *info, + u64 blocknr) { struct btrfs_block_group_cache *block_group; int ret; @@ -208,7 +208,8 @@ out: return max(cache->last_alloc, search_start); new_group: - cache = lookup_block_group(root->fs_info, last + cache->key.offset - 1); + cache = btrfs_lookup_block_group(root->fs_info, + last + cache->key.offset - 1); if (!cache) { return max((*cache_ret)->last_alloc, search_start); } @@ -250,7 +251,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (search_start) { struct btrfs_block_group_cache *shint; - shint = lookup_block_group(info, search_start); + shint = btrfs_lookup_block_group(info, search_start); if (shint->data == data) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < @@ -576,7 +577,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, int ret; while(total) { - cache = lookup_block_group(info, blocknr); + cache = btrfs_lookup_block_group(info, blocknr); if (!cache) { printk(KERN_CRIT "blocknr %Lu lookup failed\n", blocknr); @@ -677,8 +678,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); - block_group = lookup_block_group(root->fs_info, - gang[i]); + block_group = btrfs_lookup_block_group(root->fs_info, + gang[i]); if (block_group) { WARN_ON(block_group->pinned == 0); block_group->pinned--; @@ -751,7 +752,8 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); if (!err) { struct btrfs_block_group_cache *cache; - cache = lookup_block_group(root->fs_info, blocknr); + cache = btrfs_lookup_block_group(root->fs_info, + blocknr); if (cache) cache->pinned++; } @@ -851,7 +853,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct for (i = 0; i < ret; i++) { wret = set_radix_bit(pinned_radix, gang[i]); if (wret == 0) { - cache = lookup_block_group(extent_root->fs_info, + cache = + btrfs_lookup_block_group(extent_root->fs_info, gang[i]); if (cache) cache->pinned++; @@ -938,7 +941,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(info->disk_super); if (hint_block) { - block_group = lookup_block_group(info, hint_block); + block_group = btrfs_lookup_block_group(info, hint_block); block_group = btrfs_find_block_group(root, block_group, hint_block, data, 1); } else { @@ -1118,7 +1121,7 @@ check_pending: info->extent_tree_prealloc_nr = total_found; } if (!data) { - block_group = lookup_block_group(info, ins->objectid); + block_group = btrfs_lookup_block_group(info, ins->objectid); if (block_group) { if (fill_prealloc) block_group->last_prealloc = @@ -1143,7 +1146,7 @@ new_group: else wrapped = 1; } - block_group = lookup_block_group(info, search_start); + block_group = btrfs_lookup_block_group(info, search_start); cond_resched(); if (!full_scan) block_group = btrfs_find_block_group(root, block_group, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2b24a5a2be6..9749090cfbb 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -62,7 +62,6 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item *inode_item; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; - struct btrfs_block_group_cache *alloc_group; u64 alloc_group_block; int ret; @@ -95,11 +94,8 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); alloc_group_block = btrfs_inode_block_group(inode_item); - ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix, - (void **)&alloc_group, - alloc_group_block, 1); - BUG_ON(!ret); - BTRFS_I(inode)->block_group = alloc_group; + BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, + alloc_group_block); btrfs_free_path(path); inode_item = NULL; -- cgit v1.2.3-70-g09d2 From 84f54cfa78c81991e087309a9b379f25f1ffdb10 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 07:43:08 -0400 Subject: Btrfs: 64 bit div fixes Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 43 ++++++++++++++++++++++++------------------- fs/btrfs/file-item.c | 3 ++- fs/btrfs/file.c | 7 ++++--- fs/btrfs/root-tree.c | 2 +- 4 files changed, 31 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f509ffa38d0..6949eebc9dc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -65,7 +65,6 @@ static int cache_block_group(struct btrfs_root *root, path = btrfs_alloc_path(); if (!path) return -ENOMEM; -printk("cache block group %Lu\n", block_group->key.objectid); key.objectid = block_group->key.objectid; key.flags = 0; key.offset = 0; @@ -164,8 +163,8 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct static u64 leaf_range(struct btrfs_root *root) { u64 size = BTRFS_LEAF_DATA_SIZE(root); - size = size / (sizeof(struct btrfs_extent_item) + - sizeof(struct btrfs_item)); + do_div(size, sizeof(struct btrfs_extent_item) + + sizeof(struct btrfs_item)); return size; } @@ -219,6 +218,13 @@ new_group: goto again; } +static u64 div_factor(u64 num, int factor) +{ + num *= factor; + do_div(num, 10); + return num; +} + struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, @@ -255,17 +261,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (shint->data == data) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < - (shint->key.offset * factor) / 10) { + div_factor(shint->key.offset, factor)) { return shint; } } } if (hint && hint->data == data) { used = btrfs_block_group_used(&hint->item); - if (used + hint->pinned < (hint->key.offset * factor) / 10) { + if (used + hint->pinned < + div_factor(hint->key.offset, factor)) { return hint; } - if (used >= (hint->key.offset * 8) / 10) { + if (used >= div_factor(hint->key.offset, 8)) { radix_tree_tag_clear(radix, hint->key.objectid + hint->key.offset - 1, @@ -297,11 +304,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, cache[i]->key.offset; used = btrfs_block_group_used(&cache[i]->item); if (used + cache[i]->pinned < - (cache[i]->key.offset * factor) / 10) { + div_factor(cache[i]->key.offset, factor)) { found_group = cache[i]; goto found; } - if (used >= (cache[i]->key.offset * 8) / 10) { + if (used >= div_factor(cache[i]->key.offset, 8)) { radix_tree_tag_clear(radix, cache[i]->key.objectid + cache[i]->key.offset - 1, @@ -348,7 +355,6 @@ again: goto again; } if (!found_group) { -printk("find block group bailing to zero data %d\n", data); ret = radix_tree_gang_lookup(radix, (void **)&found_group, 0, 1); if (ret == 0) { @@ -386,7 +392,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); if (ret != 0) { -printk("can't find block %Lu %Lu\n", blocknr, num_blocks); BUG(); } BUG_ON(ret != 0); @@ -601,8 +606,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, } } if (cache->data != data && - old_val < cache->key.offset / 2) { -printk("changing block group %Lu from %d to %d\n", cache->key.objectid, cache->data, data); + old_val < (cache->key.offset >> 1)) { cache->data = data; radix_tree_delete(cache->radix, cache->key.objectid + @@ -634,9 +638,8 @@ printk("changing block group %Lu from %d to %d\n", cache->key.objectid, cache->d blocknr + i); } } - if (old_val < cache->key.offset / 2 && - old_val + num >= cache->key.offset / 2) { -printk("group %Lu now available\n", cache->key.objectid); + if (old_val < (cache->key.offset >> 1) && + old_val + num >= (cache->key.offset >> 1)) { radix_tree_tag_set(cache->radix, cache->key.objectid + cache->key.offset - 1, @@ -1000,10 +1003,10 @@ check_failed: } if (start_found) limit = last_block + - block_group->key.offset / 2; + (block_group->key.offset >> 1); else limit = search_start + - block_group->key.offset / 2; + (block_group->key.offset >> 1); ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -1534,9 +1537,11 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_key key; struct btrfs_key found_key; struct btrfs_leaf *leaf; - u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; + u64 group_size_blocks; u64 used; + group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >> + root->fs_info->sb->s_blocksize_bits; root = info->extent_root; key.objectid = 0; key.offset = group_size_blocks; @@ -1590,7 +1595,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) (void *)cache); BUG_ON(ret); used = btrfs_block_group_used(bi); - if (used < (key.offset * 8) / 10) { + if (used < div_factor(key.offset, 8)) { radix_tree_tag_set(radix, found_key.objectid + found_key.offset - 1, BTRFS_BLOCK_GROUP_AVAIL); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 0782e924dde..fb8c214160c 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -239,7 +239,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, if (isize <= key.offset) return 0; new_item_span = isize - key.offset; - blocks = (new_item_span + root->blocksize - 1) / root->blocksize; + blocks = (new_item_span + root->blocksize - 1) >> + root->fs_info->sb->s_blocksize_bits; new_item_size = blocks * BTRFS_CRC32_SIZE; if (new_item_size >= btrfs_item_size(leaf->items + slot)) return 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7f8e3035d99..0325dc03859 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -82,7 +82,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, for (i = 0; i < num_pages; i++) { offset = pos & (PAGE_CACHE_SIZE -1); - this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); /* FIXME, one block at a time */ mutex_lock(&root->fs_info->fs_mutex); @@ -395,7 +395,7 @@ static int prepare_pages(struct btrfs_root *root, cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); wait_on_page_writeback(pages[i]); offset = pos & (PAGE_CACHE_SIZE -1); - this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); if (!page_has_buffers(pages[i])) { create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize, @@ -567,7 +567,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, while(count > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); - size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); + size_t write_bytes = min(count, + (size_t)PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a42943bd917..0564a73bb2e 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -102,7 +102,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_del_item(trans, root, path); } else { btrfs_set_root_refs(ri, refs - 1); -printk("ref now %u root %Lu %Lu %u\n", refs -1, key->objectid, key->offset, key->flags); +printk("ref now %u root %llu %Lu %u\n", refs -1, key->objectid, key->offset, key->flags); mark_buffer_dirty(path->nodes[0]); } out: -- cgit v1.2.3-70-g09d2 From 5af3981c1878b0657b9babd2ef7ec98c2008cf2c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 07:50:13 -0400 Subject: Btrfs: printk fixes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 10 ++++++---- fs/btrfs/extent-tree.c | 5 ----- fs/btrfs/file-item.c | 5 ----- fs/btrfs/print-tree.c | 42 ++++++++++++++++++++++-------------------- fs/btrfs/root-tree.c | 2 +- 5 files changed, 29 insertions(+), 35 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 96bf3ef3a79..87810117254 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -20,8 +20,9 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { - printk(KERN_CRIT "bh_blocknr(buf) is %Lu, header is %Lu\n", - bh_blocknr(buf), btrfs_header_blocknr(&node->header)); + printk(KERN_CRIT "bh_blocknr(buf) is %llu, header is %llu\n", + (unsigned long long)bh_blocknr(buf), + (unsigned long long)btrfs_header_blocknr(&node->header)); return 1; } return 0; @@ -157,8 +158,9 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, return ret; if (verify) { if (memcmp(bh->b_data, result, BTRFS_CRC32_SIZE)) { - printk("checksum verify failed on %Lu\n", - bh_blocknr(bh)); + printk("btrfs: %s checksum verify failed on %llu\n", + root->fs_info->sb->s_id, + (unsigned long long)bh_blocknr(bh)); return 1; } } else { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6949eebc9dc..fe02fbfa6d5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -584,8 +584,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, while(total) { cache = btrfs_lookup_block_group(info, blocknr); if (!cache) { - printk(KERN_CRIT "blocknr %Lu lookup failed\n", - blocknr); return -1; } block_in_group = blocknr - cache->key.objectid; @@ -795,9 +793,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret) { - printk("failed to find %Lu\n", key.objectid); - btrfs_print_tree(extent_root, extent_root->node); - printk("failed to find %Lu\n", key.objectid); BUG(); } ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index fb8c214160c..d5a98827e38 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -28,10 +28,6 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(*item)); - if (ret) { -printk("failed to insert %Lu %Lu ret %d\n", objectid, pos, ret); -btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); - } BUG_ON(ret); item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); @@ -201,7 +197,6 @@ insert: ret = btrfs_insert_empty_item(trans, root, path, &file_key, BTRFS_CRC32_SIZE); if (ret != 0) { - printk("at insert for %Lu %u %Lu ret is %d\n", file_key.objectid, file_key.flags, file_key.offset, ret); WARN_ON(1); goto fail; } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 28813411de6..21791f03756 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -15,31 +15,32 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_block_group_item *bi; u32 type; - printk("leaf %Lu total ptrs %d free space %d\n", - btrfs_header_blocknr(&l->header), nr, + printk("leaf %llu total ptrs %d free space %d\n", + (unsigned long long)btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { item = l->items + i; type = btrfs_disk_key_type(&item->key); - printk("\titem %d key (%Lu %x %Lu) itemoff %d itemsize %d\n", + printk("\titem %d key (%llu %x %llu) itemoff %d itemsize %d\n", i, - btrfs_disk_key_objectid(&item->key), + (unsigned long long)btrfs_disk_key_objectid(&item->key), btrfs_disk_key_flags(&item->key), - btrfs_disk_key_offset(&item->key), + (unsigned long long)btrfs_disk_key_offset(&item->key), btrfs_item_offset(item), btrfs_item_size(item)); switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); - printk("\t\tinode generation %Lu size %Lu mode %o\n", - btrfs_inode_generation(ii), - btrfs_inode_size(ii), + printk("\t\tinode generation %llu size %llu mode %o\n", + (unsigned long long)btrfs_inode_generation(ii), + (unsigned long long)btrfs_inode_size(ii), btrfs_inode_mode(ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); - printk("\t\tdir oid %Lu flags %u type %u\n", - btrfs_disk_key_objectid(&di->location), + printk("\t\tdir oid %llu flags %u type %u\n", + (unsigned long long)btrfs_disk_key_objectid( + &di->location), btrfs_dir_flags(di), btrfs_dir_type(di)); printk("\t\tname %.*s\n", @@ -47,8 +48,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - printk("\t\troot data blocknr %Lu refs %u\n", - btrfs_root_blocknr(ri), btrfs_root_refs(ri)); + printk("\t\troot data blocknr %llu refs %u\n", + (unsigned long long)btrfs_root_blocknr(ri), + btrfs_root_refs(ri)); break; case BTRFS_EXTENT_ITEM_KEY: ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); @@ -58,8 +60,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); - printk("\t\tblock group used %Lu\n", - btrfs_block_group_used(bi)); + printk("\t\tblock group used %llu\n", + (unsigned long long)btrfs_block_group_used(bi)); break; case BTRFS_STRING_ITEM_KEY: printk("\t\titem data %.*s\n", btrfs_item_size(item), @@ -83,17 +85,17 @@ void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t) btrfs_print_leaf(root, (struct btrfs_leaf *)c); return; } - printk("node %Lu level %d total ptrs %d free spc %u\n", - btrfs_header_blocknr(&c->header), + printk("node %llu level %d total ptrs %d free spc %u\n", + (unsigned long long)btrfs_header_blocknr(&c->header), btrfs_header_level(&c->header), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { - printk("\tkey %d (%Lu %u %Lu) block %Lu\n", + printk("\tkey %d (%llu %u %llu) block %llu\n", i, - c->ptrs[i].key.objectid, + (unsigned long long)c->ptrs[i].key.objectid, c->ptrs[i].key.flags, - c->ptrs[i].key.offset, - btrfs_node_blockptr(c, i)); + (unsigned long long)c->ptrs[i].key.offset, + (unsigned long long)btrfs_node_blockptr(c, i)); } for (i = 0; i < nr; i++) { struct buffer_head *next_buf = read_tree_block(root, diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0564a73bb2e..13c30284fd8 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -102,7 +102,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_del_item(trans, root, path); } else { btrfs_set_root_refs(ri, refs - 1); -printk("ref now %u root %llu %Lu %u\n", refs -1, key->objectid, key->offset, key->flags); + WARN_ON(1); mark_buffer_dirty(path->nodes[0]); } out: -- cgit v1.2.3-70-g09d2 From 6cbd55707802b98843f953d1ae6d8f5bcd9a76c0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 09:07:21 -0400 Subject: Btrfs: add GPLv2 Signed-off-by: Chris Mason --- fs/btrfs/COPYING | 356 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/TODO | 1 - fs/btrfs/bit-radix.c | 18 +++ fs/btrfs/bit-radix.h | 18 +++ fs/btrfs/btrfs_inode.h | 18 +++ fs/btrfs/ctree.c | 18 +++ fs/btrfs/ctree.h | 18 +++ fs/btrfs/dir-item.c | 18 +++ fs/btrfs/disk-io.c | 18 +++ fs/btrfs/disk-io.h | 18 +++ fs/btrfs/extent-tree.c | 18 +++ fs/btrfs/file-item.c | 18 +++ fs/btrfs/file.c | 18 +++ fs/btrfs/hash.c | 18 +++ fs/btrfs/hash.h | 18 +++ fs/btrfs/inode-item.c | 18 +++ fs/btrfs/inode-map.c | 18 +++ fs/btrfs/inode.c | 18 +++ fs/btrfs/ioctl.h | 18 +++ fs/btrfs/print-tree.c | 18 +++ fs/btrfs/print-tree.h | 18 +++ fs/btrfs/root-tree.c | 18 +++ fs/btrfs/super.c | 18 +++ fs/btrfs/sysfs.c | 18 +++ fs/btrfs/transaction.c | 18 +++ fs/btrfs/transaction.h | 18 +++ 26 files changed, 788 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/COPYING (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/COPYING b/fs/btrfs/COPYING new file mode 100644 index 00000000000..ca442d313d8 --- /dev/null +++ b/fs/btrfs/COPYING @@ -0,0 +1,356 @@ + + NOTE! This copyright does *not* cover user programs that use kernel + services by normal system calls - this is merely considered normal use + of the kernel, and does *not* fall under the heading of "derived work". + Also note that the GPL below is copyrighted by the Free Software + Foundation, but the instance of code that it refers to (the Linux + kernel) is copyrighted by me and others who actually wrote it. + + Also note that the only valid version of the GPL as far as the kernel + is concerned is _this_ particular version of the license (ie v2, not + v2.2 or v3.x or whatever), unless explicitly otherwise stated. + + Linus Torvalds + +---------------------------------------- + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 4a48a2365d5..d9b6d38c603 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,4 +1,3 @@ -* fix printk warnings * cleanup, add more error checking, get rid of BUG_ONs * Fix ENOSPC handling * Make allocator smarter diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 8f9cd427723..a4ddf4de979 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "bit-radix.h" diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h index 4e717e30db4..c100f54d5c3 100644 --- a/fs/btrfs/bit-radix.h +++ b/fs/btrfs/bit-radix.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BIT_RADIX__ #define __BIT_RADIX__ #include diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6b50076b02d..783f06b6325 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BTRFS_I__ #define __BTRFS_I__ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 20e01f95ce6..4736155af41 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 24410d42e3c..4bd648d68e8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BTRFS__ #define __BTRFS__ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index b408a3d20c7..0d179a44ebf 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 87810117254..8c68a64c0ce 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 822ccb8aa4a..7b76ccc4875 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __DISKIO__ #define __DISKIO__ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe02fbfa6d5..975a0eb9241 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index d5a98827e38..37dd3801a10 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0325dc03859..566e526b60f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 32de1ea1b64..e5c76903d72 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + /* * Original copy from: * linux/fs/ext3/hash.c diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index e56c4670093..d3be0267058 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __HASH__ #define __HASH__ int btrfs_name_hash(const char *name, int len, u64 *hash_result); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index c3b990b661c..449e9dbdf95 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 318e27a6378..58930fc48be 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1cabc1951b0..46f869d6d0f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 497fadd98c3..23bed48c51a 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __IOCTL_ #define __IOCTL_ #include diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 21791f03756..1fa70568300 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 396041a05cf..205cd03601a 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __PRINT_TREE_ #define __PRINT_TREE_ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 13c30284fd8..7a2d63c1ee8 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 94b3535a2a1..ef8ad18b275 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index ed8b7e20cfb..ead9dc8051b 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4f3c8ddcb40..30914521cf0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include "ctree.h" diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f25b4900db4..5fb1d322aa1 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __TRANSACTION__ #define __TRANSACTION__ #include "btrfs_inode.h" -- cgit v1.2.3-70-g09d2 From f1ace244c8c1e16eaa5c8b3b5339849651e31ede Mon Sep 17 00:00:00 2001 From: Aneesh Date: Wed, 13 Jun 2007 16:18:26 -0400 Subject: btrfs: Code cleanup Attaching below is some of the code cleanups that i came across while reading the code. a) alloc_path already calls init_path. b) Mention that btrfs_inode is the in memory copy.Ext4 have ext4_inode_info as the in memory copy ext4_inode as the disk copy Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/ctree.c | 1 - fs/btrfs/dir-item.c | 1 - fs/btrfs/extent-tree.c | 4 ---- fs/btrfs/file-item.c | 2 -- fs/btrfs/inode-item.c | 1 - fs/btrfs/inode.c | 10 +--------- fs/btrfs/root-tree.c | 3 --- 8 files changed, 2 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 783f06b6325..6bce46be49f 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -19,6 +19,7 @@ #ifndef __BTRFS_I__ #define __BTRFS_I__ +/* in memory btrfs inode */ struct btrfs_inode { struct btrfs_root *root; struct btrfs_block_group_cache *block_group; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4736155af41..498dea0854c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1647,7 +1647,6 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 0d179a44ebf..ff10cf5a842 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -74,7 +74,6 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); path = btrfs_alloc_path(); - btrfs_init_path(path); data_size = sizeof(*dir_item) + name_len; dir_item = insert_with_overflow(trans, root, path, &key, data_size, name, name_len); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 975a0eb9241..5f4eaba8b85 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -402,7 +402,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -437,7 +436,6 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_extent_item *item; path = btrfs_alloc_path(); - btrfs_init_path(path); key.objectid = blocknr; key.offset = num_blocks; key.flags = 0; @@ -807,7 +805,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret) { @@ -1463,7 +1460,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); level = btrfs_header_level(btrfs_buffer_header(snap)); orig_level = level; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 37dd3801a10..3c140cc1dd0 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -38,7 +38,6 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); file_key.objectid = objectid; file_key.offset = pos; file_key.flags = 0; @@ -274,7 +273,6 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 449e9dbdf95..e8b5a9f7c0c 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -35,7 +35,6 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_insert_item(trans, root, &key, inode_item, sizeof(*inode_item)); btrfs_release_path(root, path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d3d5a4cdb6a..e639cc9e089 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -79,7 +79,6 @@ void btrfs_read_locked_inode(struct inode *inode) path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); @@ -177,7 +176,6 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, 1); if (ret) { @@ -214,7 +212,6 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, name, name_len, -1); if (IS_ERR(di)) { @@ -289,7 +286,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -360,7 +356,6 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, -1); BUG_ON(ret); @@ -687,7 +682,6 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, namelen, 0); if (!di || IS_ERR(di)) { @@ -874,7 +868,6 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; path = btrfs_alloc_path(); - btrfs_init_path(path); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; @@ -1263,7 +1256,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); if (create & BTRFS_GET_BLOCK_CREATE) { WARN_ON(1); /* this almost but not quite works */ @@ -1280,7 +1272,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, } ret = btrfs_lookup_file_extent(NULL, root, path, - inode->i_ino, + objectid, iblock << inode->i_blkbits, 0); if (ret < 0) { err = ret; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 7a2d63c1ee8..8e1b9046d5e 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -36,7 +36,6 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) goto out; @@ -70,7 +69,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_search_slot(trans, root, key, path, 0, 1); if (ret < 0) goto out; @@ -106,7 +104,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) goto out; -- cgit v1.2.3-70-g09d2 From 8c2383c3dd2cb5bb39598ce4fa97154bc591020a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 18 Jun 2007 09:57:58 -0400 Subject: Subject: Rework btrfs_file_write to only allocate while page locks are held Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 + fs/btrfs/extent-tree.c | 1 - fs/btrfs/file-item.c | 1 - fs/btrfs/file.c | 244 +++++++++++++++++++++++++++---------------------- fs/btrfs/inode.c | 11 ++- fs/btrfs/print-tree.c | 18 ++++ fs/btrfs/transaction.c | 1 + 7 files changed, 161 insertions(+), 117 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 602b63dc76c..15cc9ec9230 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -541,6 +541,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root else ret = submit_bh(WRITE, bh); if (ret == -EOPNOTSUPP) { + get_bh(bh); + lock_buffer(bh); set_buffer_uptodate(bh); root->fs_info->do_barriers = 0; ret = submit_bh(WRITE, bh); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5f4eaba8b85..e0738c65bc5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1477,7 +1477,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; - btrfs_btree_balance_dirty(root); } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 1068993ab1c..cf894f09f6c 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -228,7 +228,6 @@ found: path->nodes[0]->b_data, root->fs_info->sb->s_blocksize); ret = btrfs_csum_data(root, data, len, &item->csum); -// printk("file %lu offset %llu csum %X\n", objectid, (unsigned long long)offset, *(int *)(&item->csum)); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index de8d47b44e1..6b455c2b3f0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -207,6 +207,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, } path->slots[0]--; } +next_slot: keep = 0; bookend = 0; found_extent = 0; @@ -214,39 +215,48 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, extent = NULL; leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; + ret = 0; btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); if (key.offset >= end || key.objectid != inode->i_ino) { - ret = 0; goto out; } - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) { - ret = 0; + if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) { goto out; } - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(extent); - if (found_type == BTRFS_FILE_EXTENT_REG) { - extent_end = key.offset + - (btrfs_file_extent_num_blocks(extent) << - inode->i_blkbits); - found_extent = 1; - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - found_inline = 1; - extent_end = key.offset + - btrfs_file_extent_inline_len(leaf->items + slot); + if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = key.offset + + (btrfs_file_extent_num_blocks(extent) << + inode->i_blkbits); + found_extent = 1; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_inline = 1; + extent_end = key.offset + + btrfs_file_extent_inline_len(leaf->items + + slot); + } + } else { + extent_end = search_start; } /* we found nothing we can drop */ - if (!found_extent && !found_inline) { - ret = 0; - goto out; - } - - /* we found nothing inside the range */ - if (search_start >= extent_end) { - ret = 0; - goto out; + if ((!found_extent && !found_inline) || + search_start >= extent_end) { + int nextret; + u32 nritems; + nritems = btrfs_header_nritems( + btrfs_buffer_header(path->nodes[0])); + if (slot >= nritems - 1) { + nextret = btrfs_next_leaf(root, path); + if (nextret) + goto out; + } else { + path->slots[0]++; + } + goto next_slot; } /* FIXME, there's only one inline extent allowed right now */ @@ -272,7 +282,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, WARN_ON(found_inline); bookend = 1; } - /* truncate existing extent */ if (start > key.offset) { u64 new_num; @@ -337,10 +346,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, ins.offset = end; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); - btrfs_release_path(root, path); ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*extent)); + + if (ret) { + btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); + printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end); + } BUG_ON(ret); extent = btrfs_item_ptr( btrfs_buffer_leaf(path->nodes[0]), @@ -387,8 +400,7 @@ static int prepare_pages(struct btrfs_root *root, loff_t pos, unsigned long first_index, unsigned long last_index, - size_t write_bytes, - u64 alloc_extent_start) + size_t write_bytes) { int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; @@ -399,6 +411,16 @@ static int prepare_pages(struct btrfs_root *root, struct buffer_head *bh; struct buffer_head *head; loff_t isize = i_size_read(inode); + struct btrfs_trans_handle *trans; + u64 hint_block; + u64 num_blocks; + u64 alloc_extent_start; + u64 start_pos; + struct btrfs_key ins; + + start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); + num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + inode->i_blkbits; memset(pages, 0, num_pages * sizeof(struct page *)); @@ -408,6 +430,72 @@ static int prepare_pages(struct btrfs_root *root, err = -ENOMEM; goto failed_release; } + } + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + err = -ENOMEM; + mutex_unlock(&root->fs_info->fs_mutex); + goto out_unlock; + } + btrfs_set_trans_block_group(trans, inode); + /* FIXME blocksize != 4096 */ + inode->i_blocks += num_blocks << 3; + hint_block = 0; + + /* FIXME...EIEIO, ENOSPC and more */ + + /* step one, delete the existing extents in this range */ + /* FIXME blocksize != pagesize */ + if (start_pos < inode->i_size) { + err = btrfs_drop_extents(trans, root, inode, + start_pos, (pos + write_bytes + root->blocksize -1) & + ~((u64)root->blocksize - 1), &hint_block); + BUG_ON(err); + } + + /* insert any holes we need to create */ + if (inode->i_size < start_pos) { + u64 last_pos_in_file; + u64 hole_size; + u64 mask = root->blocksize - 1; + last_pos_in_file = (isize + mask) & ~mask; + hole_size = (start_pos - last_pos_in_file + mask) & ~mask; + hole_size >>= inode->i_blkbits; + if (last_pos_in_file < start_pos) { + err = btrfs_insert_file_extent(trans, root, + inode->i_ino, + last_pos_in_file, + 0, 0, hole_size); + } + BUG_ON(err); + } + + /* + * either allocate an extent for the new bytes or setup the key + * to show we are doing inline data in the extent + */ + if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || + pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { + err = btrfs_alloc_extent(trans, root, inode->i_ino, + num_blocks, hint_block, (u64)-1, + &ins, 1); + BUG_ON(err); + err = btrfs_insert_file_extent(trans, root, inode->i_ino, + start_pos, ins.objectid, ins.offset, + ins.offset); + BUG_ON(err); + } else { + ins.offset = 0; + ins.objectid = 0; + } + BUG_ON(err); + alloc_extent_start = ins.objectid; + err = btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + for (i = 0; i < num_pages; i++) { cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); wait_on_page_writeback(pages[i]); offset = pos & (PAGE_CACHE_SIZE -1); @@ -444,6 +532,11 @@ failed_truncate: if (pos > isize) vmtruncate(inode, isize); return err; + +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); + goto failed_release; + } static ssize_t btrfs_file_write(struct file *file, const char __user *buf, @@ -455,16 +548,14 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, int ret = 0; struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct page *pages[8]; + struct page **pages = NULL; + int nrptrs; struct page *pinned[2]; unsigned long first_index; unsigned long last_index; - u64 start_pos; - u64 num_blocks; - u64 alloc_extent_start; - u64 hint_block; - struct btrfs_trans_handle *trans; - struct btrfs_key ins; + + nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, + PAGE_CACHE_SIZE / (sizeof(struct page *))); pinned[0] = NULL; pinned[1] = NULL; if (file->f_flags & O_DIRECT) @@ -482,9 +573,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, goto out; file_update_time(file); - start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); - num_blocks = (count + pos - start_pos + root->blocksize - 1) >> - inode->i_blkbits; + pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); mutex_lock(&inode->i_mutex); first_index = pos >> PAGE_CACHE_SHIFT; @@ -516,87 +605,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } } - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - if (!trans) { - err = -ENOMEM; - mutex_unlock(&root->fs_info->fs_mutex); - goto out_unlock; - } - btrfs_set_trans_block_group(trans, inode); - /* FIXME blocksize != 4096 */ - inode->i_blocks += num_blocks << 3; - hint_block = 0; - - /* FIXME...EIEIO, ENOSPC and more */ - - /* step one, delete the existing extents in this range */ - if (start_pos < inode->i_size) { - /* FIXME blocksize != pagesize */ - ret = btrfs_drop_extents(trans, root, inode, - start_pos, - (pos + count + root->blocksize -1) & - ~((u64)root->blocksize - 1), - &hint_block); - BUG_ON(ret); - } - - /* insert any holes we need to create */ - if (inode->i_size < start_pos) { - u64 last_pos_in_file; - u64 hole_size; - u64 mask = root->blocksize - 1; - last_pos_in_file = (inode->i_size + mask) & ~mask; - hole_size = (start_pos - last_pos_in_file + mask) & ~mask; - hole_size >>= inode->i_blkbits; - if (last_pos_in_file < start_pos) { - ret = btrfs_insert_file_extent(trans, root, - inode->i_ino, - last_pos_in_file, - 0, 0, hole_size); - } - BUG_ON(ret); - } - - /* - * either allocate an extent for the new bytes or setup the key - * to show we are doing inline data in the extent - */ - if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || - pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { - ret = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, hint_block, (u64)-1, - &ins, 1); - BUG_ON(ret); - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - start_pos, ins.objectid, ins.offset, - ins.offset); - BUG_ON(ret); - } else { - ins.offset = 0; - ins.objectid = 0; - } - BUG_ON(ret); - alloc_extent_start = ins.objectid; - ret = btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - while(count > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); - size_t write_bytes = min(count, - (size_t)PAGE_CACHE_SIZE - offset); + size_t write_bytes = min(count, nrptrs * PAGE_CACHE_SIZE - + offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(pages)); ret = prepare_pages(root, file, pages, num_pages, pos, first_index, last_index, - write_bytes, alloc_extent_start); + write_bytes); BUG_ON(ret); - /* FIXME blocks != pagesize */ - if (alloc_extent_start) - alloc_extent_start += num_pages; ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); BUG_ON(ret); @@ -611,13 +633,13 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, pos += write_bytes; num_written += write_bytes; - balance_dirty_pages_ratelimited(inode->i_mapping); + balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); btrfs_btree_balance_dirty(root); cond_resched(); } -out_unlock: mutex_unlock(&inode->i_mutex); out: + kfree(pages); if (pinned[0]) page_cache_release(pinned[0]); if (pinned[1]) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index def33ac90d7..94f1c28c25b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -962,7 +962,6 @@ void btrfs_dirty_inode(struct inode *inode) btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); } static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, @@ -1402,7 +1401,6 @@ int btrfs_get_block_csum(struct inode *inode, sector_t iblock, goto out; } memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE); -printk("get_block_sum file %lu offset %llu csum %X\n", inode->i_ino, (unsigned long long)offset, *(int *)(&item->csum)); out: if (path) btrfs_free_path(path); @@ -1476,7 +1474,6 @@ static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) (unsigned long long)offset); memset(kaddr + bh_offset(bh), 1, bh->b_size); flush_dcache_page(page); -printk("bad verify file %lu offset %llu bh_private %lX csum %X\n", inode->i_ino, (unsigned long long)offset, (unsigned long)(bh->b_private), *(int *)csum); } kunmap_atomic(kaddr, KM_IRQ0); } @@ -1655,6 +1652,13 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; + /* no csumming allowed when from PF_MEMALLOC */ + if (current->flags & PF_MEMALLOC) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } + if (!page_has_buffers(page)) { create_empty_buffers(page, blocksize, (1 << BH_Dirty)|(1 << BH_Uptodate)); @@ -1885,7 +1889,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) lock_page(page); wait_on_page_writeback(page); -printk("btrfs_page_mkwrite %lu %lu\n", page->mapping->host->i_ino, page->index); size = i_size_read(inode); if ((page->mapping != inode->i_mapping) || ((page->index << PAGE_CACHE_SHIFT) > size)) { diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 1fa70568300..e66acd4e274 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -31,6 +31,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_dir_item *di; struct btrfs_inode_item *ii; struct btrfs_block_group_item *bi; + struct btrfs_file_extent_item *fi; u32 type; printk("leaf %llu total ptrs %d free space %d\n", @@ -75,6 +76,23 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) printk("\t\textent data refs %u\n", btrfs_extent_refs(ei)); break; + + case BTRFS_EXTENT_DATA_KEY: + fi = btrfs_item_ptr(l, i, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(fi) == + BTRFS_FILE_EXTENT_INLINE) { + printk("\t\tinline extent data size %u\n", + btrfs_file_extent_inline_len(l->items + i)); + break; + } + printk("\t\textent data disk block %llu nr %llu\n", + (unsigned long long)btrfs_file_extent_disk_blocknr(fi), + (unsigned long long)btrfs_file_extent_disk_num_blocks(fi)); + printk("\t\textent data offset %llu nr %llu\n", + (unsigned long long)btrfs_file_extent_offset(fi), + (unsigned long long)btrfs_file_extent_num_blocks(fi)); + break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cd4f1d14038..b5b99a85763 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -279,6 +279,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, BUG_ON(ret); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(tree_root); } return 0; } -- cgit v1.2.3-70-g09d2 From 85e55b13e4e318672f669747e5e010d1b707b198 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 19 Jun 2007 15:50:51 -0400 Subject: Btrfs: cache the extent tree preallocation Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e0738c65bc5..47e91184fff 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -741,7 +741,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct BUG_ON(ret); } extent_root->fs_info->extent_tree_insert_nr = 0; - extent_root->fs_info->extent_tree_prealloc_nr = 0; return 0; } @@ -951,6 +950,18 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } + if (fill_prealloc) { + u64 first; + int nr = info->extent_tree_prealloc_nr; + first = info->extent_tree_prealloc[nr - 1]; + if (info->extent_tree_prealloc_nr >= total_needed && + first >= search_start) { + ins->objectid = info->extent_tree_prealloc[0]; + ins->offset = 1; + return 0; + } + info->extent_tree_prealloc_nr = 0; + } if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(info->disk_super); if (hint_block) { @@ -1107,7 +1118,6 @@ check_pending: if (ins->objectid + num_blocks > first && ins->objectid <= info->extent_tree_prealloc[0]) { search_start = info->extent_tree_prealloc[0] + 1; - WARN_ON(!full_scan); goto new_group; } } -- cgit v1.2.3-70-g09d2 From e011599b0f375683499bf1a9954703f4959a8d00 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 19 Jun 2007 16:23:05 -0400 Subject: Btrfs: reada while dropping snapshots Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 47e91184fff..b38c3e92f0c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -940,7 +940,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int wrapped = 0; u64 limit; - path = btrfs_alloc_path(); ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -974,6 +973,8 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root data, 1); } + path = btrfs_alloc_path(); + check_failed: if (!block_group->data) search_start = find_search_start(root, &block_group, @@ -1355,6 +1356,28 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } +static void reada_walk_down(struct btrfs_root *root, + struct btrfs_node *node) +{ + int i; + u32 nritems; + u64 blocknr; + int ret; + u32 refs; + + nritems = btrfs_header_nritems(&node->header); + for (i = 0; i < nritems; i++) { + blocknr = btrfs_node_blockptr(node, i); + ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs); + BUG_ON(ret); + if (refs != 1) + continue; + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + /* * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. @@ -1375,6 +1398,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); if (refs > 1) goto out; + /* * walk down to the last node level and free all the leaves */ @@ -1382,8 +1406,13 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; + + if (*level > 0 && path->slots[*level] == 0) + reada_walk_down(root, btrfs_buffer_node(cur)); + if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) WARN_ON(1); + if (path->slots[*level] >= btrfs_header_nritems(btrfs_buffer_header(cur))) break; -- cgit v1.2.3-70-g09d2 From 54aa1f4dfdacd60a19c4471220b24e581be6f774 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 22 Jun 2007 14:16:25 -0400 Subject: Btrfs: Audit callers and return codes to make sure -ENOSPC gets up the stack Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 106 ++++++++++++++++++++++++------- fs/btrfs/ctree.h | 3 - fs/btrfs/dir-item.c | 6 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 141 ++++++++++++++++++++++++++++++++++-------- fs/btrfs/file-item.c | 46 ++------------ fs/btrfs/file.c | 131 ++++++++++++++++++++++++--------------- fs/btrfs/inode.c | 165 ++++++++++++++++++++++++++++++++----------------- fs/btrfs/root-tree.c | 1 - fs/btrfs/super.c | 3 +- fs/btrfs/transaction.c | 21 ++++--- 11 files changed, 410 insertions(+), 215 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 498dea0854c..606a19b5916 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -73,6 +73,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root { struct buffer_head *cow; struct btrfs_node *cow_node; + int ret; if (btrfs_header_generation(btrfs_buffer_header(buf)) == trans->transid) { @@ -80,6 +81,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr); + if (IS_ERR(cow)) + return PTR_ERR(cow); cow_node = btrfs_buffer_node(cow); if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); @@ -87,7 +90,9 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); btrfs_set_header_generation(&cow_node->header, trans->transid); btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); - btrfs_inc_ref(trans, root, buf); + ret = btrfs_inc_ref(trans, root, buf); + if (ret) + return ret; if (buf == root->node) { root->node = cow; get_bh(cow); @@ -320,6 +325,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root int wret; int pslot; int orig_slot = path->slots[level]; + int err_on_enospc = 0; u64 orig_ptr; if (level == 0) @@ -363,29 +369,43 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; + if (btrfs_header_nritems(&mid->header) < 2) + err_on_enospc = 1; + left_buf = read_node_slot(root, parent_buf, pslot - 1); right_buf = read_node_slot(root, parent_buf, pslot + 1); /* first, try to make some room in the middle buffer */ if (left_buf) { - btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, - &left_buf); + wret = btrfs_cow_block(trans, root, left_buf, + parent_buf, pslot - 1, &left_buf); + if (wret) { + ret = wret; + goto enospc; + } left = btrfs_buffer_node(left_buf); orig_slot += btrfs_header_nritems(&left->header); wret = push_node_left(trans, root, left_buf, mid_buf); if (wret < 0) ret = wret; + if (btrfs_header_nritems(&mid->header) < 2) + err_on_enospc = 1; } /* * then try to empty the right most buffer into the middle */ if (right_buf) { - btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1, - &right_buf); + wret = btrfs_cow_block(trans, root, right_buf, + parent_buf, pslot + 1, &right_buf); + if (wret) { + ret = wret; + goto enospc; + } + right = btrfs_buffer_node(right_buf); wret = push_node_left(trans, root, mid_buf, right_buf); - if (wret < 0) + if (wret < 0 && wret != -ENOSPC) ret = wret; if (btrfs_header_nritems(&right->header) == 0) { u64 blocknr = bh_blocknr(right_buf); @@ -421,8 +441,10 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root */ BUG_ON(!left_buf); wret = balance_node_right(trans, root, mid_buf, left_buf); - if (wret < 0) + if (wret < 0) { ret = wret; + goto enospc; + } BUG_ON(wret == 1); } if (btrfs_header_nritems(&mid->header) == 0) { @@ -467,7 +489,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), path->slots[level])) BUG(); - +enospc: if (right_buf) btrfs_block_release(root, right_buf); if (left_buf) @@ -519,10 +541,15 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - btrfs_cow_block(trans, root, left_buf, parent_buf, - pslot - 1, &left_buf); - left = btrfs_buffer_node(left_buf); - wret = push_node_left(trans, root, left_buf, mid_buf); + ret = btrfs_cow_block(trans, root, left_buf, parent_buf, + pslot - 1, &left_buf); + if (ret) + wret = 1; + else { + left = btrfs_buffer_node(left_buf); + wret = push_node_left(trans, root, + left_buf, mid_buf); + } } if (wret < 0) ret = wret; @@ -561,11 +588,16 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, &right_buf); - right = btrfs_buffer_node(right_buf); - wret = balance_node_right(trans, root, - right_buf, mid_buf); + ret = btrfs_cow_block(trans, root, right_buf, + parent_buf, pslot + 1, + &right_buf); + if (ret) + wret = 1; + else { + right = btrfs_buffer_node(right_buf); + wret = balance_node_right(trans, root, + right_buf, mid_buf); + } } if (wret < 0) ret = wret; @@ -631,6 +663,10 @@ again: p->nodes[level + 1], p->slots[level + 1], &cow_buf); + if (wret) { + btrfs_block_release(root, cow_buf); + return wret; + } b = cow_buf; c = btrfs_buffer_node(b); } @@ -737,6 +773,7 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root src_nritems = btrfs_header_nritems(&src->header); dst_nritems = btrfs_header_nritems(&dst->header); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; + if (push_items <= 0) { return 1; } @@ -827,6 +864,8 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level-1] != root->node); t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); + if (IS_ERR(t)) + return PTR_ERR(t); c = btrfs_buffer_node(t); memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); @@ -929,10 +968,15 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_header_nritems(&c->header) < BTRFS_NODEPTRS_PER_BLOCK(root) - 1) return 0; + if (ret < 0) + return ret; } c_nritems = btrfs_header_nritems(&c->header); split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); + if (IS_ERR(split_buffer)) + return PTR_ERR(split_buffer); + split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); @@ -1022,6 +1066,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_item *item; u32 left_nritems; u32 right_nritems; + int ret; slot = path->slots[1]; if (!path->nodes[1]) { @@ -1041,7 +1086,12 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } /* cow and double check */ - btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf); + ret = btrfs_cow_block(trans, root, right_buf, upper, + slot + 1, &right_buf); + if (ret) { + btrfs_block_release(root, right_buf); + return 1; + } right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -1162,7 +1212,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root } /* cow and double check */ - btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); + ret = btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); + if (ret) { + /* we hit -ENOSPC, but it isn't fatal here */ + return 1; + } left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -1309,8 +1363,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root slot = path->slots[0]; nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); - BUG_ON(!right_buffer); + if (IS_ERR(right_buffer)) + return PTR_ERR(right_buffer); + right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); @@ -1407,7 +1464,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (!double_split) return ret; right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); - BUG_ON(!right_buffer); + if (IS_ERR(right_buffer)) + return PTR_ERR(right_buffer); + right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); @@ -1655,7 +1714,6 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root ptr, data, data_size); btrfs_mark_buffer_dirty(path->nodes[0]); } - btrfs_release_path(root, path); btrfs_free_path(path); return ret; } @@ -1775,12 +1833,12 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, slot = path->slots[1]; get_bh(leaf_buf); wret = push_leaf_left(trans, root, path, 1); - if (wret < 0) + if (wret < 0 && wret != -ENOSPC) ret = wret; if (path->nodes[0] == leaf_buf && btrfs_header_nritems(&leaf->header)) { wret = push_leaf_right(trans, root, path, 1); - if (wret < 0) + if (wret < 0 && wret != -ENOSPC) ret = wret; } if (btrfs_header_nritems(&leaf->header) == 0) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 19a1d998fca..77071f27397 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1126,9 +1126,6 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, char *data, size_t len); -int btrfs_csum_verify_file_block(struct btrfs_root *root, - u64 objectid, u64 offset, - char *data, size_t len); struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index ff10cf5a842..d5db8bdea3d 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -47,6 +47,8 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle if (ret) return ERR_PTR(ret); } + if (ret < 0) + return ERR_PTR(ret); WARN_ON(ret > 0); leaf = btrfs_buffer_leaf(path->nodes[0]); item = leaf->items + path->slots[0]; @@ -225,14 +227,13 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_leaf *leaf; u32 sub_item_len; u32 item_len; - int ret; + int ret = 0; leaf = btrfs_buffer_leaf(path->nodes[0]); sub_item_len = sizeof(*di) + btrfs_dir_name_len(di); item_len = btrfs_item_size(leaf->items + path->slots[0]); if (sub_item_len == btrfs_item_size(leaf->items + path->slots[0])) { ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); } else { char *ptr = (char *)di; char *start = btrfs_item_ptr(leaf, path->slots[0], char); @@ -240,7 +241,6 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, item_len - (ptr + sub_item_len - start)); ret = btrfs_truncate_item(trans, root, path, item_len - sub_item_len); - BUG_ON(ret); } return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3712e946f9c..865a284aa06 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -580,7 +580,7 @@ int close_ctree(struct btrfs_root *root) btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b38c3e92f0c..8025e9f8ef1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -100,6 +100,8 @@ static int cache_block_group(struct btrfs_root *root, if (slot >= btrfs_header_nritems(&leaf->header)) { reada_extent_leaves(root, path, limit); ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto err; if (ret == 0) { continue; } else { @@ -148,6 +150,7 @@ static int cache_block_group(struct btrfs_root *root, } block_group->cached = 1; +err: btrfs_free_path(path); return 0; } @@ -201,7 +204,9 @@ static u64 find_search_start(struct btrfs_root *root, last = max(last, cache->last_prealloc); } again: - cache_block_group(root, cache); + ret = cache_block_group(root, cache); + if (ret) + goto out; while(1) { ret = find_first_radix_bit(&root->fs_info->extent_map_radix, gang, last, ARRAY_SIZE(gang)); @@ -398,16 +403,23 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_key ins; u32 refs; - find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, 0, - &ins, 0); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; + ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, + (u64)-1, 0, &ins, 0); + if (ret) { + btrfs_free_path(path); + return ret; + } key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); + if (ret < 0) + return ret; if (ret != 0) { BUG(); } @@ -442,12 +454,14 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); + if (ret < 0) + goto out; if (ret != 0) BUG(); l = btrfs_buffer_leaf(path->nodes[0]); item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); - btrfs_release_path(root->fs_info->extent_root, path); +out: btrfs_free_path(path); return 0; } @@ -469,6 +483,8 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int i; int leaf; int ret; + int faili; + int err; if (!root->ref_cows) return 0; @@ -491,14 +507,45 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, continue; ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, btrfs_file_extent_disk_num_blocks(fi)); - BUG_ON(ret); + if (ret) { + faili = i; + goto fail; + } } else { blocknr = btrfs_node_blockptr(buf_node, i); ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); - BUG_ON(ret); + if (ret) { + faili = i; + goto fail; + } } } return 0; +fail: + for (i =0; i < faili; i++) { + if (leaf) { + u64 disk_blocknr; + key = &buf_leaf->items[i].key; + if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf_leaf, i, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + if (disk_blocknr == 0) + continue; + err = btrfs_free_extent(trans, root, disk_blocknr, + btrfs_file_extent_disk_num_blocks(fi), 0); + BUG_ON(err); + } else { + blocknr = btrfs_node_blockptr(buf_node, i); + err = btrfs_free_extent(trans, root, blocknr, 1, 0); + BUG_ON(err); + } + } + return ret; } static int write_one_cache_group(struct btrfs_trans_handle *trans, @@ -512,15 +559,20 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0); + ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0); + /* FIXME, set bit to recalc cache groups on next mount */ + if (ret) + return ret; ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); + if (ret < 0) + goto fail; BUG_ON(ret); bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_block_group_item); memcpy(bi, &cache->item, sizeof(*bi)); mark_buffer_dirty(path->nodes[0]); btrfs_release_path(extent_root, path); - +fail: finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); if (ret) @@ -543,6 +595,7 @@ static int write_dirty_block_radix(struct btrfs_trans_handle *trans, int werr = 0; int i; struct btrfs_path *path; + unsigned long off = 0; path = btrfs_alloc_path(); if (!path) @@ -550,18 +603,28 @@ static int write_dirty_block_radix(struct btrfs_trans_handle *trans, while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)cache, - 0, ARRAY_SIZE(cache), + off, ARRAY_SIZE(cache), BTRFS_BLOCK_GROUP_DIRTY); if (!ret) break; for (i = 0; i < ret; i++) { - radix_tree_tag_clear(radix, cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_DIRTY); err = write_one_cache_group(trans, root, path, cache[i]); - if (err) + /* + * if we fail to write the cache group, we want + * to keep it marked dirty in hopes that a later + * write will work + */ + if (err) { werr = err; + off = cache[i]->key.objectid + + cache[i]->key.offset; + continue; + } + + radix_tree_tag_clear(radix, cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_DIRTY); } } btrfs_free_path(path); @@ -801,14 +864,20 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; - find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; - ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); + ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0); if (ret) { - BUG(); + btrfs_free_path(path); + return ret; } + + ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); + if (ret < 0) + return ret; + BUG_ON(ret); ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_extent_item); BUG_ON(ei->refs == 0); @@ -827,8 +896,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); - if (ret) - BUG(); + if (ret) { + return ret; + } ret = update_block_group(trans, root, blocknr, num_blocks, 0, mark_free, 0); BUG_ON(ret); @@ -1075,7 +1145,6 @@ next: path->slots[0]++; cond_resched(); } - // FIXME -ENOSPC check_pending: /* we have to make sure we didn't find an extent that has already * been allocated by the map tree or the original allocation @@ -1246,7 +1315,15 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ret = find_free_extent(trans, root, num_blocks, search_start, search_end, hint_block, ins, data); if (ret) { - return ret; + if (search_start == 0) + return ret; + search_end = search_start - 1; + search_start = 0; + hint_block = search_start; + ret = find_free_extent(trans, root, num_blocks, search_start, + search_end, hint_block, ins, data); + if (ret) + return ret; } /* @@ -1271,7 +1348,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_end, hint_block, &prealloc_key, 0); if (ret) { - return ret; + if (search_start == 0) + return ret; + search_end = search_start - 1; + search_start = 0; + hint_block = search_start; + ret = find_free_extent(trans, root, 0, search_start, + search_end, hint_block, + &prealloc_key, 0); + if (ret) + return ret; } } @@ -1309,11 +1395,14 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, 1, hint, (unsigned long)-1, &ins, 0); if (ret) { - BUG(); - return NULL; + BUG_ON(ret > 0); + return ERR_PTR(ret); } - BUG_ON(ret); buf = btrfs_find_create_tree_block(root, ins.objectid); + if (!buf) { + btrfs_free_extent(trans, root, ins.objectid, 1, 0); + return ERR_PTR(-ENOMEM); + } set_buffer_uptodate(buf); set_buffer_checked(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index cf894f09f6c..68859934ae2 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -45,6 +45,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(*item)); + if (ret < 0) + goto out; BUG_ON(ret); item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); @@ -55,10 +57,9 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_generation(item, trans->transid); btrfs_set_file_extent_type(item, BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(path->nodes[0]); - - btrfs_release_path(root, path); +out: btrfs_free_path(path); - return 0; + return ret; } struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, @@ -213,6 +214,8 @@ insert: csum_offset = 0; ret = btrfs_insert_empty_item(trans, root, path, &file_key, BTRFS_CRC32_SIZE); + if (ret < 0) + goto fail; if (ret != 0) { WARN_ON(1); goto fail; @@ -261,40 +264,3 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, return ret; } -int btrfs_csum_verify_file_block(struct btrfs_root *root, - u64 objectid, u64 offset, - char *data, size_t len) -{ - int ret; - struct btrfs_key file_key; - struct btrfs_path *path; - struct btrfs_csum_item *item; - char result[BTRFS_CRC32_SIZE]; - - path = btrfs_alloc_path(); - BUG_ON(!path); - file_key.objectid = objectid; - file_key.offset = offset; - file_key.flags = 0; - btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); - mutex_lock(&root->fs_info->fs_mutex); - - item = btrfs_lookup_csum(NULL, root, path, objectid, offset, 0); - if (IS_ERR(item)) { - ret = PTR_ERR(item); - /* a csum that isn't present is a preallocated region. */ - if (ret == -ENOENT || ret == -EFBIG) - ret = -ENOENT; - goto fail; - } - - ret = btrfs_csum_data(root, data, len, result); - WARN_ON(ret); - if (memcmp(result, &item->csum, BTRFS_CRC32_SIZE)) - ret = 1; -fail: - btrfs_release_path(root, path); - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - return ret; -} diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 23f02935968..fef7ba1e707 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -81,6 +81,62 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) } } +static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, + u64 offset, ssize_t size, + struct buffer_head *bh) +{ + struct btrfs_key key; + struct btrfs_path *path; + char *ptr, *kaddr; + struct btrfs_trans_handle *trans; + struct btrfs_file_extent_item *ei; + u32 datasize; + int err = 0; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + key.objectid = inode->i_ino; + key.offset = offset; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + BUG_ON(size >= PAGE_CACHE_SIZE); + datasize = btrfs_file_extent_calc_inline_size(size); + + ret = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + if (ret) { + err = ret; + goto fail; + } + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(ei, trans->transid); + btrfs_set_file_extent_type(ei, + BTRFS_FILE_EXTENT_INLINE); + ptr = btrfs_file_extent_inline_start(ei); + + kaddr = kmap_atomic(bh->b_page, KM_USER0); + btrfs_memcpy(root, path->nodes[0]->b_data, + ptr, kaddr + bh_offset(bh), + size); + kunmap_atomic(kaddr, KM_USER0); + mark_buffer_dirty(path->nodes[0]); +fail: + btrfs_free_path(path); + ret = btrfs_end_transaction(trans, root); + if (ret && !err) + err = ret; + mutex_unlock(&root->fs_info->fs_mutex); + return err; +} + static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct file *file, @@ -96,57 +152,22 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, int this_write; struct inode *inode = file->f_path.dentry->d_inode; struct buffer_head *bh; - struct btrfs_file_extent_item *ei; for (i = 0; i < num_pages; i++) { offset = pos & (PAGE_CACHE_SIZE -1); this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); - /* FIXME, one block at a time */ + /* FIXME, one block at a time */ bh = page_buffers(pages[i]); if (buffer_mapped(bh) && bh->b_blocknr == 0) { - struct btrfs_key key; - struct btrfs_path *path; - char *ptr, *kaddr; - u32 datasize; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - - /* create an inline extent, and copy the data in */ - path = btrfs_alloc_path(); - BUG_ON(!path); - key.objectid = inode->i_ino; - key.offset = pages[i]->index << PAGE_CACHE_SHIFT; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); - BUG_ON(write_bytes >= PAGE_CACHE_SIZE); - datasize = offset + - btrfs_file_extent_calc_inline_size(write_bytes); - - ret = btrfs_insert_empty_item(trans, root, path, &key, - datasize); - BUG_ON(ret); - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, - BTRFS_FILE_EXTENT_INLINE); - ptr = btrfs_file_extent_inline_start(ei); - - kaddr = kmap_atomic(bh->b_page, KM_USER0); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, kaddr + bh_offset(bh), - offset + write_bytes); - kunmap_atomic(kaddr, KM_USER0); - - mark_buffer_dirty(path->nodes[0]); - btrfs_free_path(path); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); + ret = insert_inline_extent(root, inode, + pages[i]->index << PAGE_CACHE_SHIFT, + offset + this_write, bh); + if (ret) { + err = ret; + goto failed; + } } ret = btrfs_commit_write(file, pages[i], offset, @@ -321,6 +342,7 @@ next_slot: btrfs_file_extent_disk_blocknr(extent); } ret = btrfs_del_item(trans, root, path); + /* TODO update progress marker and return */ BUG_ON(ret); btrfs_release_path(root, path); extent = NULL; @@ -452,7 +474,8 @@ static int prepare_pages(struct btrfs_root *root, err = btrfs_drop_extents(trans, root, inode, start_pos, (pos + write_bytes + root->blocksize -1) & ~((u64)root->blocksize - 1), &hint_block); - BUG_ON(err); + if (err) + goto failed_release; } /* insert any holes we need to create */ @@ -469,7 +492,8 @@ static int prepare_pages(struct btrfs_root *root, last_pos_in_file, 0, 0, hole_size); } - BUG_ON(err); + if (err) + goto failed_release; } /* @@ -481,11 +505,13 @@ static int prepare_pages(struct btrfs_root *root, err = btrfs_alloc_extent(trans, root, inode->i_ino, num_blocks, hint_block, (u64)-1, &ins, 1); - BUG_ON(err); + if (err) + goto failed_truncate; err = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset, ins.offset); - BUG_ON(err); + if (err) + goto failed_truncate; } else { ins.offset = 0; ins.objectid = 0; @@ -618,16 +644,21 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ret = prepare_pages(root, file, pages, num_pages, pos, first_index, last_index, write_bytes); - BUG_ON(ret); + if (ret) + goto out; ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); - BUG_ON(ret); + if (ret) { + btrfs_drop_pages(pages, num_pages); + goto out; + } ret = dirty_and_release_pages(NULL, root, file, pages, num_pages, pos, write_bytes); - BUG_ON(ret); btrfs_drop_pages(pages, num_pages); + if (ret) + goto out; buf += write_bytes; count -= write_bytes; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 94f1c28c25b..6d031daa777 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -212,7 +212,11 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + ret = -ENOMEM; + goto err; + } + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, name, name_len, -1); if (IS_ERR(di)) { @@ -225,7 +229,8 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, } objectid = btrfs_disk_key_objectid(&di->location); ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); + if (ret) + goto err; btrfs_release_path(root, path); di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, @@ -239,7 +244,6 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, goto err; } ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); dentry->d_inode->i_ctime = dir->i_ctime; err: @@ -248,7 +252,7 @@ err: dir->i_size -= name_len * 2; btrfs_update_inode(trans, root, dir); drop_nlink(dentry->d_inode); - btrfs_update_inode(trans, root, dentry->d_inode); + ret = btrfs_update_inode(trans, root, dentry->d_inode); dir->i_sb->s_dirt = 1; } return ret; @@ -359,9 +363,10 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, BUG_ON(!path); ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, -1); - BUG_ON(ret); - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); + if (ret > 0) + ret = -ENOENT; + if (!ret) + ret = btrfs_del_item(trans, root, path); btrfs_free_path(path); return ret; } @@ -516,7 +521,8 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, } if (del_item) { ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); + if (ret) + goto error; } else { break; } @@ -577,19 +583,22 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) page->index << PAGE_CACHE_SHIFT, (page->index + 1) << PAGE_CACHE_SHIFT, &alloc_hint); - BUG_ON(ret); + if (ret) + goto out; ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, alloc_hint, (u64)-1, &ins, 1); - BUG_ON(ret); + if (ret) + goto out; ret = btrfs_insert_file_extent(trans, root, inode->i_ino, page->index << PAGE_CACHE_SHIFT, ins.objectid, 1, 1); - BUG_ON(ret); + if (ret) + goto out; SetPageChecked(page); kaddr = kmap(page); memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); flush_dcache_page(page); - btrfs_csum_file_block(trans, root, inode->i_ino, + ret = btrfs_csum_file_block(trans, root, inode->i_ino, page->index << PAGE_CACHE_SHIFT, kaddr, PAGE_CACHE_SIZE); kunmap(page); @@ -633,9 +642,10 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_set_trans_block_group(trans, inode); err = btrfs_insert_file_extent(trans, root, inode->i_ino, pos, 0, 0, hole_size); - BUG_ON(err); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + if (err) + return err; } out: err = inode_setattr(inode, attr); @@ -657,12 +667,20 @@ void btrfs_delete_inode(struct inode *inode) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); - btrfs_free_inode(trans, root, inode); + if (ret) + goto no_delete_lock; + ret = btrfs_free_inode(trans, root, inode); + if (ret) + goto no_delete_lock; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root); return; + +no_delete_lock: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); no_delete: clear_inode(inode); } @@ -946,7 +964,7 @@ int btrfs_write_inode(struct inode *inode, int wait) } /* - * This is somewhat expense, updating the tree every time the + * This is somewhat expensive, updating the tree every time the * inode changes. But, it is most likely to find the inode in cache. * FIXME, needs more benchmarking...there are no reasons other than performance * to keep or drop this code. @@ -1002,8 +1020,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_inode(trans, root, objectid, &inode_item); - BUG_ON(ret); - + if (ret) + return ERR_PTR(ret); insert_inode_hash(inode); return inode; } @@ -1121,7 +1139,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, dir); - btrfs_update_inode(trans, root, inode); + err = btrfs_update_inode(trans, root, inode); + if (err) + drop_inode = 1; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -1349,17 +1369,26 @@ not_found: ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, alloc_hint, (u64)-1, &ins, 1); - BUG_ON(ret); + if (ret) { + err = ret; + goto out; + } ret = btrfs_insert_file_extent(trans, root, inode->i_ino, iblock << inode->i_blkbits, ins.objectid, ins.offset, ins.offset); - BUG_ON(ret); + if (ret) { + err = ret; + goto out; + } btrfs_map_bh_to_logical(root, result, ins.objectid); } out: - if (trans) - err = btrfs_end_transaction(trans, root); + if (trans) { + ret = btrfs_end_transaction(trans, root); + if (!err) + err = ret; + } btrfs_free_path(path); return err; } @@ -1375,8 +1404,8 @@ int btrfs_get_block(struct inode *inode, sector_t iblock, return err; } -int btrfs_get_block_csum(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) +static int btrfs_get_block_csum(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) { int ret; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1397,7 +1426,7 @@ int btrfs_get_block_csum(struct inode *inode, sector_t iblock, /* a csum that isn't present is a preallocated region. */ if (ret == -ENOENT || ret == -EFBIG) ret = 0; - result->b_private = 0; + result->b_private = NULL; goto out; } memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE); @@ -1736,11 +1765,10 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); kaddr = kmap(page); - ret = btrfs_csum_file_block(trans, root, inode->i_ino, + btrfs_csum_file_block(trans, root, inode->i_ino, off, kaddr + bh_offset(bh), bh->b_size); kunmap(page); - BUG_ON(ret); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1930,7 +1958,6 @@ static void btrfs_truncate(struct inode *inode) /* FIXME, add redo link to tree so we don't leak on crash */ ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); btrfs_update_inode(trans, root, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); @@ -1970,6 +1997,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) struct inode *inode; struct inode *dir; int ret; + int err; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; @@ -1978,8 +2006,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) BUG_ON(!trans); subvol = btrfs_alloc_free_block(trans, root, 0); - if (subvol == NULL) - return -ENOSPC; + if (IS_ERR(subvol)) + return PTR_ERR(subvol); leaf = btrfs_buffer_leaf(subvol); btrfs_set_header_nritems(&leaf->header, 0); btrfs_set_header_level(&leaf->header, 0); @@ -2005,7 +2033,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); - BUG_ON(ret); + if (ret) + goto fail; btrfs_set_root_dirid(&root_item, new_dirid); @@ -2015,7 +2044,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &root_item); - BUG_ON(ret); + if (ret) + goto fail; /* * insert the directory item @@ -2025,10 +2055,12 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, name, namelen, dir->i_ino, &key, BTRFS_FT_DIR); - BUG_ON(ret); + if (ret) + goto fail; ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); + if (ret) + goto fail_commit; new_root = btrfs_read_fs_root(root->fs_info, &key); BUG_ON(!new_root); @@ -2038,24 +2070,29 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) inode = btrfs_new_inode(trans, new_root, new_dirid, BTRFS_I(dir)->block_group, S_IFDIR | 0700); + if (IS_ERR(inode)) + goto fail; inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; new_root->inode = inode; ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid); - BUG_ON(ret); + if (ret) + goto fail; inode->i_nlink = 1; inode->i_size = 6; ret = btrfs_update_inode(trans, new_root, inode); - BUG_ON(ret); - - ret = btrfs_commit_transaction(trans, new_root); - BUG_ON(ret); - + if (ret) + goto fail; +fail: + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; +fail_commit: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root); - return 0; + return ret; } static int create_snapshot(struct btrfs_root *root, char *name, int namelen) @@ -2064,6 +2101,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) struct btrfs_key key; struct btrfs_root_item new_root_item; int ret; + int err; u64 objectid; if (!root->ref_cows) @@ -2074,11 +2112,13 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) BUG_ON(!trans); ret = btrfs_update_inode(trans, root, root->inode); - BUG_ON(ret); + if (ret) + goto fail; ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); - BUG_ON(ret); + if (ret) + goto fail; memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); @@ -2091,7 +2131,8 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); - BUG_ON(ret); + if (ret) + goto fail; /* * insert the directory item @@ -2102,16 +2143,20 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) root->fs_info->sb->s_root->d_inode->i_ino, &key, BTRFS_FT_DIR); - BUG_ON(ret); + if (ret) + goto fail; ret = btrfs_inc_root_ref(trans, root); - BUG_ON(ret); + if (ret) + goto fail; - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); +fail: + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root); - return 0; + return ret; } int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int @@ -2148,12 +2193,13 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int btrfs_free_path(path); if (di && !IS_ERR(di)) return -EEXIST; + if (IS_ERR(di)) + return PTR_ERR(di); if (root == root->fs_info->tree_root) ret = create_subvol(root, vol_args.name, namelen); else ret = create_snapshot(root, vol_args.name, namelen); - WARN_ON(ret); break; default: return -ENOTTY; @@ -2316,7 +2362,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, old_parent_oid = btrfs_disk_key_objectid(&di->location); ret = btrfs_del_item(trans, root, path); if (ret) { - ret = -EIO; goto out_fail; } btrfs_release_path(root, path); @@ -2335,7 +2380,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, } ret = btrfs_del_item(trans, root, path); if (ret) { - ret = -EIO; goto out_fail; } btrfs_release_path(root, path); @@ -2361,7 +2405,9 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, clear_nlink(new_inode); else drop_nlink(new_inode); - btrfs_update_inode(trans, root, new_inode); + ret = btrfs_update_inode(trans, root, new_inode); + if (ret) + goto out_fail; } ret = btrfs_add_link(trans, new_dentry, old_inode); if (ret) @@ -2433,7 +2479,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, datasize = btrfs_file_extent_calc_inline_size(name_len); err = btrfs_insert_empty_item(trans, root, path, &key, datasize); - BUG_ON(err); + if (err) { + drop_inode = 1; + goto out_unlock; + } ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_generation(ei, trans->transid); @@ -2447,13 +2496,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_size = name_len - 1; - btrfs_update_inode(trans, root, inode); - err = 0; + err = btrfs_update_inode(trans, root, inode); + if (err) + drop_inode = 1; out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - if (drop_inode) { inode_dec_link_count(inode); iput(inode); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 8e1b9046d5e..ac0fae7780f 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -90,7 +90,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root { int ret; ret = btrfs_insert_item(trans, root, key, item, sizeof(*item)); - BUG_ON(ret); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ef8ad18b275..c11ecf50020 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -125,9 +125,8 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; - BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - return 0; + return ret; } static void btrfs_write_super(struct super_block *sb) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b5b99a85763..321f8852755 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -219,7 +219,8 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root; int i; int ret; - int err; + int err = 0; + while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, ARRAY_SIZE(gang), @@ -251,11 +252,12 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); - BUG_ON(err); + if (err) + break; list_add(&dirty->list, list); } } - return 0; + return err; } static int drop_dirty_roots(struct btrfs_root *tree_root, @@ -263,7 +265,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, { struct dirty_root *dirty; struct btrfs_trans_handle *trans; - int ret; + int ret = 0; while(!list_empty(list)) { mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); @@ -274,14 +276,15 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, BUG_ON(ret); ret = btrfs_del_root(trans, tree_root, &dirty->snap_key); - BUG_ON(ret); + if (ret) + break; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root); } - return 0; + return ret; } int btrfs_commit_transaction(struct btrfs_trans_handle *trans, @@ -321,9 +324,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } finish_wait(&trans->transaction->writer_wait, &wait); WARN_ON(cur_trans != trans->transaction); - add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); + ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, + &dirty_fs_roots); + BUG_ON(ret); + ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); + cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; if (cur_trans->list.prev != &root->fs_info->trans_list) { -- cgit v1.2.3-70-g09d2 From 4b52dff6d371b9b93bc99f64c32831ea9a8ec3ac Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 26 Jun 2007 10:06:50 -0400 Subject: Btrfs: Fix super block updates during transaction commit The super block written during commit was not consistent with the state of the trees. This change adds an in-memory copy of the super so that we can make sure to write out consistent data during a commit. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 5 ++--- fs/btrfs/extent-tree.c | 16 ++++++++-------- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.c | 8 ++++++-- 5 files changed, 18 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fb6fffb71dd..1998f86df08 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -306,6 +306,7 @@ struct btrfs_fs_info { u64 generation; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; + struct btrfs_super_block super_copy; struct buffer_head *sb_buffer; struct super_block *sb; struct inode *btree_inode; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d7615e1578c..7081729d5b1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -471,6 +471,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!fs_info->sb_buffer) goto fail_iput; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + fs_info->disk_super = disk_super; + memcpy(&fs_info->super_copy, disk_super, sizeof(fs_info->super_copy)); if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; @@ -479,7 +481,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_super_total_blocks(disk_super) << fs_info->btree_inode->i_blkbits); - fs_info->disk_super = disk_super; if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { @@ -527,8 +528,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root int ret; struct buffer_head *bh = root->fs_info->sb_buffer; - btrfs_set_super_root(root->fs_info->disk_super, - bh_blocknr(root->fs_info->tree_root->node)); lock_buffer(bh); WARN_ON(atomic_read(&bh->b_count) < 1); clear_buffer_dirty(bh); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8025e9f8ef1..7e550343aee 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -796,8 +796,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) { ins.objectid = extent_root->fs_info->extent_tree_insert[i]; - super_blocks_used = btrfs_super_blocks_used(info->disk_super); - btrfs_set_super_blocks_used(info->disk_super, + super_blocks_used = btrfs_super_blocks_used(&info->super_copy); + btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + 1); ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, sizeof(extent_item)); @@ -892,8 +892,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); } - super_blocks_used = btrfs_super_blocks_used(info->disk_super); - btrfs_set_super_blocks_used(info->disk_super, + super_blocks_used = btrfs_super_blocks_used(&info->super_copy); + btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); if (ret) { @@ -1032,7 +1032,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root info->extent_tree_prealloc_nr = 0; } if (search_end == (u64)-1) - search_end = btrfs_super_total_blocks(info->disk_super); + search_end = btrfs_super_total_blocks(&info->super_copy); if (hint_block) { block_group = btrfs_lookup_block_group(info, hint_block); block_group = btrfs_find_block_group(root, block_group, @@ -1361,8 +1361,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } } - super_blocks_used = btrfs_super_blocks_used(info->disk_super); - btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + + super_blocks_used = btrfs_super_blocks_used(&info->super_copy); + btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + num_blocks); ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, sizeof(extent_item)); @@ -1737,7 +1737,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) BTRFS_BLOCK_GROUP_AVAIL); } if (key.objectid >= - btrfs_super_total_blocks(info->disk_super)) + btrfs_super_total_blocks(&info->super_copy)) break; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c11ecf50020..2e797d5fb28 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -144,7 +144,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); - struct btrfs_super_block *disk_super = root->fs_info->disk_super; + struct btrfs_super_block *disk_super = &root->fs_info->super_copy; buf->f_namelen = BTRFS_NAME_LEN; buf->f_blocks = btrfs_super_total_blocks(disk_super); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a5a63d471e4..3b2face593e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -380,6 +380,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, else prev_trans->use_count++; } + btrfs_set_super_generation(&root->fs_info->super_copy, + cur_trans->transid); + btrfs_set_super_root(&root->fs_info->super_copy, + bh_blocknr(root->fs_info->tree_root->node)); + memcpy(root->fs_info->disk_super, &root->fs_info->super_copy, + sizeof(root->fs_info->super_copy)); mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); @@ -389,8 +395,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, put_transaction(prev_trans); mutex_unlock(&root->fs_info->trans_mutex); } - btrfs_set_super_generation(root->fs_info->disk_super, - cur_trans->transid); BUG_ON(ret); write_ctree_super(trans, root); -- cgit v1.2.3-70-g09d2 From f2654de42a759127cb1f1e8a626ec94178732e20 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 26 Jun 2007 12:20:46 -0400 Subject: Btrfs: Allow find_free_extent callers to pass in an exclusion range Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 79 +++++++++++++++++++------------------------------- 1 file changed, 30 insertions(+), 49 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7e550343aee..01dc3057928 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -25,7 +25,8 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 search_end, u64 hint_block, - struct btrfs_key *ins, int data); + struct btrfs_key *ins, u64 exclude_start, + u64 exclude_nr, int data); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -407,7 +408,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, - (u64)-1, 0, &ins, 0); + (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); return ret; @@ -559,7 +560,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0); + ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, + 0, 0, 0); /* FIXME, set bit to recalc cache groups on next mount */ if (ret) return ret; @@ -868,7 +870,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0); + ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); return ret; @@ -987,7 +989,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 search_end, u64 hint_block, - struct btrfs_key *ins, int data) + struct btrfs_key *ins, u64 exclude_start, + u64 exclude_nr, int data) { struct btrfs_path *path; struct btrfs_key key; @@ -1191,6 +1194,11 @@ check_pending: goto new_group; } } + if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start && + ins->objectid < exclude_start + exclude_nr)) { + search_start = exclude_start + exclude_nr; + goto new_group; + } if (fill_prealloc) { int nr; test_block = ins->objectid; @@ -1267,6 +1275,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, int pending_ret; u64 super_blocks_used; u64 search_start = 0; + u64 exclude_start = 0; + u64 exclude_nr = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; @@ -1298,33 +1308,19 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, */ if (data) { ret = find_free_extent(trans, root, 0, 0, - search_end, 0, &prealloc_key, 0); - if (ret) { + search_end, 0, &prealloc_key, 0, 0, 0); + if (ret) return ret; - } - if (prealloc_key.objectid + prealloc_key.offset >= search_end) { - int nr = info->extent_tree_prealloc_nr; - search_end = info->extent_tree_prealloc[nr - 1] - 1; - } else { - search_start = info->extent_tree_prealloc[0] + 1; - } + exclude_nr = info->extent_tree_prealloc_nr; + exclude_start = info->extent_tree_prealloc[exclude_nr - 1]; } - if (hint_block < search_start) - hint_block = search_start; + /* do the real allocation */ ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, hint_block, ins, data); - if (ret) { - if (search_start == 0) - return ret; - search_end = search_start - 1; - search_start = 0; - hint_block = search_start; - ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, hint_block, ins, data); - if (ret) - return ret; - } + search_end, hint_block, ins, + exclude_start, exclude_nr, data); + if (ret) + return ret; /* * if we're doing a metadata allocation, preallocate space in the @@ -1336,29 +1332,14 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * The unused prealloc will get reused the next time around. */ if (!data) { - if (ins->objectid + ins->offset >= search_end) - search_end = ins->objectid - 1; - else - search_start = ins->objectid + ins->offset; - - if (hint_block < search_start) - hint_block = search_start; - + exclude_start = ins->objectid; + exclude_nr = ins->offset; ret = find_free_extent(trans, root, 0, search_start, search_end, hint_block, - &prealloc_key, 0); - if (ret) { - if (search_start == 0) - return ret; - search_end = search_start - 1; - search_start = 0; - hint_block = search_start; - ret = find_free_extent(trans, root, 0, search_start, - search_end, hint_block, - &prealloc_key, 0); - if (ret) - return ret; - } + &prealloc_key, exclude_start, + exclude_nr, 0); + if (ret) + return ret; } super_blocks_used = btrfs_super_blocks_used(&info->super_copy); -- cgit v1.2.3-70-g09d2 From ccd467d60e81b48cdbecae93532b66bcdedca91d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 28 Jun 2007 15:57:36 -0400 Subject: Btrfs: crash recovery fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 13 +++++++++++- fs/btrfs/ctree.h | 12 ++++------- fs/btrfs/disk-io.c | 22 ++++++++++++-------- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent-tree.c | 37 ++++++++++++++++++++++++++++++---- fs/btrfs/file.c | 15 ++++++++++---- fs/btrfs/inode.c | 6 +++--- fs/btrfs/transaction.c | 54 +++++++++++++++++++++++++++++++------------------- 8 files changed, 112 insertions(+), 48 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 606a19b5916..9eb646529ed 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -75,6 +75,17 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_node *cow_node; int ret; + WARN_ON(!buffer_uptodate(buf)); + if (trans->transaction != root->fs_info->running_transaction) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->running_transaction->transid); + WARN_ON(1); + } + if (trans->transid != root->fs_info->generation) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->generation); + WARN_ON(1); + } if (btrfs_header_generation(btrfs_buffer_header(buf)) == trans->transid) { *cow_ret = buf; @@ -107,7 +118,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); } btrfs_block_release(root, buf); - mark_buffer_dirty(cow); + btrfs_mark_buffer_dirty(cow); *cow_ret = cow; return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1998f86df08..0287bd51d87 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1013,18 +1013,13 @@ static inline void btrfs_memmove(struct btrfs_root *root, memmove(dst, src, nr); } -static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) -{ - WARN_ON(!atomic_read(&bh->b_count)); - mark_buffer_dirty(bh); -} - /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 blocknr); @@ -1044,8 +1039,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); -int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct - btrfs_root *root); +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct radix_tree_root *unpin_radix); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7081729d5b1..d1bf5bc1bc1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -270,14 +270,6 @@ fail: return NULL; } -int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) -{ - WARN_ON(atomic_read(&buf->b_count) == 0); - mark_buffer_dirty(buf); - return 0; -} - int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { @@ -621,6 +613,20 @@ int close_ctree(struct btrfs_root *root) return 0; } +void btrfs_mark_buffer_dirty(struct buffer_head *bh) +{ + struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; + u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + WARN_ON(!atomic_read(&bh->b_count)); + if (transid != root->fs_info->generation) { + printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", + (unsigned long long)bh->b_blocknr, + transid, root->fs_info->generation); + WARN_ON(1); + } + mark_buffer_dirty(bh); +} + void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { brelse(buf); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c4a695ac44f..9e2c261b41a 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -78,4 +78,5 @@ int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, int btrfs_releasepage(struct page *page, gfp_t flags); void btrfs_btree_balance_dirty(struct btrfs_root *root); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); +void btrfs_mark_buffer_dirty(struct buffer_head *bh); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 01dc3057928..14b93268920 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -523,6 +523,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } return 0; fail: + WARN_ON(1); for (i =0; i < faili; i++) { if (leaf) { u64 disk_blocknr; @@ -572,7 +573,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_block_group_item); memcpy(bi, &cache->item, sizeof(*bi)); - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(extent_root, path); fail: finish_current_insert(trans, extent_root); @@ -739,8 +740,30 @@ static int try_remove_page(struct address_space *mapping, unsigned long index) return ret; } -int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct - btrfs_root *root) +int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy) +{ + unsigned long gang[8]; + u64 last = 0; + struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; + int ret; + int i; + + while(1) { + ret = find_first_radix_bit(pinned_radix, gang, last, + ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0 ; i < ret; i++) { + set_radix_bit(copy, gang[i]); + last = gang[i] + 1; + } + } + return 0; +} + +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct radix_tree_root *unpin_radix) { unsigned long gang[8]; struct inode *btree_inode = root->fs_info->btree_inode; @@ -752,7 +775,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix; while(1) { - ret = find_first_radix_bit(pinned_radix, gang, 0, + ret = find_first_radix_bit(unpin_radix, gang, 0, ARRAY_SIZE(gang)); if (!ret) break; @@ -760,6 +783,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); + clear_radix_bit(unpin_radix, gang[i]); block_group = btrfs_lookup_block_group(root->fs_info, gang[i]); if (block_group) { @@ -1309,6 +1333,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (data) { ret = find_free_extent(trans, root, 0, 0, search_end, 0, &prealloc_key, 0, 0, 0); + BUG_ON(ret); if (ret) return ret; exclude_nr = info->extent_tree_prealloc_nr; @@ -1319,6 +1344,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ret = find_free_extent(trans, root, num_blocks, search_start, search_end, hint_block, ins, exclude_start, exclude_nr, data); + BUG_ON(ret); if (ret) return ret; @@ -1334,10 +1360,12 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (!data) { exclude_start = ins->objectid; exclude_nr = ins->offset; + hint_block = exclude_start + exclude_nr; ret = find_free_extent(trans, root, 0, search_start, search_end, hint_block, &prealloc_key, exclude_start, exclude_nr, 0); + BUG_ON(ret); if (ret) return ret; } @@ -1348,6 +1376,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, sizeof(extent_item)); + BUG_ON(ret); finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); if (ret) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fef7ba1e707..2456cc3e1cf 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -127,7 +127,7 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, ptr, kaddr + bh_offset(bh), size); kunmap_atomic(kaddr, KM_USER0); - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_free_path(path); ret = btrfs_end_transaction(trans, root); @@ -211,11 +211,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, int found_type; int found_extent; int found_inline; + int recow; path = btrfs_alloc_path(); if (!path) return -ENOMEM; while(1) { + recow = 0; btrfs_release_path(root, path); ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, search_start, -1); @@ -244,6 +246,10 @@ next_slot: if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) { goto out; } + if (recow) { + search_start = key.offset; + continue; + } if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); @@ -274,6 +280,7 @@ next_slot: nextret = btrfs_next_leaf(root, path); if (nextret) goto out; + recow = 1; } else { path->slots[0]++; } @@ -321,7 +328,7 @@ next_slot: } btrfs_set_file_extent_num_blocks(extent, new_num); - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); } else { WARN_ON(1); } @@ -452,6 +459,8 @@ static int prepare_pages(struct btrfs_root *root, err = -ENOMEM; goto failed_release; } + cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); + wait_on_page_writeback(pages[i]); } mutex_lock(&root->fs_info->fs_mutex); @@ -522,8 +531,6 @@ static int prepare_pages(struct btrfs_root *root, mutex_unlock(&root->fs_info->fs_mutex); for (i = 0; i < num_pages; i++) { - cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); - wait_on_page_writeback(pages[i]); offset = pos & (PAGE_CACHE_SIZE -1); this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); if (!page_has_buffers(pages[i])) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index eba06e7cf41..4fc0367d54f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -506,7 +506,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, extent_num_blocks); inode->i_blocks -= (orig_num_blocks - extent_num_blocks) << 3; - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); } else { extent_start = btrfs_file_extent_disk_blocknr(fi); @@ -2020,7 +2020,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_header_owner(&leaf->header, root->root_key.objectid); memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, sizeof(leaf->header.fsid)); - mark_buffer_dirty(subvol); + btrfs_mark_buffer_dirty(subvol); inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); @@ -2497,7 +2497,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, ptr = btrfs_file_extent_inline_start(ei); btrfs_memcpy(root, path->nodes[0]->b_data, ptr, symname, name_len); - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3b2face593e..bec38ae8aa1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -85,6 +85,8 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, if (root != root->fs_info->tree_root && root->last_trans < running_trans_id) { + WARN_ON(root == root->fs_info->extent_root); + WARN_ON(root->ref_cows != 1); if (root->root_item.refs != 0) { radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, @@ -113,10 +115,11 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; + WARN_ON(cur_trans != trans->transaction); WARN_ON(cur_trans->num_writers < 1); + cur_trans->num_writers--; if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); - cur_trans->num_writers--; put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); memset(trans, 0, sizeof(*trans)); @@ -194,6 +197,7 @@ static int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { DEFINE_WAIT(wait); + mutex_lock(&root->fs_info->trans_mutex); while(!commit->commit_done) { prepare_to_wait(&commit->commit_wait, &wait, TASK_UNINTERRUPTIBLE); @@ -203,6 +207,7 @@ static int wait_for_commit(struct btrfs_root *root, schedule(); mutex_lock(&root->fs_info->trans_mutex); } + mutex_unlock(&root->fs_info->trans_mutex); finish_wait(&commit->commit_wait, &wait); return 0; } @@ -279,7 +284,6 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, &root->root_item); if (err) break; - refs = btrfs_root_refs(&tmp_item); btrfs_set_root_refs(&tmp_item, refs - 1); err = btrfs_update_root(trans, root->fs_info->tree_root, @@ -333,31 +337,53 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; + struct radix_tree_root pinned_copy; DEFINE_WAIT(wait); + init_bit_radix(&pinned_copy); INIT_LIST_HEAD(&dirty_fs_roots); mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { cur_trans = trans->transaction; trans->transaction->use_count++; + mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); + + mutex_unlock(&root->fs_info->fs_mutex); ret = wait_for_commit(root, cur_trans); BUG_ON(ret); put_transaction(cur_trans); - mutex_unlock(&root->fs_info->trans_mutex); + mutex_lock(&root->fs_info->fs_mutex); return 0; } - cur_trans = trans->transaction; trans->transaction->in_commit = 1; + cur_trans = trans->transaction; + if (cur_trans->list.prev != &root->fs_info->trans_list) { + prev_trans = list_entry(cur_trans->list.prev, + struct btrfs_transaction, list); + if (!prev_trans->commit_done) { + prev_trans->use_count++; + mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->trans_mutex); + + wait_for_commit(root, prev_trans); + put_transaction(prev_trans); + + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->trans_mutex); + } + } while (trans->transaction->num_writers > 1) { WARN_ON(cur_trans != trans->transaction); prepare_to_wait(&trans->transaction->writer_wait, &wait, TASK_UNINTERRUPTIBLE); if (trans->transaction->num_writers <= 1) break; + mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->trans_mutex); schedule(); + mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); finish_wait(&trans->transaction->writer_wait, &wait); } @@ -372,34 +398,22 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; - if (cur_trans->list.prev != &root->fs_info->trans_list) { - prev_trans = list_entry(cur_trans->list.prev, - struct btrfs_transaction, list); - if (prev_trans->commit_done) - prev_trans = NULL; - else - prev_trans->use_count++; - } btrfs_set_super_generation(&root->fs_info->super_copy, cur_trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, bh_blocknr(root->fs_info->tree_root->node)); memcpy(root->fs_info->disk_super, &root->fs_info->super_copy, sizeof(root->fs_info->super_copy)); + + btrfs_copy_pinned(root, &pinned_copy); + mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); - if (prev_trans) { - mutex_lock(&root->fs_info->trans_mutex); - wait_for_commit(root, prev_trans); - put_transaction(prev_trans); - mutex_unlock(&root->fs_info->trans_mutex); - } BUG_ON(ret); write_ctree_super(trans, root); - mutex_lock(&root->fs_info->fs_mutex); - btrfs_finish_extent_commit(trans, root); + btrfs_finish_extent_commit(trans, root, &pinned_copy); mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; wake_up(&cur_trans->commit_wait); -- cgit v1.2.3-70-g09d2 From ec6b910fb330f29e169c9f502c3ac209515af8d1 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Wed, 11 Jul 2007 10:00:37 -0400 Subject: Btrfs: trivial include fixups Almost none of the files including module.h need to do so, remove them. Include sched.h in extent-tree.c to silence a warning about cond_resched() being undeclared. Signed-off-by: Zach Brown Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 1 - fs/btrfs/ctree.c | 1 - fs/btrfs/dir-item.c | 1 - fs/btrfs/disk-io.c | 1 - fs/btrfs/extent-tree.c | 2 +- fs/btrfs/file-item.c | 1 - fs/btrfs/file.c | 1 - fs/btrfs/inode-item.c | 1 - fs/btrfs/inode-map.c | 1 - fs/btrfs/inode.c | 1 - fs/btrfs/print-tree.c | 1 - fs/btrfs/root-tree.c | 1 - fs/btrfs/sysfs.c | 1 - fs/btrfs/transaction.c | 1 - 14 files changed, 1 insertion(+), 14 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index a4ddf4de979..e8bf876db39 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "bit-radix.h" #define BIT_ARRAY_BYTES 256 diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9eb646529ed..eace2ee7644 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index d5db8bdea3d..49db5fa7ced 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "hash.h" diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d1bf5bc1bc1..60db85bc4ba 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 14b93268920..5ace2c33f1a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -16,7 +16,7 @@ * Boston, MA 021110-1307, USA. */ -#include +#include #include "ctree.h" #include "disk-io.h" #include "print-tree.h" diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 68859934ae2..ea4dd4c5fce 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2456cc3e1cf..f327525f541 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index e8b5a9f7c0c..b7834617753 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 58930fc48be..40547086625 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4fc0367d54f..d75ca5cf59b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index e66acd4e274..bafa78603bd 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "print-tree.h" diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 8b0eca042ea..271dd2b5c32 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "transaction.h" #include "disk-io.h" diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index ead9dc8051b..db8917e5b25 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bec38ae8aa1..0f494fe365c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include "ctree.h" -- cgit v1.2.3-70-g09d2 From 9f3a742736cecda5a8778be70faa2f779458839f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 7 Aug 2007 15:52:19 -0400 Subject: Btrfs: Do snapshot deletion in smaller chunks. Before, snapshot deletion was a single atomic unit. This caused considerable lock contention and required an unbounded amount of space. Now, the drop_progress field in the root item is used to indicate how far along snapshot deletion is, and to resume where it left off. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 10 ++++++++ fs/btrfs/ctree.h | 4 ++-- fs/btrfs/extent-tree.c | 45 ++++++++++++++++++++++++++++++++---- fs/btrfs/transaction.c | 63 ++++++++++++++++++++++++++++++-------------------- 4 files changed, 91 insertions(+), 31 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 19a30b7c3a2..aa824e2c521 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -659,9 +659,16 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *b; struct buffer_head *cow_buf; struct btrfs_node *c; + struct btrfs_root_item *root_item = &root->root_item; int slot; int ret; int level; + u8 lowest_level = 0; + + if (btrfs_root_refs(root_item) == 0 && root->ref_cows) { + lowest_level = root_item->drop_level; + WARN_ON(ins_len || cow); + } WARN_ON(p->nodes[0] != NULL); WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); @@ -718,6 +725,9 @@ again: slot = p->slots[level]; BUG_ON(btrfs_header_nritems(&c->header) == 1); } + /* this is only true while dropping a snapshot */ + if (level == lowest_level) + break; b = read_tree_block(root, btrfs_node_blockptr(c, slot)); } else { struct btrfs_leaf *l = (struct btrfs_leaf *)c; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0287bd51d87..73c2e75a136 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -333,10 +333,10 @@ struct btrfs_root { u64 objectid; u64 last_trans; u32 blocksize; - int ref_cows; u32 type; u64 highest_inode; u64 last_inode_alloc; + int ref_cows; }; /* the lower bits in the key flags defines the item type */ @@ -1073,7 +1073,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *snap); + *root); /* root-item.c */ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5ace2c33f1a..9455974dabe 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1561,12 +1561,21 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root int i; int slot; int ret; + struct btrfs_root_item *root_item = &root->root_item; + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; if (slot < btrfs_header_nritems( btrfs_buffer_header(path->nodes[i])) - 1) { + struct btrfs_node *node; + node = btrfs_buffer_node(path->nodes[i]); path->slots[i]++; *level = i; + WARN_ON(*level == 0); + memcpy(&root_item->drop_progress, + &node->ptrs[path->slots[i]].key, + sizeof(root_item->drop_progress)); + root_item->drop_level = i; return 0; } else { ret = btrfs_free_extent(trans, root, @@ -1587,7 +1596,7 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root * decremented. */ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *snap) + *root) { int ret = 0; int wret; @@ -1595,14 +1604,33 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_path *path; int i; int orig_level; + int num_walks = 0; + struct btrfs_root_item *root_item = &root->root_item; path = btrfs_alloc_path(); BUG_ON(!path); - level = btrfs_header_level(btrfs_buffer_header(snap)); + level = btrfs_header_level(btrfs_buffer_header(root->node)); orig_level = level; - path->nodes[level] = snap; - path->slots[level] = 0; + if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { + path->nodes[level] = root->node; + path->slots[level] = 0; + } else { + struct btrfs_key key; + struct btrfs_disk_key *found_key; + struct btrfs_node *node; + btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + ret = wret; + goto out; + } + level = root_item->drop_level; + node = btrfs_buffer_node(path->nodes[level]); + found_key = &node->ptrs[path->slots[level]].key; + WARN_ON(memcmp(found_key, &root_item->drop_progress, + sizeof(*found_key))); + } while(1) { wret = walk_down_tree(trans, root, path, &level); if (wret > 0) @@ -1615,12 +1643,21 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; + num_walks++; + if (num_walks > 10) { + struct btrfs_key key; + btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + ret = -EAGAIN; + get_bh(root->node); + break; + } } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { btrfs_block_release(root, path->nodes[i]); } } +out: btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0f494fe365c..498626470a0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -213,10 +213,7 @@ static int wait_for_commit(struct btrfs_root *root, struct dirty_root { struct list_head list; - struct btrfs_key snap_key; - struct buffer_head *commit_root; struct btrfs_root *root; - int free_on_drop; }; int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list) @@ -226,10 +223,7 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list) dirty = kmalloc(sizeof(*dirty), GFP_NOFS); if (!dirty) return -ENOMEM; - memcpy(&dirty->snap_key, &root->root_key, sizeof(root->root_key)); - dirty->commit_root = root->node; dirty->root = root; - dirty->free_on_drop = 1; list_add(&dirty->list, dead_list); return 0; } @@ -241,7 +235,6 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, struct dirty_root *dirty; struct btrfs_root *gang[8]; struct btrfs_root *root; - struct btrfs_root_item tmp_item; int i; int ret; int err = 0; @@ -267,13 +260,16 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, } dirty = kmalloc(sizeof(*dirty), GFP_NOFS); BUG_ON(!dirty); - memcpy(&dirty->snap_key, &root->root_key, - sizeof(root->root_key)); - dirty->commit_root = root->commit_root; + dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); + BUG_ON(!dirty->root); + + memset(&root->root_item.drop_progress, 0, + sizeof(struct btrfs_disk_key)); + root->root_item.drop_level = 0; + + memcpy(dirty->root, root, sizeof(*root)); + dirty->root->node = root->commit_root; root->commit_root = NULL; - dirty->root = root; - dirty->free_on_drop = 0; - memcpy(&tmp_item, &root->root_item, sizeof(tmp_item)); root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, @@ -283,17 +279,21 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, &root->root_item); if (err) break; - refs = btrfs_root_refs(&tmp_item); - btrfs_set_root_refs(&tmp_item, refs - 1); + + refs = btrfs_root_refs(&dirty->root->root_item); + btrfs_set_root_refs(&dirty->root->root_item, refs - 1); err = btrfs_update_root(trans, root->fs_info->tree_root, - &dirty->snap_key, - &tmp_item); + &dirty->root->root_key, + &dirty->root->root_item); BUG_ON(err); - if (refs == 1) + if (refs == 1) { list_add(&dirty->list, list); - else + } else { + WARN_ON(1); + kfree(dirty->root); kfree(dirty); + } } } return err; @@ -305,23 +305,36 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, struct dirty_root *dirty; struct btrfs_trans_handle *trans; int ret = 0; + int err; + while(!list_empty(list)) { mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); - trans = btrfs_start_transaction(tree_root, 1); - ret = btrfs_drop_snapshot(trans, dirty->root, - dirty->commit_root); + while(1) { + trans = btrfs_start_transaction(tree_root, 1); + ret = btrfs_drop_snapshot(trans, dirty->root); + if (ret != -EAGAIN) { + break; + } + err = btrfs_update_root(trans, + tree_root, + &dirty->root->root_key, + &dirty->root->root_item); + if (err) + ret = err; + ret = btrfs_end_transaction(trans, tree_root); + BUG_ON(ret); + } BUG_ON(ret); - ret = btrfs_del_root(trans, tree_root, &dirty->snap_key); + ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); if (ret) break; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); - if (dirty->free_on_drop) - kfree(dirty->root); + kfree(dirty->root); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root); -- cgit v1.2.3-70-g09d2 From 3c69faecb8d83cb2ef085a98b196a3fecea67725 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 7 Aug 2007 15:52:22 -0400 Subject: Btrfs: Fold some btree readahead routines into something more generic. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 30 +------------------- fs/btrfs/inode.c | 69 ++------------------------------------------ 4 files changed, 81 insertions(+), 96 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index aa824e2c521..7a08491e208 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -639,6 +639,73 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, return 1; } +/* + * readahead one full node of leaves + */ +static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, + int slot) +{ + struct btrfs_node *node; + int i; + u32 nritems; + u64 item_objectid; + u64 blocknr; + u64 search; + u64 cluster_start; + int ret; + int nread = 0; + int direction = path->reada; + struct radix_tree_root found; + unsigned long gang[8]; + struct buffer_head *bh; + + if (!path->nodes[1]) + return; + + node = btrfs_buffer_node(path->nodes[1]); + search = btrfs_node_blockptr(node, slot); + bh = btrfs_find_tree_block(root, search); + if (bh) { + brelse(bh); + return; + } + + init_bit_radix(&found); + nritems = btrfs_header_nritems(&node->header); + for (i = slot; i < nritems; i++) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + blocknr = btrfs_node_blockptr(node, i); + set_radix_bit(&found, blocknr); + } + if (direction > 0) { + cluster_start = search - 4; + if (cluster_start > search) + cluster_start = 0; + } else + cluster_start = search + 4; + while(1) { + ret = find_first_radix_bit(&found, gang, 0, ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + blocknr = gang[i]; + clear_radix_bit(&found, blocknr); + if (nread > 64) + continue; + if (direction > 0 && cluster_start <= blocknr && + cluster_start + 8 > blocknr) { + cluster_start = blocknr; + readahead_tree_block(root, blocknr); + nread++; + } else if (direction < 0 && cluster_start >= blocknr && + blocknr + 8 > cluster_start) { + cluster_start = blocknr; + readahead_tree_block(root, blocknr); + nread++; + } + } + } +} /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf @@ -660,9 +727,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *cow_buf; struct btrfs_node *c; struct btrfs_root_item *root_item = &root->root_item; + u64 blocknr; int slot; int ret; int level; + int should_reada = p->reada; u8 lowest_level = 0; if (btrfs_root_refs(root_item) == 0 && root->ref_cows) { @@ -728,7 +797,11 @@ again: /* this is only true while dropping a snapshot */ if (level == lowest_level) break; + blocknr = btrfs_node_blockptr(c, slot); + if (level == 1 && should_reada) + reada_for_search(root, p, slot); b = read_tree_block(root, btrfs_node_blockptr(c, slot)); + } else { struct btrfs_leaf *l = (struct btrfs_leaf *)c; p->slots[level] = slot; @@ -1915,6 +1988,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) blocknr = btrfs_node_blockptr(c_node, slot); if (next) btrfs_block_release(root, next); + if (level == 1 && path->reada) + reada_for_search(root, path, slot); next = read_tree_block(root, blocknr); break; } @@ -1927,6 +2002,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) path->slots[level] = 0; if (!level) break; + if (level == 1 && path->reada) + reada_for_search(root, path, slot); next = read_tree_block(root, btrfs_node_blockptr(btrfs_buffer_node(next), 0)); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 73c2e75a136..c5a18d5d7f7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -177,6 +177,7 @@ struct btrfs_node { struct btrfs_path { struct buffer_head *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; + int reada; }; /* diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9455974dabe..5d4d5d8db8e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -32,33 +32,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static void reada_extent_leaves(struct btrfs_root *root, - struct btrfs_path *path, u64 limit) -{ - struct btrfs_node *node; - int i; - int nritems; - u64 item_objectid; - u64 blocknr; - int slot; - int ret; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1] + 1; - nritems = btrfs_header_nritems(&node->header); - for (i = slot; i < nritems && i < slot + 8; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid > limit) - break; - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); - if (ret) - break; - } -} - static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) { @@ -84,6 +57,7 @@ static int cache_block_group(struct btrfs_root *root, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; key.objectid = block_group->key.objectid; key.flags = 0; key.offset = 0; @@ -94,12 +68,10 @@ static int cache_block_group(struct btrfs_root *root, if (ret && path->slots[0] > 0) path->slots[0]--; limit = block_group->key.objectid + block_group->key.offset; - reada_extent_leaves(root, path, limit); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&leaf->header)) { - reada_extent_leaves(root, path, limit); ret = btrfs_next_leaf(root, path); if (ret < 0) goto err; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fa9c531c86b..3889032fc44 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -374,40 +374,6 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, return ret; } -/* - * truncates go from a high offset to a low offset. So, walk - * from hi to lo in the node and issue readas. Stop when you find - * keys from a different objectid - */ -static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path, - u64 objectid) -{ - struct btrfs_node *node; - int i; - int nritems; - u64 item_objectid; - u64 blocknr; - int slot; - int ret; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1]; - if (slot == 0) - return; - nritems = btrfs_header_nritems(&node->header); - for (i = slot - 1; i >= 0; i--) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid != objectid) - break; - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); - if (ret) - break; - } -} - /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find @@ -434,6 +400,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int del_item; path = btrfs_alloc_path(); + path->reada = -1; BUG_ON(!path); /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; @@ -450,7 +417,6 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(path->slots[0] == 0); path->slots[0]--; } - reada_truncate(root, path, inode->i_ino); leaf = btrfs_buffer_leaf(path->nodes[0]); found_key = &leaf->items[path->slots[0]].key; found_type = btrfs_disk_key_type(found_key); @@ -827,36 +793,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } -/* - * readahead one full node of leaves as long as their keys include - * the objectid supplied - */ -static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path, - u64 objectid) -{ - struct btrfs_node *node; - int i; - u32 nritems; - u64 item_objectid; - u64 blocknr; - int slot; - int ret; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1]; - nritems = btrfs_header_nritems(&node->header); - for (i = slot + 1; i < nritems; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid != objectid) - break; - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); - if (ret) - break; - } -} static unsigned char btrfs_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK }; @@ -890,18 +826,17 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; path = btrfs_alloc_path(); + path->reada = 1; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; advance = 0; - reada_leaves(root, path, inode->i_ino); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { - reada_leaves(root, path, inode->i_ino); ret = btrfs_next_leaf(root, path); if (ret) break; -- cgit v1.2.3-70-g09d2 From 6702ed490ca0bb44e17131818a5a18b773957c5a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 7 Aug 2007 16:15:09 -0400 Subject: Btrfs: Add run time btree defrag, and an ioctl to force btree defrag This adds two types of btree defrag, a run time form that tries to defrag recently allocated blocks in the btree when they are still in ram, and an ioctl that forces defrag of all btree blocks. File data blocks are not defragged yet, but this can make a huge difference in sequential btree reads. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 195 +++++++++++++++++++++++++++++++++---------- fs/btrfs/ctree.h | 19 ++++- fs/btrfs/disk-io.c | 8 ++ fs/btrfs/extent-tree.c | 53 +++++++----- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 33 +++++++- fs/btrfs/ioctl.h | 2 +- fs/btrfs/transaction.c | 105 +++++++++++++++++++---- fs/btrfs/transaction.h | 1 + fs/btrfs/tree-defrag.c | 222 +++++++++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 553 insertions(+), 89 deletions(-) create mode 100644 fs/btrfs/tree-defrag.c (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a4e2df66bdb..9321438c5ee 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ - transaction.o bit-radix.o inode.o file.o + transaction.o bit-radix.o inode.o file.o tree-defrag.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7a08491e208..c7e47e77723 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -65,44 +65,44 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } -static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root +static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf, struct buffer_head *parent, int parent_slot, struct buffer_head - **cow_ret) + **cow_ret, u64 search_start, u64 empty_size) { struct buffer_head *cow; struct btrfs_node *cow_node; - int ret; + int ret = 0; + int different_trans = 0; + WARN_ON(root->ref_cows && trans->transid != root->last_trans); WARN_ON(!buffer_uptodate(buf)); - if (trans->transaction != root->fs_info->running_transaction) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->running_transaction->transid); - WARN_ON(1); - } - if (trans->transid != root->fs_info->generation) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->generation); - WARN_ON(1); - } - if (btrfs_header_generation(btrfs_buffer_header(buf)) == - trans->transid) { - *cow_ret = buf; - return 0; - } - cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr); + cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); + cow_node = btrfs_buffer_node(cow); if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); + memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); btrfs_set_header_generation(&cow_node->header, trans->transid); btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); - ret = btrfs_inc_ref(trans, root, buf); - if (ret) - return ret; + + WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) > + trans->transid); + if (btrfs_header_generation(btrfs_buffer_header(buf)) != + trans->transid) { + different_trans = 1; + ret = btrfs_inc_ref(trans, root, buf); + if (ret) + return ret; + } else { + WARN_ON(!root->ref_cows); + clean_tree_block(trans, root, buf); + } + if (buf == root->node) { root->node = cow; get_bh(cow); @@ -114,6 +114,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, bh_blocknr(cow)); btrfs_mark_buffer_dirty(parent); + WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) != + trans->transid); btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); } btrfs_block_release(root, buf); @@ -122,6 +124,115 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head + **cow_ret) +{ + u64 search_start; + if (trans->transaction != root->fs_info->running_transaction) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->running_transaction->transid); + WARN_ON(1); + } + if (trans->transid != root->fs_info->generation) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->generation); + WARN_ON(1); + } + if (btrfs_header_generation(btrfs_buffer_header(buf)) == + trans->transid) { + *cow_ret = buf; + return 0; + } + + search_start = bh_blocknr(buf) & ~((u64)65535); + return __btrfs_cow_block(trans, root, buf, parent, + parent_slot, cow_ret, search_start, 0); +} + +static int close_blocks(u64 blocknr, u64 other) +{ + if (blocknr < other && other - blocknr < 8) + return 1; + if (blocknr > other && blocknr - other < 8) + return 1; + return 0; +} + +int btrfs_realloc_node(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *parent, + int cache_only) +{ + struct btrfs_node *parent_node; + struct buffer_head *cur_bh; + struct buffer_head *tmp_bh; + u64 blocknr; + u64 search_start = 0; + u64 other; + u32 parent_nritems; + int start_slot; + int end_slot; + int i; + int err = 0; + + if (trans->transaction != root->fs_info->running_transaction) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->running_transaction->transid); + WARN_ON(1); + } + if (trans->transid != root->fs_info->generation) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->generation); + WARN_ON(1); + } + parent_node = btrfs_buffer_node(parent); + parent_nritems = btrfs_header_nritems(&parent_node->header); + + start_slot = 0; + end_slot = parent_nritems; + + if (parent_nritems == 1) + return 0; + + for (i = start_slot; i < end_slot; i++) { + int close = 1; + blocknr = btrfs_node_blockptr(parent_node, i); + if (i > 0) { + other = btrfs_node_blockptr(parent_node, i - 1); + close = close_blocks(blocknr, other); + } + if (close && i < end_slot - 1) { + other = btrfs_node_blockptr(parent_node, i + 1); + close = close_blocks(blocknr, other); + } + if (close) + continue; + + cur_bh = btrfs_find_tree_block(root, blocknr); + if (!cur_bh || !buffer_uptodate(cur_bh) || + buffer_locked(cur_bh)) { + if (cache_only) { + brelse(cur_bh); + continue; + } + brelse(cur_bh); + cur_bh = read_tree_block(root, blocknr); + } + if (search_start == 0) { + search_start = bh_blocknr(cur_bh) & ~((u64)65535); + } + err = __btrfs_cow_block(trans, root, cur_bh, parent, i, + &tmp_bh, search_start, + min(8, end_slot - i)); + if (err) + break; + search_start = bh_blocknr(tmp_bh); + brelse(tmp_bh); + } + return err; +} + /* * The leaf data grows from end-to-front in the node. * this returns the address of the start of the last item, @@ -221,6 +332,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, parent_slot = path->slots[level + 1]; parent_key = &parent->ptrs[parent_slot].key; + BUG_ON(memcmp(parent_key, &leaf->items[0].key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != @@ -643,7 +755,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, * readahead one full node of leaves */ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, - int slot) + int level, int slot) { struct btrfs_node *node; int i; @@ -659,10 +771,13 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, unsigned long gang[8]; struct buffer_head *bh; - if (!path->nodes[1]) + if (level == 0) + return; + + if (!path->nodes[level]) return; - node = btrfs_buffer_node(path->nodes[1]); + node = btrfs_buffer_node(path->nodes[level]); search = btrfs_node_blockptr(node, slot); bh = btrfs_find_tree_block(root, search); if (bh) { @@ -690,7 +805,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, for (i = 0; i < ret; i++) { blocknr = gang[i]; clear_radix_bit(&found, blocknr); - if (nread > 64) + if (nread > 32) continue; if (direction > 0 && cluster_start <= blocknr && cluster_start + 8 > blocknr) { @@ -726,7 +841,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *b; struct buffer_head *cow_buf; struct btrfs_node *c; - struct btrfs_root_item *root_item = &root->root_item; u64 blocknr; int slot; int ret; @@ -734,11 +848,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int should_reada = p->reada; u8 lowest_level = 0; - if (btrfs_root_refs(root_item) == 0 && root->ref_cows) { - lowest_level = root_item->drop_level; - WARN_ON(ins_len || cow); - } - + lowest_level = p->lowest_level; + WARN_ON(lowest_level && ins_len); WARN_ON(p->nodes[0] != NULL); WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); again: @@ -798,8 +909,8 @@ again: if (level == lowest_level) break; blocknr = btrfs_node_blockptr(c, slot); - if (level == 1 && should_reada) - reada_for_search(root, p, slot); + if (should_reada) + reada_for_search(root, p, level, slot); b = read_tree_block(root, btrfs_node_blockptr(c, slot)); } else { @@ -960,7 +1071,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); + t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0); if (IS_ERR(t)) return PTR_ERR(t); c = btrfs_buffer_node(t); @@ -1070,7 +1181,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); + split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0); if (IS_ERR(split_buffer)) return PTR_ERR(split_buffer); @@ -1461,7 +1572,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); if (IS_ERR(right_buffer)) return PTR_ERR(right_buffer); @@ -1560,7 +1671,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); if (IS_ERR(right_buffer)) return PTR_ERR(right_buffer); @@ -1988,8 +2099,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) blocknr = btrfs_node_blockptr(c_node, slot); if (next) btrfs_block_release(root, next); - if (level == 1 && path->reada) - reada_for_search(root, path, slot); + if (path->reada) + reada_for_search(root, path, level, slot); next = read_tree_block(root, blocknr); break; } @@ -2002,8 +2113,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) path->slots[level] = 0; if (!level) break; - if (level == 1 && path->reada) - reada_for_search(root, path, slot); + if (path->reada) + reada_for_search(root, path, level, slot); next = read_tree_block(root, btrfs_node_blockptr(btrfs_buffer_node(next), 0)); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c5a18d5d7f7..42aa20301bc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -178,6 +178,7 @@ struct btrfs_path { struct buffer_head *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; int reada; + int lowest_level; }; /* @@ -338,6 +339,9 @@ struct btrfs_root { u64 highest_inode; u64 last_inode_alloc; int ref_cows; + struct btrfs_key defrag_progress; + int defrag_running; + int defrag_level; }; /* the lower bits in the key flags defines the item type */ @@ -1031,10 +1035,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint); + struct btrfs_root *root, u64 hint, + u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 search_start, + u64 num_blocks, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); @@ -1051,6 +1056,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); /* ctree.c */ +int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head + **cow_ret); int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); int btrfs_truncate_item(struct btrfs_trans_handle *trans, @@ -1060,6 +1069,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); +int btrfs_realloc_node(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *parent, + int cache_only); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); @@ -1171,4 +1183,7 @@ extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 *hint_block); +/* tree-defrag.c */ +int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int cache_only); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60db85bc4ba..c948416fea3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -273,7 +273,9 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { WARN_ON(atomic_read(&buf->b_count) == 0); + lock_buffer(buf); clear_buffer_dirty(buf); + unlock_buffer(buf); return 0; } @@ -294,6 +296,9 @@ static int __setup_root(int blocksize, root->last_inode_alloc = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); + memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + root->defrag_running = 0; + root->defrag_level = 0; root->root_key.objectid = objectid; return 0; } @@ -585,6 +590,7 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -616,7 +622,9 @@ void btrfs_mark_buffer_dirty(struct buffer_head *bh) { struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + WARN_ON(!atomic_read(&bh->b_count)); + if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", (unsigned long long)bh->b_blocknr, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5d4d5d8db8e..26b8d340649 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -23,7 +23,8 @@ #include "transaction.h" static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 search_start, + *orig_root, u64 num_blocks, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_block, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data); @@ -379,7 +380,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, + ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); @@ -533,7 +534,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, + ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); /* FIXME, set bit to recalc cache groups on next mount */ if (ret) @@ -708,6 +709,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, static int try_remove_page(struct address_space *mapping, unsigned long index) { int ret; + return 0; ret = invalidate_mapping_pages(mapping, index, index); return ret; } @@ -866,7 +868,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); + ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); return ret; @@ -983,8 +985,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root * Any available blocks before search_start are skipped. */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, u64 hint_block, + *orig_root, u64 num_blocks, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_block, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data) { @@ -1042,6 +1044,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root data, 1); } + total_needed += empty_size; path = btrfs_alloc_path(); check_failed: @@ -1157,9 +1160,11 @@ check_pending: goto error; } search_start = orig_search_start; - if (wrapped) + if (wrapped) { + if (!full_scan) + total_needed -= empty_size; full_scan = 1; - else + } else wrapped = 1; goto new_group; } @@ -1238,9 +1243,11 @@ new_group: ret = -ENOSPC; goto error; } - if (wrapped) + if (wrapped) { + if (!full_scan) + total_needed -= empty_size; full_scan = 1; - else + } else wrapped = 1; } block_group = btrfs_lookup_block_group(info, search_start); @@ -1264,7 +1271,7 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 hint_block, + u64 num_blocks, u64 empty_size, u64 hint_block, u64 search_end, struct btrfs_key *ins, int data) { int ret; @@ -1303,7 +1310,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * in the correct block group. */ if (data) { - ret = find_free_extent(trans, root, 0, 0, + ret = find_free_extent(trans, root, 0, 0, 0, search_end, 0, &prealloc_key, 0, 0, 0); BUG_ON(ret); if (ret) @@ -1313,8 +1320,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } /* do the real allocation */ - ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, hint_block, ins, + ret = find_free_extent(trans, root, num_blocks, empty_size, + search_start, search_end, hint_block, ins, exclude_start, exclude_nr, data); BUG_ON(ret); if (ret) @@ -1333,7 +1340,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, exclude_start = ins->objectid; exclude_nr = ins->offset; hint_block = exclude_start + exclude_nr; - ret = find_free_extent(trans, root, 0, search_start, + ret = find_free_extent(trans, root, 0, 0, search_start, search_end, hint_block, &prealloc_key, exclude_start, exclude_nr, 0); @@ -1368,14 +1375,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * returns the tree buffer or NULL. */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint) + struct btrfs_root *root, u64 hint, + u64 empty_size) { struct btrfs_key ins; int ret; struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, hint, (unsigned long)-1, &ins, 0); + 1, empty_size, hint, + (unsigned long)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); return ERR_PTR(ret); @@ -1385,6 +1394,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_free_extent(trans, root, ins.objectid, 1, 0); return ERR_PTR(-ENOMEM); } + WARN_ON(buffer_dirty(buf)); set_buffer_uptodate(buf); set_buffer_checked(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); @@ -1591,13 +1601,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key key; struct btrfs_disk_key *found_key; struct btrfs_node *node; + btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + level = root_item->drop_level; + path->lowest_level = level; wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { + if (wret < 0) { ret = wret; goto out; } - level = root_item->drop_level; node = btrfs_buffer_node(path->nodes[level]); found_key = &node->ptrs[path->slots[level]].key; WARN_ON(memcmp(found_key, &root_item->drop_progress, @@ -1617,8 +1629,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; num_walks++; if (num_walks > 10) { - struct btrfs_key key; - btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); ret = -EAGAIN; get_bh(root->node); break; @@ -1627,6 +1637,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { btrfs_block_release(root, path->nodes[i]); + path->nodes[i] = 0; } } out: diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1fe38fe8415..00b118a2db6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -512,7 +512,7 @@ static int prepare_pages(struct btrfs_root *root, if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { err = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, hint_block, (u64)-1, + num_blocks, 0, hint_block, (u64)-1, &ins, 1); if (err) goto failed_truncate; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3889032fc44..12aa043b9f6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -554,7 +554,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) &alloc_hint); if (ret) goto out; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, + ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) goto out; @@ -1360,7 +1360,7 @@ not_found: if (create & BTRFS_GET_BLOCK_CREATE) { struct btrfs_key ins; ret = btrfs_alloc_extent(trans, root, inode->i_ino, - 1, alloc_hint, (u64)-1, + 1, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { err = ret; @@ -1998,7 +1998,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root, 0); + subvol = btrfs_alloc_free_block(trans, root, 0, 0); if (IS_ERR(subvol)) return PTR_ERR(subvol); leaf = btrfs_buffer_leaf(subvol); @@ -2159,7 +2159,9 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; + struct btrfs_trans_handle *trans; int ret = 0; + int err; struct btrfs_dir_item *di; int namelen; struct btrfs_path *path; @@ -2196,6 +2198,31 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int else ret = create_snapshot(root, vol_args.name, namelen); break; + + case BTRFS_IOC_DEFRAG: + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + memset(&root->defrag_progress, 0, + sizeof(root->defrag_progress)); + while (1) { + root->defrag_running = 1; + err = btrfs_defrag_leaves(trans, root, 0); + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + btrfs_btree_balance_dirty(root); + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (err != -EAGAIN) + break; + } + root->defrag_running = 0; + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + ret = 0; + break; default: return -ENOTTY; } diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 23bed48c51a..8bc47dec286 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -28,6 +28,6 @@ struct btrfs_ioctl_vol_args { #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_ADD_DISK _IOW(BTRFS_IOCTL_MAGIC, 2, \ +#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 498626470a0..338a7199363 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -29,6 +29,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; static struct workqueue_struct *trans_wq; #define BTRFS_ROOT_TRANS_TAG 0 +#define BTRFS_ROOT_DEFRAG_TAG 1 static void put_transaction(struct btrfs_transaction *transaction) { @@ -69,35 +70,41 @@ static int join_transaction(struct btrfs_root *root) return 0; } +static int record_root_in_trans(struct btrfs_root *root) +{ + u64 running_trans_id = root->fs_info->running_transaction->transid; + if (root->ref_cows && root->last_trans < running_trans_id) { + WARN_ON(root == root->fs_info->extent_root); + if (root->root_item.refs != 0) { + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + root->commit_root = root->node; + get_bh(root->node); + } else { + WARN_ON(1); + } + root->last_trans = running_trans_id; + } + return 0; +} + struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { struct btrfs_trans_handle *h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; - u64 running_trans_id; mutex_lock(&root->fs_info->trans_mutex); ret = join_transaction(root); BUG_ON(ret); - running_trans_id = root->fs_info->running_transaction->transid; - if (root != root->fs_info->tree_root && root->last_trans < - running_trans_id) { - WARN_ON(root == root->fs_info->extent_root); - WARN_ON(root->ref_cows != 1); - if (root->root_item.refs != 0) { - radix_tree_tag_set(&root->fs_info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_TRANS_TAG); - root->commit_root = root->node; - get_bh(root->node); - } else { - WARN_ON(1); - } - } - root->last_trans = running_trans_id; - h->transid = running_trans_id; + record_root_in_trans(root); + h->transid = root->fs_info->running_transaction->transid; h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; h->blocks_used = 0; @@ -155,6 +162,15 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, gang[i]); if (!page) continue; + if (PageWriteback(page)) { + if (PageDirty(page)) + wait_on_page_writeback(page); + else { + unlock_page(page); + page_cache_release(page); + continue; + } + } err = write_one_page(page, 0); if (err) werr = err; @@ -299,6 +315,58 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, return err; } +int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) +{ + struct btrfs_root *gang[1]; + struct btrfs_root *root; + struct btrfs_root *tree_root = info->tree_root; + struct btrfs_trans_handle *trans; + int i; + int ret; + int err = 0; + u64 last = 0; + + trans = btrfs_start_transaction(tree_root, 1); + while(1) { + ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix, + (void **)gang, last, + ARRAY_SIZE(gang), + BTRFS_ROOT_DEFRAG_TAG); + if (ret == 0) + break; + for (i = 0; i < ret; i++) { + root = gang[i]; + last = root->root_key.objectid + 1; + radix_tree_tag_clear(&info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + if (root->defrag_running) + continue; + + while (1) { + mutex_lock(&root->fs_info->trans_mutex); + record_root_in_trans(root); + mutex_unlock(&root->fs_info->trans_mutex); + + root->defrag_running = 1; + err = btrfs_defrag_leaves(trans, root, 1); + btrfs_end_transaction(trans, tree_root); + mutex_unlock(&info->fs_mutex); + + btrfs_btree_balance_dirty(root); + + mutex_lock(&info->fs_mutex); + trans = btrfs_start_transaction(tree_root, 1); + if (err != -EAGAIN) + break; + } + root->defrag_running = 0; + } + } + btrfs_end_transaction(trans, tree_root); + return err; +} + static int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { @@ -475,6 +543,7 @@ void btrfs_transaction_cleaner(struct work_struct *work) goto out; } mutex_unlock(&root->fs_info->trans_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); out: diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ebf44f3e111..8b2714e6556 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -70,5 +70,6 @@ void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); void btrfs_init_transaction_sys(void); void btrfs_exit_transaction_sys(void); int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); +int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c new file mode 100644 index 00000000000..15d0a486fb5 --- /dev/null +++ b/fs/btrfs/tree-defrag.c @@ -0,0 +1,222 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" +#include "transaction.h" + +static void reada_defrag(struct btrfs_root *root, + struct btrfs_node *node) +{ + int i; + u32 nritems; + u64 blocknr; + int ret; + + nritems = btrfs_header_nritems(&node->header); + for (i = 0; i < nritems; i++) { + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + +static int defrag_walk_down(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + int cache_only) +{ + struct buffer_head *next; + struct buffer_head *cur; + u64 blocknr; + int ret = 0; + + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + while(*level > 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + cur = path->nodes[*level]; + + if (!cache_only && *level > 1 && path->slots[*level] == 0) + reada_defrag(root, btrfs_buffer_node(cur)); + + if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + WARN_ON(1); + + if (path->slots[*level] >= + btrfs_header_nritems(btrfs_buffer_header(cur))) + break; + + if (*level == 1) { + ret = btrfs_realloc_node(trans, root, + path->nodes[*level], + cache_only); + break; + } + blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), + path->slots[*level]); + + if (cache_only) { + next = btrfs_find_tree_block(root, blocknr); + if (!next || !buffer_uptodate(next) || + buffer_locked(next)) { + brelse(next); + path->slots[*level]++; + continue; + } + } else { + next = read_tree_block(root, blocknr); + } + ret = btrfs_cow_block(trans, root, next, path->nodes[*level], + path->slots[*level], &next); + BUG_ON(ret); + ret = btrfs_realloc_node(trans, root, next, cache_only); + BUG_ON(ret); + WARN_ON(*level <= 0); + if (path->nodes[*level-1]) + btrfs_block_release(root, path->nodes[*level-1]); + path->nodes[*level-1] = next; + *level = btrfs_header_level(btrfs_buffer_header(next)); + path->slots[*level] = 0; + } + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + btrfs_block_release(root, path->nodes[*level]); + path->nodes[*level] = NULL; + *level += 1; + WARN_ON(ret); + return 0; +} + +static int defrag_walk_up(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + int cache_only) +{ + int i; + int slot; + struct btrfs_node *node; + + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + slot = path->slots[i]; + if (slot < btrfs_header_nritems( + btrfs_buffer_header(path->nodes[i])) - 1) { + path->slots[i]++; + *level = i; + node = btrfs_buffer_node(path->nodes[i]); + WARN_ON(i == 0); + btrfs_disk_key_to_cpu(&root->defrag_progress, + &node->ptrs[path->slots[i]].key); + root->defrag_level = i; + return 0; + } else { + btrfs_block_release(root, path->nodes[*level]); + path->nodes[*level] = NULL; + *level = i + 1; + } + } + return 1; +} + +int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int cache_only) +{ + struct btrfs_path *path = NULL; + struct buffer_head *tmp; + int ret = 0; + int wret; + int level; + int orig_level; + int i; + int num_runs = 0; + + if (root->ref_cows == 0) { + goto out; + } + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + level = btrfs_header_level(btrfs_buffer_header(root->node)); + orig_level = level; + if (level == 0) { + goto out; + } + if (root->defrag_progress.objectid == 0) { + get_bh(root->node); + ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); + BUG_ON(ret); + ret = btrfs_realloc_node(trans, root, root->node, cache_only); + BUG_ON(ret); + path->nodes[level] = root->node; + path->slots[level] = 0; + } else { + level = root->defrag_level; + path->lowest_level = level; + wret = btrfs_search_slot(trans, root, &root->defrag_progress, + path, 0, 1); + + if (wret < 0) { + ret = wret; + goto out; + } + while(level > 0 && !path->nodes[level]) + level--; + if (!path->nodes[level]) { + ret = 0; + goto out; + } + } + + while(1) { + wret = defrag_walk_down(trans, root, path, &level, cache_only); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + + wret = defrag_walk_up(trans, root, path, &level, cache_only); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + if (num_runs++ > 8) { + ret = -EAGAIN; + break; + } + } + for (i = 0; i <= orig_level; i++) { + if (path->nodes[i]) { + btrfs_block_release(root, path->nodes[i]); + path->nodes[i] = 0; + } + } +out: + if (path) + btrfs_free_path(path); + if (ret != -EAGAIN) { + memset(&root->defrag_progress, 0, + sizeof(root->defrag_progress)); + } + return ret; +} -- cgit v1.2.3-70-g09d2 From f4468e94c86c2031f447788c4bfe7dfd2fcdc93a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 8 Aug 2007 10:08:58 -0400 Subject: Btrfs: Let some locks go during defrag and snapshot dropping Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/extent-tree.c | 6 ++++-- fs/btrfs/transaction.c | 8 ++++++++ 3 files changed, 14 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 42aa20301bc..8b7d01597f4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -300,9 +300,9 @@ struct btrfs_fs_info { struct radix_tree_root block_group_data_radix; struct radix_tree_root extent_map_radix; - u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; + u64 extent_tree_insert[BTRFS_MAX_LEVEL * 6]; int extent_tree_insert_nr; - u64 extent_tree_prealloc[BTRFS_MAX_LEVEL * 3]; + u64 extent_tree_prealloc[BTRFS_MAX_LEVEL * 6]; int extent_tree_prealloc_nr; u64 generation; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 26b8d340649..f7d76d34fc3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1018,7 +1018,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (num_blocks == 0) { fill_prealloc = 1; num_blocks = 1; - total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; + total_needed = (min(level + 1, BTRFS_MAX_LEVEL)) * 6; } if (fill_prealloc) { u64 first; @@ -1300,6 +1300,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ins->objectid; ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, 0); + WARN_ON(info->extent_tree_insert_nr > + ARRAY_SIZE(info->extent_tree_insert)); BUG_ON(ret); return 0; } @@ -1628,7 +1630,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; num_walks++; - if (num_walks > 10) { + if (num_walks > 2) { ret = -EAGAIN; get_bh(root->node); break; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 338a7199363..88b0752550b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -354,6 +354,7 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) mutex_unlock(&info->fs_mutex); btrfs_btree_balance_dirty(root); + cond_resched(); mutex_lock(&info->fs_mutex); trans = btrfs_start_transaction(tree_root, 1); @@ -394,6 +395,12 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, ret = err; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); + mutex_unlock(&tree_root->fs_info->fs_mutex); + + btrfs_btree_balance_dirty(tree_root); + schedule(); + + mutex_lock(&tree_root->fs_info->fs_mutex); } BUG_ON(ret); ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); @@ -406,6 +413,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root); + schedule(); } return ret; } -- cgit v1.2.3-70-g09d2 From 26b8003f10569a9155b7539ef5a7379ee0c6b050 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 8 Aug 2007 20:17:12 -0400 Subject: Btrfs: Replace extent tree preallocation code with some bit radix magic. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 7 +- fs/btrfs/disk-io.c | 3 +- fs/btrfs/extent-tree.c | 230 +++++++++---------------------------------------- fs/btrfs/transaction.c | 2 + fs/btrfs/transaction.h | 2 + 5 files changed, 49 insertions(+), 195 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8b7d01597f4..59e09e37ab9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -299,12 +299,7 @@ struct btrfs_fs_info { struct radix_tree_root block_group_radix; struct radix_tree_root block_group_data_radix; struct radix_tree_root extent_map_radix; - - u64 extent_tree_insert[BTRFS_MAX_LEVEL * 6]; - int extent_tree_insert_nr; - u64 extent_tree_prealloc[BTRFS_MAX_LEVEL * 6]; - int extent_tree_prealloc_nr; - + struct radix_tree_root extent_ins_radix; u64 generation; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c948416fea3..192211274ce 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -427,6 +427,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_map_radix); + init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); @@ -443,8 +444,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; fs_info->do_barriers = 1; - fs_info->extent_tree_insert_nr = 0; - fs_info->extent_tree_prealloc_nr = 0; fs_info->closing = 0; INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f7d76d34fc3..ba50bd7b9a7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -22,12 +22,6 @@ #include "print-tree.h" #include "transaction.h" -static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 empty_size, - u64 search_start, - u64 search_end, u64 hint_block, - struct btrfs_key *ins, u64 exclude_start, - u64 exclude_nr, int data); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -174,9 +168,6 @@ static u64 find_search_start(struct btrfs_root *root, if (cache->data) goto out; - if (num > 1) { - last = max(last, cache->last_prealloc); - } again: ret = cache_block_group(root, cache); if (ret) @@ -374,18 +365,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_leaf *l; struct btrfs_extent_item *item; - struct btrfs_key ins; u32 refs; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0, - (u64)-1, 0, &ins, 0, 0, 0); - if (ret) { - btrfs_free_path(path); - return ret; - } + key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -532,13 +517,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, int pending_ret; struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_block_group_item *bi; - struct btrfs_key ins; - ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins, - 0, 0, 0); - /* FIXME, set bit to recalc cache groups on next mount */ - if (ret) - return ret; ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); if (ret < 0) goto fail; @@ -706,14 +685,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } -static int try_remove_page(struct address_space *mapping, unsigned long index) -{ - int ret; - return 0; - ret = invalidate_mapping_pages(mapping, index, index); - return ret; -} - int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy) { unsigned long gang[8]; @@ -732,6 +703,9 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy) last = gang[i] + 1; } } + ret = find_first_radix_bit(&root->fs_info->extent_ins_radix, gang, 0, + ARRAY_SIZE(gang)); + WARN_ON(ret); return 0; } @@ -740,7 +714,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct radix_tree_root *unpin_radix) { unsigned long gang[8]; - struct inode *btree_inode = root->fs_info->btree_inode; struct btrfs_block_group_cache *block_group; u64 first = 0; int ret; @@ -765,14 +738,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, block_group->pinned--; if (gang[i] < block_group->last_alloc) block_group->last_alloc = gang[i]; - if (gang[i] < block_group->last_prealloc) - block_group->last_prealloc = gang[i]; if (!block_group->data) set_radix_bit(extent_radix, gang[i]); } - try_remove_page(btree_inode->i_mapping, - gang[i] << (PAGE_CACHE_SHIFT - - btree_inode->i_blkbits)); } } return 0; @@ -785,7 +753,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct struct btrfs_extent_item extent_item; int i; int ret; - u64 super_blocks_used; + int err; + unsigned long gang[8]; struct btrfs_fs_info *info = extent_root->fs_info; btrfs_set_extent_refs(&extent_item, 1); @@ -794,16 +763,21 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); - for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) { - ins.objectid = extent_root->fs_info->extent_tree_insert[i]; - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); - btrfs_set_super_blocks_used(&info->super_copy, - super_blocks_used + 1); - ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, - sizeof(extent_item)); - BUG_ON(ret); + while(1) { + ret = find_first_radix_bit(&info->extent_ins_radix, gang, 0, + ARRAY_SIZE(gang)); + if (!ret) + break; + + for (i = 0; i < ret; i++) { + ins.objectid = gang[i]; + err = btrfs_insert_item(trans, extent_root, &ins, + &extent_item, + sizeof(extent_item)); + clear_radix_bit(&info->extent_ins_radix, gang[i]); + WARN_ON(err); + } } - extent_root->fs_info->extent_tree_insert_nr = 0; return 0; } @@ -856,7 +830,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_root *extent_root = info->extent_root; int ret; struct btrfs_extent_item *ei; - struct btrfs_key ins; u32 refs; key.objectid = blocknr; @@ -868,12 +841,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); - if (ret) { - btrfs_free_path(path); - return ret; - } - ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret < 0) return ret; @@ -1003,35 +970,17 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; int total_needed = num_blocks; - int total_found = 0; - int fill_prealloc = 0; int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; u64 limit; + WARN_ON(num_blocks < 1); ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); - if (num_blocks == 0) { - fill_prealloc = 1; - num_blocks = 1; - total_needed = (min(level + 1, BTRFS_MAX_LEVEL)) * 6; - } - if (fill_prealloc) { - u64 first; - int nr = info->extent_tree_prealloc_nr; - first = info->extent_tree_prealloc[nr - 1]; - if (info->extent_tree_prealloc_nr >= total_needed && - first >= search_start) { - ins->objectid = info->extent_tree_prealloc[0]; - ins->offset = 1; - return 0; - } - info->extent_tree_prealloc_nr = 0; - } if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(&info->super_copy); if (hint_block) { @@ -1091,10 +1040,6 @@ check_failed: l = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - if (fill_prealloc) { - info->extent_tree_prealloc_nr = 0; - total_found = 0; - } if (start_found) limit = last_block + (block_group->key.offset >> 1); @@ -1170,67 +1115,21 @@ check_pending: } for (test_block = ins->objectid; test_block < ins->objectid + num_blocks; test_block++) { - if (test_radix_bit(&info->pinned_radix, test_block)) { + if (test_radix_bit(&info->pinned_radix, test_block) || + test_radix_bit(&info->extent_ins_radix, test_block)) { search_start = test_block + 1; goto new_group; } } - if (!fill_prealloc && info->extent_tree_insert_nr) { - u64 last = - info->extent_tree_insert[info->extent_tree_insert_nr - 1]; - if (ins->objectid + num_blocks > - info->extent_tree_insert[0] && - ins->objectid <= last) { - search_start = last + 1; - WARN_ON(!full_scan); - goto new_group; - } - } - if (!fill_prealloc && info->extent_tree_prealloc_nr) { - u64 first = - info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1]; - if (ins->objectid + num_blocks > first && - ins->objectid <= info->extent_tree_prealloc[0]) { - search_start = info->extent_tree_prealloc[0] + 1; - goto new_group; - } - } if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; goto new_group; } - if (fill_prealloc) { - int nr; - test_block = ins->objectid; - if (test_block - info->extent_tree_prealloc[total_needed - 1] >= - leaf_range(root)) { - total_found = 0; - info->extent_tree_prealloc_nr = total_found; - } - while(test_block < ins->objectid + ins->offset && - total_found < total_needed) { - nr = total_needed - total_found - 1; - BUG_ON(nr < 0); - info->extent_tree_prealloc[nr] = test_block; - total_found++; - test_block++; - } - if (total_found < total_needed) { - search_start = test_block; - goto new_group; - } - info->extent_tree_prealloc_nr = total_found; - } if (!data) { block_group = btrfs_lookup_block_group(info, ins->objectid); - if (block_group) { - if (fill_prealloc) - block_group->last_prealloc = - info->extent_tree_prealloc[total_needed-1]; - else - trans->block_group = block_group; - } + if (block_group) + trans->block_group = block_group; } ins->offset = num_blocks; btrfs_free_path(path); @@ -1278,85 +1177,41 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, int pending_ret; u64 super_blocks_used; u64 search_start = 0; - u64 exclude_start = 0; - u64 exclude_nr = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; - struct btrfs_key prealloc_key; btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, owner); - if (root == extent_root) { - int nr; - BUG_ON(info->extent_tree_prealloc_nr == 0); - BUG_ON(num_blocks != 1); - ins->offset = 1; - info->extent_tree_prealloc_nr--; - nr = info->extent_tree_prealloc_nr; - ins->objectid = info->extent_tree_prealloc[nr]; - info->extent_tree_insert[info->extent_tree_insert_nr++] = - ins->objectid; - ret = update_block_group(trans, root, - ins->objectid, ins->offset, 1, 0, 0); - WARN_ON(info->extent_tree_insert_nr > - ARRAY_SIZE(info->extent_tree_insert)); - BUG_ON(ret); - return 0; - } - - /* - * if we're doing a data allocation, preallocate room in the - * extent tree first. This way the extent tree blocks end up - * in the correct block group. - */ - if (data) { - ret = find_free_extent(trans, root, 0, 0, 0, - search_end, 0, &prealloc_key, 0, 0, 0); - BUG_ON(ret); - if (ret) - return ret; - exclude_nr = info->extent_tree_prealloc_nr; - exclude_start = info->extent_tree_prealloc[exclude_nr - 1]; - } - - /* do the real allocation */ + WARN_ON(num_blocks < 1); ret = find_free_extent(trans, root, num_blocks, empty_size, search_start, search_end, hint_block, ins, - exclude_start, exclude_nr, data); + trans->alloc_exclude_start, + trans->alloc_exclude_nr, data); BUG_ON(ret); if (ret) return ret; - /* - * if we're doing a metadata allocation, preallocate space in the - * extent tree second. This way, we don't create a tiny hole - * in the allocation map between any unused preallocation blocks - * and the metadata block we're actually allocating. On disk, - * it'll go: - * [block we've allocated], [used prealloc 1], [ unused prealloc ] - * The unused prealloc will get reused the next time around. - */ - if (!data) { - exclude_start = ins->objectid; - exclude_nr = ins->offset; - hint_block = exclude_start + exclude_nr; - ret = find_free_extent(trans, root, 0, 0, search_start, - search_end, hint_block, - &prealloc_key, exclude_start, - exclude_nr, 0); - BUG_ON(ret); - if (ret) - return ret; - } - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + num_blocks); + + if (root == extent_root) { + BUG_ON(num_blocks != 1); + set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid); + goto update_block; + } + + WARN_ON(trans->alloc_exclude_nr); + trans->alloc_exclude_start = ins->objectid; + trans->alloc_exclude_nr = ins->offset; ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, sizeof(extent_item)); + trans->alloc_exclude_start = 0; + trans->alloc_exclude_nr = 0; + BUG_ON(ret); finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); @@ -1366,6 +1221,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (pending_ret) { return pending_ret; } + +update_block: ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, data); BUG_ON(ret); @@ -1750,7 +1607,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; - cache->last_prealloc = cache->key.objectid; cache->pinned = 0; cache->cached = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 88b0752550b..e64ecec3f52 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -109,6 +109,8 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, h->blocks_reserved = num_blocks; h->blocks_used = 0; h->block_group = NULL; + h->alloc_exclude_nr = 0; + h->alloc_exclude_start = 0; root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); return h; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 8b2714e6556..65a395eeca9 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -39,6 +39,8 @@ struct btrfs_trans_handle { unsigned long blocks_used; struct btrfs_transaction *transaction; struct btrfs_block_group_cache *block_group; + u64 alloc_exclude_start; + u64 alloc_exclude_nr; }; -- cgit v1.2.3-70-g09d2 From 409eb95d7f6632d5af32b795244ce68a29e49319 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 8 Aug 2007 20:17:12 -0400 Subject: Btrfs: Further reduce the concurrency penalty of defrag and drop_snapshot Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 13 ++++++------- fs/btrfs/transaction.c | 3 +++ fs/btrfs/tree-defrag.c | 7 ++----- 3 files changed, 11 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ba50bd7b9a7..120c448f58f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1312,7 +1312,10 @@ static void reada_walk_down(struct btrfs_root *root, BUG_ON(ret); if (refs != 1) continue; + mutex_unlock(&root->fs_info->fs_mutex); ret = readahead_tree_block(root, blocknr); + cond_resched(); + mutex_lock(&root->fs_info->fs_mutex); if (ret) break; } @@ -1445,7 +1448,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_path *path; int i; int orig_level; - int num_walks = 0; struct btrfs_root_item *root_item = &root->root_item; path = btrfs_alloc_path(); @@ -1486,12 +1488,9 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; - num_walks++; - if (num_walks > 2) { - ret = -EAGAIN; - get_bh(root->node); - break; - } + ret = -EAGAIN; + get_bh(root->node); + break; } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e64ecec3f52..204337c5ca0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -364,6 +364,9 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) break; } root->defrag_running = 0; + radix_tree_tag_clear(&info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); } } btrfs_end_transaction(trans, tree_root); diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 15d0a486fb5..7ea66b4aa5c 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -148,7 +148,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int level; int orig_level; int i; - int num_runs = 0; if (root->ref_cows == 0) { goto out; @@ -200,10 +199,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, break; if (wret < 0) ret = wret; - if (num_runs++ > 8) { - ret = -EAGAIN; - break; - } + ret = -EAGAIN; + break; } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { -- cgit v1.2.3-70-g09d2 From e9d0b13b5bbb58c9b840e407a8d181442f799966 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 10 Aug 2007 14:06:19 -0400 Subject: Btrfs: Btree defrag on the extent-mapping tree as well Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 18 ++++++---- fs/btrfs/ctree.h | 4 ++- fs/btrfs/extent-tree.c | 28 +++++++++++++++- fs/btrfs/inode.c | 23 ++----------- fs/btrfs/super.c | 2 ++ fs/btrfs/transaction.c | 91 +++++++++++++++++++++++++++----------------------- fs/btrfs/transaction.h | 2 ++ fs/btrfs/tree-defrag.c | 36 ++++++++++++++++---- 8 files changed, 125 insertions(+), 79 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c7e47e77723..ee1ae00d282 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -99,7 +99,6 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) return ret; } else { - WARN_ON(!root->ref_cows); clean_tree_block(trans, root, buf); } @@ -162,13 +161,14 @@ static int close_blocks(u64 blocknr, u64 other) int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *parent, - int cache_only) + int cache_only, u64 *last_ret) { struct btrfs_node *parent_node; struct buffer_head *cur_bh; struct buffer_head *tmp_bh; u64 blocknr; - u64 search_start = 0; + u64 search_start = *last_ret; + u64 last_block = 0; u64 other; u32 parent_nritems; int start_slot; @@ -198,6 +198,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, for (i = start_slot; i < end_slot; i++) { int close = 1; blocknr = btrfs_node_blockptr(parent_node, i); + if (last_block == 0) + last_block = blocknr; if (i > 0) { other = btrfs_node_blockptr(parent_node, i - 1); close = close_blocks(blocknr, other); @@ -206,8 +208,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, other = btrfs_node_blockptr(parent_node, i + 1); close = close_blocks(blocknr, other); } - if (close) + if (close) { + last_block = blocknr; continue; + } cur_bh = btrfs_find_tree_block(root, blocknr); if (!cur_bh || !buffer_uptodate(cur_bh) || @@ -219,9 +223,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, brelse(cur_bh); cur_bh = read_tree_block(root, blocknr); } - if (search_start == 0) { - search_start = bh_blocknr(cur_bh) & ~((u64)65535); - } + if (search_start == 0) + search_start = last_block & ~((u64)65535); + err = __btrfs_cow_block(trans, root, cur_bh, parent, i, &tmp_bh, search_start, min(8, end_slot - i)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 59e09e37ab9..d3cd564b3b3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1019,6 +1019,8 @@ static inline void btrfs_memmove(struct btrfs_root *root, btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +int btrfs_extent_post_op(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, @@ -1066,7 +1068,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow); int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *parent, - int cache_only); + int cache_only, u64 *last_ret); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 120c448f58f..3418bb62b99 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -396,6 +396,14 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return 0; } +int btrfs_extent_post_op(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + finish_current_insert(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); + return 0; +} + static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, u32 *refs) @@ -1374,7 +1382,25 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); continue; } - next = read_tree_block(root, blocknr); + next = btrfs_find_tree_block(root, blocknr); + if (!next || !buffer_uptodate(next)) { + brelse(next); + mutex_unlock(&root->fs_info->fs_mutex); + next = read_tree_block(root, blocknr); + mutex_lock(&root->fs_info->fs_mutex); + + /* we dropped the lock, check one more time */ + ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); + BUG_ON(ret); + if (refs != 1) { + path->slots[*level]++; + brelse(next); + ret = btrfs_free_extent(trans, root, + blocknr, 1, 1); + BUG_ON(ret); + continue; + } + } WARN_ON(*level <= 0); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 12aa043b9f6..5c05ecbc572 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2159,9 +2159,7 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; - struct btrfs_trans_handle *trans; int ret = 0; - int err; struct btrfs_dir_item *di; int namelen; struct btrfs_path *path; @@ -2201,25 +2199,8 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int case BTRFS_IOC_DEFRAG: mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - memset(&root->defrag_progress, 0, - sizeof(root->defrag_progress)); - while (1) { - root->defrag_running = 1; - err = btrfs_defrag_leaves(trans, root, 0); - - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - btrfs_btree_balance_dirty(root); - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - if (err != -EAGAIN) - break; - } - root->defrag_running = 0; - btrfs_end_transaction(trans, root); + btrfs_defrag_root(root, 0); + btrfs_defrag_root(root->fs_info->extent_root, 0); mutex_unlock(&root->fs_info->fs_mutex); ret = 0; break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2e797d5fb28..74f3de47423 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -121,7 +121,9 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } + btrfs_clean_old_snapshots(root); mutex_lock(&root->fs_info->fs_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 204337c5ca0..c9d52dc83e4 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -317,18 +317,47 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, return err; } +int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) +{ + struct btrfs_fs_info *info = root->fs_info; + int ret; + struct btrfs_trans_handle *trans; + + if (root->defrag_running) + return 0; + + trans = btrfs_start_transaction(root, 1); + while (1) { + root->defrag_running = 1; + ret = btrfs_defrag_leaves(trans, root, cacheonly); + btrfs_end_transaction(trans, root); + mutex_unlock(&info->fs_mutex); + + btrfs_btree_balance_dirty(root); + cond_resched(); + + mutex_lock(&info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (ret != -EAGAIN) + break; + } + root->defrag_running = 0; + radix_tree_tag_clear(&info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + btrfs_end_transaction(trans, root); + return 0; +} + int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) { struct btrfs_root *gang[1]; struct btrfs_root *root; - struct btrfs_root *tree_root = info->tree_root; - struct btrfs_trans_handle *trans; int i; int ret; int err = 0; u64 last = 0; - trans = btrfs_start_transaction(tree_root, 1); while(1) { ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix, (void **)gang, last, @@ -339,37 +368,10 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) for (i = 0; i < ret; i++) { root = gang[i]; last = root->root_key.objectid + 1; - radix_tree_tag_clear(&info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); - if (root->defrag_running) - continue; - - while (1) { - mutex_lock(&root->fs_info->trans_mutex); - record_root_in_trans(root); - mutex_unlock(&root->fs_info->trans_mutex); - - root->defrag_running = 1; - err = btrfs_defrag_leaves(trans, root, 1); - btrfs_end_transaction(trans, tree_root); - mutex_unlock(&info->fs_mutex); - - btrfs_btree_balance_dirty(root); - cond_resched(); - - mutex_lock(&info->fs_mutex); - trans = btrfs_start_transaction(tree_root, 1); - if (err != -EAGAIN) - break; - } - root->defrag_running = 0; - radix_tree_tag_clear(&info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); + btrfs_defrag_root(root, 1); } } - btrfs_end_transaction(trans, tree_root); + btrfs_defrag_root(info->extent_root, 1); return err; } @@ -527,6 +529,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } +int btrfs_clean_old_snapshots(struct btrfs_root *root) +{ + struct list_head dirty_roots; + INIT_LIST_HEAD(&dirty_roots); + + mutex_lock(&root->fs_info->trans_mutex); + list_splice_init(&root->fs_info->dead_roots, &dirty_roots); + mutex_unlock(&root->fs_info->trans_mutex); + + if (!list_empty(&dirty_roots)) { + drop_dirty_roots(root, &dirty_roots); + } + return 0; +} void btrfs_transaction_cleaner(struct work_struct *work) { struct btrfs_fs_info *fs_info = container_of(work, @@ -536,12 +552,10 @@ void btrfs_transaction_cleaner(struct work_struct *work) struct btrfs_root *root = fs_info->tree_root; struct btrfs_transaction *cur; struct btrfs_trans_handle *trans; - struct list_head dirty_roots; unsigned long now; unsigned long delay = HZ * 30; int ret; - INIT_LIST_HEAD(&dirty_roots); mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; @@ -561,14 +575,7 @@ void btrfs_transaction_cleaner(struct work_struct *work) ret = btrfs_commit_transaction(trans, root); out: mutex_unlock(&root->fs_info->fs_mutex); - - mutex_lock(&root->fs_info->trans_mutex); - list_splice_init(&root->fs_info->dead_roots, &dirty_roots); - mutex_unlock(&root->fs_info->trans_mutex); - - if (!list_empty(&dirty_roots)) { - drop_dirty_roots(root, &dirty_roots); - } + btrfs_clean_old_snapshots(root); btrfs_transaction_queue_work(root, delay); } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 65a395eeca9..d5f491d3757 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -73,5 +73,7 @@ void btrfs_init_transaction_sys(void); void btrfs_exit_transaction_sys(void); int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); +int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); +int btrfs_clean_old_snapshots(struct btrfs_root *root); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 7ea66b4aa5c..a09064a9a41 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -42,16 +42,20 @@ static void reada_defrag(struct btrfs_root *root, static int defrag_walk_down(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level, - int cache_only) + int cache_only, u64 *last_ret) { struct buffer_head *next; struct buffer_head *cur; u64 blocknr; int ret = 0; + int is_extent = 0; WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); + if (root->fs_info->extent_root == root) + is_extent = 1; + while(*level > 0) { WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); @@ -70,7 +74,10 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, if (*level == 1) { ret = btrfs_realloc_node(trans, root, path->nodes[*level], - cache_only); + cache_only, last_ret); + if (is_extent) + btrfs_extent_post_op(trans, root); + break; } blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), @@ -90,8 +97,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); BUG_ON(ret); - ret = btrfs_realloc_node(trans, root, next, cache_only); + ret = btrfs_realloc_node(trans, root, next, cache_only, + last_ret); BUG_ON(ret); + + if (is_extent) + btrfs_extent_post_op(trans, root); + WARN_ON(*level <= 0); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); @@ -148,10 +160,14 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int level; int orig_level; int i; + int is_extent = 0; + u64 last_ret = 0; + + if (root->fs_info->extent_root == root) + is_extent = 1; - if (root->ref_cows == 0) { + if (root->ref_cows == 0 && !is_extent) goto out; - } path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -165,16 +181,21 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, get_bh(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); BUG_ON(ret); - ret = btrfs_realloc_node(trans, root, root->node, cache_only); + ret = btrfs_realloc_node(trans, root, root->node, cache_only, + &last_ret); BUG_ON(ret); path->nodes[level] = root->node; path->slots[level] = 0; + if (is_extent) + btrfs_extent_post_op(trans, root); } else { level = root->defrag_level; path->lowest_level = level; wret = btrfs_search_slot(trans, root, &root->defrag_progress, path, 0, 1); + if (is_extent) + btrfs_extent_post_op(trans, root); if (wret < 0) { ret = wret; goto out; @@ -188,7 +209,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, } while(1) { - wret = defrag_walk_down(trans, root, path, &level, cache_only); + wret = defrag_walk_down(trans, root, path, &level, cache_only, + &last_ret); if (wret > 0) break; if (wret < 0) -- cgit v1.2.3-70-g09d2 From f2183bde1a918d338337955c8e8ba29bd8f5e7b1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 10 Aug 2007 14:42:37 -0400 Subject: Btrfs: Add BH_Defrag to mark buffers that are in need of defragging This allows the tree walking code to defrag only the newly allocated buffers, it seems to be a good balance between perfect defragging and the performance hit of repeatedly reallocating blocks. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 22 ++++++++++++---------- fs/btrfs/disk-io.h | 2 ++ fs/btrfs/extent-tree.c | 2 ++ fs/btrfs/tree-defrag.c | 3 ++- 4 files changed, 18 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ee1ae00d282..7cf43da5e78 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -175,6 +175,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, int end_slot; int i; int err = 0; + int parent_level; if (trans->transaction != root->fs_info->running_transaction) { printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, @@ -188,6 +189,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, } parent_node = btrfs_buffer_node(parent); parent_nritems = btrfs_header_nritems(&parent_node->header); + parent_level = btrfs_header_level(&parent_node->header); start_slot = 0; end_slot = parent_nritems; @@ -215,13 +217,16 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, cur_bh = btrfs_find_tree_block(root, blocknr); if (!cur_bh || !buffer_uptodate(cur_bh) || - buffer_locked(cur_bh)) { + buffer_locked(cur_bh) || !buffer_defrag(cur_bh)) { if (cache_only) { brelse(cur_bh); continue; } - brelse(cur_bh); - cur_bh = read_tree_block(root, blocknr); + if (!cur_bh || !buffer_uptodate(cur_bh) || + buffer_locked(cur_bh)) { + brelse(cur_bh); + cur_bh = read_tree_block(root, blocknr); + } } if (search_start == 0) search_start = last_block & ~((u64)65535); @@ -232,6 +237,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (err) break; search_start = bh_blocknr(tmp_bh); + *last_ret = search_start; + if (parent_level == 1) + clear_buffer_defrag(tmp_bh); brelse(tmp_bh); } return err; @@ -811,16 +819,10 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, clear_radix_bit(&found, blocknr); if (nread > 32) continue; - if (direction > 0 && cluster_start <= blocknr && - cluster_start + 8 > blocknr) { - cluster_start = blocknr; + if (close_blocks(cluster_start, blocknr)) { readahead_tree_block(root, blocknr); nread++; - } else if (direction < 0 && cluster_start >= blocknr && - blocknr + 8 > cluster_start) { cluster_start = blocknr; - readahead_tree_block(root, blocknr); - nread++; } } } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 9e2c261b41a..81fd18cbd82 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -25,8 +25,10 @@ enum btrfs_bh_state_bits { BH_Checked = BH_PrivateStart, + BH_Defrag, }; BUFFER_FNS(Checked, checked); +BUFFER_FNS(Defrag, defrag); static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3418bb62b99..b3641234473 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1015,6 +1015,7 @@ check_failed: ins->objectid = search_start; ins->offset = 0; start_found = 0; + path->reada = 1; ret = btrfs_search_slot(trans, root, ins, path, 0, 0); if (ret < 0) @@ -1264,6 +1265,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, WARN_ON(buffer_dirty(buf)); set_buffer_uptodate(buf); set_buffer_checked(buf); + set_buffer_defrag(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); return buf; } diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index a09064a9a41..35fd20d2464 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -86,7 +86,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, if (cache_only) { next = btrfs_find_tree_block(root, blocknr); if (!next || !buffer_uptodate(next) || - buffer_locked(next)) { + buffer_locked(next) || !buffer_defrag(next)) { brelse(next); path->slots[*level]++; continue; @@ -142,6 +142,7 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, root->defrag_level = i; return 0; } else { + clear_buffer_defrag(path->nodes[*level]); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; -- cgit v1.2.3-70-g09d2 From 2cc58cf24f69be8632a3b29d653c318bf3bd8c84 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Aug 2007 16:49:44 -0400 Subject: Btrfs: Do more extensive readahead during tree searches Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 62 +++++++++++++++++++++++++++++++++++++++----------- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/inode.c | 2 +- 3 files changed, 52 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 51eea5ccb64..c0782a5b04c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -43,8 +43,10 @@ struct btrfs_path *btrfs_alloc_path(void) { struct btrfs_path *path; path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); - if (path) + if (path) { btrfs_init_path(path); + path->reada = 1; + } return path; } @@ -159,6 +161,34 @@ static int close_blocks(u64 blocknr, u64 other) return 0; } +static int should_defrag_leaf(struct buffer_head *bh) +{ + struct btrfs_leaf *leaf = btrfs_buffer_leaf(bh); + struct btrfs_disk_key *key; + u32 nritems; + + if (buffer_defrag(bh)) + return 1; + + nritems = btrfs_header_nritems(&leaf->header); + if (nritems == 0) + return 0; + + key = &leaf->items[0].key; + if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) + return 1; + + key = &leaf->items[nritems-1].key; + if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) + return 1; + if (nritems > 4) { + key = &leaf->items[nritems/2].key; + if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) + return 1; + } + return 0; +} + int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *parent, int cache_only, u64 *last_ret) @@ -217,7 +247,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, cur_bh = btrfs_find_tree_block(root, blocknr); if (!cur_bh || !buffer_uptodate(cur_bh) || - buffer_locked(cur_bh) || !buffer_defrag(cur_bh)) { + buffer_locked(cur_bh) || + (parent_level != 1 && !buffer_defrag(cur_bh)) || + (parent_level == 1 && !should_defrag_leaf(cur_bh))) { if (cache_only) { brelse(cur_bh); continue; @@ -297,6 +329,7 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, parent = btrfs_buffer_node(path->nodes[level + 1]); slot = path->slots[level]; + BUG_ON(!buffer_uptodate(path->nodes[level])); BUG_ON(nritems == 0); if (parent) { struct btrfs_disk_key *parent_key; @@ -511,9 +544,6 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root err_on_enospc = 1; left_buf = read_node_slot(root, parent_buf, pslot - 1); - right_buf = read_node_slot(root, parent_buf, pslot + 1); - - /* first, try to make some room in the middle buffer */ if (left_buf) { wret = btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, &left_buf); @@ -521,6 +551,19 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; goto enospc; } + } + right_buf = read_node_slot(root, parent_buf, pslot + 1); + if (right_buf) { + wret = btrfs_cow_block(trans, root, right_buf, + parent_buf, pslot + 1, &right_buf); + if (wret) { + ret = wret; + goto enospc; + } + } + + /* first, try to make some room in the middle buffer */ + if (left_buf) { left = btrfs_buffer_node(left_buf); orig_slot += btrfs_header_nritems(&left->header); wret = push_node_left(trans, root, left_buf, mid_buf); @@ -534,13 +577,6 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root * then try to empty the right most buffer into the middle */ if (right_buf) { - wret = btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, &right_buf); - if (wret) { - ret = wret; - goto enospc; - } - right = btrfs_buffer_node(right_buf); wret = push_node_left(trans, root, mid_buf, right_buf); if (wret < 0 && wret != -ENOSPC) @@ -817,7 +853,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, for (i = 0; i < ret; i++) { blocknr = gang[i]; clear_radix_bit(&found, blocknr); - if (nread > 32) + if (path->reada == 1 && nread > 16) continue; if (close_blocks(cluster_start, blocknr)) { readahead_tree_block(root, blocknr); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b3641234473..4049aadbeda 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -52,7 +52,7 @@ static int cache_block_group(struct btrfs_root *root, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = 1; + path->reada = 2; key.objectid = block_group->key.objectid; key.flags = 0; key.offset = 0; @@ -1015,7 +1015,7 @@ check_failed: ins->objectid = search_start; ins->offset = 0; start_found = 0; - path->reada = 1; + path->reada = 2; ret = btrfs_search_slot(trans, root, ins, path, 0, 0); if (ret < 0) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 398484179d8..7e4cf62ada5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -827,7 +827,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; path = btrfs_alloc_path(); - path->reada = 1; + path->reada = 2; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; -- cgit v1.2.3-70-g09d2 From b888db2bd7b67f190b32934e6a86181f262ac3ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Aug 2007 16:49:44 -0400 Subject: Btrfs: Add delayed allocation to the extent based page tree code Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 9 --- fs/btrfs/extent_map.c | 147 +++++++++++++++++++++++++++++++++++++------------ fs/btrfs/extent_map.h | 3 + fs/btrfs/file.c | 50 +++++++---------- fs/btrfs/inode.c | 141 ++++++++++++++++++++++++++--------------------- fs/btrfs/super.c | 1 + 6 files changed, 216 insertions(+), 135 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4049aadbeda..c31e84d4265 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -39,7 +39,6 @@ static int cache_block_group(struct btrfs_root *root, u64 i; u64 last = 0; u64 hole_size; - u64 limit; int found = 0; root = root->fs_info->extent_root; @@ -62,7 +61,6 @@ static int cache_block_group(struct btrfs_root *root, return ret; if (ret && path->slots[0] > 0) path->slots[0]--; - limit = block_group->key.objectid + block_group->key.offset; while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; @@ -982,7 +980,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; - u64 limit; WARN_ON(num_blocks < 1); ins->flags = 0; @@ -1049,12 +1046,6 @@ check_failed: l = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - if (start_found) - limit = last_block + - (block_group->key.offset >> 1); - else - limit = search_start + - (block_group->key.offset >> 1); ret = btrfs_next_leaf(root, path); if (ret == 0) continue; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index d378edf0964..a9c7419615b 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -30,9 +30,6 @@ struct tree_entry { #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) -static LIST_HEAD(all_states); -spinlock_t state_lock = SPIN_LOCK_UNLOCKED; - void __init extent_map_init(void) { extent_map_cache = kmem_cache_create("extent_map", @@ -49,15 +46,6 @@ void __init extent_map_init(void) void __exit extent_map_exit(void) { - while(!list_empty(&all_states)) { - struct extent_state *state; - struct list_head *cur = all_states.next; - state = list_entry(cur, struct extent_state, list); - printk("found leaked state %Lu %Lu state %d in_tree %d\n", - state->start, state->end, state->state, state->in_tree); - list_del(&state->list); - kfree(state); - } if (extent_map_cache) kmem_cache_destroy(extent_map_cache); if (extent_state_cache) @@ -69,6 +57,7 @@ void extent_map_tree_init(struct extent_map_tree *tree, { tree->map.rb_node = NULL; tree->state.rb_node = NULL; + tree->fill_delalloc = NULL; rwlock_init(&tree->lock); tree->mapping = mapping; } @@ -106,9 +95,6 @@ struct extent_state *alloc_extent_state(gfp_t mask) state->in_tree = 0; atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); - spin_lock_irq(&state_lock); - list_add(&state->list, &all_states); - spin_unlock_irq(&state_lock); return state; } EXPORT_SYMBOL(alloc_extent_state); @@ -117,9 +103,6 @@ void free_extent_state(struct extent_state *state) { if (atomic_dec_and_test(&state->refs)) { WARN_ON(state->in_tree); - spin_lock_irq(&state_lock); - list_del_init(&state->list); - spin_unlock_irq(&state_lock); kmem_cache_free(extent_state_cache, state); } } @@ -369,7 +352,7 @@ static int insert_state(struct extent_map_tree *tree, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); -printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); free_extent_state(state); return -EEXIST; } @@ -408,7 +391,7 @@ static int split_state(struct extent_map_tree *tree, struct extent_state *orig, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); -printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); free_extent_state(prealloc); return -EEXIST; } @@ -792,10 +775,20 @@ int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_dirty); +int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, + EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_delalloc); + int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) { - return clear_extent_bit(tree, start, end, EXTENT_DIRTY, 0, 0, mask); + return clear_extent_bit(tree, start, end, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); } EXPORT_SYMBOL(clear_extent_dirty); @@ -922,6 +915,62 @@ int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(set_range_writeback); +u64 find_lock_delalloc_range(struct extent_map_tree *tree, + u64 start, u64 lock_start, u64 *end, u64 max_bytes) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = start; + u64 found = 0; + u64 total_bytes = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ +search_again: + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != cur_start) { + goto out; + } + if (!(state->state & EXTENT_DELALLOC)) { + goto out; + } + if (state->start >= lock_start) { + if (state->state & EXTENT_LOCKED) { + DEFINE_WAIT(wait); + atomic_inc(&state->refs); + write_unlock_irq(&tree->lock); + schedule(); + write_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + free_extent_state(state); + goto search_again; + } + state->state |= EXTENT_LOCKED; + } + found++; + *end = state->end; + cur_start = state->end + 1; + node = rb_next(node); + if (!node) + break; + total_bytes = state->end - state->start + 1; + if (total_bytes >= max_bytes) + break; + } +out: + write_unlock_irq(&tree->lock); + return found; +} + /* * helper function to lock both pages and extents in the tree. * pages must be locked first. @@ -1285,6 +1334,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } @@ -1384,7 +1434,10 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, size_t blocksize; loff_t i_size = i_size_read(inode); unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; + u64 nr_delalloc; + u64 delalloc_end; + WARN_ON(!PageLocked(page)); if (page->index > end_index) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); unlock_page(page); @@ -1400,11 +1453,34 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } - end = page_end; lock_extent(tree, start, page_end, GFP_NOFS); + nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, + &delalloc_end, + 128 * 1024 * 1024); + if (nr_delalloc) { + tree->fill_delalloc(inode, start, delalloc_end); + if (delalloc_end >= page_end + 1) { + clear_extent_bit(tree, page_end + 1, delalloc_end, + EXTENT_LOCKED | EXTENT_DELALLOC, + 1, 0, GFP_NOFS); + } + clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC, + 0, 0, GFP_NOFS); + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after clear extent_bit\n"); + } + } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after find_delalloc_range returns 0\n"); + } + + end = page_end; + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after lock_extent\n"); + } if (last_byte <= start) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); @@ -1419,7 +1495,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, clear_extent_dirty(tree, cur, page_end, GFP_NOFS); break; } - em = get_extent(inode, page, page_offset, cur, end, 1); + em = get_extent(inode, page, page_offset, cur, end, 0); if (IS_ERR(em) || !em) { SetPageError(page); break; @@ -1507,6 +1583,7 @@ int extent_commit_write(struct extent_map_tree *tree, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } @@ -1543,6 +1620,7 @@ int extent_prepare_write(struct extent_map_tree *tree, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } block_start = (page_start + from) & ~((u64)blocksize - 1); @@ -1628,29 +1706,28 @@ int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) u64 start = page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; u64 orig_start = start; + int ret = 1; while (start <= end) { em = lookup_extent_mapping(tree, start, end); if (!em || IS_ERR(em)) break; - if (test_range_bit(tree, em->start, em->end, - EXTENT_LOCKED, 0)) { + if (!test_range_bit(tree, em->start, em->end, + EXTENT_LOCKED, 0)) { + remove_extent_mapping(tree, em); + /* once for the rb tree */ free_extent_map(em); - start = em->end + 1; -printk("range still locked %Lu %Lu\n", em->start, em->end); - break; } - remove_extent_mapping(tree, em); start = em->end + 1; - /* once for the rb tree */ - free_extent_map(em); /* once for us */ free_extent_map(em); } - WARN_ON(test_range_bit(tree, orig_start, end, EXTENT_WRITEBACK, 0)); - clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, - 1, 1, GFP_NOFS); - return 1; + if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) + ret = 0; + else + clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, + 1, 1, GFP_NOFS); + return ret; } EXPORT_SYMBOL(try_release_extent_mapping); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 108944aab4b..e91a2e9619e 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -11,6 +11,7 @@ struct extent_map_tree { struct rb_root state; struct address_space *mapping; rwlock_t lock; + int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); }; /* note, this must start with the same fields as fs/extent_map.c:tree_entry */ @@ -74,6 +75,8 @@ int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); int extent_invalidatepage(struct extent_map_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_map_tree *tree, struct page *page, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 71a481894ab..d3d39e4a279 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -143,7 +143,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct inode *inode = file->f_path.dentry->d_inode; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct btrfs_key ins; u64 hint_block; u64 num_blocks; u64 start_pos; @@ -162,6 +161,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, inode->i_blkbits; end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1; + lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { @@ -179,16 +179,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, /* FIXME...EIEIO, ENOSPC and more */ - /* step one, delete the existing extents in this range */ - /* FIXME blocksize != pagesize */ - if (start_pos < inode->i_size) { - err = btrfs_drop_extents(trans, root, inode, - start_pos, (pos + write_bytes + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); - if (err) - goto failed; - } - /* insert any holes we need to create */ if (inode->i_size < start_pos) { u64 last_pos_in_file; @@ -213,29 +203,28 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, */ if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { - err = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, 0, hint_block, (u64)-1, - &ins, 1); - BUG_ON(err); - err = btrfs_insert_file_extent(trans, root, inode->i_ino, - start_pos, ins.objectid, ins.offset, - ins.offset); - BUG_ON(err); - em->start = start_pos; - em->end = end_of_last_block; - em->block_start = ins.objectid << inode->i_blkbits; - em->block_end = em->block_start + - (ins.offset << inode->i_blkbits) - 1; - set_extent_dirty(em_tree, start_pos, end_of_last_block, - GFP_NOFS); - err = add_extent_mapping(em_tree, em); + u64 last_end; for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); - __set_page_dirty_nobuffers(p); + set_page_dirty(p); } + last_end = pages[num_pages -1]->index << PAGE_CACHE_SHIFT; + last_end += PAGE_CACHE_SIZE - 1; + set_extent_delalloc(em_tree, start_pos, end_of_last_block, + GFP_NOFS); } else { struct page *p = pages[0]; + /* step one, delete the existing extents in this range */ + /* FIXME blocksize != pagesize */ + if (start_pos < inode->i_size) { + err = btrfs_drop_extents(trans, root, inode, start_pos, + (pos + write_bytes + root->blocksize -1) & + ~((u64)root->blocksize - 1), &hint_block); + if (err) + goto failed; + } + err = insert_inline_extent(trans, root, inode, start_pos, end_pos - start_pos, p, 0); BUG_ON(err); @@ -253,6 +242,7 @@ failed: err = btrfs_end_transaction(trans, root); out_unlock: mutex_unlock(&root->fs_info->fs_mutex); + unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); free_extent_map(em); return err; } @@ -531,7 +521,7 @@ static int prepare_pages(struct btrfs_root *root, u64 num_blocks; u64 start_pos; - start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); + start_pos = pos & ~((u64)root->blocksize - 1); num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> inode->i_blkbits; @@ -548,8 +538,10 @@ static int prepare_pages(struct btrfs_root *root, if (!PagePrivate(pages[i])) { SetPagePrivate(pages[i]); set_page_private(pages[i], 1); + WARN_ON(!pages[i]->mapping->a_ops->invalidatepage); page_cache_get(pages[i]); } + WARN_ON(!PageLocked(pages[i])); } return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fd4cb65fcf1..dc181089aa7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -70,6 +70,40 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; +static int run_delalloc_range(struct inode *inode, u64 start, u64 end) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + struct btrfs_key ins; + u64 alloc_hint = 0; + u64 num_blocks; + int ret; + u64 blocksize = 1 << inode->i_blkbits; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + BUG_ON(!trans); + num_blocks = (end - start + blocksize) & ~(blocksize - 1); + ret = btrfs_drop_extents(trans, root, inode, + start, start + num_blocks, &alloc_hint); + num_blocks = num_blocks >> inode->i_blkbits; + ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_blocks, 0, + alloc_hint, (u64)-1, &ins, 1); + if (ret) { + WARN_ON(1); + goto out; + } + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + start, ins.objectid, ins.offset, + ins.offset); +out: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + + void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -121,8 +155,7 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, - inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; break; @@ -450,11 +483,17 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(ret); } if (item_end < inode->i_size) { - if (found_type) { - btrfs_set_key_type(&key, found_type - 1); - continue; + if (found_type == BTRFS_DIR_ITEM_KEY) { + found_type = BTRFS_INODE_ITEM_KEY; + } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { + found_type = BTRFS_CSUM_ITEM_KEY; + } else if (found_type) { + found_type--; + } else { + break; } - break; + btrfs_set_key_type(&key, found_type - 1); + continue; } if (btrfs_disk_key_offset(found_key) >= inode->i_size) del_item = 1; @@ -514,47 +553,34 @@ error: return ret; } -static int btrfs_cow_one_page(struct btrfs_trans_handle *trans, - struct inode *inode, struct page *page, +static int btrfs_cow_one_page(struct inode *inode, struct page *page, size_t zero_start) { char *kaddr; int ret = 0; - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 alloc_hint = 0; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 page_start = page->index << PAGE_CACHE_SHIFT; - struct btrfs_key ins; + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } - btrfs_set_trans_block_group(trans, inode); - - ret = btrfs_drop_extents(trans, root, inode, - page_start, page_start + PAGE_CACHE_SIZE, - &alloc_hint); - if (ret) - goto out; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0, - alloc_hint, (u64)-1, &ins, 1); - if (ret) - goto out; - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - page_start, ins.objectid, 1, 1); - if (ret) - goto out; - SetPageChecked(page); - kaddr = kmap(page); + lock_extent(em_tree, page_start, page_end, GFP_NOFS); + set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start, + page_end, GFP_NOFS); if (zero_start != PAGE_CACHE_SIZE) { + kaddr = kmap(page); memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); flush_dcache_page(page); + kunmap(page); } - kunmap(page); + set_page_dirty(page); + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); -out: return ret; } @@ -565,8 +591,6 @@ out: static int btrfs_truncate_page(struct address_space *mapping, loff_t from) { struct inode *inode = mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; unsigned blocksize = 1 << inode->i_blkbits; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); @@ -591,21 +615,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) } page_start = page->index << PAGE_CACHE_SHIFT; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_cow_one_page(trans, inode, page, offset); - if (!ret) { - char *kaddr = kmap(page); - ret = btrfs_csum_file_block(trans, root, inode->i_ino, - page_start, kaddr, PAGE_CACHE_SIZE); - kunmap(page); - } - set_extent_dirty(&BTRFS_I(inode)->extent_tree, - page_start, page_start + PAGE_CACHE_SIZE - 1, - GFP_NOFS); - set_page_dirty(page); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); + ret = btrfs_cow_one_page(inode, page, offset); unlock_page(page); page_cache_release(page); @@ -757,6 +767,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) struct btrfs_iget_args *args = p; inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); return 0; } @@ -968,7 +980,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->root = root; + if (mode & S_IFDIR) owner = 0; else @@ -1128,6 +1143,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -1344,9 +1360,11 @@ again: extent_end = extent_start + (btrfs_file_extent_num_blocks(item) << inode->i_blkbits); err = 0; - if (start < extent_start || start > extent_end) { + if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { + if (end < extent_start) + goto not_found; em->end = extent_end - 1; } else { em->end = end; @@ -1375,9 +1393,11 @@ again: size = btrfs_file_extent_inline_len(leaf->items + path->slots[0]); extent_end = extent_start + size; - if (start < extent_start || start > extent_end) { + if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { + if (end < extent_start) + goto not_found; em->end = extent_end - 1; } else { em->end = end; @@ -1412,8 +1432,7 @@ not_found_em: insert: btrfs_release_path(root, path); if (em->start > start || em->end < start) { - printk("bad extent! %Lu %Lu start %Lu end %Lu\n", em->start, em->end, start, end); - WARN_ON(1); + printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end); err = -EIO; goto out; } @@ -1648,6 +1667,13 @@ int btrfs_readpage(struct file *file, struct page *page) static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { struct extent_map_tree *tree; + + + if (current->flags & PF_MEMALLOC) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } tree = &BTRFS_I(page->mapping->host)->extent_tree; return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } @@ -1698,8 +1724,6 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; unsigned long end; loff_t size; int ret = -EINVAL; @@ -1722,15 +1746,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) else end = PAGE_CACHE_SIZE; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_cow_one_page(trans, inode, page, end); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - set_extent_dirty(&BTRFS_I(inode)->extent_tree, - page_start, page_start + PAGE_CACHE_SIZE - 1, - GFP_NOFS); - set_page_dirty(page); + ret = btrfs_cow_one_page(inode, page, end); out_unlock: unlock_page(page); @@ -2264,6 +2280,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7b7c32c73a2..7da95538745 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -83,6 +83,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) bi->location.offset = 0; bi->location.flags = 0; bi->root = tree_root; + btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); if (!inode) { -- cgit v1.2.3-70-g09d2 From 58176a9604c5db1784d2c979aea472b3be40b6f0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 29 Aug 2007 15:47:34 -0400 Subject: Btrfs: Add per-root block accounting and sysfs entries Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.h | 54 +++++++++++ fs/btrfs/disk-io.c | 31 ++++++- fs/btrfs/disk-io.h | 3 +- fs/btrfs/extent-tree.c | 17 +++- fs/btrfs/inode.c | 13 ++- fs/btrfs/super.c | 14 +++ fs/btrfs/sysfs.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.c | 34 ++++++- 9 files changed, 393 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index ea7a2287035..ee505c87ecf 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,7 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o + extent_map.o sysfs.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 947f061ed11..ffeee546a0d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "bit-radix.h" struct btrfs_trans_handle; @@ -313,6 +314,8 @@ struct btrfs_fs_info { struct list_head trans_list; struct list_head dead_roots; struct delayed_work trans_work; + struct kobject super_kobj; + struct completion kobj_unregister; int do_barriers; int closing; }; @@ -328,6 +331,8 @@ struct btrfs_root { struct btrfs_key root_key; struct btrfs_fs_info *fs_info; struct inode *inode; + struct kobject root_kobj; + struct completion kobj_unregister; u64 objectid; u64 last_trans; u32 blocksize; @@ -338,6 +343,7 @@ struct btrfs_root { struct btrfs_key defrag_progress; int defrag_running; int defrag_level; + char *name; }; /* the lower bits in the key flags defines the item type */ @@ -814,6 +820,28 @@ static inline void btrfs_set_root_flags(struct btrfs_root_item *item, u32 val) item->flags = cpu_to_le32(val); } +static inline void btrfs_set_root_blocks_used(struct btrfs_root_item *item, + u64 val) +{ + item->blocks_used = cpu_to_le64(val); +} + +static inline u64 btrfs_root_blocks_used(struct btrfs_root_item *item) +{ + return le64_to_cpu(item->blocks_used); +} + +static inline void btrfs_set_root_block_limit(struct btrfs_root_item *item, + u64 val) +{ + item->block_limit = cpu_to_le64(val); +} + +static inline u64 btrfs_root_block_limit(struct btrfs_root_item *item) +{ + return le64_to_cpu(item->block_limit); +} + static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s) { return le64_to_cpu(s->blocknr); @@ -1014,6 +1042,23 @@ static inline void btrfs_memmove(struct btrfs_root *root, memmove(dst, src, nr); } +static inline int btrfs_set_root_name(struct btrfs_root *root, + const char *name, int len) +{ + /* if we already have a name just free it */ + if (root->name) + kfree(root->name); + + root->name = kmalloc(len+1, GFP_KERNEL); + if (!root->name) + return -ENOMEM; + + memcpy(root->name, name, len); + root->name[len] ='\0'; + + return 0; +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -1191,4 +1236,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only); + +/* sysfs.c */ +int btrfs_init_sysfs(void); +void btrfs_exit_sysfs(void); +int btrfs_sysfs_add_super(struct btrfs_fs_info *fs); +int btrfs_sysfs_add_root(struct btrfs_root *root); +void btrfs_sysfs_del_root(struct btrfs_root *root); +void btrfs_sysfs_del_super(struct btrfs_fs_info *root); + #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b2f79878d51..c25ef0a68f1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -294,9 +294,12 @@ static int __setup_root(int blocksize, root->last_trans = 0; root->highest_inode = 0; root->last_inode_alloc = 0; + root->name = NULL; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + memset(&root->root_kobj, 0, sizeof(root->root_kobj)); + init_completion(&root->kobj_unregister); root->defrag_running = 0; root->defrag_level = 0; root->root_key.objectid = objectid; @@ -384,7 +387,8 @@ insert: } struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) + struct btrfs_key *location, + const char *name, int namelen) { struct btrfs_root *root; int ret; @@ -405,6 +409,22 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } + + ret = btrfs_set_root_name(root, name, namelen); + if (ret) { + brelse(root->node); + kfree(root); + return ERR_PTR(ret); + } + + ret = btrfs_sysfs_add_root(root); + if (ret) { + brelse(root->node); + kfree(root->name); + kfree(root); + return ERR_PTR(ret); + } + return root; } @@ -433,6 +453,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); + memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); + init_completion(&fs_info->kobj_unregister); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->last_trans_committed = 0; @@ -500,8 +522,10 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->generation = btrfs_super_generation(disk_super) + 1; ret = btrfs_find_dead_roots(tree_root); - if (ret) + if (ret) { + mutex_unlock(&fs_info->fs_mutex); goto fail_tree_root; + } mutex_unlock(&fs_info->fs_mutex); return tree_root; @@ -553,12 +577,15 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); + btrfs_sysfs_del_root(root); if (root->inode) iput(root->inode); if (root->node) brelse(root->node); if (root->commit_root) brelse(root->commit_root); + if (root->name) + kfree(root->name); kfree(root); return 0; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 5261733b873..da6bb72750f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -66,7 +66,8 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location); + struct btrfs_key *location, + const char *name, int namelen); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_key *location); u64 bh_blocknr(struct buffer_head *bh); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c31e84d4265..ff3f7c2be60 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -858,16 +858,23 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_extent_refs(ei, refs); btrfs_mark_buffer_dirty(path->nodes[0]); if (refs == 0) { - u64 super_blocks_used; + u64 super_blocks_used, root_blocks_used; if (pin) { ret = pin_down_block(root, blocknr, 0); BUG_ON(ret); } + /* block accounting for super block */ super_blocks_used = btrfs_super_blocks_used(&info->super_copy); btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used - num_blocks); + + /* block accounting for root item */ + root_blocks_used = btrfs_root_blocks_used(&root->root_item); + btrfs_set_root_blocks_used(&root->root_item, + root_blocks_used - num_blocks); + ret = btrfs_del_item(trans, extent_root, path); if (ret) { return ret; @@ -1175,7 +1182,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, { int ret; int pending_ret; - u64 super_blocks_used; + u64 super_blocks_used, root_blocks_used; u64 search_start = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -1193,10 +1200,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (ret) return ret; + /* block accounting for super block */ super_blocks_used = btrfs_super_blocks_used(&info->super_copy); btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + num_blocks); + /* block accounting for root item */ + root_blocks_used = btrfs_root_blocks_used(&root->root_item); + btrfs_set_root_blocks_used(&root->root_item, root_blocks_used + + num_blocks); + if (root == extent_root) { BUG_ON(num_blocks != 1); set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dc181089aa7..2e3918e6049 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -733,7 +733,8 @@ out: */ static int fixup_tree_root_location(struct btrfs_root *root, struct btrfs_key *location, - struct btrfs_root **sub_root) + struct btrfs_root **sub_root, + struct dentry *dentry) { struct btrfs_path *path; struct btrfs_root_item *ri; @@ -747,7 +748,9 @@ static int fixup_tree_root_location(struct btrfs_root *root, BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); - *sub_root = btrfs_read_fs_root(root->fs_info, location); + *sub_root = btrfs_read_fs_root(root->fs_info, location, + dentry->d_name.name, + dentry->d_name.len); if (IS_ERR(*sub_root)) return PTR_ERR(*sub_root); @@ -812,7 +815,8 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(ret); inode = NULL; if (location.objectid) { - ret = fixup_tree_root_location(root, &location, &sub_root); + ret = fixup_tree_root_location(root, &location, &sub_root, + dentry); if (ret < 0) return ERR_PTR(ret); if (ret > 0) @@ -1829,6 +1833,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); + btrfs_set_root_blocks_used(&root_item, 0); memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); root_item.drop_level = 0; brelse(subvol); @@ -1865,7 +1870,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail_commit; - new_root = btrfs_read_fs_root(root->fs_info, &key); + new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen); BUG_ON(!new_root); trans = btrfs_start_transaction(new_root, 1); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7da95538745..a68101ae1a3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -45,12 +45,14 @@ static struct super_operations btrfs_super_ops; static void btrfs_put_super (struct super_block * sb) { struct btrfs_root *root = btrfs_sb(sb); + struct btrfs_fs_info *fs = root->fs_info; int ret; ret = close_ctree(root); if (ret) { printk("close ctree returns %d\n", ret); } + btrfs_sysfs_del_super(fs); sb->s_fs_info = NULL; } @@ -101,6 +103,12 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) err = -ENOMEM; goto fail_close; } + + /* this does the super kobj at the same time */ + err = btrfs_sysfs_add_super(tree_root->fs_info); + if (err) + goto fail_close; + sb->s_root = root_dentry; btrfs_transaction_queue_work(tree_root, HZ * 30); return 0; @@ -182,6 +190,11 @@ static struct super_operations btrfs_super_ops = { static int __init init_btrfs_fs(void) { int err; + + err = btrfs_init_sysfs(); + if (err) + return err; + btrfs_init_transaction_sys(); err = btrfs_init_cachep(); if (err) @@ -196,6 +209,7 @@ static void __exit exit_btrfs_fs(void) btrfs_destroy_cachep(); extent_map_exit(); unregister_filesystem(&btrfs_fs_type); + btrfs_exit_sysfs(); } module_init(init_btrfs_fs) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index db8917e5b25..2058783373e 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -16,6 +16,242 @@ * Boston, MA 021110-1307, USA. */ +#include +#include +#include +#include +#include +#include +#include + #include "ctree.h" #include "disk-io.h" #include "transaction.h" + +static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_root_blocks_used(&root->root_item)); +} + +static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_root_block_limit(&root->root_item)); +} + +static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_super_blocks_used(fs->disk_super)); +} + +static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_super_total_blocks(fs->disk_super)); +} + +static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_super_blocksize(fs->disk_super)); +} + +/* this is for root attrs (subvols/snapshots) */ +struct btrfs_root_attr { + struct attribute attr; + ssize_t (*show)(struct btrfs_root *, char *); + ssize_t (*store)(struct btrfs_root *, const char *, size_t); +}; + +#define ROOT_ATTR(name, mode, show, store) \ +static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, show, store) + +ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL); +ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL); + +static struct attribute *btrfs_root_attrs[] = { + &btrfs_root_attr_blocks_used.attr, + &btrfs_root_attr_block_limit.attr, + NULL, +}; + +/* this is for super attrs (actual full fs) */ +struct btrfs_super_attr { + struct attribute attr; + ssize_t (*show)(struct btrfs_fs_info *, char *); + ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t); +}; + +#define SUPER_ATTR(name, mode, show, store) \ +static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, show, store) + +SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL); +SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL); +SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL); + +static struct attribute *btrfs_super_attrs[] = { + &btrfs_super_attr_blocks_used.attr, + &btrfs_super_attr_total_blocks.attr, + &btrfs_super_attr_blocksize.attr, + NULL, +}; + +static ssize_t btrfs_super_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, + super_kobj); + struct btrfs_super_attr *a = container_of(attr, + struct btrfs_super_attr, + attr); + + return a->show ? a->show(fs, buf) : 0; +} + +static ssize_t btrfs_super_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, + super_kobj); + struct btrfs_super_attr *a = container_of(attr, + struct btrfs_super_attr, + attr); + + return a->store ? a->store(fs, buf, len) : 0; +} + +static ssize_t btrfs_root_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct btrfs_root *root = container_of(kobj, struct btrfs_root, + root_kobj); + struct btrfs_root_attr *a = container_of(attr, + struct btrfs_root_attr, + attr); + + return a->show ? a->show(root, buf) : 0; +} + +static ssize_t btrfs_root_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct btrfs_root *root = container_of(kobj, struct btrfs_root, + root_kobj); + struct btrfs_root_attr *a = container_of(attr, + struct btrfs_root_attr, + attr); + return a->store ? a->store(root, buf, len) : 0; +} + +static void btrfs_super_release(struct kobject *kobj) +{ + struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, + super_kobj); + complete(&fs->kobj_unregister); +} + +static void btrfs_root_release(struct kobject *kobj) +{ + struct btrfs_root *root = container_of(kobj, struct btrfs_root, + root_kobj); + complete(&root->kobj_unregister); +} + +static struct sysfs_ops btrfs_super_attr_ops = { + .show = btrfs_super_attr_show, + .store = btrfs_super_attr_store, +}; + +static struct sysfs_ops btrfs_root_attr_ops = { + .show = btrfs_root_attr_show, + .store = btrfs_root_attr_store, +}; + +static struct kobj_type btrfs_root_ktype = { + .default_attrs = btrfs_root_attrs, + .sysfs_ops = &btrfs_root_attr_ops, + .release = btrfs_root_release, +}; + +static struct kobj_type btrfs_super_ktype = { + .default_attrs = btrfs_super_attrs, + .sysfs_ops = &btrfs_super_attr_ops, + .release = btrfs_super_release, +}; + +static struct kset btrfs_kset = { + .kobj = {.name = "btrfs"}, +}; + +int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) +{ + int error; + + fs->super_kobj.kset = &btrfs_kset; + fs->super_kobj.ktype = &btrfs_super_ktype; + + error = kobject_set_name(&fs->super_kobj, "%s", + fs->sb->s_id); + if (error) + goto fail; + + error = kobject_register(&fs->super_kobj); + if (error) + goto fail; + + return 0; + +fail: + printk(KERN_ERR "btrfs: sysfs creation for super failed\n"); + return error; +} + +int btrfs_sysfs_add_root(struct btrfs_root *root) +{ + int error; + + root->root_kobj.ktype = &btrfs_root_ktype; + root->root_kobj.parent = &root->fs_info->super_kobj; + + error = kobject_set_name(&root->root_kobj, "%s", root->name); + if (error) { + goto fail; + } + + error = kobject_register(&root->root_kobj); + if (error) + goto fail; + + return 0; + +fail: + printk(KERN_ERR "btrfs: sysfs creation for root failed\n"); + return error; +} + +void btrfs_sysfs_del_root(struct btrfs_root *root) +{ + kobject_unregister(&root->root_kobj); + wait_for_completion(&root->kobj_unregister); +} + +void btrfs_sysfs_del_super(struct btrfs_fs_info *fs) +{ + kobject_unregister(&fs->super_kobj); + wait_for_completion(&fs->kobj_unregister); +} + +int btrfs_init_sysfs() +{ + kobj_set_kset_s(&btrfs_kset, fs_subsys); + return kset_register(&btrfs_kset); +} + +void btrfs_exit_sysfs() +{ + kset_unregister(&btrfs_kset); +} diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 18abea80279..2b15daa3a9f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -236,6 +236,7 @@ static int wait_for_commit(struct btrfs_root *root, struct dirty_root { struct list_head list; struct btrfs_root *root; + struct btrfs_root *latest_root; }; int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list) @@ -278,6 +279,15 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, btrfs_root_blocknr(&root->root_item)); brelse(root->commit_root); root->commit_root = NULL; + + /* make sure to update the root on disk + * so we get any updates to the block used + * counts + */ + err = btrfs_update_root(trans, + root->fs_info->tree_root, + &root->root_key, + &root->root_item); continue; } dirty = kmalloc(sizeof(*dirty), GFP_NOFS); @@ -291,6 +301,7 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, memcpy(dirty->root, root, sizeof(*root)); dirty->root->node = root->commit_root; + dirty->latest_root = root; root->commit_root = NULL; root->root_key.offset = root->fs_info->generation; @@ -384,20 +395,29 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, { struct dirty_root *dirty; struct btrfs_trans_handle *trans; + u64 num_blocks; + u64 blocks_used; int ret = 0; int err; while(!list_empty(list)) { + struct btrfs_root *root; + mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); + num_blocks = btrfs_root_blocks_used(&dirty->root->root_item); + root = dirty->latest_root; + while(1) { trans = btrfs_start_transaction(tree_root, 1); + ret = btrfs_drop_snapshot(trans, dirty->root); if (ret != -EAGAIN) { break; } + err = btrfs_update_root(trans, tree_root, &dirty->root->root_key, @@ -414,9 +434,19 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, mutex_lock(&tree_root->fs_info->fs_mutex); } BUG_ON(ret); + + num_blocks -= btrfs_root_blocks_used(&dirty->root->root_item); + blocks_used = btrfs_root_blocks_used(&root->root_item); + if (num_blocks) { + record_root_in_trans(root); + btrfs_set_root_blocks_used(&root->root_item, + blocks_used - num_blocks); + } ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); - if (ret) + if (ret) { + BUG(); break; + } ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); @@ -534,10 +564,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); put_transaction(cur_trans); + if (root->fs_info->closing) list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); else list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); + mutex_unlock(&root->fs_info->trans_mutex); kmem_cache_free(btrfs_trans_handle_cachep, trans); -- cgit v1.2.3-70-g09d2 From e9fe395e47bef9b948657b788c65cbfbd2433cc4 Mon Sep 17 00:00:00 2001 From: Yan Date: Wed, 29 Aug 2007 09:11:44 -0400 Subject: Btrfs: Fix oopsen in extent_tree.c during enospc Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ff3f7c2be60..b074ad1416d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -244,7 +244,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (search_start) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_block_group(info, search_start); - if (shint->data == data) { + if (shint && shint->data == data) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < div_factor(shint->key.offset, factor)) { @@ -1255,8 +1255,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, empty_size, hint, - (unsigned long)-1, &ins, 0); + 1, empty_size, hint, (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); return ERR_PTR(ret); -- cgit v1.2.3-70-g09d2 From 7d7d6068bee9d8004ea7cc8a2ebda2c99a57caec Mon Sep 17 00:00:00 2001 From: Yan Date: Fri, 14 Sep 2007 16:15:28 -0400 Subject: Btrfs: Fix cache_block_group to catch holes at the start of the group Cache block group was overly complex and missed free blocks at the very start of the group. This patch simplifies things significantly. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 61 +++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 28 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b074ad1416d..9151850266b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -39,6 +39,7 @@ static int cache_block_group(struct btrfs_root *root, u64 i; u64 last = 0; u64 hole_size; + u64 first_free; int found = 0; root = root->fs_info->extent_root; @@ -51,16 +52,22 @@ static int cache_block_group(struct btrfs_root *root, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 2; + first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; key.flags = 0; key.offset = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) return ret; + if (ret && path->slots[0] > 0) path->slots[0]--; + while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; @@ -71,50 +78,48 @@ static int cache_block_group(struct btrfs_root *root, if (ret == 0) { continue; } else { - if (found) { - hole_size = block_group->key.objectid + - block_group->key.offset - last; - } else { - last = block_group->key.objectid; - hole_size = block_group->key.offset; - } - for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, - last + i); - } break; } } + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + if (key.objectid < block_group->key.objectid) { + if (key.objectid + key.offset > first_free) + first_free = key.objectid + key.offset; + goto next; + } + if (key.objectid >= block_group->key.objectid + block_group->key.offset) { - if (found) { - hole_size = block_group->key.objectid + - block_group->key.offset - last; - } else { - last = block_group->key.objectid; - hole_size = block_group->key.offset; - } - for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, last + i); - } break; } + if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { if (!found) { - last = key.objectid + key.offset; + last = first_free; found = 1; - } else { - hole_size = key.objectid - last; - for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, last + i); - } - last = key.objectid + key.offset; } + hole_size = key.objectid - last; + for (i = 0; i < hole_size; i++) { + set_radix_bit(extent_radix, last + i); + } + last = key.objectid + key.offset; } +next: path->slots[0]++; } + if (!found) + last = first_free; + if (block_group->key.objectid + + block_group->key.offset > last) { + hole_size = block_group->key.objectid + + block_group->key.offset - last; + for (i = 0; i < hole_size; i++) { + set_radix_bit(extent_radix, + last + i); + } + } block_group->cached = 1; err: btrfs_free_path(path); -- cgit v1.2.3-70-g09d2 From d3c2fdcf7b79079f60ac64e61d886964d4647910 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Sep 2007 10:58:06 -0400 Subject: Btrfs: Use balance_dirty_pages_nr on btree blocks btrfs_btree_balance_dirty is changed to pass the number of pages dirtied for more accurate dirty throttling. This lets the VM make better decisions about when to force some writeback. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 +++-- fs/btrfs/disk-io.h | 2 +- fs/btrfs/extent-tree.c | 1 + fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 47 +++++++++++++++++++++++++++++++++++------------ fs/btrfs/transaction.c | 14 ++++++++++---- 6 files changed, 51 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4296839eea1..ef6d76f2ec2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -666,7 +666,8 @@ void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) brelse(buf); } -void btrfs_btree_balance_dirty(struct btrfs_root *root) +void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { - balance_dirty_pages_ratelimited(root->fs_info->btree_inode->i_mapping); + balance_dirty_pages_ratelimited_nr( + root->fs_info->btree_inode->i_mapping, nr); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 714fa877438..fc6b520c6e0 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -80,7 +80,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, u64 num_blocks); int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical); -void btrfs_btree_balance_dirty(struct btrfs_root *root); +void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct buffer_head *bh); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9151850266b..fe772f9b06c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1275,6 +1275,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_buffer_checked(buf); set_buffer_defrag(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); + trans->blocks_used++; return buf; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 26826a54335..698eaea612f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -655,7 +655,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, num_written += write_bytes; balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, 1); cond_resched(); } mutex_unlock(&inode->i_mutex); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e24b875327d..bd00df33fb3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -387,15 +387,17 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) struct btrfs_root *root; struct btrfs_trans_handle *trans; int ret; + unsigned long nr; root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); ret = btrfs_unlink_trans(trans, root, dir, dentry); + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return ret; } @@ -412,6 +414,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) int found_type; struct btrfs_leaf *leaf; char *goodnames = ".."; + unsigned long nr; path = btrfs_alloc_path(); BUG_ON(!path); @@ -467,8 +470,9 @@ out: btrfs_release_path(root, path); btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); if (ret && !err) err = ret; return err; @@ -764,6 +768,7 @@ void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; + unsigned long nr; int ret; truncate_inode_pages(&inode->i_data, 0); @@ -780,15 +785,17 @@ void btrfs_delete_inode(struct inode *inode) ret = btrfs_free_inode(trans, root, inode); if (ret) goto no_delete_lock; + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return; no_delete_lock: + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); no_delete: clear_inode(inode); } @@ -1165,6 +1172,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, int err; int drop_inode = 0; u64 objectid; + unsigned long nr; if (!new_valid_dev(rdev)) return -EINVAL; @@ -1198,6 +1206,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); out_unlock: + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -1205,7 +1214,7 @@ out_unlock: inode_dec_link_count(inode); iput(inode); } - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return err; } @@ -1217,6 +1226,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, struct inode *inode; int err; int drop_inode = 0; + unsigned long nr; u64 objectid; mutex_lock(&root->fs_info->fs_mutex); @@ -1251,6 +1261,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); out_unlock: + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -1258,7 +1269,7 @@ out_unlock: inode_dec_link_count(inode); iput(inode); } - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return err; } @@ -1268,6 +1279,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = old_dentry->d_inode; + unsigned long nr; int err; int drop_inode = 0; @@ -1288,6 +1300,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (err) drop_inode = 1; + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -1295,7 +1308,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, inode_dec_link_count(inode); iput(inode); } - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return err; } @@ -1336,6 +1349,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) int err = 0; int drop_on_err = 0; u64 objectid; + unsigned long nr = 1; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -1380,12 +1394,13 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) btrfs_update_inode_block_group(trans, dir); out_fail: + nr = trans->blocks_used; btrfs_end_transaction(trans, root); out_unlock: mutex_unlock(&root->fs_info->fs_mutex); if (drop_on_err) iput(inode); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return err; } @@ -1682,6 +1697,7 @@ static void btrfs_truncate(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; int ret; struct btrfs_trans_handle *trans; + unsigned long nr; if (!S_ISREG(inode->i_mode)) return; @@ -1697,10 +1713,11 @@ static void btrfs_truncate(struct inode *inode) /* FIXME, add redo link to tree so we don't leak on crash */ ret = btrfs_truncate_in_trans(trans, root, inode); btrfs_update_inode(trans, root, inode); + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); } int btrfs_commit_write(struct file *file, struct page *page, @@ -1725,6 +1742,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) int err; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; + unsigned long nr = 1; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -1814,12 +1832,13 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail; fail: + nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); if (err && !ret) ret = err; fail_commit: mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return ret; } @@ -1832,6 +1851,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) int ret; int err; u64 objectid; + unsigned long nr; if (!root->ref_cows) return -EINVAL; @@ -1884,12 +1904,13 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail; fail: + nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); if (err && !ret) ret = err; mutex_unlock(&root->fs_info->fs_mutex); up_write(&root->snap_sem); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return ret; } @@ -2240,6 +2261,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, int datasize; char *ptr; struct btrfs_file_extent_item *ei; + unsigned long nr; name_len = strlen(symname) + 1; if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) @@ -2309,13 +2331,14 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, drop_inode = 1; out_unlock: + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { inode_dec_link_count(inode); iput(inode); } - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return err; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 29755593de6..087074db0bd 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -18,6 +18,7 @@ #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -340,6 +341,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) struct btrfs_fs_info *info = root->fs_info; int ret; struct btrfs_trans_handle *trans; + unsigned long nr; if (root->defrag_running) return 0; @@ -348,10 +350,11 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) while (1) { root->defrag_running = 1; ret = btrfs_defrag_leaves(trans, root, cacheonly); + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&info->fs_mutex); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(info->tree_root, nr); cond_resched(); mutex_lock(&info->fs_mutex); @@ -398,6 +401,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, { struct dirty_root *dirty; struct btrfs_trans_handle *trans; + unsigned long nr; u64 num_blocks; u64 blocks_used; int ret = 0; @@ -426,11 +430,11 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, &dirty->root->root_item); if (err) ret = err; + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); mutex_unlock(&tree_root->fs_info->fs_mutex); - - btrfs_btree_balance_dirty(tree_root); + btrfs_btree_balance_dirty(tree_root, nr); schedule(); mutex_lock(&tree_root->fs_info->fs_mutex); @@ -449,13 +453,15 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, BUG(); break; } + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); kfree(dirty->root); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(tree_root); + + btrfs_btree_balance_dirty(tree_root, nr); schedule(); } return ret; -- cgit v1.2.3-70-g09d2 From cf67582bb2ec364d71b3e4f8e49de98d8c3c832e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Sep 2007 11:00:51 -0400 Subject: Btrfs: Fix duplicate ENOSPC checks in find_free_extent find_free_extent would fail to wrap around to the start of the drive because it was doing the enospc case checking twice in some cases, causing it to return -ENOSPC early. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe772f9b06c..f261a8326cd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1111,20 +1111,9 @@ check_pending: btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); - if (ins->objectid + num_blocks >= search_end) { - if (full_scan) { - ret = -ENOSPC; - goto error; - } - search_start = orig_search_start; - if (wrapped) { - if (!full_scan) - total_needed -= empty_size; - full_scan = 1; - } else - wrapped = 1; - goto new_group; - } + if (ins->objectid + num_blocks >= search_end) + goto enospc; + for (test_block = ins->objectid; test_block < ins->objectid + num_blocks; test_block++) { if (test_radix_bit(&info->pinned_radix, test_block) || @@ -1149,6 +1138,7 @@ check_pending: new_group: if (search_start + num_blocks >= search_end) { +enospc: search_start = orig_search_start; if (full_scan) { ret = -ENOSPC; -- cgit v1.2.3-70-g09d2 From 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:19 -0400 Subject: Btrfs: Create extent_buffer interface for large blocksizes Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 + fs/btrfs/ctree.c | 1484 ++++++++++++++++++++++++++---------------------- fs/btrfs/ctree.h | 849 ++++++++++----------------- fs/btrfs/dir-item.c | 102 ++-- fs/btrfs/disk-io.c | 391 ++++++------- fs/btrfs/disk-io.h | 55 +- fs/btrfs/extent-tree.c | 294 +++++----- fs/btrfs/extent_map.c | 672 +++++++++++++++++++++- fs/btrfs/extent_map.h | 75 ++- fs/btrfs/file-item.c | 63 +- fs/btrfs/file.c | 129 +++-- fs/btrfs/inode-item.c | 22 +- fs/btrfs/inode-map.c | 17 +- fs/btrfs/inode.c | 435 ++++++++------ fs/btrfs/print-tree.c | 96 ++-- fs/btrfs/print-tree.h | 4 +- fs/btrfs/root-tree.c | 51 +- fs/btrfs/super.c | 7 +- fs/btrfs/sysfs.c | 10 +- fs/btrfs/transaction.c | 61 +- fs/btrfs/transaction.h | 5 +- fs/btrfs/tree-defrag.c | 58 +- 22 files changed, 2794 insertions(+), 2088 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 6341cf7a9f9..a3b51085d7f 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -23,4 +23,6 @@ modules_install: clean: $(MAKE) -C $(KERNELDIR) M=`pwd` clean +tester: + $(MAKE) -C $(KERNELDIR) M=`pwd` tree-defrag.o transaction.o sysfs.o super.o root-tree.o inode-map.o inode-item.o inode.o file-item.o file.o extent_map.o disk-io.o ctree.o dir-item.o extent-tree.o endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b41f48ade41..f60920e8a0e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,21 +16,24 @@ * Boston, MA 021110-1307, USA. */ +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "print-tree.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, struct btrfs_path *path, int data_size); -static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *dst, struct buffer_head - *src); -static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct buffer_head *dst_buf, - struct buffer_head *src_buf); +static int push_node_left(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *dst, + struct extent_buffer *src); +static int balance_node_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *dst_buf, + struct extent_buffer *src_buf); static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); @@ -62,40 +65,38 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) break; - btrfs_block_release(root, p->nodes[i]); + free_extent_buffer(p->nodes[i]); } memset(p, 0, sizeof(*p)); } -static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret, u64 search_start, u64 empty_size) +static int __btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret, + u64 search_start, u64 empty_size) { - struct buffer_head *cow; - struct btrfs_node *cow_node; + struct extent_buffer *cow; int ret = 0; int different_trans = 0; WARN_ON(root->ref_cows && trans->transid != root->last_trans); - WARN_ON(!buffer_uptodate(buf)); + cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); - cow_node = btrfs_buffer_node(cow); - if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) + if (buf->len != root->sectorsize || cow->len != root->sectorsize) WARN_ON(1); - memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); - btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); - btrfs_set_header_generation(&cow_node->header, trans->transid); - btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); + copy_extent_buffer(cow, buf, 0, 0, cow->len); + btrfs_set_header_blocknr(cow, extent_buffer_blocknr(cow)); + btrfs_set_header_generation(cow, trans->transid); + btrfs_set_header_owner(cow, root->root_key.objectid); - WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) > - trans->transid); - if (btrfs_header_generation(btrfs_buffer_header(buf)) != - trans->transid) { + WARN_ON(btrfs_header_generation(buf) > trans->transid); + if (btrfs_header_generation(buf) != trans->transid) { different_trans = 1; ret = btrfs_inc_ref(trans, root, buf); if (ret) @@ -106,29 +107,29 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root if (buf == root->node) { root->node = cow; - get_bh(cow); + extent_buffer_get(cow); if (buf != root->commit_root) { - btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); + btrfs_free_extent(trans, root, + extent_buffer_blocknr(buf), 1, 1); } - btrfs_block_release(root, buf); + free_extent_buffer(buf); } else { - btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, - bh_blocknr(cow)); + btrfs_set_node_blockptr(parent, parent_slot, + extent_buffer_blocknr(cow)); btrfs_mark_buffer_dirty(parent); - WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) != - trans->transid); - btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); + WARN_ON(btrfs_header_generation(parent) != trans->transid); + btrfs_free_extent(trans, root, extent_buffer_blocknr(buf),1,1); } - btrfs_block_release(root, buf); + free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); *cow_ret = cow; return 0; } -int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret) +int btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret) { u64 search_start; if (trans->transaction != root->fs_info->running_transaction) { @@ -141,13 +142,12 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root root->fs_info->generation); WARN_ON(1); } - if (btrfs_header_generation(btrfs_buffer_header(buf)) == - trans->transid) { + if (btrfs_header_generation(buf) == trans->transid) { *cow_ret = buf; return 0; } - search_start = bh_blocknr(buf) & ~((u64)65535); + search_start = extent_buffer_blocknr(buf) & ~((u64)65535); return __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); } @@ -161,9 +161,11 @@ static int close_blocks(u64 blocknr, u64 other) return 0; } -static int should_defrag_leaf(struct buffer_head *bh) +#if 0 +static int should_defrag_leaf(struct extent_buffer *eb) { - struct btrfs_leaf *leaf = btrfs_buffer_leaf(bh); + return 0; + struct btrfs_leaf *leaf = btrfs_buffer_leaf(eb); struct btrfs_disk_key *key; u32 nritems; @@ -188,14 +190,17 @@ static int should_defrag_leaf(struct buffer_head *bh) } return 0; } +#endif int btrfs_realloc_node(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *parent, + struct btrfs_root *root, struct extent_buffer *parent, int cache_only, u64 *last_ret) { + return 0; +#if 0 struct btrfs_node *parent_node; - struct buffer_head *cur_bh; - struct buffer_head *tmp_bh; + struct extent_buffer *cur_eb; + struct extent_buffer *tmp_eb; u64 blocknr; u64 search_start = *last_ret; u64 last_block = 0; @@ -281,6 +286,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, brelse(tmp_bh); } return err; +#endif } /* @@ -289,12 +295,12 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, * which is the stop of the leaf data stack */ static inline unsigned int leaf_data_end(struct btrfs_root *root, - struct btrfs_leaf *leaf) + struct extent_buffer *leaf) { - u32 nr = btrfs_header_nritems(&leaf->header); + u32 nr = btrfs_header_nritems(leaf); if (nr == 0) return BTRFS_LEAF_DATA_SIZE(root); - return btrfs_item_offset(leaf->items + nr - 1); + return btrfs_item_offset_nr(leaf, nr - 1); } /* @@ -310,9 +316,9 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 1; if (k1.objectid < k2->objectid) return -1; - if (k1.flags > k2->flags) + if (k1.type > k2->type) return 1; - if (k1.flags < k2->flags) + if (k1.type < k2->type) return -1; if (k1.offset > k2->offset) return 1; @@ -324,37 +330,39 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) static int check_node(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_node *parent = NULL; - struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); + struct extent_buffer *parent = NULL; + struct extent_buffer *node = path->nodes[level]; + struct btrfs_disk_key parent_key; + struct btrfs_disk_key node_key; int parent_slot; int slot; struct btrfs_key cpukey; - u32 nritems = btrfs_header_nritems(&node->header); + u32 nritems = btrfs_header_nritems(node); if (path->nodes[level + 1]) - parent = btrfs_buffer_node(path->nodes[level + 1]); + parent = path->nodes[level + 1]; slot = path->slots[level]; - BUG_ON(!buffer_uptodate(path->nodes[level])); BUG_ON(nritems == 0); if (parent) { - struct btrfs_disk_key *parent_key; - parent_slot = path->slots[level + 1]; - parent_key = &parent->ptrs[parent_slot].key; - BUG_ON(memcmp(parent_key, &node->ptrs[0].key, + btrfs_node_key(parent, &parent_key, parent_slot); + btrfs_node_key(node, &node_key, 0); + BUG_ON(memcmp(&parent_key, &node_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(&node->header)); + btrfs_header_blocknr(node)); } BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != 0) { - btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot - 1].key); - BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) <= 0); + btrfs_node_key_to_cpu(node, &cpukey, slot - 1); + btrfs_node_key(node, &node_key, slot); + BUG_ON(comp_keys(&node_key, &cpukey) <= 0); } if (slot < nritems - 1) { - btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot + 1].key); - BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) >= 0); + btrfs_node_key_to_cpu(node, &cpukey, slot + 1); + btrfs_node_key(node, &node_key, slot); + BUG_ON(comp_keys(&node_key, &cpukey) >= 0); } return 0; } @@ -362,83 +370,172 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]); - struct btrfs_node *parent = NULL; + struct extent_buffer *leaf = path->nodes[level]; + struct extent_buffer *parent = NULL; int parent_slot; - int slot = path->slots[0]; struct btrfs_key cpukey; + struct btrfs_disk_key parent_key; + struct btrfs_disk_key leaf_key; + int slot = path->slots[0]; - u32 nritems = btrfs_header_nritems(&leaf->header); + u32 nritems = btrfs_header_nritems(leaf); if (path->nodes[level + 1]) - parent = btrfs_buffer_node(path->nodes[level + 1]); - - BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); + parent = path->nodes[level + 1]; if (nritems == 0) return 0; if (parent) { - struct btrfs_disk_key *parent_key; - parent_slot = path->slots[level + 1]; - parent_key = &parent->ptrs[parent_slot].key; + btrfs_node_key(parent, &parent_key, parent_slot); + btrfs_item_key(leaf, &leaf_key, 0); - BUG_ON(memcmp(parent_key, &leaf->items[0].key, + BUG_ON(memcmp(&parent_key, &leaf_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(&leaf->header)); + btrfs_header_blocknr(leaf)); + } +#if 0 + for (i = 0; nritems > 1 && i < nritems - 2; i++) { + btrfs_item_key_to_cpu(leaf, &cpukey, i + 1); + btrfs_item_key(leaf, &leaf_key, i); + if (comp_keys(&leaf_key, &cpukey) >= 0) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad key\n", i); + BUG_ON(1); + } + if (btrfs_item_offset_nr(leaf, i) != + btrfs_item_end_nr(leaf, i + 1)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", i); + BUG_ON(1); + } + if (i == 0) { + if (btrfs_item_offset_nr(leaf, i) + + btrfs_item_size_nr(leaf, i) != + BTRFS_LEAF_DATA_SIZE(root)) { + btrfs_print_leaf(root, leaf); + printk("slot %d first offset bad\n", i); + BUG_ON(1); + } + } } - if (slot != 0) { - btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot - 1].key); - BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) <= 0); - BUG_ON(btrfs_item_offset(leaf->items + slot - 1) != - btrfs_item_end(leaf->items + slot)); + if (nritems > 0) { + if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) { + btrfs_print_leaf(root, leaf); + printk("slot %d bad size \n", nritems - 1); + BUG_ON(1); + } + } +#endif + if (slot != 0 && slot < nritems - 1) { + btrfs_item_key(leaf, &leaf_key, slot); + btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); + if (comp_keys(&leaf_key, &cpukey) <= 0) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad key\n", slot); + BUG_ON(1); + } + if (btrfs_item_offset_nr(leaf, slot - 1) != + btrfs_item_end_nr(leaf, slot)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", slot); + BUG_ON(1); + } } if (slot < nritems - 1) { - btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot + 1].key); - BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) >= 0); - BUG_ON(btrfs_item_offset(leaf->items + slot) != - btrfs_item_end(leaf->items + slot + 1)); + btrfs_item_key(leaf, &leaf_key, slot); + btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); + BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); + if (btrfs_item_offset_nr(leaf, slot) != + btrfs_item_end_nr(leaf, slot + 1)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", slot); + BUG_ON(1); + } } - BUG_ON(btrfs_item_offset(leaf->items) + - btrfs_item_size(leaf->items) != BTRFS_LEAF_DATA_SIZE(root)); + BUG_ON(btrfs_item_offset_nr(leaf, 0) + + btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); return 0; } static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); - if (memcmp(node->header.fsid, root->fs_info->disk_super->fsid, - sizeof(node->header.fsid))) - BUG(); + struct extent_buffer *buf = path->nodes[level]; + char fsid[BTRFS_FSID_SIZE]; + + read_extent_buffer(buf, fsid, (unsigned long)btrfs_header_fsid(buf), + BTRFS_FSID_SIZE); + + if (memcmp(fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) { + int i = 0; + printk("warning bad block %Lu\n", buf->start); + if (!btrfs_buffer_uptodate(buf)) { + WARN_ON(1); + } + for (i = 0; i < BTRFS_FSID_SIZE; i++) { + printk("%x:%x ", root->fs_info->fsid[i], fsid[i]); + } + printk("\n"); + // BUG(); + } if (level == 0) return check_leaf(root, path, level); return check_node(root, path, level); } /* - * search for key in the array p. items p are item_size apart - * and there are 'max' items in p + * search for key in the extent_buffer. The items start at offset p, + * and they are item_size apart. There are 'max' items in p. + * * the slot in the array is returned via slot, and it points to * the place where you would insert key if it is not found in * the array. * * slot may point to max if the key is bigger than all of the keys */ -static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, - int max, int *slot) +static int generic_bin_search(struct extent_buffer *eb, unsigned long p, + int item_size, struct btrfs_key *key, + int max, int *slot) { int low = 0; int high = max; int mid; int ret; struct btrfs_disk_key *tmp; + struct btrfs_disk_key unaligned; + unsigned long offset; + char *map_token = NULL; + char *kaddr = NULL; + unsigned long map_start = 0; + unsigned long map_len = 0; while(low < high) { mid = (low + high) / 2; - tmp = (struct btrfs_disk_key *)(p + mid * item_size); + offset = p + mid * item_size; + + if (!map_token || offset < map_start || + (offset + sizeof(struct btrfs_disk_key)) > + map_start + map_len) { + if (map_token) + unmap_extent_buffer(eb, map_token, KM_USER0); + map_extent_buffer(eb, offset, &map_token, &kaddr, + &map_start, &map_len, KM_USER0); + + } + if (offset + sizeof(struct btrfs_disk_key) > + map_start + map_len) { + unmap_extent_buffer(eb, map_token, KM_USER0); + read_extent_buffer(eb, &unaligned, + offset, sizeof(unaligned)); + map_token = NULL; + tmp = &unaligned; + } else { + tmp = (struct btrfs_disk_key *)(kaddr + offset - + map_start); + } ret = comp_keys(tmp, key); if (ret < 0) @@ -447,10 +544,13 @@ static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, high = mid; else { *slot = mid; + unmap_extent_buffer(eb, map_token, KM_USER0); return 0; } } *slot = low; + if (map_token) + unmap_extent_buffer(eb, map_token, KM_USER0); return 1; } @@ -458,46 +558,42 @@ static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) +static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, + int level, int *slot) { - if (btrfs_is_leaf(c)) { - struct btrfs_leaf *l = (struct btrfs_leaf *)c; - return generic_bin_search((void *)l->items, + if (level == 0) { + return generic_bin_search(eb, + offsetof(struct btrfs_leaf, items), sizeof(struct btrfs_item), - key, btrfs_header_nritems(&c->header), + key, btrfs_header_nritems(eb), slot); } else { - return generic_bin_search((void *)c->ptrs, + return generic_bin_search(eb, + offsetof(struct btrfs_node, ptrs), sizeof(struct btrfs_key_ptr), - key, btrfs_header_nritems(&c->header), + key, btrfs_header_nritems(eb), slot); } return -1; } -static struct buffer_head *read_node_slot(struct btrfs_root *root, - struct buffer_head *parent_buf, - int slot) +static struct extent_buffer *read_node_slot(struct btrfs_root *root, + struct extent_buffer *parent, int slot) { - struct btrfs_node *node = btrfs_buffer_node(parent_buf); if (slot < 0) return NULL; - if (slot >= btrfs_header_nritems(&node->header)) + if (slot >= btrfs_header_nritems(parent)) return NULL; - return read_tree_block(root, btrfs_node_blockptr(node, slot)); + return read_tree_block(root, btrfs_node_blockptr(parent, slot)); } static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *right_buf; - struct buffer_head *mid_buf; - struct buffer_head *left_buf; - struct buffer_head *parent_buf = NULL; - struct btrfs_node *right = NULL; - struct btrfs_node *mid; - struct btrfs_node *left = NULL; - struct btrfs_node *parent = NULL; + struct extent_buffer *right = NULL; + struct extent_buffer *mid; + struct extent_buffer *left = NULL; + struct extent_buffer *parent = NULL; int ret = 0; int wret; int pslot; @@ -508,60 +604,57 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (level == 0) return 0; - mid_buf = path->nodes[level]; - mid = btrfs_buffer_node(mid_buf); + mid = path->nodes[level]; orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) - parent_buf = path->nodes[level + 1]; + parent = path->nodes[level + 1]; pslot = path->slots[level + 1]; /* * deal with the case where there is only one pointer in the root * by promoting the node below to a root */ - if (!parent_buf) { - struct buffer_head *child; - u64 blocknr = bh_blocknr(mid_buf); + if (!parent) { + struct extent_buffer *child; + u64 blocknr = extent_buffer_blocknr(mid); - if (btrfs_header_nritems(&mid->header) != 1) + if (btrfs_header_nritems(mid) != 1) return 0; /* promote the child to a root */ - child = read_node_slot(root, mid_buf, 0); + child = read_node_slot(root, mid, 0); BUG_ON(!child); root->node = child; path->nodes[level] = NULL; - clean_tree_block(trans, root, mid_buf); - wait_on_buffer(mid_buf); + clean_tree_block(trans, root, mid); + wait_on_tree_block_writeback(root, mid); /* once for the path */ - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); /* once for the root ptr */ - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); return btrfs_free_extent(trans, root, blocknr, 1, 1); } - parent = btrfs_buffer_node(parent_buf); - - if (btrfs_header_nritems(&mid->header) > + if (btrfs_header_nritems(mid) > BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; - if (btrfs_header_nritems(&mid->header) < 2) + if (btrfs_header_nritems(mid) < 2) err_on_enospc = 1; - left_buf = read_node_slot(root, parent_buf, pslot - 1); - if (left_buf) { - wret = btrfs_cow_block(trans, root, left_buf, - parent_buf, pslot - 1, &left_buf); + left = read_node_slot(root, parent, pslot - 1); + if (left) { + wret = btrfs_cow_block(trans, root, left, + parent, pslot - 1, &left); if (wret) { ret = wret; goto enospc; } } - right_buf = read_node_slot(root, parent_buf, pslot + 1); - if (right_buf) { - wret = btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, &right_buf); + right = read_node_slot(root, parent, pslot + 1); + if (right) { + wret = btrfs_cow_block(trans, root, right, + parent, pslot + 1, &right); if (wret) { ret = wret; goto enospc; @@ -569,30 +662,27 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } /* first, try to make some room in the middle buffer */ - if (left_buf) { - left = btrfs_buffer_node(left_buf); - orig_slot += btrfs_header_nritems(&left->header); - wret = push_node_left(trans, root, left_buf, mid_buf); + if (left) { + orig_slot += btrfs_header_nritems(left); + wret = push_node_left(trans, root, left, mid); if (wret < 0) ret = wret; - if (btrfs_header_nritems(&mid->header) < 2) + if (btrfs_header_nritems(mid) < 2) err_on_enospc = 1; } /* * then try to empty the right most buffer into the middle */ - if (right_buf) { - right = btrfs_buffer_node(right_buf); - wret = push_node_left(trans, root, mid_buf, right_buf); + if (right) { + wret = push_node_left(trans, root, mid, right); if (wret < 0 && wret != -ENOSPC) ret = wret; - if (btrfs_header_nritems(&right->header) == 0) { - u64 blocknr = bh_blocknr(right_buf); - clean_tree_block(trans, root, right_buf); - wait_on_buffer(right_buf); - btrfs_block_release(root, right_buf); - right_buf = NULL; + if (btrfs_header_nritems(right) == 0) { + u64 blocknr = extent_buffer_blocknr(right); + clean_tree_block(trans, root, right); + wait_on_tree_block_writeback(root, right); + free_extent_buffer(right); right = NULL; wret = del_ptr(trans, root, path, level + 1, pslot + 1); @@ -602,14 +692,13 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; } else { - btrfs_memcpy(root, parent, - &parent->ptrs[pslot + 1].key, - &right->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); + struct btrfs_disk_key right_key; + btrfs_node_key(right, &right_key, 0); + btrfs_set_node_key(parent, &right_key, pslot + 1); + btrfs_mark_buffer_dirty(parent); } } - if (btrfs_header_nritems(&mid->header) == 1) { + if (btrfs_header_nritems(mid) == 1) { /* * we're not allowed to leave a node with one item in the * tree during a delete. A deletion from lower in the tree @@ -619,21 +708,20 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root * otherwise we would have pulled some pointers from the * right */ - BUG_ON(!left_buf); - wret = balance_node_right(trans, root, mid_buf, left_buf); + BUG_ON(!left); + wret = balance_node_right(trans, root, mid, left); if (wret < 0) { ret = wret; goto enospc; } BUG_ON(wret == 1); } - if (btrfs_header_nritems(&mid->header) == 0) { + if (btrfs_header_nritems(mid) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = bh_blocknr(mid_buf); - clean_tree_block(trans, root, mid_buf); - wait_on_buffer(mid_buf); - btrfs_block_release(root, mid_buf); - mid_buf = NULL; + u64 blocknr = extent_buffer_blocknr(mid); + clean_tree_block(trans, root, mid); + wait_on_tree_block_writeback(root, mid); + free_extent_buffer(mid); mid = NULL; wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) @@ -643,37 +731,36 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; } else { /* update the parent key to reflect our changes */ - btrfs_memcpy(root, parent, - &parent->ptrs[pslot].key, &mid->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); + struct btrfs_disk_key mid_key; + btrfs_node_key(mid, &mid_key, 0); + btrfs_set_node_key(parent, &mid_key, pslot); + btrfs_mark_buffer_dirty(parent); } /* update the path */ - if (left_buf) { - if (btrfs_header_nritems(&left->header) > orig_slot) { - get_bh(left_buf); - path->nodes[level] = left_buf; + if (left) { + if (btrfs_header_nritems(left) > orig_slot) { + extent_buffer_get(left); + path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; - if (mid_buf) - btrfs_block_release(root, mid_buf); + if (mid) + free_extent_buffer(mid); } else { - orig_slot -= btrfs_header_nritems(&left->header); + orig_slot -= btrfs_header_nritems(left); path->slots[level] = orig_slot; } } /* double check we haven't messed things up */ check_block(root, path, level); if (orig_ptr != - btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), - path->slots[level])) + btrfs_node_blockptr(path->nodes[level], path->slots[level])) BUG(); enospc: - if (right_buf) - btrfs_block_release(root, right_buf); - if (left_buf) - btrfs_block_release(root, left_buf); + if (right) + free_extent_buffer(right); + if (left) + free_extent_buffer(left); return ret; } @@ -682,14 +769,10 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *right_buf; - struct buffer_head *mid_buf; - struct buffer_head *left_buf; - struct buffer_head *parent_buf = NULL; - struct btrfs_node *right = NULL; - struct btrfs_node *mid; - struct btrfs_node *left = NULL; - struct btrfs_node *parent = NULL; + struct extent_buffer *right = NULL; + struct extent_buffer *mid; + struct extent_buffer *left = NULL; + struct extent_buffer *parent = NULL; int ret = 0; int wret; int pslot; @@ -699,107 +782,101 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (level == 0) return 1; - mid_buf = path->nodes[level]; - mid = btrfs_buffer_node(mid_buf); + mid = path->nodes[level]; orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) - parent_buf = path->nodes[level + 1]; + parent = path->nodes[level + 1]; pslot = path->slots[level + 1]; - if (!parent_buf) + if (!parent) return 1; - parent = btrfs_buffer_node(parent_buf); - left_buf = read_node_slot(root, parent_buf, pslot - 1); + left = read_node_slot(root, parent, pslot - 1); /* first, try to make some room in the middle buffer */ - if (left_buf) { + if (left) { u32 left_nr; - left = btrfs_buffer_node(left_buf); - left_nr = btrfs_header_nritems(&left->header); + left_nr = btrfs_header_nritems(left); if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - ret = btrfs_cow_block(trans, root, left_buf, parent_buf, - pslot - 1, &left_buf); + ret = btrfs_cow_block(trans, root, left, parent, + pslot - 1, &left); if (ret) wret = 1; else { - left = btrfs_buffer_node(left_buf); wret = push_node_left(trans, root, - left_buf, mid_buf); + left, mid); } } if (wret < 0) ret = wret; if (wret == 0) { + struct btrfs_disk_key disk_key; orig_slot += left_nr; - btrfs_memcpy(root, parent, - &parent->ptrs[pslot].key, - &mid->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); - if (btrfs_header_nritems(&left->header) > orig_slot) { - path->nodes[level] = left_buf; + btrfs_node_key(mid, &disk_key, 0); + btrfs_set_node_key(parent, &disk_key, pslot); + btrfs_mark_buffer_dirty(parent); + if (btrfs_header_nritems(left) > orig_slot) { + path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); } else { orig_slot -= - btrfs_header_nritems(&left->header); + btrfs_header_nritems(left); path->slots[level] = orig_slot; - btrfs_block_release(root, left_buf); + free_extent_buffer(left); } check_node(root, path, level); return 0; } - btrfs_block_release(root, left_buf); + free_extent_buffer(left); } - right_buf = read_node_slot(root, parent_buf, pslot + 1); + right= read_node_slot(root, parent, pslot + 1); /* * then try to empty the right most buffer into the middle */ - if (right_buf) { + if (right) { u32 right_nr; - right = btrfs_buffer_node(right_buf); - right_nr = btrfs_header_nritems(&right->header); + right_nr = btrfs_header_nritems(right); if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - ret = btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, - &right_buf); + ret = btrfs_cow_block(trans, root, right, + parent, pslot + 1, + &right); if (ret) wret = 1; else { - right = btrfs_buffer_node(right_buf); wret = balance_node_right(trans, root, - right_buf, mid_buf); + right, mid); } } if (wret < 0) ret = wret; if (wret == 0) { - btrfs_memcpy(root, parent, - &parent->ptrs[pslot + 1].key, - &right->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); - if (btrfs_header_nritems(&mid->header) <= orig_slot) { - path->nodes[level] = right_buf; + struct btrfs_disk_key disk_key; + + btrfs_node_key(right, &disk_key, 0); + btrfs_set_node_key(parent, &disk_key, pslot + 1); + btrfs_mark_buffer_dirty(parent); + + if (btrfs_header_nritems(mid) <= orig_slot) { + path->nodes[level] = right; path->slots[level + 1] += 1; path->slots[level] = orig_slot - - btrfs_header_nritems(&mid->header); - btrfs_block_release(root, mid_buf); + btrfs_header_nritems(mid); + free_extent_buffer(mid); } else { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } check_node(root, path, level); return 0; } - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } check_node(root, path, level); return 1; @@ -811,10 +888,9 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - struct btrfs_node *node; + struct extent_buffer *node; int i; u32 nritems; - u64 item_objectid; u64 blocknr; u64 search; u64 cluster_start; @@ -823,7 +899,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int direction = path->reada; struct radix_tree_root found; unsigned long gang[8]; - struct buffer_head *bh; + struct extent_buffer *eb; if (level == 0) return; @@ -831,18 +907,17 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, if (!path->nodes[level]) return; - node = btrfs_buffer_node(path->nodes[level]); + node = path->nodes[level]; search = btrfs_node_blockptr(node, slot); - bh = btrfs_find_tree_block(root, search); - if (bh) { - brelse(bh); + eb = btrfs_find_tree_block(root, search); + if (eb) { + free_extent_buffer(eb); return; } init_bit_radix(&found); - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = slot; i < nritems; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); blocknr = btrfs_node_blockptr(node, i); set_radix_bit(&found, blocknr); } @@ -886,8 +961,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { - struct buffer_head *b; - struct btrfs_node *c; + struct extent_buffer *b; u64 blocknr; int slot; int ret; @@ -901,10 +975,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); again: b = root->node; - get_bh(b); + extent_buffer_get(b); while (b) { - c = btrfs_buffer_node(b); - level = btrfs_header_level(&c->header); + level = btrfs_header_level(b); if (cow) { int wret; wret = btrfs_cow_block(trans, root, b, @@ -912,32 +985,30 @@ again: p->slots[level + 1], &b); if (wret) { - btrfs_block_release(root, b); + free_extent_buffer(b); return wret; } - c = btrfs_buffer_node(b); } BUG_ON(!cow && ins_len); - if (level != btrfs_header_level(&c->header)) + if (level != btrfs_header_level(b)) WARN_ON(1); - level = btrfs_header_level(&c->header); + level = btrfs_header_level(b); p->nodes[level] = b; ret = check_block(root, p, level); if (ret) return -1; - ret = bin_search(c, key, &slot); - if (!btrfs_is_leaf(c)) { + ret = bin_search(b, key, level, &slot); + if (level != 0) { if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len > 0 && btrfs_header_nritems(&c->header) >= + if (ins_len > 0 && btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); if (sret) return sret; b = p->nodes[level]; - c = btrfs_buffer_node(b); slot = p->slots[level]; } else if (ins_len < 0) { int sret = balance_level(trans, root, p, @@ -947,22 +1018,19 @@ again: b = p->nodes[level]; if (!b) goto again; - c = btrfs_buffer_node(b); slot = p->slots[level]; - BUG_ON(btrfs_header_nritems(&c->header) == 1); + BUG_ON(btrfs_header_nritems(b) == 1); } /* this is only true while dropping a snapshot */ if (level == lowest_level) break; - blocknr = btrfs_node_blockptr(c, slot); + blocknr = btrfs_node_blockptr(b, slot); if (should_reada) reada_for_search(root, p, level, slot); - b = read_tree_block(root, btrfs_node_blockptr(c, slot)); - + b = read_tree_block(root, btrfs_node_blockptr(b, slot)); } else { - struct btrfs_leaf *l = (struct btrfs_leaf *)c; p->slots[level] = slot; - if (ins_len > 0 && btrfs_leaf_free_space(root, l) < + if (ins_len > 0 && btrfs_leaf_free_space(root, b) < sizeof(struct btrfs_item) + ins_len) { int sret = split_leaf(trans, root, key, p, ins_len); @@ -986,19 +1054,20 @@ again: * If this fails to write a tree block, it returns -1, but continues * fixing up the blocks in ram so the tree is consistent. */ -static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, struct btrfs_disk_key - *key, int level) +static int fixup_low_keys(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_disk_key *key, int level) { int i; int ret = 0; + struct extent_buffer *t; + for (i = level; i < BTRFS_MAX_LEVEL; i++) { - struct btrfs_node *t; int tslot = path->slots[i]; if (!path->nodes[i]) break; - t = btrfs_buffer_node(path->nodes[i]); - btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key)); + t = path->nodes[i]; + btrfs_set_node_key(t, key, tslot); btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; @@ -1014,18 +1083,16 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root * error, and > 0 if there was no room in the left hand block. */ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *dst_buf, struct - buffer_head *src_buf) + *root, struct extent_buffer *dst, + struct extent_buffer *src) { - struct btrfs_node *src = btrfs_buffer_node(src_buf); - struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int src_nritems; int dst_nritems; int ret = 0; - src_nritems = btrfs_header_nritems(&src->header); - dst_nritems = btrfs_header_nritems(&dst->header); + src_nritems = btrfs_header_nritems(src); + dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; if (push_items <= 0) { @@ -1035,17 +1102,21 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root if (src_nritems < push_items) push_items = src_nritems; - btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs, - push_items * sizeof(struct btrfs_key_ptr)); + copy_extent_buffer(dst, src, + btrfs_node_key_ptr_offset(dst_nritems), + btrfs_node_key_ptr_offset(0), + push_items * sizeof(struct btrfs_key_ptr)); + if (push_items < src_nritems) { - btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items, - (src_nritems - push_items) * - sizeof(struct btrfs_key_ptr)); - } - btrfs_set_header_nritems(&src->header, src_nritems - push_items); - btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - btrfs_mark_buffer_dirty(src_buf); - btrfs_mark_buffer_dirty(dst_buf); + memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(push_items), + (src_nritems - push_items) * + sizeof(struct btrfs_key_ptr)); + } + btrfs_set_header_nritems(src, src_nritems - push_items); + btrfs_set_header_nritems(dst, dst_nritems + push_items); + btrfs_mark_buffer_dirty(src); + btrfs_mark_buffer_dirty(dst); return ret; } @@ -1058,24 +1129,22 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root * * this will only push up to 1/2 the contents of the left node over */ -static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct buffer_head *dst_buf, - struct buffer_head *src_buf) +static int balance_node_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *dst, + struct extent_buffer *src) { - struct btrfs_node *src = btrfs_buffer_node(src_buf); - struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int max_push; int src_nritems; int dst_nritems; int ret = 0; - src_nritems = btrfs_header_nritems(&src->header); - dst_nritems = btrfs_header_nritems(&dst->header); + src_nritems = btrfs_header_nritems(src); + dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; - if (push_items <= 0) { + if (push_items <= 0) return 1; - } max_push = src_nritems / 2 + 1; /* don't try to empty the node */ @@ -1085,18 +1154,21 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct if (max_push < push_items) push_items = max_push; - btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs, - dst_nritems * sizeof(struct btrfs_key_ptr)); + memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), + btrfs_node_key_ptr_offset(0), + (dst_nritems) * + sizeof(struct btrfs_key_ptr)); - btrfs_memcpy(root, dst, dst->ptrs, - src->ptrs + src_nritems - push_items, - push_items * sizeof(struct btrfs_key_ptr)); + copy_extent_buffer(dst, src, + btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(src_nritems - push_items), + push_items * sizeof(struct btrfs_key_ptr)); - btrfs_set_header_nritems(&src->header, src_nritems - push_items); - btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); + btrfs_set_header_nritems(src, src_nritems - push_items); + btrfs_set_header_nritems(dst, dst_nritems + push_items); - btrfs_mark_buffer_dirty(src_buf); - btrfs_mark_buffer_dirty(dst_buf); + btrfs_mark_buffer_dirty(src); + btrfs_mark_buffer_dirty(dst); return ret; } @@ -1107,45 +1179,46 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct * * returns zero on success or < 0 on failure. */ -static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int level) +static int insert_new_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) { - struct buffer_head *t; - struct btrfs_node *lower; - struct btrfs_node *c; - struct btrfs_disk_key *lower_key; + struct extent_buffer *lower; + struct extent_buffer *c; + struct btrfs_disk_key lower_key; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0); - if (IS_ERR(t)) - return PTR_ERR(t); - c = btrfs_buffer_node(t); - memset(c, 0, root->blocksize); - btrfs_set_header_nritems(&c->header, 1); - btrfs_set_header_level(&c->header, level); - btrfs_set_header_blocknr(&c->header, bh_blocknr(t)); - btrfs_set_header_generation(&c->header, trans->transid); - btrfs_set_header_owner(&c->header, root->root_key.objectid); - lower = btrfs_buffer_node(path->nodes[level-1]); - memcpy(c->header.fsid, root->fs_info->disk_super->fsid, - sizeof(c->header.fsid)); - if (btrfs_is_leaf(lower)) - lower_key = &((struct btrfs_leaf *)lower)->items[0].key; + c = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(root->node), 0); + if (IS_ERR(c)) + return PTR_ERR(c); + memset_extent_buffer(c, 0, 0, root->nodesize); + btrfs_set_header_nritems(c, 1); + btrfs_set_header_level(c, level); + btrfs_set_header_blocknr(c, extent_buffer_blocknr(c)); + btrfs_set_header_generation(c, trans->transid); + btrfs_set_header_owner(c, root->root_key.objectid); + lower = path->nodes[level-1]; + + write_extent_buffer(c, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(c), + BTRFS_FSID_SIZE); + if (level == 1) + btrfs_item_key(lower, &lower_key, 0); else - lower_key = &lower->ptrs[0].key; - btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key, - sizeof(struct btrfs_disk_key)); - btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1])); + btrfs_node_key(lower, &lower_key, 0); + btrfs_set_node_key(c, &lower_key, 0); + btrfs_set_node_blockptr(c, 0, extent_buffer_blocknr(lower)); - btrfs_mark_buffer_dirty(t); + btrfs_mark_buffer_dirty(c); /* the super has an extra ref to root->node */ - btrfs_block_release(root, root->node); - root->node = t; - get_bh(t); - path->nodes[level] = t; + free_extent_buffer(root->node); + root->node = c; + extent_buffer_get(c); + path->nodes[level] = c; path->slots[level] = 0; return 0; } @@ -1163,26 +1236,26 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key *key, u64 blocknr, int slot, int level) { - struct btrfs_node *lower; + struct extent_buffer *lower; int nritems; BUG_ON(!path->nodes[level]); - lower = btrfs_buffer_node(path->nodes[level]); - nritems = btrfs_header_nritems(&lower->header); + lower = path->nodes[level]; + nritems = btrfs_header_nritems(lower); if (slot > nritems) BUG(); if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) BUG(); if (slot != nritems) { - btrfs_memmove(root, lower, lower->ptrs + slot + 1, - lower->ptrs + slot, + memmove_extent_buffer(lower, + btrfs_node_key_ptr_offset(slot + 1), + btrfs_node_key_ptr_offset(slot), (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - btrfs_memcpy(root, lower, &lower->ptrs[slot].key, - key, sizeof(struct btrfs_disk_key)); + btrfs_set_node_key(lower, key, slot); btrfs_set_node_blockptr(lower, slot, blocknr); - btrfs_set_header_nritems(&lower->header, nritems + 1); - btrfs_mark_buffer_dirty(path->nodes[level]); + btrfs_set_header_nritems(lower, nritems + 1); + btrfs_mark_buffer_dirty(lower); check_node(root, path, level); return 0; } @@ -1199,69 +1272,73 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *t; - struct btrfs_node *c; - struct buffer_head *split_buffer; - struct btrfs_node *split; + struct extent_buffer *c; + struct extent_buffer *split; + struct btrfs_disk_key disk_key; int mid; int ret; int wret; u32 c_nritems; - t = path->nodes[level]; - c = btrfs_buffer_node(t); - if (t == root->node) { + c = path->nodes[level]; + if (c == root->node) { /* trying to split the root, lets make a new one */ ret = insert_new_root(trans, root, path, level + 1); if (ret) return ret; } else { ret = push_nodes_for_insert(trans, root, path, level); - t = path->nodes[level]; - c = btrfs_buffer_node(t); - if (!ret && - btrfs_header_nritems(&c->header) < + c = path->nodes[level]; + if (!ret && btrfs_header_nritems(c) < BTRFS_NODEPTRS_PER_BLOCK(root) - 1) return 0; if (ret < 0) return ret; } - c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0); - if (IS_ERR(split_buffer)) - return PTR_ERR(split_buffer); + c_nritems = btrfs_header_nritems(c); + split = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(c), 0); + if (IS_ERR(split)) + return PTR_ERR(split); + + btrfs_set_header_flags(split, btrfs_header_flags(c)); + btrfs_set_header_level(split, btrfs_header_level(c)); + btrfs_set_header_blocknr(split, extent_buffer_blocknr(split)); + btrfs_set_header_generation(split, trans->transid); + btrfs_set_header_owner(split, root->root_key.objectid); + write_extent_buffer(split, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(split), + BTRFS_FSID_SIZE); - split = btrfs_buffer_node(split_buffer); - btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); - btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); - btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer)); - btrfs_set_header_generation(&split->header, trans->transid); - btrfs_set_header_owner(&split->header, root->root_key.objectid); - memcpy(split->header.fsid, root->fs_info->disk_super->fsid, - sizeof(split->header.fsid)); mid = (c_nritems + 1) / 2; - btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid, - (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); - btrfs_set_header_nritems(&split->header, c_nritems - mid); - btrfs_set_header_nritems(&c->header, mid); + + copy_extent_buffer(split, c, + btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(mid), + (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); + btrfs_set_header_nritems(split, c_nritems - mid); + btrfs_set_header_nritems(c, mid); ret = 0; - btrfs_mark_buffer_dirty(t); - btrfs_mark_buffer_dirty(split_buffer); - wret = insert_ptr(trans, root, path, &split->ptrs[0].key, - bh_blocknr(split_buffer), path->slots[level + 1] + 1, + btrfs_mark_buffer_dirty(c); + btrfs_mark_buffer_dirty(split); + + btrfs_node_key(split, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, + extent_buffer_blocknr(split), + path->slots[level + 1] + 1, level + 1); if (wret) ret = wret; if (path->slots[level] >= mid) { path->slots[level] -= mid; - btrfs_block_release(root, t); - path->nodes[level] = split_buffer; + free_extent_buffer(c); + path->nodes[level] = split; path->slots[level + 1] += 1; } else { - btrfs_block_release(root, split_buffer); + free_extent_buffer(split); } return ret; } @@ -1271,16 +1348,16 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root * and nr indicate which items in the leaf to check. This totals up the * space used both by the item structs and the item data */ -static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) +static int leaf_space_used(struct extent_buffer *l, int start, int nr) { int data_len; - int nritems = btrfs_header_nritems(&l->header); + int nritems = btrfs_header_nritems(l); int end = min(nritems, start + nr) - 1; if (!nr) return 0; - data_len = btrfs_item_end(l->items + start); - data_len = data_len - btrfs_item_offset(l->items + end); + data_len = btrfs_item_end_nr(l, start); + data_len = data_len - btrfs_item_offset_nr(l, end); data_len += sizeof(struct btrfs_item) * nr; WARN_ON(data_len < 0); return data_len; @@ -1291,10 +1368,17 @@ static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) +int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) { - int nritems = btrfs_header_nritems(&leaf->header); - return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); + int nritems = btrfs_header_nritems(leaf); + int ret; + ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); + if (ret < 0) { + printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n", + ret, BTRFS_LEAF_DATA_SIZE(root), + leaf_space_used(leaf, 0, nritems), nritems); + } + return ret; } /* @@ -1307,12 +1391,10 @@ int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct buffer_head *left_buf = path->nodes[0]; - struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf); - struct btrfs_leaf *right; - struct buffer_head *right_buf; - struct buffer_head *upper; - struct btrfs_node *upper_node; + struct extent_buffer *left = path->nodes[0]; + struct extent_buffer *right; + struct extent_buffer *upper; + struct btrfs_disk_key disk_key; int slot; int i; int free_space; @@ -1321,6 +1403,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_item *item; u32 left_nritems; u32 right_nritems; + u32 data_end; int ret; slot = path->slots[1]; @@ -1328,102 +1411,109 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } upper = path->nodes[1]; - upper_node = btrfs_buffer_node(upper); - if (slot >= btrfs_header_nritems(&upper_node->header) - 1) { + if (slot >= btrfs_header_nritems(upper) - 1) return 1; - } - right_buf = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1)); - right = btrfs_buffer_leaf(right_buf); + + right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1)); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } + /* cow and double check */ - ret = btrfs_cow_block(trans, root, right_buf, upper, - slot + 1, &right_buf); + ret = btrfs_cow_block(trans, root, right, upper, + slot + 1, &right); if (ret) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } - right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } - left_nritems = btrfs_header_nritems(&left->header); + left_nritems = btrfs_header_nritems(left); if (left_nritems == 0) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } + for (i = left_nritems - 1; i >= 1; i--) { - item = left->items + i; + item = btrfs_item_nr(left, i); if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(item) + sizeof(*item) + push_space > + if (btrfs_item_size(left, item) + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(item) + sizeof(*item); + push_space += btrfs_item_size(left, item) + sizeof(*item); } + if (push_items == 0) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } + if (push_items == left_nritems) WARN_ON(1); - right_nritems = btrfs_header_nritems(&right->header); + /* push left to right */ - push_space = btrfs_item_end(left->items + left_nritems - push_items); + right_nritems = btrfs_header_nritems(right); + push_space = btrfs_item_end_nr(left, left_nritems - push_items); push_space -= leaf_data_end(root, left); + /* make room in the right data area */ - btrfs_memmove(root, right, btrfs_leaf_data(right) + - leaf_data_end(root, right) - push_space, - btrfs_leaf_data(right) + - leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) - - leaf_data_end(root, right)); + data_end = leaf_data_end(root, right); + memmove_extent_buffer(right, + btrfs_leaf_data(right) + data_end - push_space, + btrfs_leaf_data(right) + data_end, + BTRFS_LEAF_DATA_SIZE(root) - data_end); + /* copy from the left data area */ - btrfs_memcpy(root, right, btrfs_leaf_data(right) + + copy_extent_buffer(right, left, btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space, btrfs_leaf_data(left) + leaf_data_end(root, left), push_space); - btrfs_memmove(root, right, right->items + push_items, right->items, - right_nritems * sizeof(struct btrfs_item)); + + memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), + btrfs_item_nr_offset(0), + right_nritems * sizeof(struct btrfs_item)); + /* copy the items from left to right */ - btrfs_memcpy(root, right, right->items, left->items + - left_nritems - push_items, - push_items * sizeof(struct btrfs_item)); + copy_extent_buffer(right, left, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(left_nritems - push_items), + push_items * sizeof(struct btrfs_item)); /* update the item pointers */ right_nritems += push_items; - btrfs_set_header_nritems(&right->header, right_nritems); + btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { - btrfs_set_item_offset(right->items + i, push_space - - btrfs_item_size(right->items + i)); - push_space = btrfs_item_offset(right->items + i); + item = btrfs_item_nr(right, i); + btrfs_set_item_offset(right, item, push_space - + btrfs_item_size(right, item)); + push_space = btrfs_item_offset(right, item); } left_nritems -= push_items; - btrfs_set_header_nritems(&left->header, left_nritems); + btrfs_set_header_nritems(left, left_nritems); - btrfs_mark_buffer_dirty(left_buf); - btrfs_mark_buffer_dirty(right_buf); + btrfs_mark_buffer_dirty(left); + btrfs_mark_buffer_dirty(right); - btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key, - &right->items[0].key, sizeof(struct btrfs_disk_key)); + btrfs_item_key(right, &disk_key, 0); + btrfs_set_node_key(upper, &disk_key, slot + 1); btrfs_mark_buffer_dirty(upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { path->slots[0] -= left_nritems; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buf; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[1] += 1; } else { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } if (path->nodes[1]) check_node(root, path, 1); @@ -1436,10 +1526,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct buffer_head *right_buf = path->nodes[0]; - struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf); - struct buffer_head *t; - struct btrfs_leaf *left; + struct btrfs_disk_key disk_key; + struct extent_buffer *right = path->nodes[0]; + struct extent_buffer *left; int slot; int i; int free_space; @@ -1447,119 +1536,128 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root int push_items = 0; struct btrfs_item *item; u32 old_left_nritems; + u32 right_nritems; int ret = 0; int wret; slot = path->slots[1]; - if (slot == 0) { + if (slot == 0) return 1; - } - if (!path->nodes[1]) { + if (!path->nodes[1]) return 1; - } - t = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1)); - left = btrfs_buffer_leaf(t); + + left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1], + slot - 1)); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } /* cow and double check */ - ret = btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); + ret = btrfs_cow_block(trans, root, left, + path->nodes[1], slot - 1, &left); if (ret) { /* we hit -ENOSPC, but it isn't fatal here */ - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - if (btrfs_header_nritems(&right->header) == 0) { - btrfs_block_release(root, t); + right_nritems = btrfs_header_nritems(right); + if (right_nritems == 0) { + free_extent_buffer(left); return 1; } - for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) { - item = right->items + i; + for (i = 0; i < right_nritems - 1; i++) { + item = btrfs_item_nr(right, i); if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(item) + sizeof(*item) + push_space > + if (btrfs_item_size(right, item) + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(item) + sizeof(*item); + push_space += btrfs_item_size(right, item) + sizeof(*item); } if (push_items == 0) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - if (push_items == btrfs_header_nritems(&right->header)) + if (push_items == btrfs_header_nritems(right)) WARN_ON(1); + /* push data from right to left */ - btrfs_memcpy(root, left, left->items + - btrfs_header_nritems(&left->header), - right->items, push_items * sizeof(struct btrfs_item)); + copy_extent_buffer(left, right, + btrfs_item_nr_offset(btrfs_header_nritems(left)), + btrfs_item_nr_offset(0), + push_items * sizeof(struct btrfs_item)); + push_space = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset(right->items + push_items -1); - btrfs_memcpy(root, left, btrfs_leaf_data(left) + + btrfs_item_offset_nr(right, push_items -1); + + copy_extent_buffer(left, right, btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space, btrfs_leaf_data(right) + - btrfs_item_offset(right->items + push_items - 1), + btrfs_item_offset_nr(right, push_items - 1), push_space); - old_left_nritems = btrfs_header_nritems(&left->header); + old_left_nritems = btrfs_header_nritems(left); BUG_ON(old_left_nritems < 0); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { - u32 ioff = btrfs_item_offset(left->items + i); - btrfs_set_item_offset(left->items + i, ioff - - (BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset(left->items + - old_left_nritems - 1))); + u32 ioff; + item = btrfs_item_nr(left, i); + ioff = btrfs_item_offset(left, item); + btrfs_set_item_offset(left, item, + ioff - (BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_offset_nr(left, old_left_nritems - 1))); } - btrfs_set_header_nritems(&left->header, old_left_nritems + push_items); + btrfs_set_header_nritems(left, old_left_nritems + push_items); /* fixup right node */ - push_space = btrfs_item_offset(right->items + push_items - 1) - - leaf_data_end(root, right); - btrfs_memmove(root, right, btrfs_leaf_data(right) + - BTRFS_LEAF_DATA_SIZE(root) - push_space, - btrfs_leaf_data(right) + - leaf_data_end(root, right), push_space); - btrfs_memmove(root, right, right->items, right->items + push_items, - (btrfs_header_nritems(&right->header) - push_items) * - sizeof(struct btrfs_item)); - btrfs_set_header_nritems(&right->header, - btrfs_header_nritems(&right->header) - - push_items); + push_space = btrfs_item_offset_nr(right, push_items - 1) - + leaf_data_end(root, right); + memmove_extent_buffer(right, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(right) + + leaf_data_end(root, right), push_space); + + memmove_extent_buffer(right, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(push_items), + (btrfs_header_nritems(right) - push_items) * + sizeof(struct btrfs_item)); + + right_nritems = btrfs_header_nritems(right) - push_items; + btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); - for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - btrfs_set_item_offset(right->items + i, push_space - - btrfs_item_size(right->items + i)); - push_space = btrfs_item_offset(right->items + i); + for (i = 0; i < right_nritems; i++) { + item = btrfs_item_nr(right, i); + btrfs_set_item_offset(right, item, push_space - + btrfs_item_size(right, item)); + push_space = btrfs_item_offset(right, item); } - btrfs_mark_buffer_dirty(t); - btrfs_mark_buffer_dirty(right_buf); + btrfs_mark_buffer_dirty(left); + btrfs_mark_buffer_dirty(right); - wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); + btrfs_item_key(right, &disk_key, 0); + wret = fixup_low_keys(trans, root, path, &disk_key, 1); if (wret) ret = wret; /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { path->slots[0] += old_left_nritems; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = t; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = left; path->slots[1] -= 1; } else { - btrfs_block_release(root, t); + free_extent_buffer(left); path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); @@ -1578,13 +1676,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, struct btrfs_path *path, int data_size) { - struct buffer_head *l_buf; - struct btrfs_leaf *l; + struct extent_buffer *l; u32 nritems; int mid; int slot; - struct btrfs_leaf *right; - struct buffer_head *right_buffer; + struct extent_buffer *right; int space_needed = data_size + sizeof(struct btrfs_item); int data_copy_size; int rt_data_off; @@ -1603,8 +1699,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) return wret; } - l_buf = path->nodes[0]; - l = btrfs_buffer_leaf(l_buf); + l = path->nodes[0]; /* did the pushes work? */ if (btrfs_leaf_free_space(root, l) >= @@ -1617,36 +1712,38 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } slot = path->slots[0]; - nritems = btrfs_header_nritems(&l->header); + nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); - if (IS_ERR(right_buffer)) - return PTR_ERR(right_buffer); - - right = btrfs_buffer_leaf(right_buffer); - memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); - btrfs_set_header_generation(&right->header, trans->transid); - btrfs_set_header_owner(&right->header, root->root_key.objectid); - btrfs_set_header_level(&right->header, 0); - memcpy(right->header.fsid, root->fs_info->disk_super->fsid, - sizeof(right->header.fsid)); + right = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(l), 0); + if (IS_ERR(right)) + return PTR_ERR(right); + + memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_generation(right, trans->transid); + btrfs_set_header_owner(right, root->root_key.objectid); + btrfs_set_header_level(right, 0); + write_extent_buffer(right, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(right), + BTRFS_FSID_SIZE); + if (mid <= slot) { if (nritems == 1 || leaf_space_used(l, mid, nritems - mid) + space_needed > BTRFS_LEAF_DATA_SIZE(root)) { if (slot >= nritems) { btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1] + 1, 1); if (wret) ret = wret; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; path->slots[1] += 1; return ret; @@ -1659,15 +1756,15 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BTRFS_LEAF_DATA_SIZE(root)) { if (slot == 0) { btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1], 1); if (wret) ret = wret; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; if (path->slots[1] == 0) { wret = fixup_low_keys(trans, root, @@ -1681,61 +1778,74 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root double_split = 1; } } - btrfs_set_header_nritems(&right->header, nritems - mid); - data_copy_size = btrfs_item_end(l->items + mid) - - leaf_data_end(root, l); - btrfs_memcpy(root, right, right->items, l->items + mid, - (nritems - mid) * sizeof(struct btrfs_item)); - btrfs_memcpy(root, right, + nritems = nritems - mid; + btrfs_set_header_nritems(right, nritems); + data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); + + copy_extent_buffer(right, l, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(mid), + nritems * sizeof(struct btrfs_item)); + + copy_extent_buffer(right, l, btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - data_copy_size, btrfs_leaf_data(l) + leaf_data_end(root, l), data_copy_size); + rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_end(l->items + mid); + btrfs_item_end_nr(l, mid); - for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - u32 ioff = btrfs_item_offset(right->items + i); - btrfs_set_item_offset(right->items + i, ioff + rt_data_off); + for (i = 0; i < nritems; i++) { + struct btrfs_item *item = btrfs_item_nr(right, i); + u32 ioff = btrfs_item_offset(right, item); + btrfs_set_item_offset(right, item, ioff + rt_data_off); } - btrfs_set_header_nritems(&l->header, mid); + btrfs_set_header_nritems(l, mid); ret = 0; - wret = insert_ptr(trans, root, path, &right->items[0].key, - bh_blocknr(right_buffer), path->slots[1] + 1, 1); + btrfs_item_key(right, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, + extent_buffer_blocknr(right), path->slots[1] + 1, 1); if (wret) ret = wret; - btrfs_mark_buffer_dirty(right_buffer); - btrfs_mark_buffer_dirty(l_buf); + + btrfs_mark_buffer_dirty(right); + btrfs_mark_buffer_dirty(l); BUG_ON(path->slots[0] != slot); + if (mid <= slot) { - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] -= mid; path->slots[1] += 1; } else - btrfs_block_release(root, right_buffer); + free_extent_buffer(right); + BUG_ON(path->slots[0] < 0); check_node(root, path, 1); + check_leaf(root, path, 0); if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); - if (IS_ERR(right_buffer)) - return PTR_ERR(right_buffer); - - right = btrfs_buffer_leaf(right_buffer); - memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); - btrfs_set_header_generation(&right->header, trans->transid); - btrfs_set_header_owner(&right->header, root->root_key.objectid); - btrfs_set_header_level(&right->header, 0); - memcpy(right->header.fsid, root->fs_info->disk_super->fsid, - sizeof(right->header.fsid)); + + right = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(l), 0); + if (IS_ERR(right)) + return PTR_ERR(right); + + memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_generation(right, trans->transid); + btrfs_set_header_owner(right, root->root_key.objectid); + btrfs_set_header_level(right, 0); + write_extent_buffer(right, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(right), + BTRFS_FSID_SIZE); + btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1], 1); if (wret) ret = wret; @@ -1744,8 +1854,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; } - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; check_node(root, path, 1); check_leaf(root, path, 0); @@ -1760,8 +1870,8 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; u32 nritems; unsigned int data_end; unsigned int old_data_start; @@ -1770,15 +1880,14 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int i; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); slot = path->slots[0]; - old_data_start = btrfs_item_offset(leaf->items + slot); - old_size = btrfs_item_size(leaf->items + slot); + old_data_start = btrfs_item_offset_nr(leaf, slot); + old_size = btrfs_item_size_nr(leaf, slot); BUG_ON(old_size <= new_size); size_diff = old_size - new_size; @@ -1790,32 +1899,38 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff + size_diff); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + size_diff); } /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + size_diff, btrfs_leaf_data(leaf) + data_end, old_data_start + new_size - data_end); - btrfs_set_item_size(leaf->items + slot, new_size); - btrfs_mark_buffer_dirty(leaf_buf); + + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_size(leaf, item, new_size); + btrfs_mark_buffer_dirty(leaf); ret = 0; - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); return ret; } -int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u32 data_size) +int btrfs_extend_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u32 data_size) { int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; u32 nritems; unsigned int data_end; unsigned int old_data; @@ -1823,16 +1938,17 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root int i; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); - if (btrfs_leaf_free_space(root, leaf) < data_size) + if (btrfs_leaf_free_space(root, leaf) < data_size) { + btrfs_print_leaf(root, leaf); BUG(); + } slot = path->slots[0]; - old_data = btrfs_item_end(leaf->items + slot); + old_data = btrfs_item_end_nr(leaf, slot); BUG_ON(slot < 0); BUG_ON(slot >= nritems); @@ -1842,22 +1958,28 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff - data_size); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - data_size); } + /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + data_end, old_data - data_end); + data_end = old_data; - old_size = btrfs_item_size(leaf->items + slot); - btrfs_set_item_size(leaf->items + slot, old_size + data_size); - btrfs_mark_buffer_dirty(leaf_buf); + old_size = btrfs_item_size_nr(leaf, slot); + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_size(leaf, item, old_size + data_size); + btrfs_mark_buffer_dirty(leaf); ret = 0; - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); return ret; } @@ -1866,15 +1988,16 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, struct btrfs_key - *cpu_key, u32 data_size) +int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 data_size) { + struct extent_buffer *leaf; + struct btrfs_item *item; int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; u32 nritems; unsigned int data_end; struct btrfs_disk_key disk_key; @@ -1884,6 +2007,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root /* create a root if there isn't one */ if (!root->node) BUG(); + ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1); if (ret == 0) { return -EEXIST; @@ -1892,57 +2016,68 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root goto out; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); if (btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item) + data_size) { BUG(); } + slot = path->slots[0]; BUG_ON(slot < 0); + if (slot != nritems) { int i; - unsigned int old_data = btrfs_item_end(leaf->items + slot); + unsigned int old_data = btrfs_item_end_nr(leaf, slot); + if (old_data < data_end) { + btrfs_print_leaf(root, leaf); + printk("slot %d old_data %d data_end %d\n", + slot, old_data, data_end); + BUG_ON(1); + } /* * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff - data_size); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - data_size); } /* shift the items */ - btrfs_memmove(root, leaf, leaf->items + slot + 1, - leaf->items + slot, + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), + btrfs_item_nr_offset(slot), (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + data_end, old_data - data_end); data_end = old_data; } + /* setup the item for the new data */ - btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key, - sizeof(struct btrfs_disk_key)); - btrfs_set_item_offset(leaf->items + slot, data_end - data_size); - btrfs_set_item_size(leaf->items + slot, data_size); - btrfs_set_header_nritems(&leaf->header, nritems + 1); - btrfs_mark_buffer_dirty(leaf_buf); + btrfs_set_item_key(leaf, &disk_key, slot); + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_offset(leaf, item, data_end - data_size); + btrfs_set_item_size(leaf, item, data_size); + btrfs_set_header_nritems(leaf, nritems + 1); + btrfs_mark_buffer_dirty(leaf); ret = 0; if (slot == 0) ret = fixup_low_keys(trans, root, path, &disk_key, 1); - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); out: return ret; @@ -1958,17 +2093,17 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root { int ret = 0; struct btrfs_path *path; - u8 *ptr; + struct extent_buffer *leaf; + unsigned long ptr; path = btrfs_alloc_path(); BUG_ON(!path); ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { - ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], u8); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, data, data_size); - btrfs_mark_buffer_dirty(path->nodes[0]); + leaf = path->nodes[0]; + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, data, ptr, data_size); + btrfs_mark_buffer_dirty(leaf); } btrfs_free_path(path); return ret; @@ -1984,30 +2119,30 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - struct btrfs_node *node; - struct buffer_head *parent = path->nodes[level]; + struct extent_buffer *parent = path->nodes[level]; u32 nritems; int ret = 0; int wret; - node = btrfs_buffer_node(parent); - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(parent); if (slot != nritems -1) { - btrfs_memmove(root, node, node->ptrs + slot, - node->ptrs + slot + 1, + memmove_extent_buffer(parent, + btrfs_node_key_ptr_offset(slot), + btrfs_node_key_ptr_offset(slot + 1), sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); } nritems--; - btrfs_set_header_nritems(&node->header, nritems); + btrfs_set_header_nritems(parent, nritems); if (nritems == 0 && parent == root->node) { - struct btrfs_header *header = btrfs_buffer_header(root->node); - BUG_ON(btrfs_header_level(header) != 1); + BUG_ON(btrfs_header_level(root->node) != 1); /* just turn the root into a leaf and break */ - btrfs_set_header_level(header, 0); + btrfs_set_header_level(root->node, 0); } else if (slot == 0) { - wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key, - level + 1); + struct btrfs_disk_key disk_key; + + btrfs_node_key(parent, &disk_key, 0); + wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); if (wret) ret = wret; } @@ -2023,59 +2158,67 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path) { int slot; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; int doff; int dsize; int ret = 0; int wret; u32 nritems; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; slot = path->slots[0]; - doff = btrfs_item_offset(leaf->items + slot); - dsize = btrfs_item_size(leaf->items + slot); - nritems = btrfs_header_nritems(&leaf->header); + doff = btrfs_item_offset_nr(leaf, slot); + dsize = btrfs_item_size_nr(leaf, slot); + nritems = btrfs_header_nritems(leaf); if (slot != nritems - 1) { int i; int data_end = leaf_data_end(root, leaf); - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + dsize, btrfs_leaf_data(leaf) + data_end, doff - data_end); + for (i = slot + 1; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, ioff + dsize); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + dsize); } - btrfs_memmove(root, leaf, leaf->items + slot, - leaf->items + slot + 1, + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), + btrfs_item_nr_offset(slot + 1), sizeof(struct btrfs_item) * (nritems - slot - 1)); } - btrfs_set_header_nritems(&leaf->header, nritems - 1); + btrfs_set_header_nritems(leaf, nritems - 1); nritems--; + /* delete the leaf if we've emptied it */ if (nritems == 0) { - if (leaf_buf == root->node) { - btrfs_set_header_level(&leaf->header, 0); + if (leaf == root->node) { + btrfs_set_header_level(leaf, 0); } else { - clean_tree_block(trans, root, leaf_buf); - wait_on_buffer(leaf_buf); + clean_tree_block(trans, root, leaf); + wait_on_tree_block_writeback(root, leaf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - bh_blocknr(leaf_buf), 1, 1); + extent_buffer_blocknr(leaf), + 1, 1); if (wret) ret = wret; } } else { int used = leaf_space_used(leaf, 0, nritems); if (slot == 0) { + struct btrfs_disk_key disk_key; + + btrfs_item_key(leaf, &disk_key, 0); wret = fixup_low_keys(trans, root, path, - &leaf->items[0].key, 1); + &disk_key, 1); if (wret) ret = wret; } @@ -2087,34 +2230,40 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, * for possible call to del_ptr below */ slot = path->slots[1]; - get_bh(leaf_buf); + extent_buffer_get(leaf); + wret = push_leaf_left(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; - if (path->nodes[0] == leaf_buf && - btrfs_header_nritems(&leaf->header)) { + + if (path->nodes[0] == leaf && + btrfs_header_nritems(leaf)) { wret = push_leaf_right(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; } - if (btrfs_header_nritems(&leaf->header) == 0) { - u64 blocknr = bh_blocknr(leaf_buf); - clean_tree_block(trans, root, leaf_buf); - wait_on_buffer(leaf_buf); + + if (btrfs_header_nritems(leaf) == 0) { + u64 blocknr = extent_buffer_blocknr(leaf); + + clean_tree_block(trans, root, leaf); + wait_on_tree_block_writeback(root, leaf); + wret = del_ptr(trans, root, path, 1, slot); if (wret) ret = wret; - btrfs_block_release(root, leaf_buf); + + free_extent_buffer(leaf); wret = btrfs_free_extent(trans, root, blocknr, 1, 1); if (wret) ret = wret; } else { - btrfs_mark_buffer_dirty(leaf_buf); - btrfs_block_release(root, leaf_buf); + btrfs_mark_buffer_dirty(leaf); + free_extent_buffer(leaf); } } else { - btrfs_mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf); } } return ret; @@ -2130,25 +2279,27 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int slot; int level = 1; u64 blocknr; - struct buffer_head *c; - struct btrfs_node *c_node; - struct buffer_head *next = NULL; + struct extent_buffer *c; + struct extent_buffer *next = NULL; while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; + slot = path->slots[level] + 1; c = path->nodes[level]; - c_node = btrfs_buffer_node(c); - if (slot >= btrfs_header_nritems(&c_node->header)) { + if (slot >= btrfs_header_nritems(c)) { level++; continue; } - blocknr = btrfs_node_blockptr(c_node, slot); + + blocknr = btrfs_node_blockptr(c, slot); if (next) - btrfs_block_release(root, next); + free_extent_buffer(next); + if (path->reada) reada_for_search(root, path, level, slot); + next = read_tree_block(root, blocknr); break; } @@ -2156,15 +2307,14 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) while(1) { level--; c = path->nodes[level]; - btrfs_block_release(root, c); + free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; if (!level) break; if (path->reada) reada_for_search(root, path, level, 0); - next = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(next), 0)); + next = read_tree_block(root, btrfs_node_blockptr(next, 0)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 800a3499cc3..c4b82980685 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -20,10 +20,10 @@ #define __BTRFS__ #include -#include #include #include #include "bit-radix.h" +#include "extent_map.h" struct btrfs_trans_handle; struct btrfs_transaction; @@ -32,7 +32,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; -#define BTRFS_MAGIC "_BtRfS_M" +#define BTRFS_MAGIC "_B2RfS_M" #define BTRFS_ROOT_TREE_OBJECTID 1ULL #define BTRFS_EXTENT_TREE_OBJECTID 2ULL @@ -78,41 +78,41 @@ extern struct kmem_cache *btrfs_path_cachep; */ struct btrfs_disk_key { __le64 objectid; - __le32 flags; + u8 type; __le64 offset; } __attribute__ ((__packed__)); struct btrfs_key { u64 objectid; - u32 flags; + u8 type; u64 offset; } __attribute__ ((__packed__)); +#define BTRFS_FSID_SIZE 16 /* * every tree block (leaf or node) starts with this header. */ struct btrfs_header { u8 csum[BTRFS_CSUM_SIZE]; - u8 fsid[16]; /* FS specific uuid */ + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; __le64 owner; - __le16 nritems; + __le32 nritems; __le16 flags; u8 level; } __attribute__ ((__packed__)); #define BTRFS_MAX_LEVEL 8 -#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->blocksize - \ +#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ sizeof(struct btrfs_header)) / \ (sizeof(struct btrfs_disk_key) + sizeof(u64))) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) -#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) +#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) -struct buffer_head; /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -128,7 +128,9 @@ struct btrfs_super_block { __le64 total_blocks; __le64 blocks_used; __le64 root_dir_objectid; - __le32 blocksize; + __le32 sectorsize; + __le32 nodesize; + __le32 leafsize; } __attribute__ ((__packed__)); /* @@ -138,7 +140,7 @@ struct btrfs_super_block { struct btrfs_item { struct btrfs_disk_key key; __le32 offset; - __le16 size; + __le32 size; } __attribute__ ((__packed__)); /* @@ -176,7 +178,7 @@ struct btrfs_node { * used while walking the tree. */ struct btrfs_path { - struct buffer_head *nodes[BTRFS_MAX_LEVEL]; + struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; int reada; int lowest_level; @@ -292,6 +294,7 @@ struct btrfs_block_group_cache { }; struct btrfs_fs_info { + u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; @@ -304,9 +307,8 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; - struct btrfs_super_block *disk_super; struct btrfs_super_block super_copy; - struct buffer_head *sb_buffer; + struct extent_buffer *sb_buffer; struct super_block *sb; struct inode *btree_inode; struct mutex trans_mutex; @@ -325,8 +327,8 @@ struct btrfs_fs_info { * and for the extent tree extent_root root. */ struct btrfs_root { - struct buffer_head *node; - struct buffer_head *commit_root; + struct extent_buffer *node; + struct extent_buffer *commit_root; struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; @@ -336,7 +338,16 @@ struct btrfs_root { struct rw_semaphore snap_sem; u64 objectid; u64 last_trans; - u32 blocksize; + + /* data allocations are done in sectorsize units */ + u32 sectorsize; + + /* node allocations are done in nodesize units */ + u32 nodesize; + + /* leaf allocations are done in leafsize units */ + u32 leafsize; + u32 type; u64 highest_inode; u64 last_inode_alloc; @@ -347,12 +358,6 @@ struct btrfs_root { char *name; }; -/* the lower bits in the key flags defines the item type */ -#define BTRFS_KEY_TYPE_MAX 256 -#define BTRFS_KEY_TYPE_SHIFT 24 -#define BTRFS_KEY_TYPE_MASK (((u32)BTRFS_KEY_TYPE_MAX - 1) << \ - BTRFS_KEY_TYPE_SHIFT) - /* * inode items have the data typically returned from stat and store other * info about object characteristics. There is one for every file and dir in @@ -402,246 +407,253 @@ struct btrfs_root { */ #define BTRFS_STRING_ITEM_KEY 253 +/* some macros to generate set/get funcs for the struct fields. This + * assumes there is a lefoo_to_cpu for every type, so lets make a simple + * one for u8: + */ +#define le8_to_cpu(v) (v) +#define cpu_to_le8(v) (v) +#define __le8 u8 + +#define read_eb_member(eb, ptr, type, member, result) ( \ + read_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define write_eb_member(eb, ptr, type, member, result) ( \ + write_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(struct extent_buffer *eb, \ + type *s) \ +{ \ + __le##bits res; \ + read_eb_member(eb, s, type, member, &res); \ + return le##bits##_to_cpu(res); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + type *s, u##bits val) \ +{ \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, s, type, member, &val); \ +} + +#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(struct extent_buffer *eb) \ +{ \ + __le##bits res; \ + read_eb_member(eb, NULL, type, member, &res); \ + return le##bits##_to_cpu(res); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + u##bits val) \ +{ \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, NULL, type, member, &val); \ +} -static inline u64 btrfs_block_group_used(struct btrfs_block_group_item *bi) -{ - return le64_to_cpu(bi->used); -} - -static inline void btrfs_set_block_group_used(struct - btrfs_block_group_item *bi, - u64 val) -{ - bi->used = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->generation); -} - -static inline void btrfs_set_inode_generation(struct btrfs_inode_item *i, - u64 val) -{ - i->generation = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_size(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->size); -} - -static inline void btrfs_set_inode_size(struct btrfs_inode_item *i, u64 val) -{ - i->size = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_nblocks(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->nblocks); -} - -static inline void btrfs_set_inode_nblocks(struct btrfs_inode_item *i, u64 val) -{ - i->nblocks = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_block_group(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->block_group); -} - -static inline void btrfs_set_inode_block_group(struct btrfs_inode_item *i, - u64 val) -{ - i->block_group = cpu_to_le64(val); -} - -static inline u32 btrfs_inode_nlink(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->nlink); -} - -static inline void btrfs_set_inode_nlink(struct btrfs_inode_item *i, u32 val) -{ - i->nlink = cpu_to_le32(val); -} - -static inline u32 btrfs_inode_uid(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->uid); -} - -static inline void btrfs_set_inode_uid(struct btrfs_inode_item *i, u32 val) -{ - i->uid = cpu_to_le32(val); -} - -static inline u32 btrfs_inode_gid(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->gid); -} - -static inline void btrfs_set_inode_gid(struct btrfs_inode_item *i, u32 val) -{ - i->gid = cpu_to_le32(val); -} - -static inline u32 btrfs_inode_mode(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->mode); -} - -static inline void btrfs_set_inode_mode(struct btrfs_inode_item *i, u32 val) -{ - i->mode = cpu_to_le32(val); +#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(type *s) \ +{ \ + return le##bits##_to_cpu(s->member); \ +} \ +static inline void btrfs_set_##name(type *s, u##bits val) \ +{ \ + s->member = cpu_to_le##bits(val); \ } -static inline u32 btrfs_inode_rdev(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->rdev); -} +/* struct btrfs_block_group_item */ +BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, + used, 64); +BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, + used, 64); -static inline void btrfs_set_inode_rdev(struct btrfs_inode_item *i, u32 val) -{ - i->rdev = cpu_to_le32(val); -} +/* struct btrfs_inode_item */ +BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); +BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); +BTRFS_SETGET_FUNCS(inode_nblocks, struct btrfs_inode_item, nblocks, 64); +BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); +BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 32); +BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16); +BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item, + compat_flags, 16); -static inline u16 btrfs_inode_flags(struct btrfs_inode_item *i) +static inline struct btrfs_inode_timespec * +btrfs_inode_atime(struct btrfs_inode_item *inode_item) { - return le16_to_cpu(i->flags); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, atime); + return (struct btrfs_inode_timespec *)ptr; } -static inline void btrfs_set_inode_flags(struct btrfs_inode_item *i, u16 val) +static inline struct btrfs_inode_timespec * +btrfs_inode_mtime(struct btrfs_inode_item *inode_item) { - i->flags = cpu_to_le16(val); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, mtime); + return (struct btrfs_inode_timespec *)ptr; } -static inline u16 btrfs_inode_compat_flags(struct btrfs_inode_item *i) +static inline struct btrfs_inode_timespec * +btrfs_inode_ctime(struct btrfs_inode_item *inode_item) { - return le16_to_cpu(i->compat_flags); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, ctime); + return (struct btrfs_inode_timespec *)ptr; } -static inline void btrfs_set_inode_compat_flags(struct btrfs_inode_item *i, - u16 val) +static inline struct btrfs_inode_timespec * +btrfs_inode_otime(struct btrfs_inode_item *inode_item) { - i->compat_flags = cpu_to_le16(val); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, otime); + return (struct btrfs_inode_timespec *)ptr; } -static inline u64 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) -{ - return le64_to_cpu(ts->sec); -} +BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_inode_timespec, sec, 64); +BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32); -static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts, - u64 val) -{ - ts->sec = cpu_to_le64(val); -} +/* struct btrfs_extent_item */ +BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); +BTRFS_SETGET_FUNCS(extent_owner, struct btrfs_extent_item, owner, 32); -static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts) -{ - return le32_to_cpu(ts->nsec); -} +BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, + refs, 32); +BTRFS_SETGET_STACK_FUNCS(stack_extent_owner, struct btrfs_extent_item, + owner, 32); -static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts, - u32 val) -{ - ts->nsec = cpu_to_le32(val); -} +/* struct btrfs_node */ +BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); -static inline u32 btrfs_extent_refs(struct btrfs_extent_item *ei) +static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) { - return le32_to_cpu(ei->refs); + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr); } -static inline void btrfs_set_extent_refs(struct btrfs_extent_item *ei, u32 val) +static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, + int nr, u64 val) { - ei->refs = cpu_to_le32(val); + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); } -static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) +static unsigned long btrfs_node_key_ptr_offset(int nr) { - return le64_to_cpu(ei->owner); + return offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; } -static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) +static void btrfs_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - ei->owner = cpu_to_le64(val); + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + read_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); } - -static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) +static inline void btrfs_set_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - return le64_to_cpu(n->ptrs[nr].blockptr); + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + write_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); } +/* struct btrfs_item */ +BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); +BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); -static inline void btrfs_set_node_blockptr(struct btrfs_node *n, int nr, - u64 val) +static inline unsigned long btrfs_item_nr_offset(int nr) { - n->ptrs[nr].blockptr = cpu_to_le64(val); + return offsetof(struct btrfs_leaf, items) + + sizeof(struct btrfs_item) * nr; } -static inline u32 btrfs_item_offset(struct btrfs_item *item) +static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb, + int nr) { - return le32_to_cpu(item->offset); + return (struct btrfs_item *)btrfs_item_nr_offset(nr); } -static inline void btrfs_set_item_offset(struct btrfs_item *item, u32 val) +static inline u32 btrfs_item_end(struct extent_buffer *eb, + struct btrfs_item *item) { - item->offset = cpu_to_le32(val); + return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item); } -static inline u32 btrfs_item_end(struct btrfs_item *item) +static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) { - return le32_to_cpu(item->offset) + le16_to_cpu(item->size); + return btrfs_item_end(eb, btrfs_item_nr(eb, nr)); } -static inline u16 btrfs_item_size(struct btrfs_item *item) +static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr) { - return le16_to_cpu(item->size); + return btrfs_item_offset(eb, btrfs_item_nr(eb, nr)); } -static inline void btrfs_set_item_size(struct btrfs_item *item, u16 val) +static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) { - item->size = cpu_to_le16(val); + return btrfs_item_size(eb, btrfs_item_nr(eb, nr)); } -static inline u16 btrfs_dir_flags(struct btrfs_dir_item *d) +static inline void btrfs_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - return le16_to_cpu(d->flags); + struct btrfs_item *item = btrfs_item_nr(eb, nr); + read_eb_member(eb, item, struct btrfs_item, key, disk_key); } -static inline void btrfs_set_dir_flags(struct btrfs_dir_item *d, u16 val) +static inline void btrfs_set_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - d->flags = cpu_to_le16(val); + struct btrfs_item *item = btrfs_item_nr(eb, nr); + write_eb_member(eb, item, struct btrfs_item, key, disk_key); } -static inline u8 btrfs_dir_type(struct btrfs_dir_item *d) -{ - return d->type; -} +/* struct btrfs_dir_item */ +BTRFS_SETGET_FUNCS(dir_flags, struct btrfs_dir_item, flags, 16); +BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); +BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); -static inline void btrfs_set_dir_type(struct btrfs_dir_item *d, u8 val) +static inline void btrfs_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) { - d->type = val; + read_eb_member(eb, item, struct btrfs_dir_item, location, key); } -static inline u16 btrfs_dir_name_len(struct btrfs_dir_item *d) +static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) { - return le16_to_cpu(d->name_len); + write_eb_member(eb, item, struct btrfs_dir_item, location, key); } -static inline void btrfs_set_dir_name_len(struct btrfs_dir_item *d, u16 val) -{ - d->name_len = cpu_to_le16(val); -} +/* struct btrfs_disk_key */ +BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, + objectid, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, struct btrfs_disk_key *disk) { cpu->offset = le64_to_cpu(disk->offset); - cpu->flags = le32_to_cpu(disk->flags); + cpu->type = disk->type; cpu->objectid = le64_to_cpu(disk->objectid); } @@ -649,400 +661,167 @@ static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, struct btrfs_key *cpu) { disk->offset = cpu_to_le64(cpu->offset); - disk->flags = cpu_to_le32(cpu->flags); + disk->type = cpu->type; disk->objectid = cpu_to_le64(cpu->objectid); } -static inline u64 btrfs_disk_key_objectid(struct btrfs_disk_key *disk) -{ - return le64_to_cpu(disk->objectid); -} - -static inline void btrfs_set_disk_key_objectid(struct btrfs_disk_key *disk, - u64 val) -{ - disk->objectid = cpu_to_le64(val); -} - -static inline u64 btrfs_disk_key_offset(struct btrfs_disk_key *disk) -{ - return le64_to_cpu(disk->offset); -} - -static inline void btrfs_set_disk_key_offset(struct btrfs_disk_key *disk, - u64 val) -{ - disk->offset = cpu_to_le64(val); -} - -static inline u32 btrfs_disk_key_flags(struct btrfs_disk_key *disk) -{ - return le32_to_cpu(disk->flags); -} - -static inline void btrfs_set_disk_key_flags(struct btrfs_disk_key *disk, - u32 val) -{ - disk->flags = cpu_to_le32(val); -} - -static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key) -{ - return le32_to_cpu(key->flags) >> BTRFS_KEY_TYPE_SHIFT; -} - -static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, - u32 val) -{ - u32 flags = btrfs_disk_key_flags(key); - BUG_ON(val >= BTRFS_KEY_TYPE_MAX); - val = val << BTRFS_KEY_TYPE_SHIFT; - flags = (flags & ~BTRFS_KEY_TYPE_MASK) | val; - btrfs_set_disk_key_flags(key, flags); -} - -static inline u32 btrfs_key_type(struct btrfs_key *key) -{ - return key->flags >> BTRFS_KEY_TYPE_SHIFT; -} - -static inline void btrfs_set_key_type(struct btrfs_key *key, u32 val) -{ - BUG_ON(val >= BTRFS_KEY_TYPE_MAX); - val = val << BTRFS_KEY_TYPE_SHIFT; - key->flags = (key->flags & ~(BTRFS_KEY_TYPE_MASK)) | val; -} - -static inline u64 btrfs_header_blocknr(struct btrfs_header *h) -{ - return le64_to_cpu(h->blocknr); -} - -static inline void btrfs_set_header_blocknr(struct btrfs_header *h, u64 blocknr) -{ - h->blocknr = cpu_to_le64(blocknr); -} - -static inline u64 btrfs_header_generation(struct btrfs_header *h) +static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) { - return le64_to_cpu(h->generation); + struct btrfs_disk_key disk_key; + btrfs_node_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline void btrfs_set_header_generation(struct btrfs_header *h, - u64 val) +static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) { - h->generation = cpu_to_le64(val); + struct btrfs_disk_key disk_key; + btrfs_item_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline u64 btrfs_header_owner(struct btrfs_header *h) +static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_key *key) { - return le64_to_cpu(h->owner); + struct btrfs_disk_key disk_key; + btrfs_dir_item_key(eb, item, &disk_key); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline void btrfs_set_header_owner(struct btrfs_header *h, - u64 val) -{ - h->owner = cpu_to_le64(val); -} - -static inline u16 btrfs_header_nritems(struct btrfs_header *h) -{ - return le16_to_cpu(h->nritems); -} - -static inline void btrfs_set_header_nritems(struct btrfs_header *h, u16 val) -{ - h->nritems = cpu_to_le16(val); -} - -static inline u16 btrfs_header_flags(struct btrfs_header *h) -{ - return le16_to_cpu(h->flags); -} - -static inline void btrfs_set_header_flags(struct btrfs_header *h, u16 val) -{ - h->flags = cpu_to_le16(val); -} - -static inline int btrfs_header_level(struct btrfs_header *h) -{ - return h->level; -} - -static inline void btrfs_set_header_level(struct btrfs_header *h, int level) -{ - BUG_ON(level > BTRFS_MAX_LEVEL); - h->level = level; -} - -static inline int btrfs_is_leaf(struct btrfs_node *n) -{ - return (btrfs_header_level(&n->header) == 0); -} - -static inline u64 btrfs_root_blocknr(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->blocknr); -} - -static inline void btrfs_set_root_blocknr(struct btrfs_root_item *item, u64 val) -{ - item->blocknr = cpu_to_le64(val); -} - -static inline u64 btrfs_root_dirid(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->root_dirid); -} - -static inline void btrfs_set_root_dirid(struct btrfs_root_item *item, u64 val) -{ - item->root_dirid = cpu_to_le64(val); -} - -static inline u32 btrfs_root_refs(struct btrfs_root_item *item) -{ - return le32_to_cpu(item->refs); -} - -static inline void btrfs_set_root_refs(struct btrfs_root_item *item, u32 val) -{ - item->refs = cpu_to_le32(val); -} - -static inline u32 btrfs_root_flags(struct btrfs_root_item *item) -{ - return le32_to_cpu(item->flags); -} - -static inline void btrfs_set_root_flags(struct btrfs_root_item *item, u32 val) -{ - item->flags = cpu_to_le32(val); -} - -static inline void btrfs_set_root_blocks_used(struct btrfs_root_item *item, - u64 val) -{ - item->blocks_used = cpu_to_le64(val); -} - -static inline u64 btrfs_root_blocks_used(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->blocks_used); -} - -static inline void btrfs_set_root_block_limit(struct btrfs_root_item *item, - u64 val) -{ - item->block_limit = cpu_to_le64(val); -} - -static inline u64 btrfs_root_block_limit(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->block_limit); -} -static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s) +static inline u8 btrfs_key_type(struct btrfs_key *key) { - return le64_to_cpu(s->blocknr); + return key->type; } -static inline void btrfs_set_super_blocknr(struct btrfs_super_block *s, u64 val) +static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val) { - s->blocknr = cpu_to_le64(val); + key->type = val; } -static inline u64 btrfs_super_generation(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->generation); -} +/* struct btrfs_header */ +BTRFS_SETGET_HEADER_FUNCS(header_blocknr, struct btrfs_header, blocknr, 64); +BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, + generation, 64); +BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); +BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 16); +BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); -static inline void btrfs_set_super_generation(struct btrfs_super_block *s, - u64 val) +static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) { - s->generation = cpu_to_le64(val); + unsigned long ptr = offsetof(struct btrfs_header, fsid); + return (u8 *)ptr; } -static inline u64 btrfs_super_root(struct btrfs_super_block *s) +static inline u8 *btrfs_super_fsid(struct extent_buffer *eb) { - return le64_to_cpu(s->root); + unsigned long ptr = offsetof(struct btrfs_super_block, fsid); + return (u8 *)ptr; } -static inline void btrfs_set_super_root(struct btrfs_super_block *s, u64 val) +static inline u8 *btrfs_header_csum(struct extent_buffer *eb) { - s->root = cpu_to_le64(val); + unsigned long ptr = offsetof(struct btrfs_header, csum); + return (u8 *)ptr; } -static inline u64 btrfs_super_total_blocks(struct btrfs_super_block *s) +static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb) { - return le64_to_cpu(s->total_blocks); + return NULL; } -static inline void btrfs_set_super_total_blocks(struct btrfs_super_block *s, - u64 val) +static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb) { - s->total_blocks = cpu_to_le64(val); + return NULL; } -static inline u64 btrfs_super_blocks_used(struct btrfs_super_block *s) +static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb) { - return le64_to_cpu(s->blocks_used); + return NULL; } -static inline void btrfs_set_super_blocks_used(struct btrfs_super_block *s, - u64 val) +static inline int btrfs_is_leaf(struct extent_buffer *eb) { - s->blocks_used = cpu_to_le64(val); + return (btrfs_header_level(eb) == 0); } -static inline u32 btrfs_super_blocksize(struct btrfs_super_block *s) -{ - return le32_to_cpu(s->blocksize); -} +/* struct btrfs_root_item */ +BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_FUNCS(disk_root_blocknr, struct btrfs_root_item, blocknr, 64); -static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s, - u32 val) -{ - s->blocksize = cpu_to_le32(val); -} +BTRFS_SETGET_STACK_FUNCS(root_blocknr, struct btrfs_root_item, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); +BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32); +BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, block_limit, 64); -static inline u64 btrfs_super_root_dir(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->root_dir_objectid); -} +/* struct btrfs_super_block */ +BTRFS_SETGET_STACK_FUNCS(super_blocknr, struct btrfs_super_block, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); +BTRFS_SETGET_STACK_FUNCS(super_total_blocks, struct btrfs_super_block, + total_blocks, 64); +BTRFS_SETGET_STACK_FUNCS(super_blocks_used, struct btrfs_super_block, + blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, + sectorsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, + nodesize, 32); +BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block, + leafsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, + root_dir_objectid, 64); -static inline void btrfs_set_super_root_dir(struct btrfs_super_block *s, u64 - val) +static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) { - s->root_dir_objectid = cpu_to_le64(val); + return offsetof(struct btrfs_leaf, items); } -static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) -{ - return (u8 *)l->items; -} +/* struct btrfs_file_extent_item */ +BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); -static inline int btrfs_file_extent_type(struct btrfs_file_extent_item *e) -{ - return e->type; -} -static inline void btrfs_set_file_extent_type(struct btrfs_file_extent_item *e, - u8 val) -{ - e->type = val; -} - -static inline char *btrfs_file_extent_inline_start(struct +static inline unsigned long btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) { - return (char *)(&e->disk_blocknr); + unsigned long offset = (unsigned long)e; + offset += offsetof(struct btrfs_file_extent_item, disk_blocknr); + return offset; } static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) { - return (unsigned long)(&((struct - btrfs_file_extent_item *)NULL)->disk_blocknr) + datasize; -} - -static inline u32 btrfs_file_extent_inline_len(struct btrfs_item *e) -{ - struct btrfs_file_extent_item *fe = NULL; - return btrfs_item_size(e) - (unsigned long)(&fe->disk_blocknr); -} - -static inline u64 btrfs_file_extent_disk_blocknr(struct btrfs_file_extent_item - *e) -{ - return le64_to_cpu(e->disk_blocknr); + return offsetof(struct btrfs_file_extent_item, disk_blocknr) + datasize; } -static inline void btrfs_set_file_extent_disk_blocknr(struct - btrfs_file_extent_item - *e, u64 val) +static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, + struct btrfs_item *e) { - e->disk_blocknr = cpu_to_le64(val); + unsigned long offset; + offset = offsetof(struct btrfs_file_extent_item, disk_blocknr); + return btrfs_item_size(eb, e) - offset; } -static inline u64 btrfs_file_extent_generation(struct btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->generation); -} - -static inline void btrfs_set_file_extent_generation(struct - btrfs_file_extent_item *e, - u64 val) -{ - e->generation = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_disk_num_blocks(struct - btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->disk_num_blocks); -} - -static inline void btrfs_set_file_extent_disk_num_blocks(struct - btrfs_file_extent_item - *e, u64 val) -{ - e->disk_num_blocks = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_offset(struct btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->offset); -} - -static inline void btrfs_set_file_extent_offset(struct btrfs_file_extent_item - *e, u64 val) -{ - e->offset = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_num_blocks(struct btrfs_file_extent_item - *e) -{ - return le64_to_cpu(e->num_blocks); -} - -static inline void btrfs_set_file_extent_num_blocks(struct - btrfs_file_extent_item *e, - u64 val) -{ - e->num_blocks = cpu_to_le64(val); -} +BTRFS_SETGET_FUNCS(file_extent_disk_blocknr, struct btrfs_file_extent_item, + disk_blocknr, 64); +BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, + generation, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_num_blocks, struct btrfs_file_extent_item, + disk_num_blocks, 64); +BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, + offset, 64); +BTRFS_SETGET_FUNCS(file_extent_num_blocks, struct btrfs_file_extent_item, + num_blocks, 64); static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; } -static inline void btrfs_check_bounds(void *vptr, size_t len, - void *vcontainer, size_t container_len) -{ - char *ptr = vptr; - char *container = vcontainer; - WARN_ON(ptr < container); - WARN_ON(ptr + len > container + container_len); -} - -static inline void btrfs_memcpy(struct btrfs_root *root, - void *dst_block, - void *dst, const void *src, size_t nr) -{ - btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); - memcpy(dst, src, nr); -} - -static inline void btrfs_memmove(struct btrfs_root *root, - void *dst_block, - void *dst, void *src, size_t nr) -{ - btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); - memmove(dst, src, nr); -} - static inline int btrfs_set_root_name(struct btrfs_root *root, const char *name, int len) { @@ -1063,7 +842,11 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ - btrfs_item_offset((leaf)->items + (slot)))) + btrfs_item_offset_nr(leaf, slot))) + +#define btrfs_item_ptr_offset(leaf, slot) \ + ((unsigned long)(btrfs_leaf_data(leaf) + \ + btrfs_item_offset_nr(leaf, slot))) /* mount option defines and helpers */ #define BTRFS_MOUNT_SUBVOL 0x000001 @@ -1084,7 +867,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data, int owner); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, +struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 hint, u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, @@ -1092,7 +875,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 num_blocks, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); + struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, @@ -1106,10 +889,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); /* ctree.c */ -int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret); +int btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret); int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); int btrfs_truncate_item(struct btrfs_trans_handle *trans, @@ -1120,7 +903,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); int btrfs_realloc_node(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *parent, + struct btrfs_root *root, struct extent_buffer *parent, int cache_only, u64 *last_ret); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); @@ -1134,7 +917,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *cpu_key, u32 data_size); int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); -int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); +int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root); /* root-item.c */ @@ -1179,9 +962,9 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid); /* inode-item.c */ -int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 objectid, struct btrfs_inode_item - *inode_item); +int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid); int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *location, int mod); @@ -1224,8 +1007,6 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to); -int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create); struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 end, int create); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 49db5fa7ced..6f19de41b87 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -33,7 +33,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle int ret; char *ptr; struct btrfs_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (ret == -EEXIST) { @@ -49,11 +49,11 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle if (ret < 0) return ERR_PTR(ret); WARN_ON(ret > 0); - leaf = btrfs_buffer_leaf(path->nodes[0]); - item = leaf->items + path->slots[0]; + leaf = path->nodes[0]; + item = btrfs_item_nr(leaf, path->slots[0]); ptr = btrfs_item_ptr(leaf, path->slots[0], char); - BUG_ON(data_size > btrfs_item_size(item)); - ptr += btrfs_item_size(item) - data_size; + BUG_ON(data_size > btrfs_item_size(leaf, item)); + ptr += btrfs_item_size(leaf, item) - data_size; return (struct btrfs_dir_item *)ptr; } @@ -65,12 +65,13 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int ret2 = 0; struct btrfs_path *path; struct btrfs_dir_item *dir_item; - char *name_ptr; + struct extent_buffer *leaf; + unsigned long name_ptr; struct btrfs_key key; + struct btrfs_disk_key disk_key; u32 data_size; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); @@ -85,14 +86,16 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root goto out; } - btrfs_cpu_key_to_disk(&dir_item->location, location); - btrfs_set_dir_type(dir_item, type); - btrfs_set_dir_flags(dir_item, 0); - btrfs_set_dir_name_len(dir_item, name_len); - name_ptr = (char *)(dir_item + 1); + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, type); + btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + name_ptr = (unsigned long)(dir_item + 1); - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + write_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_mark_buffer_dirty(leaf); second_insert: /* FIXME, use some real flag for selecting the extra index */ @@ -110,13 +113,15 @@ second_insert: ret2 = PTR_ERR(dir_item); goto out; } - btrfs_cpu_key_to_disk(&dir_item->location, location); - btrfs_set_dir_type(dir_item, type); - btrfs_set_dir_flags(dir_item, 0); - btrfs_set_dir_name_len(dir_item, name_len); - name_ptr = (char *)(dir_item + 1); - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, type); + btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + name_ptr = (unsigned long)(dir_item + 1); + write_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); if (ret) @@ -136,14 +141,15 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; - struct btrfs_disk_key *found_key; - struct btrfs_leaf *leaf; + struct btrfs_key found_key; + struct extent_buffer *leaf; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) return ERR_PTR(ret); @@ -152,12 +158,13 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, return NULL; path->slots[0]--; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - if (btrfs_disk_key_objectid(found_key) != dir || - btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || - btrfs_disk_key_offset(found_key) != key.offset) + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != dir || + btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY || + found_key.offset != key.offset) return NULL; return btrfs_match_dir_item_name(root, path, name, name_len); @@ -176,7 +183,6 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, int cow = mod != 0; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); key.offset = objectid; @@ -193,21 +199,22 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, const char *name, int name_len) { struct btrfs_dir_item *dir_item; - char *name_ptr; + unsigned long name_ptr; u32 total_len; u32 cur = 0; u32 this_len; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); - total_len = btrfs_item_size(leaf->items + path->slots[0]); + total_len = btrfs_item_size_nr(leaf, path->slots[0]); while(cur < total_len) { - this_len = sizeof(*dir_item) + btrfs_dir_name_len(dir_item); - name_ptr = (char *)(dir_item + 1); + this_len = sizeof(*dir_item) + + btrfs_dir_name_len(leaf, dir_item); + name_ptr = (unsigned long)(dir_item + 1); - if (btrfs_dir_name_len(dir_item) == name_len && - memcmp(name_ptr, name, name_len) == 0) + if (btrfs_dir_name_len(leaf, dir_item) == name_len && + memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) return dir_item; cur += this_len; @@ -223,20 +230,23 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di) { - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u32 sub_item_len; u32 item_len; int ret = 0; - leaf = btrfs_buffer_leaf(path->nodes[0]); - sub_item_len = sizeof(*di) + btrfs_dir_name_len(di); - item_len = btrfs_item_size(leaf->items + path->slots[0]); - if (sub_item_len == btrfs_item_size(leaf->items + path->slots[0])) { + leaf = path->nodes[0]; + sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di); + item_len = btrfs_item_size_nr(leaf, path->slots[0]); + if (sub_item_len == item_len) { ret = btrfs_del_item(trans, root, path); } else { - char *ptr = (char *)di; - char *start = btrfs_item_ptr(leaf, path->slots[0], char); - btrfs_memmove(root, leaf, ptr, ptr + sub_item_len, + /* MARKER */ + unsigned long ptr = (unsigned long)di; + unsigned long start; + + start = btrfs_item_ptr_offset(leaf, path->slots[0]); + memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_len - (ptr + sub_item_len - start)); ret = btrfs_truncate_item(trans, root, path, item_len - sub_item_len); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ef6d76f2ec2..0c1f90cbedb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -23,138 +23,132 @@ #include #include #include +#include // for block_sync_page #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -u64 bh_blocknr(struct buffer_head *bh) +#if 0 +static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) { - return bh->b_blocknr; -} - -static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) -{ - struct btrfs_node *node = btrfs_buffer_node(buf); - if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { - printk(KERN_CRIT "bh_blocknr(buf) is %llu, header is %llu\n", - (unsigned long long)bh_blocknr(buf), - (unsigned long long)btrfs_header_blocknr(&node->header)); + if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) { + printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n", + (unsigned long long)extent_buffer_blocknr(buf), + (unsigned long long)btrfs_header_blocknr(buf)); return 1; } return 0; } +#endif -struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 blocknr) { - struct address_space *mapping = root->fs_info->btree_inode->i_mapping; - int blockbits = root->fs_info->sb->s_blocksize_bits; - unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); - struct page *page; - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *ret = NULL; - - - page = find_lock_page(mapping, index); - if (!page) - return NULL; - - if (!page_has_buffers(page)) - goto out_unlock; + struct inode *btree_inode = root->fs_info->btree_inode; + return find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + blocknr * root->sectorsize, + root->sectorsize, GFP_NOFS); +} - head = page_buffers(page); - bh = head; - do { - if (buffer_mapped(bh) && bh_blocknr(bh) == blocknr) { - ret = bh; - get_bh(bh); - goto out_unlock; - } - bh = bh->b_this_page; - } while (bh != head); -out_unlock: - unlock_page(page); - page_cache_release(page); - return ret; +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + return alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + blocknr * root->sectorsize, + root->sectorsize, GFP_NOFS); } -int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, - u64 logical) +struct extent_map *btree_get_extent(struct inode *inode, struct page *page, + size_t page_offset, u64 start, u64 end, + int create) { - if (logical == 0) { - bh->b_bdev = NULL; - bh->b_blocknr = 0; - set_buffer_mapped(bh); - } else { - map_bh(bh, root->fs_info->sb, logical); + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *em; + int ret; + +again: + em = lookup_extent_mapping(em_tree, start, end); + if (em) { + goto out; } - return 0; + em = alloc_extent_map(GFP_NOFS); + if (!em) { + em = ERR_PTR(-ENOMEM); + goto out; + } + em->start = 0; + em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1; + em->block_start = 0; + em->block_end = em->end; + em->bdev = inode->i_sb->s_bdev; + ret = add_extent_mapping(em_tree, em); + if (ret == -EEXIST) { + free_extent_map(em); + em = NULL; + goto again; + } else if (ret) { + em = ERR_PTR(ret); + } +out: + return em; } -struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr) +static int btree_writepage(struct page *page, struct writeback_control *wbc) { - struct address_space *mapping = root->fs_info->btree_inode->i_mapping; - int blockbits = root->fs_info->sb->s_blocksize_bits; - unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); - struct page *page; - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *ret = NULL; - int err; - u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_write_full_page(tree, page, btree_get_extent, wbc); +} +int btree_readpage(struct file *file, struct page *page) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_read_full_page(tree, page, btree_get_extent); +} - page = find_or_create_page(mapping, index, GFP_NOFS); - if (!page) - return NULL; +static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) +{ + struct extent_map_tree *tree; + int ret; - if (!page_has_buffers(page)) - create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0); - head = page_buffers(page); - bh = head; - do { - if (!buffer_mapped(bh)) { - err = btrfs_map_bh_to_logical(root, bh, first_block); - BUG_ON(err); - } - if (bh_blocknr(bh) == blocknr) { - ret = bh; - get_bh(bh); - goto out_unlock; - } - bh = bh->b_this_page; - first_block++; - } while (bh != head); -out_unlock: - unlock_page(page); - if (ret) - touch_buffer(ret); - page_cache_release(page); + BUG_ON(page->private != 1); + tree = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(tree, page); + if (ret == 1) { + ClearPagePrivate(page); + set_page_private(page, 0); + page_cache_release(page); + } return ret; } -static int btree_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create) +static void btree_invalidatepage(struct page *page, unsigned long offset) { - int err; - struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - err = btrfs_map_bh_to_logical(root, bh, iblock); - return err; + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + extent_invalidatepage(tree, page, offset); + btree_releasepage(page, GFP_NOFS); } int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result) { + return 0; +#if 0 u32 crc; crc = crc32c(0, data, len); memcpy(result, &crc, BTRFS_CRC32_SIZE); return 0; +#endif } -static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, +#if 0 +static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { + return 0; char result[BTRFS_CRC32_SIZE]; int ret; struct btrfs_node *node; @@ -176,7 +170,9 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, } return 0; } +#endif +#if 0 static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct buffer_head *bh; @@ -195,87 +191,65 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) } while (bh != head); return block_write_full_page(page, btree_get_block, wbc); } - -static int btree_readpage(struct file * file, struct page * page) -{ - return block_read_full_page(page, btree_get_block); -} +#endif static struct address_space_operations btree_aops = { .readpage = btree_readpage, .writepage = btree_writepage, + .releasepage = btree_releasepage, + .invalidatepage = btree_invalidatepage, .sync_page = block_sync_page, }; int readahead_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *bh = NULL; + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; int ret = 0; - bh = btrfs_find_create_tree_block(root, blocknr); - if (!bh) + buf = btrfs_find_create_tree_block(root, blocknr); + if (!buf) return 0; - if (buffer_uptodate(bh)) { - ret = 1; - goto done; - } - if (test_set_buffer_locked(bh)) { - ret = 1; - goto done; - } - if (!buffer_uptodate(bh)) { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); - } else { - unlock_buffer(bh); - ret = 1; - } -done: - brelse(bh); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 0); + free_extent_buffer(buf); return ret; } -struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *bh = NULL; - - bh = btrfs_find_create_tree_block(root, blocknr); - if (!bh) - return bh; - if (buffer_uptodate(bh)) - goto uptodate; - lock_buffer(bh); - if (!buffer_uptodate(bh)) { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - goto fail; - } else { - unlock_buffer(bh); - } -uptodate: - if (!buffer_checked(bh)) { - csum_tree_block(root, bh, 1); - set_buffer_checked(bh); - } - if (check_tree_block(root, bh)) - goto fail; - return bh; -fail: - brelse(bh); - return NULL; + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; + + buf = btrfs_find_create_tree_block(root, blocknr); + if (!buf) + return NULL; + read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 1); + return buf; } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) + struct extent_buffer *buf) { - WARN_ON(atomic_read(&buf->b_count) == 0); - lock_buffer(buf); - clear_buffer_dirty(buf); - unlock_buffer(buf); + struct inode *btree_inode = root->fs_info->btree_inode; + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + return 0; +} + +int wait_on_tree_block_writeback(struct btrfs_root *root, + struct extent_buffer *buf) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree, + buf); + return 0; +} + +int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); return 0; } @@ -287,7 +261,9 @@ static int __setup_root(int blocksize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; - root->blocksize = blocksize; + root->sectorsize = blocksize; + root->nodesize = blocksize; + root->leafsize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -332,7 +308,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_path *path; - struct btrfs_leaf *l; + struct extent_buffer *l; u64 highest_inode; int ret = 0; @@ -361,11 +337,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, ret = -ENOENT; goto out; } - l = btrfs_buffer_leaf(path->nodes[0]); - memcpy(&root->root_item, - btrfs_item_ptr(l, path->slots[0], struct btrfs_root_item), + l = path->nodes[0]; + read_extent_buffer(l, &root->root_item, + btrfs_item_ptr_offset(l, path->slots[0]), sizeof(root->root_item)); - memcpy(&root->root_key, location, sizeof(*location)); ret = 0; out: btrfs_release_path(root, path); @@ -406,21 +381,21 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, (unsigned long)root->root_key.objectid, root); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root); return ERR_PTR(ret); } ret = btrfs_set_root_name(root, name, namelen); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root); return ERR_PTR(ret); } ret = btrfs_sysfs_add_root(root); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root->name); kfree(root); return ERR_PTR(ret); @@ -471,6 +446,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + fs_info->btree_inode->i_mapping, + GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -493,10 +471,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!fs_info->sb_buffer) goto fail_iput; - disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; - fs_info->disk_super = disk_super; - memcpy(&fs_info->super_copy, disk_super, sizeof(fs_info->super_copy)); + read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0, + sizeof(fs_info->super_copy)); + + read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, + (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), + BTRFS_FSID_SIZE); + disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; @@ -530,9 +512,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) return tree_root; fail_tree_root: - btrfs_block_release(tree_root, tree_root->node); + free_extent_buffer(tree_root->node); fail_sb_buffer: - btrfs_block_release(tree_root, fs_info->sb_buffer); + free_extent_buffer(fs_info->sb_buffer); fail_iput: iput(fs_info->btree_inode); fail: @@ -546,31 +528,13 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; - struct buffer_head *bh = root->fs_info->sb_buffer; - - lock_buffer(bh); - WARN_ON(atomic_read(&bh->b_count) < 1); - clear_buffer_dirty(bh); - csum_tree_block(root, bh, 0); - bh->b_end_io = end_buffer_write_sync; - get_bh(bh); - if (root->fs_info->do_barriers) - ret = submit_bh(WRITE_BARRIER, bh); - else - ret = submit_bh(WRITE, bh); - if (ret == -EOPNOTSUPP) { - get_bh(bh); - lock_buffer(bh); - set_buffer_uptodate(bh); - root->fs_info->do_barriers = 0; - ret = submit_bh(WRITE, bh); - } - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - WARN_ON(1); - return -EIO; - } - return 0; + struct extent_buffer *super = root->fs_info->sb_buffer; + struct inode *btree_inode = root->fs_info->btree_inode; + + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); + ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, + super->start, super->len); + return ret; } int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) @@ -581,9 +545,9 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) if (root->inode) iput(root->inode); if (root->node) - brelse(root->node); + free_extent_buffer(root->node); if (root->commit_root) - brelse(root->commit_root); + free_extent_buffer(root->commit_root); if (root->name) kfree(root->name); kfree(root); @@ -629,12 +593,10 @@ int close_ctree(struct btrfs_root *root) mutex_unlock(&fs_info->fs_mutex); if (fs_info->extent_root->node) - btrfs_block_release(fs_info->extent_root, - fs_info->extent_root->node); + free_extent_buffer(fs_info->extent_root->node); if (fs_info->tree_root->node) - btrfs_block_release(fs_info->tree_root, - fs_info->tree_root->node); - btrfs_block_release(root, fs_info->sb_buffer); + free_extent_buffer(fs_info->tree_root->node); + free_extent_buffer(fs_info->sb_buffer); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); @@ -645,25 +607,32 @@ int close_ctree(struct btrfs_root *root) return 0; } -void btrfs_mark_buffer_dirty(struct buffer_head *bh) +int btrfs_buffer_uptodate(struct extent_buffer *buf) +{ + struct inode *btree_inode = buf->pages[0]->mapping->host; + return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); +} + +int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + struct inode *btree_inode = buf->pages[0]->mapping->host; + return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, + buf); +} - WARN_ON(!atomic_read(&bh->b_count)); +void btrfs_mark_buffer_dirty(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + u64 transid = btrfs_header_generation(buf); + struct inode *btree_inode = root->fs_info->btree_inode; if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", - (unsigned long long)bh->b_blocknr, + (unsigned long long)extent_buffer_blocknr(buf), transid, root->fs_info->generation); WARN_ON(1); } - mark_buffer_dirty(bh); -} - -void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) -{ - brelse(buf); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index fc6b520c6e0..70d9413c599 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -19,68 +19,35 @@ #ifndef __DISKIO__ #define __DISKIO__ -#include - #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) -enum btrfs_bh_state_bits { - BH_Checked = BH_PrivateStart, - BH_Defrag, - BH_DefragDone, -}; -BUFFER_FNS(Checked, checked); -BUFFER_FNS(Defrag, defrag); -BUFFER_FNS(DefragDone, defrag_done); - -static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) -{ - return (struct btrfs_node *)bh->b_data; -} - -static inline struct btrfs_leaf *btrfs_buffer_leaf(struct buffer_head *bh) -{ - return (struct btrfs_leaf *)bh->b_data; -} - -static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) -{ - return &((struct btrfs_node *)bh->b_data)->header; -} - -struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); int readahead_tree_block(struct btrfs_root *root, u64 blocknr); -struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr); -int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); -int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr); int clean_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *buf); -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, struct extent_buffer *buf); struct btrfs_root *open_ctree(struct super_block *sb); int close_ctree(struct btrfs_root *root); -void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); -int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, - char *result); +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 blocknr); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_key *location); -u64 bh_blocknr(struct buffer_head *bh); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, u64 device_id, u64 block_start, u64 num_blocks); -int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, - u64 logical); void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); -void btrfs_mark_buffer_dirty(struct buffer_head *bh); +void btrfs_mark_buffer_dirty(struct extent_buffer *buf); +int btrfs_buffer_uptodate(struct extent_buffer *buf); +int btrfs_set_buffer_uptodate(struct extent_buffer *buf); +int wait_on_tree_block_writeback(struct btrfs_root *root, + struct extent_buffer *buf); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f261a8326cd..089c41cbca7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,7 +33,7 @@ static int cache_block_group(struct btrfs_root *root, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct radix_tree_root *extent_radix; int slot; u64 i; @@ -56,7 +56,6 @@ static int cache_block_group(struct btrfs_root *root, path->reada = 2; first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; - key.flags = 0; key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -69,9 +68,9 @@ static int cache_block_group(struct btrfs_root *root, path->slots[0]--; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&leaf->header)) { + if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); if (ret < 0) goto err; @@ -82,7 +81,7 @@ static int cache_block_group(struct btrfs_root *root, } } - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid < block_group->key.objectid) { if (key.objectid + key.offset > first_free) first_free = key.objectid + key.offset; @@ -116,8 +115,7 @@ next: hole_size = block_group->key.objectid + block_group->key.offset - last; for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, - last + i); + set_radix_bit(extent_radix, last + i); } } block_group->cached = 1; @@ -366,7 +364,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_extent_item *item; u32 refs; @@ -375,7 +373,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return -ENOMEM; key.objectid = blocknr; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, @@ -386,10 +383,10 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, BUG(); } BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - refs = btrfs_extent_refs(item); - btrfs_set_extent_refs(item, refs + 1); + refs = btrfs_extent_refs(l, item); + btrfs_set_extent_refs(l, item, refs + 1); btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root->fs_info->extent_root, path); @@ -414,23 +411,25 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_extent_item *item; path = btrfs_alloc_path(); key.objectid = blocknr; key.offset = num_blocks; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; - if (ret != 0) + if (ret != 0) { + btrfs_print_leaf(root, path->nodes[0]); + printk("failed to find block number %Lu\n", blocknr); BUG(); - l = btrfs_buffer_leaf(path->nodes[0]); + } + l = path->nodes[0]; item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - *refs = btrfs_extent_refs(item); + *refs = btrfs_extent_refs(l, item); out: btrfs_free_path(path); return 0; @@ -439,16 +438,16 @@ out: int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1); + return btrfs_inc_extent_ref(trans, root, + extent_buffer_blocknr(root->node), 1); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) + struct extent_buffer *buf) { u64 blocknr; - struct btrfs_node *buf_node; - struct btrfs_leaf *buf_leaf; - struct btrfs_disk_key *key; + u32 nritems; + struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int leaf; @@ -458,31 +457,31 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (!root->ref_cows) return 0; - buf_node = btrfs_buffer_node(buf); - leaf = btrfs_is_leaf(buf_node); - buf_leaf = btrfs_buffer_leaf(buf); - for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { + + leaf = btrfs_is_leaf(buf); + nritems = btrfs_header_nritems(buf); + for (i = 0; i < nritems; i++) { if (leaf) { u64 disk_blocknr; - key = &buf_leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + btrfs_item_key_to_cpu(buf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; - fi = btrfs_item_ptr(buf_leaf, i, + fi = btrfs_item_ptr(buf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); if (disk_blocknr == 0) continue; ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi)); + btrfs_file_extent_disk_num_blocks(buf, fi)); if (ret) { faili = i; goto fail; } } else { - blocknr = btrfs_node_blockptr(buf_node, i); + blocknr = btrfs_node_blockptr(buf, i); ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); if (ret) { faili = i; @@ -496,22 +495,23 @@ fail: for (i =0; i < faili; i++) { if (leaf) { u64 disk_blocknr; - key = &buf_leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + btrfs_item_key_to_cpu(buf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; - fi = btrfs_item_ptr(buf_leaf, i, + fi = btrfs_item_ptr(buf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); if (disk_blocknr == 0) continue; err = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi), 0); + btrfs_file_extent_disk_num_blocks(buf, + fi), 0); BUG_ON(err); } else { - blocknr = btrfs_node_blockptr(buf_node, i); + blocknr = btrfs_node_blockptr(buf, i); err = btrfs_free_extent(trans, root, blocknr, 1, 0); BUG_ON(err); } @@ -527,16 +527,18 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, int ret; int pending_ret; struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_block_group_item *bi; + unsigned long bi; + struct extent_buffer *leaf; ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); if (ret < 0) goto fail; BUG_ON(ret); - bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_block_group_item); - memcpy(bi, &cache->item, sizeof(*bi)); - btrfs_mark_buffer_dirty(path->nodes[0]); + + leaf = path->nodes[0]; + bi = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); + btrfs_mark_buffer_dirty(leaf); btrfs_release_path(extent_root, path); fail: finish_current_insert(trans, extent_root); @@ -768,11 +770,11 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct unsigned long gang[8]; struct btrfs_fs_info *info = extent_root->fs_info; - btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_stack_extent_refs(&extent_item, 1); ins.offset = 1; - ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); - btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); + btrfs_set_stack_extent_owner(&extent_item, + extent_root->root_key.objectid); while(1) { ret = find_first_radix_bit(&info->extent_ins_radix, gang, 0, @@ -795,23 +797,20 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) { int err; - struct btrfs_header *header; - struct buffer_head *bh; + struct extent_buffer *buf; if (!pending) { - bh = btrfs_find_tree_block(root, blocknr); - if (bh) { - if (buffer_uptodate(bh)) { + buf = btrfs_find_tree_block(root, blocknr); + if (buf) { + if (btrfs_buffer_uptodate(buf)) { u64 transid = root->fs_info->running_transaction->transid; - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - transid) { - btrfs_block_release(root, bh); + if (btrfs_header_generation(buf) == transid) { + free_extent_buffer(buf); return 0; } } - btrfs_block_release(root, bh); + free_extent_buffer(buf); } err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); if (!err) { @@ -839,12 +838,12 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key key; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; + struct extent_buffer *leaf; int ret; struct btrfs_extent_item *ei; u32 refs; key.objectid = blocknr; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; @@ -856,12 +855,16 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) return ret; BUG_ON(ret); - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); - BUG_ON(ei->refs == 0); - refs = btrfs_extent_refs(ei) - 1; - btrfs_set_extent_refs(ei, refs); - btrfs_mark_buffer_dirty(path->nodes[0]); + refs = btrfs_extent_refs(leaf, ei); + BUG_ON(refs == 0); + refs -= 1; + btrfs_set_extent_refs(leaf, ei, refs); + btrfs_mark_buffer_dirty(leaf); + if (refs == 0) { u64 super_blocks_used, root_blocks_used; @@ -876,8 +879,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root super_blocks_used - num_blocks); /* block accounting for root item */ - root_blocks_used = btrfs_root_blocks_used(&root->root_item); - btrfs_set_root_blocks_used(&root->root_item, + root_blocks_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); @@ -984,7 +987,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 test_block; u64 orig_search_start = search_start; int start_found; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; int total_needed = num_blocks; @@ -994,10 +997,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int wrapped = 0; WARN_ON(num_blocks < 1); - ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); + if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(&info->super_copy); if (hint_block) { @@ -1034,8 +1037,9 @@ check_failed: path->slots[0]--; } - l = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key); + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &key, path->slots[0]); + /* * a rare case, go back one key if we hit a block group item * instead of an extent item @@ -1055,9 +1059,9 @@ check_failed: } while (1) { - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&l->header)) { + if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -1075,7 +1079,7 @@ check_failed: goto check_pending; } - btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + btrfs_item_key_to_cpu(l, &key, slot); if (key.objectid >= search_start && key.objectid > last_block && start_found) { if (last_block < search_start) @@ -1183,8 +1187,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; - btrfs_set_extent_refs(&extent_item, 1); - btrfs_set_extent_owner(&extent_item, owner); + btrfs_set_stack_extent_refs(&extent_item, 1); + btrfs_set_stack_extent_owner(&extent_item, owner); WARN_ON(num_blocks < 1); ret = find_free_extent(trans, root, num_blocks, empty_size, @@ -1201,8 +1205,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, num_blocks); /* block accounting for root item */ - root_blocks_used = btrfs_root_blocks_used(&root->root_item); - btrfs_set_root_blocks_used(&root->root_item, root_blocks_used + + root_blocks_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_blocks_used + num_blocks); if (root == extent_root) { @@ -1241,13 +1245,13 @@ update_block: * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ -struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint, - u64 empty_size) +struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 hint, + u64 empty_size) { struct btrfs_key ins; int ret; - struct buffer_head *buf; + struct extent_buffer *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, 1, empty_size, hint, (u64)-1, &ins, 0); @@ -1260,53 +1264,57 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_free_extent(trans, root, ins.objectid, 1, 0); return ERR_PTR(-ENOMEM); } - WARN_ON(buffer_dirty(buf)); - set_buffer_uptodate(buf); + btrfs_set_buffer_uptodate(buf); + set_extent_dirty(&trans->transaction->dirty_pages, buf->start, + buf->start + buf->len - 1, GFP_NOFS); + /* set_buffer_checked(buf); set_buffer_defrag(buf); - set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); + */ + /* FIXME!!!!!!!!!!!!!!!! + set_radix_bit(&trans->transaction->dirty_pages, buf->pages[0]->index); + */ trans->blocks_used++; return buf; } static int drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *cur) + struct btrfs_root *root, struct extent_buffer *leaf) { - struct btrfs_disk_key *key; - struct btrfs_leaf *leaf; + struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int nritems; int ret; - BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur))); - leaf = btrfs_buffer_leaf(cur); - nritems = btrfs_header_nritems(&leaf->header); + BUG_ON(!btrfs_is_leaf(leaf)); + nritems = btrfs_header_nritems(leaf); for (i = 0; i < nritems; i++) { u64 disk_blocknr; - key = &leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + + btrfs_item_key_to_cpu(leaf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE) + if (btrfs_file_extent_type(leaf, fi) == + BTRFS_FILE_EXTENT_INLINE) continue; /* * FIXME make sure to insert a trans record that * repeats the snapshot del on crash */ - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(leaf, fi); if (disk_blocknr == 0) continue; ret = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi), - 0); + btrfs_file_extent_disk_num_blocks(leaf, fi), 0); BUG_ON(ret); } return 0; } static void reada_walk_down(struct btrfs_root *root, - struct btrfs_node *node) + struct extent_buffer *node) { int i; u32 nritems; @@ -1314,7 +1322,7 @@ static void reada_walk_down(struct btrfs_root *root, int ret; u32 refs; - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { blocknr = btrfs_node_blockptr(node, i); ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs); @@ -1337,16 +1345,17 @@ static void reada_walk_down(struct btrfs_root *root, static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { - struct buffer_head *next; - struct buffer_head *cur; + struct extent_buffer *next; + struct extent_buffer *cur; u64 blocknr; int ret; u32 refs; WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]), - 1, &refs); + ret = lookup_extent_ref(trans, root, + extent_buffer_blocknr(path->nodes[*level]), + 1, &refs); BUG_ON(ret); if (refs > 1) goto out; @@ -1360,21 +1369,20 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root cur = path->nodes[*level]; if (*level > 0 && path->slots[*level] == 0) - reada_walk_down(root, btrfs_buffer_node(cur)); + reada_walk_down(root, cur); - if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + if (btrfs_header_level(cur) != *level) WARN_ON(1); if (path->slots[*level] >= - btrfs_header_nritems(btrfs_buffer_header(cur))) + btrfs_header_nritems(cur)) break; if (*level == 0) { ret = drop_leaf_ref(trans, root, cur); BUG_ON(ret); break; } - blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), - path->slots[*level]); + blocknr = btrfs_node_blockptr(cur, path->slots[*level]); ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); BUG_ON(ret); if (refs != 1) { @@ -1384,8 +1392,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root continue; } next = btrfs_find_tree_block(root, blocknr); - if (!next || !buffer_uptodate(next)) { - brelse(next); + if (!next || !btrfs_buffer_uptodate(next)) { + free_extent_buffer(next); mutex_unlock(&root->fs_info->fs_mutex); next = read_tree_block(root, blocknr); mutex_lock(&root->fs_info->fs_mutex); @@ -1395,7 +1403,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); if (refs != 1) { path->slots[*level]++; - brelse(next); + free_extent_buffer(next); ret = btrfs_free_extent(trans, root, blocknr, 1, 1); BUG_ON(ret); @@ -1404,17 +1412,17 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root } WARN_ON(*level <= 0); if (path->nodes[*level-1]) - btrfs_block_release(root, path->nodes[*level-1]); + free_extent_buffer(path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(btrfs_buffer_header(next)); + *level = btrfs_header_level(next); path->slots[*level] = 0; } out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = btrfs_free_extent(trans, root, - bh_blocknr(path->nodes[*level]), 1, 1); - btrfs_block_release(root, path->nodes[*level]); + extent_buffer_blocknr(path->nodes[*level]), 1, 1); + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; BUG_ON(ret); @@ -1436,24 +1444,24 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < btrfs_header_nritems( - btrfs_buffer_header(path->nodes[i])) - 1) { - struct btrfs_node *node; - node = btrfs_buffer_node(path->nodes[i]); + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { + struct extent_buffer *node; + struct btrfs_disk_key disk_key; + node = path->nodes[i]; path->slots[i]++; *level = i; WARN_ON(*level == 0); + btrfs_node_key(node, &disk_key, path->slots[i]); memcpy(&root_item->drop_progress, - &node->ptrs[path->slots[i]].key, - sizeof(root_item->drop_progress)); + &disk_key, sizeof(disk_key)); root_item->drop_level = i; return 0; } else { ret = btrfs_free_extent(trans, root, - bh_blocknr(path->nodes[*level]), - 1, 1); + extent_buffer_blocknr(path->nodes[*level]), + 1, 1); BUG_ON(ret); - btrfs_block_release(root, path->nodes[*level]); + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; } @@ -1480,15 +1488,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); orig_level = level; if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { path->nodes[level] = root->node; path->slots[level] = 0; } else { struct btrfs_key key; - struct btrfs_disk_key *found_key; - struct btrfs_node *node; + struct btrfs_disk_key found_key; + struct extent_buffer *node; btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); level = root_item->drop_level; @@ -1498,10 +1506,10 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; goto out; } - node = btrfs_buffer_node(path->nodes[level]); - found_key = &node->ptrs[path->slots[level]].key; - WARN_ON(memcmp(found_key, &root_item->drop_progress, - sizeof(*found_key))); + node = path->nodes[level]; + btrfs_node_key(node, &found_key, path->slots[level]); + WARN_ON(memcmp(&found_key, &root_item->drop_progress, + sizeof(found_key))); } while(1) { wret = walk_down_tree(trans, root, path, &level); @@ -1516,12 +1524,12 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; ret = -EAGAIN; - get_bh(root->node); + extent_buffer_get(root->node); break; } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { - btrfs_block_release(root, path->nodes[i]); + free_extent_buffer(path->nodes[i]); path->nodes[i] = 0; } } @@ -1581,13 +1589,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_path *path; int ret; int err = 0; - struct btrfs_block_group_item *bi; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; struct radix_tree_root *radix; struct btrfs_key key; struct btrfs_key found_key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 group_size_blocks; u64 used; @@ -1596,7 +1603,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) root = info->extent_root; key.objectid = 0; key.offset = group_size_blocks; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); path = btrfs_alloc_path(); @@ -1610,18 +1616,18 @@ int btrfs_read_block_groups(struct btrfs_root *root) err = ret; break; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); cache = kmalloc(sizeof(*cache), GFP_NOFS); if (!cache) { err = -1; break; } - bi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_block_group_item); - if (bi->flags & BTRFS_BLOCK_GROUP_DATA) { + read_extent_buffer(leaf, &cache->item, + btrfs_item_ptr_offset(leaf, path->slots[0]), + sizeof(cache->item)); + if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { radix = &info->block_group_data_radix; cache->data = 1; } else { @@ -1629,7 +1635,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->data = 0; } - memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; @@ -1640,11 +1645,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); + ret = radix_tree_insert(radix, found_key.objectid + found_key.offset - 1, (void *)cache); BUG_ON(ret); - used = btrfs_block_group_used(bi); + used = btrfs_block_group_used(&cache->item); if (used < div_factor(key.offset, 8)) { radix_tree_tag_set(radix, found_key.objectid + found_key.offset - 1, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 0ab368e091f..55fefdfab84 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -26,16 +26,6 @@ struct tree_entry { struct rb_node rb_node; }; -/* bits for the extent state */ -#define EXTENT_DIRTY 1 -#define EXTENT_WRITEBACK (1 << 1) -#define EXTENT_UPTODATE (1 << 2) -#define EXTENT_LOCKED (1 << 3) -#define EXTENT_NEW (1 << 4) -#define EXTENT_DELALLOC (1 << 5) - -#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) - void __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", @@ -223,7 +213,8 @@ int add_extent_mapping(struct extent_map_tree *tree, if (rb) prev = rb_entry(rb, struct extent_map, rb_node); if (prev && prev->end + 1 == em->start && - ((em->block_start == 0 && prev->block_start == 0) || + ((em->block_start == EXTENT_MAP_HOLE && + prev->block_start == EXTENT_MAP_HOLE) || (em->block_start == prev->block_end + 1))) { em->start = prev->start; em->block_start = prev->block_start; @@ -926,6 +917,40 @@ int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(set_range_writeback); +int find_first_extent_bit(struct extent_map_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 1; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->state & bits) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + } + node = rb_next(node); + if (!node) + break; + } +out: + write_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(find_first_extent_bit); + u64 find_lock_delalloc_range(struct extent_map_tree *tree, u64 start, u64 lock_start, u64 *end, u64 max_bytes) { @@ -1450,7 +1475,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, em = NULL; /* we've found a hole, just zero and go on */ - if (block_start == 0) { + if (block_start == EXTENT_MAP_HOLE) { zero_user_page(page, page_offset, iosize, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -1593,7 +1618,8 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, free_extent_map(em); em = NULL; - if (block_start == 0 || block_start == EXTENT_MAP_INLINE) { + if (block_start == EXTENT_MAP_HOLE || + block_start == EXTENT_MAP_INLINE) { clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); cur = cur + iosize; @@ -1630,7 +1656,6 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, nr++; } done: - WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0)); unlock_extent(tree, start, page_end, GFP_NOFS); unlock_page(page); return 0; @@ -1827,8 +1852,623 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device if (em->block_start == EXTENT_MAP_INLINE || - em->block_start == 0) - return 0; + em->block_start == EXTENT_MAP_HOLE) + return 0; return (em->block_start + start - em->start) >> inode->i_blkbits; } + +struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 0; + + eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + if (!eb || IS_ERR(eb)) + return NULL; + + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + for (i = 0; i < num_pages; i++, index++) { + p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); + if (!p) + goto fail; + eb->pages[i] = p; + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + return eb; +fail: + free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(alloc_extent_buffer); + +struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + + eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + if (!eb || IS_ERR(eb)) + return NULL; + + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + for (i = 0; i < num_pages; i++, index++) { + p = find_get_page(mapping, index); + if (!p) + goto fail; + eb->pages[i] = p; + } + return eb; +fail: + free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(find_extent_buffer); + +void free_extent_buffer(struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + if (!eb) + return; + + if (!atomic_dec_and_test(&eb->refs)) + return; + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + for (i = 0; i < num_pages; i++) { + if (eb->pages[i]) + page_cache_release(eb->pages[i]); + } + kfree(eb); +} +EXPORT_SYMBOL(free_extent_buffer); + +int clear_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + int set; + unsigned long i; + unsigned long num_pages; + struct page *page; + + u64 start = eb->start; + u64 end = start + eb->len - 1; + + set = clear_extent_dirty(tree, start, end, GFP_NOFS); + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + lock_page(page); + /* + * if we're on the last page or the first page and the + * block isn't aligned on a page boundary, do extra checks + * to make sure we don't clean page that is partially dirty + */ + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + start = page->index << PAGE_CACHE_SHIFT; + end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, + EXTENT_DIRTY, 0)) { + unlock_page(page); + continue; + } + } + clear_page_dirty_for_io(page); + unlock_page(page); + } + return 0; +} +EXPORT_SYMBOL(clear_extent_buffer_dirty); + +int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + return wait_on_extent_writeback(tree, eb->start, + eb->start + eb->len - 1); +} +EXPORT_SYMBOL(wait_on_extent_buffer_writeback); + +int set_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + return set_range_dirty(tree, eb->start, eb->start + eb->len - 1); +} +EXPORT_SYMBOL(set_extent_buffer_dirty); + +int set_extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + struct page *page; + unsigned long num_pages; + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + check_page_uptodate(tree, page); + continue; + } + SetPageUptodate(page); + } + return 0; +} +EXPORT_SYMBOL(set_extent_buffer_uptodate); + +int extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + if (eb->flags & EXTENT_UPTODATE) + return 1; + return test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1); +} +EXPORT_SYMBOL(extent_buffer_uptodate); + +int read_extent_buffer_pages(struct extent_map_tree *tree, + struct extent_buffer *eb, int wait) +{ + unsigned long i; + struct page *page; + int err; + int ret = 0; + unsigned long num_pages; + + if (eb->flags & EXTENT_UPTODATE) + return 0; + + if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1)) { + return 0; + } + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + if (PageUptodate(page)) { + continue; + } + if (!wait) { + if (TestSetPageLocked(page)) { + continue; + } + } else { + lock_page(page); + } + if (!PageUptodate(page)) { + err = page->mapping->a_ops->readpage(NULL, page); + if (err) { + ret = err; + } + } else { + unlock_page(page); + } + } + + if (ret || !wait) { + return ret; + } + + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + wait_on_page_locked(page); + if (!PageUptodate(page)) { + ret = -EIO; + } + } + eb->flags |= EXTENT_UPTODATE; + return ret; +} +EXPORT_SYMBOL(read_extent_buffer_pages); + +void read_extent_buffer(struct extent_buffer *eb, void *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *dst = (char *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memcpy(dst, kaddr + offset, cur); + // kunmap_atomic(kaddr, KM_USER0); + + dst += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(read_extent_buffer); + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + size_t offset; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + + if (i == 0) { + offset = start_offset; + *map_start = 0; + } else { + offset = 0; + *map_start = (i << PAGE_CACHE_SHIFT) - offset; + } + + // kaddr = kmap_atomic(eb->pages[i], km); + kaddr = page_address(eb->pages[i]); + *token = kaddr; + *map = kaddr + offset; + *map_len = PAGE_CACHE_SIZE - offset; + return 0; +} +EXPORT_SYMBOL(map_extent_buffer); + +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) +{ + // kunmap_atomic(token, km); +} +EXPORT_SYMBOL(unmap_extent_buffer); + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *ptr = (char *)ptrv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + ret = memcmp(ptr, kaddr + offset, cur); + // kunmap_atomic(kaddr, KM_USER0); + if (ret) + break; + + ptr += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } + return ret; +} +EXPORT_SYMBOL(memcmp_extent_buffer); + +void write_extent_buffer(struct extent_buffer *eb, const void *srcv, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *src = (char *)srcv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memcpy(kaddr + offset, src, cur); + // kunmap_atomic(kaddr, KM_USER0); + + src += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(write_extent_buffer); + +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memset(kaddr + offset, c, cur); + // kunmap_atomic(kaddr, KM_USER0); + + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(memset_extent_buffer); + +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len) +{ + u64 dst_len = dst->len; + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + + WARN_ON(src->len != dst_len); + + offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + page = dst->pages[i]; + WARN_ON(!PageUptodate(page)); + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); + + // kaddr = kmap_atomic(page, KM_USER1); + kaddr = page_address(page); + read_extent_buffer(src, kaddr + offset, src_offset, cur); + // kunmap_atomic(kaddr, KM_USER1); + + src_offset += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(copy_extent_buffer); + +static void move_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + // char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); + if (dst_page == src_page) { + memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); + } else { + // char *src_kaddr = kmap_atomic(src_page, KM_USER1); + char *src_kaddr = page_address(src_page); + char *p = dst_kaddr + dst_off + len; + char *s = src_kaddr + src_off + len; + + while (len--) + *--p = *--s; + + // kunmap_atomic(src_kaddr, KM_USER1); + } + // kunmap_atomic(dst_kaddr, KM_USER0); +} + +static void copy_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + //kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); + char *src_kaddr; + + if (dst_page != src_page) + src_kaddr = page_address(src_page); // kmap_atomic(src_page, KM_USER1); + else + src_kaddr = dst_kaddr; + + memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); + /* + kunmap_atomic(dst_kaddr, KM_USER0); + if (dst_page != src_page) + kunmap_atomic(src_kaddr, KM_USER1); + */ +} + +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + + while(len > 0) { + dst_off_in_page = dst_offset & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = src_offset & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; + + if (src_i == 0) + src_off_in_page += start_offset; + if (dst_i == 0) + dst_off_in_page += start_offset; + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - + src_off_in_page)); + cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE - + dst_off_in_page)); + + copy_pages(dst->pages[dst_i], dst->pages[src_i], + dst_off_in_page, src_off_in_page, cur); + + src_offset += cur; + dst_offset += cur; + len -= cur; + } +} +EXPORT_SYMBOL(memcpy_extent_buffer); + +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + unsigned long dst_end = dst_offset + len - 1; + unsigned long src_end = src_offset + len - 1; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset < src_offset) { + memcpy_extent_buffer(dst, dst_offset, src_offset, len); + return; + } + while(len > 0) { + dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; + + dst_off_in_page = dst_end & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = src_end & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + if (src_i == 0) + src_off_in_page += start_offset; + if (dst_i == 0) + dst_off_in_page += start_offset; + + cur = min(len, src_off_in_page + 1); + cur = min(cur, dst_off_in_page + 1); +// printk("move pages orig dst %lu src %lu len %lu, this %lu %lu %lu\n", dst_offset, src_offset, len, dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); + move_pages(dst->pages[dst_i], dst->pages[src_i], + dst_off_in_page - cur + 1, + src_off_in_page - cur + 1, cur); + + dst_end -= cur - 1; + src_end -= cur - 1; + len -= cur; + } +} +EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index d913ce01248..430b997a70f 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -3,9 +3,20 @@ #include +#define EXTENT_MAP_HOLE (u64)-3 #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 +/* bits for the extent state */ +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + + struct extent_map_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); @@ -51,10 +62,13 @@ struct extent_state { struct list_head list; }; +#define EXTENT_BUFFER_SIZE(nr) (sizeof(struct extent_buffer) + \ + (nr - 1) * sizeof(struct page *)) struct extent_buffer { u64 start; - u64 end; /* inclusive */ - char *addr; + unsigned long len; + atomic_t refs; + int flags; struct page *pages[]; }; @@ -87,8 +101,12 @@ int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int find_first_extent_bit(struct extent_map_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits); int extent_invalidatepage(struct extent_map_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_map_tree *tree, struct page *page, @@ -106,4 +124,57 @@ int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); void set_page_extent_mapped(struct page *page); + +struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +void free_extent_buffer(struct extent_buffer *eb); +int read_extent_buffer_pages(struct extent_map_tree *tree, + struct extent_buffer *eb, int wait); + +static inline void extent_buffer_get(struct extent_buffer *eb) +{ + atomic_inc(&eb->refs); +} + +static inline u64 extent_buffer_blocknr(struct extent_buffer *eb) +{ + return eb->start / 4096; +} + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len); +void read_extent_buffer(struct extent_buffer *eb, void *dst, + unsigned long start, + unsigned long len); +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len); +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len); +int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, + struct extent_buffer *eb); +int clear_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb); +int extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb); +int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ea4dd4c5fce..226f6d028c3 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -34,12 +34,12 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item *item; struct btrfs_key file_key; struct btrfs_path *path; + struct extent_buffer *leaf; path = btrfs_alloc_path(); BUG_ON(!path); file_key.objectid = objectid; file_key.offset = pos; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); ret = btrfs_insert_empty_item(trans, root, path, &file_key, @@ -47,15 +47,16 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, if (ret < 0) goto out; BUG_ON(ret); - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(item, offset); - btrfs_set_file_extent_disk_num_blocks(item, disk_num_blocks); - btrfs_set_file_extent_offset(item, 0); - btrfs_set_file_extent_num_blocks(item, num_blocks); - btrfs_set_file_extent_generation(item, trans->transid); - btrfs_set_file_extent_type(item, BTRFS_FILE_EXTENT_REG); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_set_file_extent_disk_blocknr(leaf, item, offset); + btrfs_set_file_extent_disk_num_blocks(leaf, item, disk_num_blocks); + btrfs_set_file_extent_offset(leaf, item, 0); + btrfs_set_file_extent_num_blocks(leaf, item, num_blocks); + btrfs_set_file_extent_generation(leaf, item, trans->transid); + btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); + btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); return ret; @@ -71,32 +72,30 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_csum_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 csum_offset = 0; int csums_in_item; file_key.objectid = objectid; file_key.offset = offset; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); if (ret < 0) goto fail; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; if (ret > 0) { ret = 1; if (path->slots[0] == 0) goto fail; path->slots[0]--; - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid) { goto fail; } csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; - csums_in_item = btrfs_item_size(leaf->items + path->slots[0]); + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); csums_in_item /= BTRFS_CRC32_SIZE; if (csum_offset >= csums_in_item) { @@ -127,7 +126,6 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, file_key.objectid = objectid; file_key.offset = offset; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); return ret; @@ -138,12 +136,14 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 objectid, u64 offset, char *data, size_t len) { + return 0; +#if 0 int ret; struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_path *path; struct btrfs_csum_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 csum_offset; path = btrfs_alloc_path(); @@ -161,8 +161,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, if (ret == -EFBIG) { u32 item_size; /* we found one, but it isn't big enough yet */ - leaf = btrfs_buffer_leaf(path->nodes[0]); - item_size = btrfs_item_size(leaf->items + path->slots[0]); + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); if ((item_size / BTRFS_CRC32_SIZE) >= MAX_CSUM_ITEMS(root)) { /* already at max size, make a new one */ goto insert; @@ -188,8 +188,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, goto insert; } path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || @@ -197,10 +197,10 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, csum_offset >= MAX_CSUM_ITEMS(root)) { goto insert; } - if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / + if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) / BTRFS_CRC32_SIZE) { u32 diff = (csum_offset + 1) * BTRFS_CRC32_SIZE; - diff = diff - btrfs_item_size(leaf->items + path->slots[0]); + diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); if (diff != BTRFS_CRC32_SIZE) goto insert; ret = btrfs_extend_item(trans, root, path, diff); @@ -220,21 +220,20 @@ insert: goto fail; } csum: - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_csum_item); + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); ret = 0; item = (struct btrfs_csum_item *)((unsigned char *)item + csum_offset * BTRFS_CRC32_SIZE); found: - btrfs_check_bounds(&item->csum, BTRFS_CRC32_SIZE, - path->nodes[0]->b_data, - root->fs_info->sb->s_blocksize); + /* FIXME!!!!!!!!!!!! */ ret = btrfs_csum_data(root, data, len, &item->csum); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); btrfs_free_path(path); return ret; +#endif } int btrfs_csum_truncate(struct btrfs_trans_handle *trans, @@ -242,21 +241,21 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, u64 isize) { struct btrfs_key key; - struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[0]); + struct extent_buffer *leaf = path->nodes[0]; int slot = path->slots[0]; int ret; u32 new_item_size; u64 new_item_span; u64 blocks; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (isize <= key.offset) return 0; new_item_span = isize - key.offset; - blocks = (new_item_span + root->blocksize - 1) >> + blocks = (new_item_span + root->sectorsize - 1) >> root->fs_info->sb->s_blocksize_bits; new_item_size = blocks * BTRFS_CRC32_SIZE; - if (new_item_size >= btrfs_item_size(leaf->items + slot)) + if (new_item_size >= btrfs_item_size_nr(leaf, slot)) return 0; ret = btrfs_truncate_item(trans, root, path, new_item_size); BUG_ON(ret); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4cc459c943e..1734ca69555 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include @@ -88,7 +87,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, { struct btrfs_key key; struct btrfs_path *path; - char *ptr, *kaddr; + struct extent_buffer *leaf; + char *kaddr; + unsigned long ptr; struct btrfs_file_extent_item *ei; u32 datasize; int err = 0; @@ -102,7 +103,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, key.objectid = inode->i_ino; key.offset = offset; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); BUG_ON(size >= PAGE_CACHE_SIZE); datasize = btrfs_file_extent_calc_inline_size(size); @@ -113,18 +113,17 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, err = ret; goto fail; } - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, - BTRFS_FILE_EXTENT_INLINE); + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); kaddr = kmap_atomic(page, KM_USER0); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, kaddr + page_offset, size); + write_extent_buffer(leaf, kaddr + page_offset, ptr, size); kunmap_atomic(kaddr, KM_USER0); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); fail: btrfs_free_path(path); return err; @@ -156,8 +155,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, em->bdev = inode->i_sb->s_bdev; - start_pos = pos & ~((u64)root->blocksize - 1); - num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + start_pos = pos & ~((u64)root->sectorsize - 1); + num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> inode->i_blkbits; down_read(&BTRFS_I(inode)->root->snap_sem); @@ -184,7 +183,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, if (inode->i_size < start_pos) { u64 last_pos_in_file; u64 hole_size; - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; last_pos_in_file = (isize + mask) & ~mask; hole_size = (start_pos - last_pos_in_file + mask) & ~mask; @@ -227,8 +226,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, /* step one, delete the existing extents in this range */ /* FIXME blocksize != pagesize */ err = btrfs_drop_extents(trans, root, inode, start_pos, - (pos + write_bytes + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); + (pos + write_bytes + root->sectorsize -1) & + ~((u64)root->sectorsize - 1), &hint_block); if (err) goto failed; @@ -288,7 +287,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, { int ret; struct btrfs_key key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; struct btrfs_file_extent_item *extent; u64 extent_end = 0; @@ -327,10 +326,10 @@ next_slot: found_extent = 0; found_inline = 0; extent = NULL; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; slot = path->slots[0]; ret = 0; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (key.offset >= end || key.objectid != inode->i_ino) { goto out; } @@ -344,17 +343,18 @@ next_slot: if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(extent); + found_type = btrfs_file_extent_type(leaf, extent); if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = key.offset + - (btrfs_file_extent_num_blocks(extent) << + (btrfs_file_extent_num_blocks(leaf, extent) << inode->i_blkbits); found_extent = 1; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + struct btrfs_item *item; + item = btrfs_item_nr(leaf, slot); found_inline = 1; extent_end = key.offset + - btrfs_file_extent_inline_len(leaf->items + - slot); + btrfs_file_extent_inline_len(leaf, item); } } else { extent_end = search_start; @@ -365,8 +365,7 @@ next_slot: search_start >= extent_end) { int nextret; u32 nritems; - nritems = btrfs_header_nritems( - btrfs_buffer_header(path->nodes[0])); + nritems = btrfs_header_nritems(leaf); if (slot >= nritems - 1) { nextret = btrfs_next_leaf(root, path); if (nextret) @@ -380,7 +379,7 @@ next_slot: /* FIXME, there's only one inline extent allowed right now */ if (found_inline) { - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; search_start = (extent_end + mask) & ~mask; } else search_start = extent_end; @@ -388,10 +387,13 @@ next_slot: if (end < extent_end && end >= key.offset) { if (found_extent) { u64 disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf,extent); u64 disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); - memcpy(&old, extent, sizeof(old)); + btrfs_file_extent_disk_num_blocks(leaf, + extent); + read_extent_buffer(leaf, &old, + (unsigned long)extent, + sizeof(old)); if (disk_blocknr != 0) { ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, disk_num_blocks); @@ -406,20 +408,24 @@ next_slot: u64 new_num; u64 old_num; keep = 1; - WARN_ON(start & (root->blocksize - 1)); + WARN_ON(start & (root->sectorsize - 1)); if (found_extent) { new_num = (start - key.offset) >> inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(extent); + old_num = btrfs_file_extent_num_blocks(leaf, + extent); *hint_block = - btrfs_file_extent_disk_blocknr(extent); - if (btrfs_file_extent_disk_blocknr(extent)) { + btrfs_file_extent_disk_blocknr(leaf, + extent); + if (btrfs_file_extent_disk_blocknr(leaf, + extent)) { inode->i_blocks -= (old_num - new_num) << 3; } - btrfs_set_file_extent_num_blocks(extent, + btrfs_set_file_extent_num_blocks(leaf, + extent, new_num); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); } else { WARN_ON(1); } @@ -431,13 +437,17 @@ next_slot: u64 extent_num_blocks = 0; if (found_extent) { disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf, + extent); disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); + btrfs_file_extent_disk_num_blocks(leaf, + extent); extent_num_blocks = - btrfs_file_extent_num_blocks(extent); + btrfs_file_extent_num_blocks(leaf, + extent); *hint_block = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf, + extent); } ret = btrfs_del_item(trans, root, path); /* TODO update progress marker and return */ @@ -464,42 +474,37 @@ next_slot: struct btrfs_key ins; ins.objectid = inode->i_ino; ins.offset = end; - ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); btrfs_release_path(root, path); ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*extent)); + leaf = path->nodes[0]; if (ret) { - btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); - printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end, keep); + btrfs_print_leaf(root, leaf); + printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.type, ins.offset, start, end, key.offset, extent_end, keep); } BUG_ON(ret); - extent = btrfs_item_ptr( - btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(extent, - btrfs_file_extent_disk_blocknr(&old)); - btrfs_set_file_extent_disk_num_blocks(extent, - btrfs_file_extent_disk_num_blocks(&old)); - - btrfs_set_file_extent_offset(extent, - btrfs_file_extent_offset(&old) + + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + write_extent_buffer(leaf, &old, + (unsigned long)extent, sizeof(old)); + + btrfs_set_file_extent_offset(leaf, extent, + le64_to_cpu(old.offset) + ((end - key.offset) >> inode->i_blkbits)); - WARN_ON(btrfs_file_extent_num_blocks(&old) < + WARN_ON(le64_to_cpu(old.num_blocks) < (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_num_blocks(extent, + btrfs_set_file_extent_num_blocks(leaf, extent, (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_type(extent, + btrfs_set_file_extent_type(leaf, extent, BTRFS_FILE_EXTENT_REG); - btrfs_set_file_extent_generation(extent, - btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); - if (btrfs_file_extent_disk_blocknr(&old) != 0) { + if (le64_to_cpu(old.disk_blocknr) != 0) { inode->i_blocks += - btrfs_file_extent_num_blocks(extent) << 3; + btrfs_file_extent_num_blocks(leaf, + extent) << 3; } ret = 0; goto out; @@ -529,8 +534,8 @@ static int prepare_pages(struct btrfs_root *root, u64 num_blocks; u64 start_pos; - start_pos = pos & ~((u64)root->blocksize - 1); - num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + start_pos = pos & ~((u64)root->sectorsize - 1); + num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> inode->i_blkbits; memset(pages, 0, num_pages * sizeof(struct page *)); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index b7834617753..35d2608f891 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -20,24 +20,18 @@ #include "disk-io.h" #include "transaction.h" -int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 objectid, struct btrfs_inode_item - *inode_item) +int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid) { - struct btrfs_path *path; struct btrfs_key key; int ret; key.objectid = objectid; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - path = btrfs_alloc_path(); - BUG_ON(!path); - ret = btrfs_insert_item(trans, root, &key, inode_item, - sizeof(*inode_item)); - btrfs_release_path(root, path); - btrfs_free_path(path); + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(struct btrfs_inode_item)); if (ret == 0 && objectid > root->highest_inode) root->highest_inode = objectid; return ret; @@ -51,15 +45,15 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root int cow = mod != 0; int ret; int slot; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_key found_key; ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY && location->offset == (u64)-1 && path->slots[0] != 0) { slot = path->slots[0] - 1; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, &leaf->items[slot].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, slot); if (found_key.objectid == location->objectid && btrfs_key_type(&found_key) == btrfs_key_type(location)) { path->slots[0]--; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 40547086625..ab74977adf5 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -24,8 +24,9 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) { struct btrfs_path *path; int ret; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_key search_key; + struct btrfs_key found_key; int slot; path = btrfs_alloc_path(); @@ -39,8 +40,9 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) BUG_ON(ret == 0); if (path->slots[0] > 0) { slot = path->slots[0] - 1; - l = btrfs_buffer_leaf(path->nodes[0]); - *objectid = btrfs_disk_key_objectid(&l->items[slot].key); + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &found_key, slot); + *objectid = found_key.objectid; } else { *objectid = BTRFS_FIRST_FREE_OBJECTID; } @@ -64,13 +66,12 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int slot = 0; u64 last_ino = 0; int start_found; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_key search_key; u64 search_start = dirid; path = btrfs_alloc_path(); BUG_ON(!path); - search_key.flags = 0; search_start = root->last_inode_alloc; search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; @@ -86,9 +87,9 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, path->slots[0]--; while (1) { - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&l->header)) { + if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -103,7 +104,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, last_ino : search_start; goto found; } - btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + btrfs_item_key_to_cpu(l, &key, slot); if (key.objectid >= search_start) { if (start_found) { if (last_ino < search_start) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b03d40a907c..fbe2836364e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -159,10 +159,8 @@ out: int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) { - char csum[BTRFS_CRC32_SIZE]; size_t offset = start - (page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; char *kaddr; u64 private; @@ -173,11 +171,15 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) if (ret) { goto zeroit; } + /* + struct btrfs_root *root = BTRFS_I(inode)->root; + char csum[BTRFS_CRC32_SIZE]; ret = btrfs_csum_data(root, kaddr + offset, end - start + 1, csum); BUG_ON(ret); if (memcmp(csum, &private, BTRFS_CRC32_SIZE)) { goto zeroit; } + */ kunmap_atomic(kaddr, KM_IRQ0); return 0; @@ -192,7 +194,9 @@ zeroit: void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; + struct extent_buffer *leaf; struct btrfs_inode_item *inode_item; + struct btrfs_inode_timespec *tspec; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; u64 alloc_group_block; @@ -205,29 +209,37 @@ void btrfs_read_locked_inode(struct inode *inode) memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); - if (ret) { + if (ret) goto make_bad; - } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_inode_item); - inode->i_mode = btrfs_inode_mode(inode_item); - inode->i_nlink = btrfs_inode_nlink(inode_item); - inode->i_uid = btrfs_inode_uid(inode_item); - inode->i_gid = btrfs_inode_gid(inode_item); - inode->i_size = btrfs_inode_size(inode_item); - inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime); - inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime); - inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime); - inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime); - inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime); - inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); - inode->i_blocks = btrfs_inode_nblocks(inode_item); - inode->i_generation = btrfs_inode_generation(inode_item); + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_inode_item); + + inode->i_mode = btrfs_inode_mode(leaf, inode_item); + inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); + inode->i_uid = btrfs_inode_uid(leaf, inode_item); + inode->i_gid = btrfs_inode_gid(leaf, inode_item); + inode->i_size = btrfs_inode_size(leaf, inode_item); + + tspec = btrfs_inode_atime(inode_item); + inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + tspec = btrfs_inode_mtime(inode_item); + inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + tspec = btrfs_inode_ctime(inode_item); + inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item); + inode->i_generation = btrfs_inode_generation(leaf, inode_item); inode->i_rdev = 0; - rdev = btrfs_inode_rdev(inode_item); - alloc_group_block = btrfs_inode_block_group(inode_item); + rdev = btrfs_inode_rdev(leaf, inode_item); + + alloc_group_block = btrfs_inode_block_group(leaf, inode_item); BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, alloc_group_block); @@ -267,24 +279,35 @@ make_bad: make_bad_inode(inode); } -static void fill_inode_item(struct btrfs_inode_item *item, +static void fill_inode_item(struct extent_buffer *leaf, + struct btrfs_inode_item *item, struct inode *inode) { - btrfs_set_inode_uid(item, inode->i_uid); - btrfs_set_inode_gid(item, inode->i_gid); - btrfs_set_inode_size(item, inode->i_size); - btrfs_set_inode_mode(item, inode->i_mode); - btrfs_set_inode_nlink(item, inode->i_nlink); - btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); - btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); - btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); - btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); - btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); - btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); - btrfs_set_inode_nblocks(item, inode->i_blocks); - btrfs_set_inode_generation(item, inode->i_generation); - btrfs_set_inode_rdev(item, inode->i_rdev); - btrfs_set_inode_block_group(item, + btrfs_set_inode_uid(leaf, item, inode->i_uid); + btrfs_set_inode_gid(leaf, item, inode->i_gid); + btrfs_set_inode_size(leaf, item, inode->i_size); + btrfs_set_inode_mode(leaf, item, inode->i_mode); + btrfs_set_inode_nlink(leaf, item, inode->i_nlink); + + btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), + inode->i_atime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), + inode->i_atime.tv_nsec); + + btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), + inode->i_mtime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), + inode->i_mtime.tv_nsec); + + btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), + inode->i_ctime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), + inode->i_ctime.tv_nsec); + + btrfs_set_inode_nblocks(leaf, item, inode->i_blocks); + btrfs_set_inode_generation(leaf, item, inode->i_generation); + btrfs_set_inode_rdev(leaf, item, inode->i_rdev); + btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group->key.objectid); } @@ -294,6 +317,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, { struct btrfs_inode_item *inode_item; struct btrfs_path *path; + struct extent_buffer *leaf; int ret; path = btrfs_alloc_path(); @@ -306,12 +330,12 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, goto failed; } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - fill_inode_item(inode_item, inode); - btrfs_mark_buffer_dirty(path->nodes[0]); + fill_inode_item(leaf, inode_item, inode); + btrfs_mark_buffer_dirty(leaf); btrfs_set_inode_last_trans(trans, inode); ret = 0; failed: @@ -330,8 +354,9 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, const char *name = dentry->d_name.name; int name_len = dentry->d_name.len; int ret = 0; - u64 objectid; + struct extent_buffer *leaf; struct btrfs_dir_item *di; + struct btrfs_key key; path = btrfs_alloc_path(); if (!path) { @@ -349,14 +374,15 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, ret = -ENOENT; goto err; } - objectid = btrfs_disk_key_objectid(&di->location); + leaf = path->nodes[0]; + btrfs_dir_item_key_to_cpu(leaf, di, &key); ret = btrfs_delete_one_dir_name(trans, root, path, di); if (ret) goto err; btrfs_release_path(root, path); di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, - objectid, name, name_len, -1); + key.objectid, name, name_len, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); goto err; @@ -391,12 +417,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); ret = btrfs_unlink_trans(trans, root, dir, dentry); nr = trans->blocks_used; + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); + return ret; } @@ -411,7 +440,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; struct btrfs_key found_key; int found_type; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; char *goodnames = ".."; unsigned long nr; @@ -419,10 +448,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = (u32)-1; + key.type = (u8)-1; while(1) { ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { @@ -435,9 +465,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto out; } path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); if (found_key.objectid != inode->i_ino) { err = -ENOENT; @@ -513,9 +542,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int ret; struct btrfs_path *path; struct btrfs_key key; - struct btrfs_disk_key *found_key; + struct btrfs_key found_key; u32 found_type; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; u64 extent_start = 0; u64 extent_num_blocks = 0; @@ -527,10 +556,12 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); path->reada = -1; BUG_ON(!path); + /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = (u32)-1; + key.type = (u8)-1; + while(1) { btrfs_init_path(path); fi = NULL; @@ -542,27 +573,28 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(path->slots[0] == 0); path->slots[0]--; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); - if (btrfs_disk_key_objectid(found_key) != inode->i_ino) + if (found_key.objectid != inode->i_ino) break; + if (found_type != BTRFS_CSUM_ITEM_KEY && found_type != BTRFS_DIR_ITEM_KEY && found_type != BTRFS_DIR_INDEX_KEY && found_type != BTRFS_EXTENT_DATA_KEY) break; - item_end = btrfs_disk_key_offset(found_key); + item_end = found_key.offset; if (found_type == BTRFS_EXTENT_DATA_KEY) { - fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], + fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) != + if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { - item_end += btrfs_file_extent_num_blocks(fi) << - inode->i_blkbits; + item_end += + btrfs_file_extent_num_blocks(leaf, fi) << + inode->i_blkbits; } } if (found_type == BTRFS_CSUM_ITEM_KEY) { @@ -583,7 +615,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, found_type); continue; } - if (btrfs_disk_key_offset(found_key) >= inode->i_size) + if (found_key.offset >= inode->i_size) del_item = 1; else del_item = 0; @@ -591,30 +623,31 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, /* FIXME, shrink the extent if the ref count is only 1 */ if (found_type == BTRFS_EXTENT_DATA_KEY && - btrfs_file_extent_type(fi) != + btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; - extent_start = btrfs_file_extent_disk_blocknr(fi); + extent_start = btrfs_file_extent_disk_blocknr(leaf, fi); if (!del_item) { u64 orig_num_blocks = - btrfs_file_extent_num_blocks(fi); + btrfs_file_extent_num_blocks(leaf, fi); extent_num_blocks = inode->i_size - - btrfs_disk_key_offset(found_key) + - root->blocksize - 1; + found_key.offset + root->sectorsize - 1; extent_num_blocks >>= inode->i_blkbits; - btrfs_set_file_extent_num_blocks(fi, + btrfs_set_file_extent_num_blocks(leaf, fi, extent_num_blocks); num_dec = (orig_num_blocks - extent_num_blocks) << 3; if (extent_start != 0) { inode->i_blocks -= num_dec; } - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); } else { extent_num_blocks = - btrfs_file_extent_disk_num_blocks(fi); + btrfs_file_extent_disk_num_blocks(leaf, + fi); /* FIXME blocksize != 4096 */ - num_dec = btrfs_file_extent_num_blocks(fi) << 3; + num_dec = btrfs_file_extent_num_blocks(leaf, + fi) << 3; if (extent_start != 0) { found_extent = 1; inode->i_blocks -= num_dec; @@ -725,7 +758,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; u64 pos = (inode->i_size + mask) & ~mask; u64 block_end = attr->ia_size | mask; u64 hole_size; @@ -771,9 +804,11 @@ void btrfs_delete_inode(struct inode *inode) if (is_bad_inode(inode)) { goto no_delete; } + inode->i_size = 0; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); if (ret) @@ -782,6 +817,7 @@ void btrfs_delete_inode(struct inode *inode) if (ret) goto no_delete_lock; nr = trans->blocks_used; + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); @@ -819,7 +855,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ret = 0; goto out; } - btrfs_disk_key_to_cpu(location, &di->location); + btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); out: btrfs_release_path(root, path); btrfs_free_path(path); @@ -856,7 +892,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, ri = &(*sub_root)->root_item; location->objectid = btrfs_root_dirid(ri); - location->flags = 0; btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); location->offset = 0; @@ -908,11 +943,14 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); + mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &location); mutex_unlock(&root->fs_info->fs_mutex); + if (ret < 0) return ERR_PTR(ret); + inode = NULL; if (location.objectid) { ret = fixup_tree_root_location(root, &location, &sub_root, @@ -952,10 +990,11 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct btrfs_item *item; struct btrfs_dir_item *di; struct btrfs_key key; + struct btrfs_key found_key; struct btrfs_path *path; int ret; u32 nritems; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; int advance; unsigned char d_type; @@ -964,15 +1003,19 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) u32 di_total; u32 di_len; int key_type = BTRFS_DIR_INDEX_KEY; + char tmp_name[32]; + char *name_ptr; + int name_len; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) key_type = BTRFS_DIR_ITEM_KEY; + mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; - key.flags = 0; btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; + path = btrfs_alloc_path(); path->reada = 2; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -980,16 +1023,16 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) goto err; advance = 0; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { ret = btrfs_next_leaf(root, path); if (ret) break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; } else { slot++; @@ -997,28 +1040,48 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } } advance = 1; - item = leaf->items + slot; - if (btrfs_disk_key_objectid(&item->key) != key.objectid) + item = btrfs_item_nr(leaf, slot); + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + if (found_key.objectid != key.objectid) break; - if (btrfs_disk_key_type(&item->key) != key_type) + if (btrfs_key_type(&found_key) != key_type) break; - if (btrfs_disk_key_offset(&item->key) < filp->f_pos) + if (found_key.offset < filp->f_pos) continue; - filp->f_pos = btrfs_disk_key_offset(&item->key); + + filp->f_pos = found_key.offset; advance = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); di_cur = 0; - di_total = btrfs_item_size(leaf->items + slot); + di_total = btrfs_item_size(leaf, item); while(di_cur < di_total) { - d_type = btrfs_filetype_table[btrfs_dir_type(di)]; - over = filldir(dirent, (const char *)(di + 1), - btrfs_dir_name_len(di), - btrfs_disk_key_offset(&item->key), - btrfs_disk_key_objectid(&di->location), + struct btrfs_key location; + + name_len = btrfs_dir_name_len(leaf, di); + if (name_len < 32) { + name_ptr = tmp_name; + } else { + name_ptr = kmalloc(name_len, GFP_NOFS); + BUG_ON(!name_ptr); + } + read_extent_buffer(leaf, name_ptr, + (unsigned long)(di + 1), name_len); + + d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; + btrfs_dir_item_key_to_cpu(leaf, di, &location); + + over = filldir(dirent, name_ptr, name_len, + found_key.offset, + location.objectid, d_type); + + if (name_ptr != tmp_name) + kfree(name_ptr); + if (over) goto nopos; - di_len = btrfs_dir_name_len(di) + sizeof(*di); + di_len = btrfs_dir_name_len(leaf, di) + sizeof(*di); di_cur += di_len; di = (struct btrfs_dir_item *)((char *)di + di_len); } @@ -1075,11 +1138,15 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, int mode) { struct inode *inode; - struct btrfs_inode_item inode_item; + struct btrfs_inode_item *inode_item; struct btrfs_key *location; + struct btrfs_path *path; int ret; int owner; + path = btrfs_alloc_path(); + BUG_ON(!path); + inode = new_inode(root->fs_info->sb); if (!inode) return ERR_PTR(-ENOMEM); @@ -1095,24 +1162,32 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, group = btrfs_find_block_group(root, group, 0, 0, owner); BTRFS_I(inode)->block_group = group; + ret = btrfs_insert_empty_inode(trans, root, path, objectid); + if (ret) + goto fail; + inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_mode = mode; inode->i_ino = objectid; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - fill_inode_item(&inode_item, inode); + inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + fill_inode_item(path->nodes[0], inode_item, inode); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + location = &BTRFS_I(inode)->location; location->objectid = objectid; - location->flags = 0; location->offset = 0; btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - ret = btrfs_insert_inode(trans, root, objectid, &inode_item); - if (ret) - return ERR_PTR(ret); insert_inode_hash(inode); return inode; +fail: + btrfs_free_path(path); + return ERR_PTR(ret); } static inline u8 btrfs_inode_type(struct inode *inode) @@ -1127,8 +1202,8 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; struct inode *parent_inode; + key.objectid = inode->i_ino; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; @@ -1285,14 +1360,18 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, inc_nlink(inode); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); atomic_inc(&inode->i_count); err = btrfs_add_nondir(trans, dentry, inode); + if (err) drop_inode = 1; + dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, dir); err = btrfs_update_inode(trans, root, inode); + if (err) drop_inode = 1; @@ -1321,13 +1400,13 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, key.objectid = objectid; key.offset = 0; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, &key, BTRFS_FT_DIR); if (ret) goto error; + key.objectid = dirid; ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, &key, BTRFS_FT_DIR); @@ -1350,6 +1429,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); + if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out_unlock; @@ -1367,6 +1447,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = PTR_ERR(inode); goto out_fail; } + drop_on_err = 1; inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; @@ -1380,9 +1461,11 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = btrfs_update_inode(trans, root, inode); if (err) goto out_fail; + err = btrfs_add_link(trans, dentry, inode); if (err) goto out_fail; + d_instantiate(dentry, inode); drop_on_err = 0; dir->i_sb->s_dirt = 1; @@ -1392,6 +1475,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; btrfs_end_transaction(trans, root); + out_unlock: mutex_unlock(&root->fs_info->fs_mutex); if (drop_on_err) @@ -1415,8 +1499,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; - struct btrfs_leaf *leaf; - struct btrfs_disk_key *found_key; + struct extent_buffer *leaf; + struct btrfs_key found_key; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct btrfs_trans_handle *trans = NULL; @@ -1436,8 +1520,8 @@ again: err = -ENOMEM; goto out; } - em->start = 0; - em->end = 0; + em->start = EXTENT_MAP_HOLE; + em->end = EXTENT_MAP_HOLE; } em->bdev = inode->i_sb->s_bdev; ret = btrfs_lookup_file_extent(NULL, root, path, @@ -1453,25 +1537,27 @@ again: path->slots[0]--; } - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - leaf = btrfs_buffer_leaf(path->nodes[0]); - blocknr = btrfs_file_extent_disk_blocknr(item); - blocknr += btrfs_file_extent_offset(item); + + blocknr = btrfs_file_extent_disk_blocknr(leaf, item); + blocknr += btrfs_file_extent_offset(leaf, item); /* are we inside the extent that was found? */ - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); - if (btrfs_disk_key_objectid(found_key) != objectid || + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); + if (found_key.objectid != objectid || found_type != BTRFS_EXTENT_DATA_KEY) { goto not_found; } - found_type = btrfs_file_extent_type(item); - extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); + found_type = btrfs_file_extent_type(leaf, item); + extent_start = found_key.offset; if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = extent_start + - (btrfs_file_extent_num_blocks(item) << inode->i_blkbits); + (btrfs_file_extent_num_blocks(leaf, item) << + inode->i_blkbits); err = 0; if (start < extent_start || start >= extent_end) { em->start = start; @@ -1484,28 +1570,29 @@ again: } goto not_found_em; } - if (btrfs_file_extent_disk_blocknr(item) == 0) { + if (btrfs_file_extent_disk_blocknr(leaf, item) == 0) { em->start = extent_start; em->end = extent_end - 1; - em->block_start = 0; - em->block_end = 0; + em->block_start = EXTENT_MAP_HOLE; + em->block_end = EXTENT_MAP_HOLE; goto insert; } em->block_start = blocknr << inode->i_blkbits; em->block_end = em->block_start + - (btrfs_file_extent_num_blocks(item) << + (btrfs_file_extent_num_blocks(leaf, item) << inode->i_blkbits) - 1; em->start = extent_start; em->end = extent_end - 1; goto insert; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - char *ptr; + unsigned long ptr; char *map; u32 size; - size = btrfs_file_extent_inline_len(leaf->items + - path->slots[0]); - extent_end = extent_start | ((u64)root->blocksize - 1); + size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, + path->slots[0])); + + extent_end = extent_start | ((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { @@ -1517,18 +1604,21 @@ again: } goto not_found_em; } + em->block_start = EXTENT_MAP_INLINE; em->block_end = EXTENT_MAP_INLINE; em->start = extent_start; em->end = extent_end; + if (!page) { goto insert; } + ptr = btrfs_file_extent_inline_start(item); map = kmap(page); - memcpy(map + page_offset, ptr, size); + read_extent_buffer(leaf, map + page_offset, ptr, size); memset(map + page_offset + size, 0, - root->blocksize - (page_offset + size)); + root->sectorsize - (page_offset + size)); flush_dcache_page(page); kunmap(page); set_extent_uptodate(em_tree, extent_start, @@ -1542,8 +1632,8 @@ not_found: em->start = start; em->end = end; not_found_em: - em->block_start = 0; - em->block_end = 0; + em->block_start = EXTENT_MAP_HOLE; + em->block_end = EXTENT_MAP_HOLE; insert: btrfs_release_path(root, path); if (em->start > start || em->end < start) { @@ -1712,6 +1802,7 @@ static void btrfs_truncate(struct inode *inode) ret = btrfs_truncate_in_trans(trans, root, inode); btrfs_update_inode(trans, root, inode); nr = trans->blocks_used; + ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1731,8 +1822,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) struct btrfs_key key; struct btrfs_root_item root_item; struct btrfs_inode_item *inode_item; - struct buffer_head *subvol; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_root *new_root; struct inode *inode; struct inode *dir; @@ -1746,34 +1836,37 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root, 0, 0); - if (IS_ERR(subvol)) - return PTR_ERR(subvol); - leaf = btrfs_buffer_leaf(subvol); - btrfs_set_header_nritems(&leaf->header, 0); - btrfs_set_header_level(&leaf->header, 0); - btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); - btrfs_set_header_generation(&leaf->header, trans->transid); - btrfs_set_header_owner(&leaf->header, root->root_key.objectid); - memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, - sizeof(leaf->header.fsid)); - btrfs_mark_buffer_dirty(subvol); + leaf = btrfs_alloc_free_block(trans, root, 0, 0); + if (IS_ERR(leaf)) + return PTR_ERR(leaf); + + btrfs_set_header_nritems(leaf, 0); + btrfs_set_header_level(leaf, 0); + btrfs_set_header_blocknr(leaf, extent_buffer_blocknr(leaf)); + btrfs_set_header_generation(leaf, trans->transid); + btrfs_set_header_owner(leaf, root->root_key.objectid); + write_extent_buffer(leaf, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(leaf), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(leaf); inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); - btrfs_set_inode_generation(inode_item, 1); - btrfs_set_inode_size(inode_item, 3); - btrfs_set_inode_nlink(inode_item, 1); - btrfs_set_inode_nblocks(inode_item, 1); - btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); + inode_item->generation = cpu_to_le64(1); + inode_item->size = cpu_to_le64(3); + inode_item->nlink = cpu_to_le32(1); + inode_item->nblocks = cpu_to_le64(1); + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); - btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); + btrfs_set_root_blocknr(&root_item, extent_buffer_blocknr(leaf)); btrfs_set_root_refs(&root_item, 1); - btrfs_set_root_blocks_used(&root_item, 0); + btrfs_set_root_used(&root_item, 0); + memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); root_item.drop_level = 0; - brelse(subvol); - subvol = NULL; + + free_extent_buffer(leaf); + leaf = NULL; ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); @@ -1784,7 +1877,6 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &root_item); @@ -1845,7 +1937,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) struct btrfs_trans_handle *trans; struct btrfs_key key; struct btrfs_root_item new_root_item; - struct buffer_head *tmp; + struct extent_buffer *tmp; int ret; int err; u64 objectid; @@ -1876,10 +1968,11 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node)); + btrfs_set_root_blocknr(&new_root_item, + extent_buffer_blocknr(root->node)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); @@ -1904,8 +1997,10 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) fail: nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); + if (err && !ret) ret = err; + mutex_unlock(&root->fs_info->fs_mutex); up_write(&root->snap_sem); btrfs_btree_balance_dirty(root, nr); @@ -1986,7 +2081,7 @@ static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) if (copy_from_user(&vol_args, arg, sizeof(vol_args))) return -EFAULT; - + namelen = strlen(vol_args.name); if (namelen > BTRFS_VOL_NAME_MAX) return -EINVAL; @@ -2164,8 +2259,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { return -ENOTEMPTY; } + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, new_dir); path = btrfs_alloc_path(); if (!path) { @@ -2177,9 +2274,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; + if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { struct btrfs_key *location = &BTRFS_I(new_dir)->location; - u64 old_parent_oid; + struct btrfs_key old_parent_key; di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino, "..", 2, -1); if (IS_ERR(di)) { @@ -2190,7 +2288,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, ret = -ENOENT; goto out_fail; } - old_parent_oid = btrfs_disk_key_objectid(&di->location); + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &old_parent_key); ret = btrfs_del_item(trans, root, path); if (ret) { goto out_fail; @@ -2199,7 +2297,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, di = btrfs_lookup_dir_index_item(trans, root, path, old_inode->i_ino, - old_parent_oid, + old_parent_key.objectid, "..", 2, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); @@ -2257,8 +2355,9 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, u64 objectid; int name_len; int datasize; - char *ptr; + unsigned long ptr; struct btrfs_file_extent_item *ei; + struct extent_buffer *leaf; unsigned long nr; name_len = strlen(symname) + 1; @@ -2302,7 +2401,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, BUG_ON(!path); key.objectid = inode->i_ino; key.offset = 0; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); datasize = btrfs_file_extent_calc_inline_size(name_len); err = btrfs_insert_empty_item(trans, root, path, &key, @@ -2311,16 +2409,17 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, drop_inode = 1; goto out_unlock; } - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, symname, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + write_extent_buffer(leaf, symname, ptr, name_len); + btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); + inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_size = name_len - 1; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index bafa78603bd..a825ce078a5 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -20,10 +20,10 @@ #include "disk-io.h" #include "print-tree.h" -void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) +void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; - u32 nr = btrfs_header_nritems(&l->header); + u32 nr = btrfs_header_nritems(l); struct btrfs_item *item; struct btrfs_extent_item *ei; struct btrfs_root_item *ri; @@ -31,119 +31,113 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_inode_item *ii; struct btrfs_block_group_item *bi; struct btrfs_file_extent_item *fi; + struct btrfs_key key; + struct btrfs_key found_key; u32 type; printk("leaf %llu total ptrs %d free space %d\n", - (unsigned long long)btrfs_header_blocknr(&l->header), nr, + (unsigned long long)btrfs_header_blocknr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { - item = l->items + i; - type = btrfs_disk_key_type(&item->key); + item = btrfs_item_nr(l, i); + btrfs_item_key_to_cpu(l, &key, i); + type = btrfs_key_type(&key); printk("\titem %d key (%llu %x %llu) itemoff %d itemsize %d\n", i, - (unsigned long long)btrfs_disk_key_objectid(&item->key), - btrfs_disk_key_flags(&item->key), - (unsigned long long)btrfs_disk_key_offset(&item->key), - btrfs_item_offset(item), - btrfs_item_size(item)); + (unsigned long long)key.objectid, type, + (unsigned long long)key.offset, + btrfs_item_offset(l, item), btrfs_item_size(l, item)); switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); printk("\t\tinode generation %llu size %llu mode %o\n", - (unsigned long long)btrfs_inode_generation(ii), - (unsigned long long)btrfs_inode_size(ii), - btrfs_inode_mode(ii)); + (unsigned long long)btrfs_inode_generation(l, ii), + (unsigned long long)btrfs_inode_size(l, ii), + btrfs_inode_mode(l, ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); + btrfs_dir_item_key_to_cpu(l, di, &found_key); printk("\t\tdir oid %llu flags %u type %u\n", - (unsigned long long)btrfs_disk_key_objectid( - &di->location), - btrfs_dir_flags(di), - btrfs_dir_type(di)); - printk("\t\tname %.*s\n", - btrfs_dir_name_len(di),(char *)(di + 1)); + (unsigned long long)found_key.objectid, + btrfs_dir_flags(l, di), + btrfs_dir_type(l, di)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); printk("\t\troot data blocknr %llu refs %u\n", - (unsigned long long)btrfs_root_blocknr(ri), - btrfs_root_refs(ri)); + (unsigned long long)btrfs_disk_root_blocknr(l, ri), + btrfs_disk_root_refs(l, ri)); break; case BTRFS_EXTENT_ITEM_KEY: ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); printk("\t\textent data refs %u\n", - btrfs_extent_refs(ei)); + btrfs_extent_refs(l, ei)); break; case BTRFS_EXTENT_DATA_KEY: fi = btrfs_item_ptr(l, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(l, fi) == BTRFS_FILE_EXTENT_INLINE) { printk("\t\tinline extent data size %u\n", - btrfs_file_extent_inline_len(l->items + i)); + btrfs_file_extent_inline_len(l, item)); break; } printk("\t\textent data disk block %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_disk_blocknr(fi), - (unsigned long long)btrfs_file_extent_disk_num_blocks(fi)); + (unsigned long long)btrfs_file_extent_disk_blocknr(l, fi), + (unsigned long long)btrfs_file_extent_disk_num_blocks(l, fi)); printk("\t\textent data offset %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_offset(fi), - (unsigned long long)btrfs_file_extent_num_blocks(fi)); + (unsigned long long)btrfs_file_extent_offset(l, fi), + (unsigned long long)btrfs_file_extent_num_blocks(l, fi)); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); printk("\t\tblock group used %llu\n", - (unsigned long long)btrfs_block_group_used(bi)); - break; - case BTRFS_STRING_ITEM_KEY: - printk("\t\titem data %.*s\n", btrfs_item_size(item), - btrfs_leaf_data(l) + btrfs_item_offset(item)); + (unsigned long long)btrfs_disk_block_group_used(l, bi)); break; }; } } -void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t) +void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) { int i; u32 nr; - struct btrfs_node *c; + struct btrfs_key key; - if (!t) + if (!c) return; - c = btrfs_buffer_node(t); - nr = btrfs_header_nritems(&c->header); + nr = btrfs_header_nritems(c); if (btrfs_is_leaf(c)) { - btrfs_print_leaf(root, (struct btrfs_leaf *)c); + btrfs_print_leaf(root, c); return; } printk("node %llu level %d total ptrs %d free spc %u\n", - (unsigned long long)btrfs_header_blocknr(&c->header), - btrfs_header_level(&c->header), nr, + (unsigned long long)btrfs_header_blocknr(c), + btrfs_header_level(c), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { + btrfs_node_key_to_cpu(c, &key, i); printk("\tkey %d (%llu %u %llu) block %llu\n", i, - (unsigned long long)c->ptrs[i].key.objectid, - c->ptrs[i].key.flags, - (unsigned long long)c->ptrs[i].key.offset, + (unsigned long long)key.objectid, + key.type, + (unsigned long long)key.offset, (unsigned long long)btrfs_node_blockptr(c, i)); } for (i = 0; i < nr; i++) { - struct buffer_head *next_buf = read_tree_block(root, + struct extent_buffer *next = read_tree_block(root, btrfs_node_blockptr(c, i)); - struct btrfs_node *next = btrfs_buffer_node(next_buf); if (btrfs_is_leaf(next) && - btrfs_header_level(&c->header) != 1) + btrfs_header_level(c) != 1) BUG(); - if (btrfs_header_level(&next->header) != - btrfs_header_level(&c->header) - 1) + if (btrfs_header_level(next) != + btrfs_header_level(c) - 1) BUG(); - btrfs_print_tree(root, next_buf); - btrfs_block_release(root, next_buf); + btrfs_print_tree(root, next); + free_extent_buffer(next); } } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 205cd03601a..da75efe534d 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -18,6 +18,6 @@ #ifndef __PRINT_TREE_ #define __PRINT_TREE_ -void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); -void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t); +void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l); +void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t); #endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 3b5926dfbeb..88bcdd33f56 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -26,12 +26,13 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, { struct btrfs_path *path; struct btrfs_key search_key; - struct btrfs_leaf *l; + struct btrfs_key found_key; + struct extent_buffer *l; int ret; int slot; search_key.objectid = objectid; - search_key.flags = (u32)-1; + search_key.type = (u8)-1; search_key.offset = (u64)-1; path = btrfs_alloc_path(); @@ -39,17 +40,19 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) goto out; + BUG_ON(ret == 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; BUG_ON(path->slots[0] == 0); slot = path->slots[0] - 1; - if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { + btrfs_item_key_to_cpu(l, &found_key, slot); + if (found_key.objectid != objectid) { ret = 1; goto out; } - memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), - sizeof(*item)); - btrfs_disk_key_to_cpu(key, &l->items[slot].key); + read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), + sizeof(*item)); + memcpy(key, &found_key, sizeof(found_key)); ret = 0; out: btrfs_release_path(root, path); @@ -62,10 +65,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item) { struct btrfs_path *path; - struct btrfs_leaf *l; + struct extent_buffer *l; int ret; int slot; - struct btrfs_root_item *update_item; + unsigned long ptr; path = btrfs_alloc_path(); BUG_ON(!path); @@ -73,10 +76,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) goto out; BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - update_item = btrfs_item_ptr(l, slot, struct btrfs_root_item); - btrfs_memcpy(root, l, update_item, item, sizeof(*item)); + ptr = btrfs_item_ptr_offset(l, slot); + write_extent_buffer(l, item, ptr, sizeof(*item)); btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_release_path(root, path); @@ -103,11 +106,10 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_path *path; int ret; u32 nritems; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; key.objectid = objectid; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); key.offset = 0; path = btrfs_alloc_path(); @@ -117,19 +119,19 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, if (ret < 0) goto err; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; if (slot >= nritems) { ret = btrfs_next_leaf(root, path); if (ret) break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; } - item = leaf->items + slot; - btrfs_disk_key_to_cpu(&key, &item->key); + item = btrfs_item_nr(leaf, slot); + btrfs_item_key_to_cpu(leaf, &key, slot); if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) goto next; @@ -140,7 +142,7 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, break; ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); - if (btrfs_root_refs(ri) != 0) + if (btrfs_disk_root_refs(leaf, ri) != 0) goto next; dead_root = btrfs_read_fs_root_no_radix(root->fs_info, &key); @@ -170,6 +172,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, int ret; u32 refs; struct btrfs_root_item *ri; + struct extent_buffer *leaf; path = btrfs_alloc_path(); BUG_ON(!path); @@ -177,10 +180,10 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (ret < 0) goto out; BUG_ON(ret != 0); - ri = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_root_item); + leaf = path->nodes[0]; + ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); - refs = btrfs_root_refs(ri); + refs = btrfs_disk_root_refs(leaf, ri); BUG_ON(refs != 0); ret = btrfs_del_item(trans, root, path); out: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 66a01cbbbea..39a1435c68f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -41,7 +41,7 @@ #include "ioctl.h" #include "print-tree.h" -#define BTRFS_SUPER_MAGIC 0x9123682E +#define BTRFS_SUPER_MAGIC 0x9123683E static struct super_operations btrfs_super_ops; @@ -115,13 +115,12 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) return -EIO; } sb->s_fs_info = tree_root; - disk_super = tree_root->fs_info->disk_super; + disk_super = &tree_root->fs_info->super_copy; inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), tree_root); bi = BTRFS_I(inode); bi->location.objectid = inode->i_ino; bi->location.offset = 0; - bi->location.flags = 0; bi->root = tree_root; btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); @@ -281,6 +280,7 @@ error_s: error_bdev: close_bdev_excl(bdev); error: +printk("get_sb failed\n"); return error; } /* end copy & paste */ @@ -295,6 +295,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, btrfs_fill_super, mnt, subvol_name ? subvol_name : "default"); +printk("btrfs_get_sb returns %d\n", ret); return ret; } diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 2058783373e..9654e90eec8 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -31,31 +31,31 @@ static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_root_blocks_used(&root->root_item)); + (unsigned long long)btrfs_root_used(&root->root_item)); } static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_root_block_limit(&root->root_item)); + (unsigned long long)btrfs_root_limit(&root->root_item)); } static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocks_used(fs->disk_super)); + (unsigned long long)btrfs_super_blocks_used(&fs->super_copy)); } static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_total_blocks(fs->disk_super)); + (unsigned long long)btrfs_super_total_blocks(&fs->super_copy)); } static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocksize(fs->disk_super)); + (unsigned long long)btrfs_super_sectorsize(&fs->super_copy)); } /* this is for root attrs (subvols/snapshots) */ diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 087074db0bd..750f35a37aa 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -66,7 +67,9 @@ static int join_transaction(struct btrfs_root *root) cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); - init_bit_radix(&cur_trans->dirty_pages); + extent_map_tree_init(&cur_trans->dirty_pages, + root->fs_info->btree_inode->i_mapping, + GFP_NOFS); } else { cur_trans->num_writers++; cur_trans->num_joined++; @@ -88,7 +91,7 @@ static int record_root_in_trans(struct btrfs_root *root) (unsigned long)root->root_key.objectid, BTRFS_ROOT_DEFRAG_TAG); root->commit_root = root->node; - get_bh(root->node); + extent_buffer_get(root->node); } else { WARN_ON(1); } @@ -144,29 +147,30 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - unsigned long gang[16]; int ret; - int i; int err; int werr = 0; + struct extent_map_tree *dirty_pages; struct page *page; - struct radix_tree_root *dirty_pages; struct inode *btree_inode = root->fs_info->btree_inode; + u64 start; + u64 end; + unsigned long index; if (!trans || !trans->transaction) { return filemap_write_and_wait(btree_inode->i_mapping); } dirty_pages = &trans->transaction->dirty_pages; while(1) { - ret = find_first_radix_bit(dirty_pages, gang, - 0, ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(dirty_pages, 0, &start, &end, + EXTENT_DIRTY); + if (ret) break; - for (i = 0; i < ret; i++) { - /* FIXME EIO */ - clear_radix_bit(dirty_pages, gang[i]); - page = find_lock_page(btree_inode->i_mapping, - gang[i]); + clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); + while(start <= end) { + index = start >> PAGE_CACHE_SHIFT; + start = (index + 1) << PAGE_CACHE_SHIFT; + page = find_lock_page(btree_inode->i_mapping, index); if (!page) continue; if (PageWriteback(page)) { @@ -202,10 +206,11 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_write_dirty_block_groups(trans, extent_root); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == bh_blocknr(extent_root->node)) + if (old_extent_block == + extent_buffer_blocknr(extent_root->node)) break; btrfs_set_root_blocknr(&extent_root->root_item, - bh_blocknr(extent_root->node)); + extent_buffer_blocknr(extent_root->node)); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -279,9 +284,9 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { - WARN_ON(bh_blocknr(root->node) != + WARN_ON(extent_buffer_blocknr(root->node) != btrfs_root_blocknr(&root->root_item)); - brelse(root->commit_root); + free_extent_buffer(root->commit_root); root->commit_root = NULL; /* make sure to update the root on disk @@ -310,7 +315,7 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, - bh_blocknr(root->node)); + extent_buffer_blocknr(root->node)); err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); @@ -389,10 +394,10 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) for (i = 0; i < ret; i++) { root = gang[i]; last = root->root_key.objectid + 1; - btrfs_defrag_root(root, 1); + // btrfs_defrag_root(root, 1); } } - btrfs_defrag_root(info->extent_root, 1); + // btrfs_defrag_root(info->extent_root, 1); return err; } @@ -414,7 +419,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); - num_blocks = btrfs_root_blocks_used(&dirty->root->root_item); + num_blocks = btrfs_root_used(&dirty->root->root_item); root = dirty->latest_root; while(1) { @@ -441,11 +446,11 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, } BUG_ON(ret); - num_blocks -= btrfs_root_blocks_used(&dirty->root->root_item); - blocks_used = btrfs_root_blocks_used(&root->root_item); + num_blocks -= btrfs_root_used(&dirty->root->root_item); + blocks_used = btrfs_root_used(&root->root_item); if (num_blocks) { record_root_in_trans(root); - btrfs_set_root_blocks_used(&root->root_item, + btrfs_set_root_used(&root->root_item, blocks_used - num_blocks); } ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); @@ -553,9 +558,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_super_generation(&root->fs_info->super_copy, cur_trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, - bh_blocknr(root->fs_info->tree_root->node)); - memcpy(root->fs_info->disk_super, &root->fs_info->super_copy, - sizeof(root->fs_info->super_copy)); + extent_buffer_blocknr(root->fs_info->tree_root->node)); + + write_extent_buffer(root->fs_info->sb_buffer, + &root->fs_info->super_copy, 0, + sizeof(root->fs_info->super_copy)); btrfs_copy_pinned(root, &pinned_copy); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 4bc328cbb24..ae39fcfc169 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -28,7 +28,7 @@ struct btrfs_transaction { int use_count; int commit_done; struct list_head list; - struct radix_tree_root dirty_pages; + struct extent_map_tree dirty_pages; unsigned long start_time; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; @@ -83,5 +83,6 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); - +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 420597127ed..daf019afa0a 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -23,14 +23,14 @@ #include "transaction.h" static void reada_defrag(struct btrfs_root *root, - struct btrfs_node *node) + struct extent_buffer *node) { int i; u32 nritems; u64 blocknr; int ret; - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { blocknr = btrfs_node_blockptr(node, i); ret = readahead_tree_block(root, blocknr); @@ -44,8 +44,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, struct btrfs_path *path, int *level, int cache_only, u64 *last_ret) { - struct buffer_head *next; - struct buffer_head *cur; + struct extent_buffer *next; + struct extent_buffer *cur; u64 blocknr; int ret = 0; int is_extent = 0; @@ -62,13 +62,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, cur = path->nodes[*level]; if (!cache_only && *level > 1 && path->slots[*level] == 0) - reada_defrag(root, btrfs_buffer_node(cur)); + reada_defrag(root, cur); - if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + if (btrfs_header_level(cur) != *level) WARN_ON(1); if (path->slots[*level] >= - btrfs_header_nritems(btrfs_buffer_header(cur))) + btrfs_header_nritems(cur)) break; if (*level == 1) { @@ -80,14 +80,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; } - blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), - path->slots[*level]); + blocknr = btrfs_node_blockptr(cur, path->slots[*level]); if (cache_only) { next = btrfs_find_tree_block(root, blocknr); - if (!next || !buffer_uptodate(next) || - buffer_locked(next) || !buffer_defrag(next)) { - brelse(next); + /* FIXME, test for defrag */ + if (!next || !btrfs_buffer_uptodate(next)) { + free_extent_buffer(next); path->slots[*level]++; continue; } @@ -106,16 +105,18 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, WARN_ON(*level <= 0); if (path->nodes[*level-1]) - btrfs_block_release(root, path->nodes[*level-1]); + free_extent_buffer(path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(btrfs_buffer_header(next)); + *level = btrfs_header_level(next); path->slots[*level] = 0; } WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); +#if 0 clear_buffer_defrag(path->nodes[*level]); clear_buffer_defrag_done(path->nodes[*level]); - btrfs_block_release(root, path->nodes[*level]); +#endif + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; WARN_ON(ret); @@ -129,24 +130,25 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, { int i; int slot; - struct btrfs_node *node; + struct extent_buffer *node; for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < btrfs_header_nritems( - btrfs_buffer_header(path->nodes[i])) - 1) { + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { path->slots[i]++; *level = i; - node = btrfs_buffer_node(path->nodes[i]); + node = path->nodes[i]; WARN_ON(i == 0); - btrfs_disk_key_to_cpu(&root->defrag_progress, - &node->ptrs[path->slots[i]].key); + btrfs_node_key_to_cpu(node, &root->defrag_progress, + path->slots[i]); root->defrag_level = i; return 0; } else { + /* clear_buffer_defrag(path->nodes[*level]); clear_buffer_defrag_done(path->nodes[*level]); - btrfs_block_release(root, path->nodes[*level]); + */ + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; } @@ -158,7 +160,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only) { struct btrfs_path *path = NULL; - struct buffer_head *tmp; + struct extent_buffer *tmp; int ret = 0; int wret; int level; @@ -172,17 +174,18 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (root->ref_cows == 0 && !is_extent) goto out; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); orig_level = level; if (level == 0) { goto out; } if (root->defrag_progress.objectid == 0) { - get_bh(root->node); + extent_buffer_get(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); BUG_ON(ret); ret = btrfs_realloc_node(trans, root, root->node, cache_only, @@ -200,12 +203,15 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (is_extent) btrfs_extent_post_op(trans, root); + if (wret < 0) { ret = wret; goto out; } + while(level > 0 && !path->nodes[level]) level--; + if (!path->nodes[level]) { ret = 0; goto out; @@ -230,7 +236,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { - btrfs_block_release(root, path->nodes[i]); + free_extent_buffer(path->nodes[i]); path->nodes[i] = 0; } } -- cgit v1.2.3-70-g09d2 From f510cfecfc98759d75283823cfccf0cc0d59a4c6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:48 -0400 Subject: Btrfs: Fix extent_buffer and extent_state leaks Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 10 +++- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 17 +++++-- fs/btrfs/extent-tree.c | 126 +++++++++++++++++++++++-------------------------- fs/btrfs/extent_map.c | 64 ++++++++++++++++++++----- fs/btrfs/extent_map.h | 2 + fs/btrfs/transaction.c | 5 +- 7 files changed, 139 insertions(+), 87 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9427b79c5d7..50ef351ef6b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -87,6 +87,7 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (IS_ERR(cow)) return PTR_ERR(cow); + cow->alloc_addr = (unsigned long)__builtin_return_address(0); if (buf->len != root->sectorsize || cow->len != root->sectorsize) WARN_ON(1); @@ -132,6 +133,7 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret) { u64 search_start; + int ret; if (trans->transaction != root->fs_info->running_transaction) { printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, root->fs_info->running_transaction->transid); @@ -148,8 +150,10 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, } search_start = extent_buffer_blocknr(buf) & ~((u64)65535); - return __btrfs_cow_block(trans, root, buf, parent, + ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); + (*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0); + return ret; } static int close_blocks(u64 blocknr, u64 other) @@ -1013,8 +1017,10 @@ again: if (sret) return sret; b = p->nodes[level]; - if (!b) + if (!b) { + btrfs_release_path(NULL, p); goto again; + } slot = p->slots[level]; BUG_ON(btrfs_header_nritems(b) == 1); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index aed08618aca..5262b28f468 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -303,8 +303,8 @@ struct btrfs_fs_info { struct radix_tree_root pinned_radix; struct radix_tree_root block_group_radix; struct radix_tree_root block_group_data_radix; - struct radix_tree_root extent_map_radix; struct radix_tree_root extent_ins_radix; + struct extent_map_tree free_space_cache; u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8242933a1d8..09f4e694624 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -46,18 +46,25 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) { struct inode *btree_inode = root->fs_info->btree_inode; - return find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + struct extent_buffer *eb; + eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, blocknr * root->sectorsize, root->sectorsize, GFP_NOFS); + if (eb) + eb->alloc_addr = (unsigned long)__builtin_return_address(0); + return eb; } struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { struct inode *btree_inode = root->fs_info->btree_inode; - return alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + struct extent_buffer *eb; + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, blocknr * root->sectorsize, root->sectorsize, GFP_NOFS); + eb->alloc_addr = (unsigned long)__builtin_return_address(0); + return eb; } struct extent_map *btree_get_extent(struct inode *inode, struct page *page, @@ -226,6 +233,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); + buf->alloc_addr = (unsigned long)__builtin_return_address(0); return buf; } @@ -426,7 +434,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); - init_bit_radix(&fs_info->extent_map_radix); init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); @@ -449,6 +456,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->free_space_cache, + fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -594,8 +603,10 @@ int close_ctree(struct btrfs_root *root) if (fs_info->extent_root->node) free_extent_buffer(fs_info->extent_root->node); + if (fs_info->tree_root->node) free_extent_buffer(fs_info->tree_root->node); + free_extent_buffer(fs_info->sb_buffer); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 089c41cbca7..74cfbee2ff3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -34,21 +34,19 @@ static int cache_block_group(struct btrfs_root *root, int ret; struct btrfs_key key; struct extent_buffer *leaf; - struct radix_tree_root *extent_radix; + struct extent_map_tree *free_space_cache; int slot; - u64 i; u64 last = 0; u64 hole_size; u64 first_free; int found = 0; root = root->fs_info->extent_root; - extent_radix = &root->fs_info->extent_map_radix; + free_space_cache = &root->fs_info->free_space_cache; if (block_group->cached) return 0; - if (block_group->data) - return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -98,9 +96,11 @@ static int cache_block_group(struct btrfs_root *root, last = first_free; found = 1; } - hole_size = key.objectid - last; - for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, last + i); + if (key.objectid > last) { + hole_size = key.objectid - last; + set_extent_dirty(free_space_cache, last, + last + hole_size - 1, + GFP_NOFS); } last = key.objectid + key.offset; } @@ -114,9 +114,8 @@ next: block_group->key.offset > last) { hole_size = block_group->key.objectid + block_group->key.offset - last; - for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, last + i); - } + set_extent_dirty(free_space_cache, last, + last + hole_size - 1, GFP_NOFS); } block_group->cached = 1; err: @@ -150,47 +149,33 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return NULL; } -static u64 leaf_range(struct btrfs_root *root) -{ - u64 size = BTRFS_LEAF_DATA_SIZE(root); - do_div(size, sizeof(struct btrfs_extent_item) + - sizeof(struct btrfs_item)); - return size; -} - static u64 find_search_start(struct btrfs_root *root, struct btrfs_block_group_cache **cache_ret, - u64 search_start, int num) + u64 search_start, int num, int data) { - unsigned long gang[8]; int ret; struct btrfs_block_group_cache *cache = *cache_ret; u64 last = max(search_start, cache->key.objectid); + u64 start = 0; + u64 end = 0; - if (cache->data) - goto out; again: ret = cache_block_group(root, cache); if (ret) goto out; while(1) { - ret = find_first_radix_bit(&root->fs_info->extent_map_radix, - gang, last, ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(&root->fs_info->free_space_cache, + last, &start, &end, EXTENT_DIRTY); + if (ret) goto out; - last = gang[ret-1] + 1; - if (num > 1) { - if (ret != ARRAY_SIZE(gang)) { - goto new_group; - } - if (gang[ret-1] - gang[0] > leaf_range(root)) { - continue; - } - } - if (gang[0] >= cache->key.objectid + cache->key.offset) { + + start = max(last, start); + last = end + 1; + if (end + 1 - start < num) + continue; + if (start + num > cache->key.objectid + cache->key.offset) goto new_group; - } - return gang[0]; + return start; } out: return max(cache->last_alloc, search_start); @@ -202,7 +187,7 @@ new_group: return max((*cache_ret)->last_alloc, search_start); } cache = btrfs_find_block_group(root, cache, - last + cache->key.offset - 1, 0, 0); + last + cache->key.offset - 1, data, 0); *cache_ret = cache; goto again; } @@ -625,7 +610,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 total = num; u64 old_val; u64 block_in_group; - u64 i; int ret; while(total) { @@ -644,12 +628,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, if (alloc) { if (blocknr > cache->last_alloc) cache->last_alloc = blocknr; - if (!cache->data) { - for (i = 0; i < num; i++) { - clear_radix_bit(&info->extent_map_radix, - blocknr + i); - } - } if (cache->data != data && old_val < (cache->key.offset >> 1)) { cache->data = data; @@ -677,11 +655,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val -= num; if (blocknr < cache->first_free) cache->first_free = blocknr; - if (!cache->data && mark_free) { - for (i = 0; i < num; i++) { - set_radix_bit(&info->extent_map_radix, - blocknr + i); - } + if (mark_free) { + set_extent_dirty(&info->free_space_cache, + blocknr, blocknr + num - 1, + GFP_NOFS); } if (old_val < (cache->key.offset >> 1) && old_val + num >= (cache->key.offset >> 1)) { @@ -732,7 +709,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, int ret; int i; struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; - struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix; + struct extent_map_tree *free_space_cache; + + free_space_cache = &root->fs_info->free_space_cache; while(1) { ret = find_first_radix_bit(unpin_radix, gang, 0, @@ -751,8 +730,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, block_group->pinned--; if (gang[i] < block_group->last_alloc) block_group->last_alloc = gang[i]; - if (!block_group->data) - set_radix_bit(extent_radix, gang[i]); + if (!block_group->data) { + set_extent_dirty(free_space_cache, + gang[i], gang[i], + GFP_NOFS); + } } } } @@ -995,6 +977,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; + u64 cached_search_start = 0; WARN_ON(num_blocks < 1); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -1017,11 +1000,9 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); check_failed: - if (!block_group->data) - search_start = find_search_start(root, &block_group, - search_start, total_needed); - else if (!full_scan) - search_start = max(block_group->last_alloc, search_start); + search_start = find_search_start(root, &block_group, + search_start, total_needed, data); + cached_search_start = search_start; btrfs_init_path(path); ins->objectid = search_start; @@ -1097,6 +1078,7 @@ check_failed: start_found = 1; last_block = key.objectid + key.offset; + if (!full_scan && last_block >= block_group->key.objectid + block_group->key.offset) { btrfs_release_path(root, path); @@ -1138,6 +1120,9 @@ check_pending: } ins->offset = num_blocks; btrfs_free_path(path); + if (0 && ins->objectid != cached_search_start) { +printk("\tcached was %Lu found %Lu\n", cached_search_start, ins->objectid); + } return 0; new_group: @@ -1209,6 +1194,10 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_root_used(&root->root_item, root_blocks_used + num_blocks); + clear_extent_dirty(&root->fs_info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1, + GFP_NOFS); + if (root == extent_root) { BUG_ON(num_blocks != 1); set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid); @@ -1227,6 +1216,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); + if (ret) { return ret; } @@ -1265,6 +1255,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); } btrfs_set_buffer_uptodate(buf); + buf->alloc_addr = (unsigned long)__builtin_return_address(0); set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); /* @@ -1492,6 +1483,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root orig_level = level; if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { path->nodes[level] = root->node; + extent_buffer_get(root->node); path->slots[level] = 0; } else { struct btrfs_key key; @@ -1524,7 +1516,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; ret = -EAGAIN; - extent_buffer_get(root->node); break; } for (i = 0; i <= orig_level; i++) { @@ -1562,8 +1553,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) { int ret; int ret2; - unsigned long gang[16]; - int i; + u64 start; + u64 end; ret = free_block_group_radix(&info->block_group_radix); ret2 = free_block_group_radix(&info->block_group_data_radix); @@ -1573,13 +1564,12 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return ret2; while(1) { - ret = find_first_radix_bit(&info->extent_map_radix, - gang, 0, ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(&info->free_space_cache, 0, + &start, &end, EXTENT_DIRTY); + if (ret) break; - for (i = 0; i < ret; i++) { - clear_radix_bit(&info->extent_map_radix, gang[i]); - } + clear_extent_dirty(&info->free_space_cache, start, + end, GFP_NOFS); } return 0; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f150188f621..5b7dbcaacd1 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -19,8 +19,13 @@ struct kmem_cache *btrfs_cache_create(const char *name, size_t size, static struct kmem_cache *extent_map_cache; static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; + static LIST_HEAD(extent_buffers); +static LIST_HEAD(buffers); +static LIST_HEAD(states); + static spinlock_t extent_buffers_lock; +static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; static int nr_extent_buffers; #define MAX_EXTENT_BUFFER_CACHE 128 @@ -48,6 +53,7 @@ void __init extent_map_init(void) void __exit extent_map_exit(void) { struct extent_buffer *eb; + struct extent_state *state; while (!list_empty(&extent_buffers)) { eb = list_entry(extent_buffers.next, @@ -55,6 +61,22 @@ void __exit extent_map_exit(void) list_del(&eb->list); kmem_cache_free(extent_buffer_cache, eb); } + while (!list_empty(&states)) { + state = list_entry(states.next, struct extent_state, list); + printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); + list_del(&state->list); + kmem_cache_free(extent_state_cache, state); + + } + while (!list_empty(&buffers)) { + eb = list_entry(buffers.next, + struct extent_buffer, leak_list); + printk("buffer leak start %Lu len %lu return %lX\n", eb->start, eb->len, eb->alloc_addr); + list_del(&eb->leak_list); + kmem_cache_free(extent_buffer_cache, eb); + } + + if (extent_map_cache) kmem_cache_destroy(extent_map_cache); if (extent_state_cache) @@ -101,12 +123,19 @@ EXPORT_SYMBOL(free_extent_map); struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; + unsigned long flags; + state = kmem_cache_alloc(extent_state_cache, mask); if (!state || IS_ERR(state)) return state; state->state = 0; state->in_tree = 0; state->private = 0; + + spin_lock_irqsave(&state_lock, flags); + list_add(&state->list, &states); + spin_unlock_irqrestore(&state_lock, flags); + atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); return state; @@ -115,10 +144,14 @@ EXPORT_SYMBOL(alloc_extent_state); void free_extent_state(struct extent_state *state) { + unsigned long flags; if (!state) return; if (atomic_dec_and_test(&state->refs)) { WARN_ON(state->in_tree); + spin_lock_irqsave(&state_lock, flags); + list_del(&state->list); + spin_unlock_irqrestore(&state_lock, flags); kmem_cache_free(extent_state_cache, state); } } @@ -361,10 +394,6 @@ static int insert_state(struct extent_map_tree *tree, state->state |= bits; state->start = start; state->end = end; - if ((end & 4095) == 0) { - printk("insert state %Lu %Lu strange end\n", start, end); - WARN_ON(1); - } node = tree_insert(&tree->state, end, &state->rb_node); if (node) { struct extent_state *found; @@ -399,11 +428,7 @@ static int split_state(struct extent_map_tree *tree, struct extent_state *orig, prealloc->end = split - 1; prealloc->state = orig->state; orig->start = split; - if ((prealloc->end & 4095) == 0) { - printk("insert state %Lu %Lu strange end\n", prealloc->start, - prealloc->end); - WARN_ON(1); - } + node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); if (node) { struct extent_state *found; @@ -957,6 +982,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start, *start_ret = state->start; *end_ret = state->end; ret = 0; + break; } node = rb_next(node); if (!node) @@ -1877,6 +1903,7 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, static struct extent_buffer *__alloc_extent_buffer(gfp_t mask) { struct extent_buffer *eb = NULL; + spin_lock(&extent_buffers_lock); if (!list_empty(&extent_buffers)) { eb = list_entry(extent_buffers.next, struct extent_buffer, @@ -1886,15 +1913,26 @@ static struct extent_buffer *__alloc_extent_buffer(gfp_t mask) nr_extent_buffers--; } spin_unlock(&extent_buffers_lock); + if (eb) { memset(eb, 0, sizeof(*eb)); - return eb; + } else { + eb = kmem_cache_zalloc(extent_buffer_cache, mask); } - return kmem_cache_zalloc(extent_buffer_cache, mask); + spin_lock(&extent_buffers_lock); + list_add(&eb->leak_list, &buffers); + spin_unlock(&extent_buffers_lock); + + return eb; } static void __free_extent_buffer(struct extent_buffer *eb) { + + spin_lock(&extent_buffers_lock); + list_del_init(&eb->leak_list); + spin_unlock(&extent_buffers_lock); + if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) { kmem_cache_free(extent_buffer_cache, eb); } else { @@ -1933,6 +1971,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; + eb->alloc_addr = __builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -1947,6 +1986,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; } + set_page_extent_mapped(p); if (i == 0) eb->first_page = p; if (!PageUptodate(p)) @@ -1978,6 +2018,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; + eb->alloc_addr = __builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -1992,6 +2033,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; } + set_page_extent_mapped(p); if (i == 0) eb->first_page = p; } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 3b3abf34c40..d100f7c1476 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -68,7 +68,9 @@ struct extent_buffer { atomic_t refs; int flags; struct list_head list; + struct list_head leak_list; struct page *first_page; + unsigned long alloc_addr; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 750f35a37aa..372b61f5733 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -352,7 +352,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) return 0; trans = btrfs_start_transaction(root, 1); - while (1) { + while (0) { root->defrag_running = 1; ret = btrfs_defrag_leaves(trans, root, cacheonly); nr = trans->blocks_used; @@ -394,7 +394,7 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) for (i = 0; i < ret; i++) { root = gang[i]; last = root->root_key.objectid + 1; - // btrfs_defrag_root(root, 1); + btrfs_defrag_root(root, 1); } } // btrfs_defrag_root(info->extent_root, 1); @@ -462,6 +462,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); + free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); -- cgit v1.2.3-70-g09d2 From 96b5179d0d9b6368c203856f2ad6e8e12a8b2a2c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:15:19 -0400 Subject: Btrfs: Stop using radix trees for the block group cache Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 6 +- fs/btrfs/disk-io.c | 4 +- fs/btrfs/extent-tree.c | 362 ++++++++++++++++++++----------------------------- fs/btrfs/extent_map.c | 18 ++- fs/btrfs/extent_map.h | 4 + 5 files changed, 171 insertions(+), 223 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5262b28f468..c6174b27fd1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -271,8 +271,6 @@ struct btrfs_csum_item { } __attribute__ ((__packed__)); /* tag for the radix tree of block groups in ram */ -#define BTRFS_BLOCK_GROUP_DIRTY 0 -#define BTRFS_BLOCK_GROUP_AVAIL 1 #define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) @@ -285,7 +283,6 @@ struct btrfs_block_group_item { struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; - struct radix_tree_root *radix; u64 first_free; u64 last_alloc; u64 pinned; @@ -301,10 +298,9 @@ struct btrfs_fs_info { struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; - struct radix_tree_root block_group_radix; - struct radix_tree_root block_group_data_radix; struct radix_tree_root extent_ins_radix; struct extent_map_tree free_space_cache; + struct extent_map_tree block_group_cache; u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 09f4e694624..aac7c82b0dc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -436,8 +436,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); - INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); - INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); @@ -458,6 +456,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); extent_map_tree_init(&fs_info->free_space_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->block_group_cache, + fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 74cfbee2ff3..4bc639565d1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -22,6 +22,10 @@ #include "print-tree.h" #include "transaction.h" +#define BLOCK_GROUP_DATA EXTENT_WRITEBACK +#define BLOCK_GROUP_METADATA EXTENT_UPTODATE +#define BLOCK_GROUP_DIRTY EXTENT_DIRTY + static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -127,25 +131,31 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 blocknr) { - struct btrfs_block_group_cache *block_group; + struct extent_map_tree *block_group_cache; + struct btrfs_block_group_cache *block_group = NULL; + u64 ptr; + u64 start; + u64 end; int ret; - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&block_group, - blocknr, 1); + block_group_cache = &info->block_group_cache; + ret = find_first_extent_bit(block_group_cache, + blocknr, &start, &end, + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA); if (ret) { - if (block_group->key.objectid <= blocknr && blocknr <= - block_group->key.objectid + block_group->key.offset) - return block_group; - } - ret = radix_tree_gang_lookup(&info->block_group_data_radix, - (void **)&block_group, - blocknr, 1); - if (ret) { - if (block_group->key.objectid <= blocknr && blocknr <= - block_group->key.objectid + block_group->key.offset) - return block_group; + return NULL; } + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + return NULL; + + block_group = (struct btrfs_block_group_cache *)ptr; + + + if (block_group->key.objectid <= blocknr && blocknr <= + block_group->key.objectid + block_group->key.offset) + return block_group; + return NULL; } @@ -173,7 +183,7 @@ again: last = end + 1; if (end + 1 - start < num) continue; - if (start + num > cache->key.objectid + cache->key.offset) + if (start + num >= cache->key.objectid + cache->key.offset) goto new_group; return start; } @@ -189,6 +199,7 @@ new_group: cache = btrfs_find_block_group(root, cache, last + cache->key.offset - 1, data, 0); *cache_ret = cache; + last = min(cache->key.objectid, last); goto again; } @@ -204,30 +215,32 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, *hint, u64 search_start, int data, int owner) { - struct btrfs_block_group_cache *cache[8]; + struct btrfs_block_group_cache *cache; + struct extent_map_tree *block_group_cache; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; - struct radix_tree_root *radix; - struct radix_tree_root *swap_radix; u64 used; u64 last = 0; u64 hint_last; - int i; + u64 start; + u64 end; + u64 free_check; + u64 ptr; + int bit; int ret; int full_search = 0; int factor = 8; int data_swap = 0; + block_group_cache = &info->block_group_cache; + if (!owner) factor = 5; - if (data) { - radix = &info->block_group_data_radix; - swap_radix = &info->block_group_radix; - } else { - radix = &info->block_group_radix; - swap_radix = &info->block_group_data_radix; - } + if (data) + bit = BLOCK_GROUP_DATA; + else + bit = BLOCK_GROUP_METADATA; if (search_start) { struct btrfs_block_group_cache *shint; @@ -246,12 +259,6 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, div_factor(hint->key.offset, factor)) { return hint; } - if (used >= div_factor(hint->key.offset, 8)) { - radix_tree_tag_clear(radix, - hint->key.objectid + - hint->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - } last = hint->key.offset * 3; if (hint->key.objectid >= last) last = max(search_start + hint->key.offset - 1, @@ -267,51 +274,29 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, last = hint_last; } - while(1) { - ret = radix_tree_gang_lookup_tag(radix, (void **)cache, - last, ARRAY_SIZE(cache), - BTRFS_BLOCK_GROUP_AVAIL); - if (!ret) - break; - for (i = 0; i < ret; i++) { - last = cache[i]->key.objectid + - cache[i]->key.offset; - used = btrfs_block_group_used(&cache[i]->item); - if (used + cache[i]->pinned < - div_factor(cache[i]->key.offset, factor)) { - found_group = cache[i]; - goto found; - } - if (used >= div_factor(cache[i]->key.offset, 8)) { - radix_tree_tag_clear(radix, - cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - } - } - cond_resched(); - } - last = hint_last; again: while(1) { - ret = radix_tree_gang_lookup(radix, (void **)cache, - last, ARRAY_SIZE(cache)); - if (!ret) + ret = find_first_extent_bit(block_group_cache, last, + &start, &end, bit); + if (ret) break; - for (i = 0; i < ret; i++) { - last = cache[i]->key.objectid + - cache[i]->key.offset; - used = btrfs_block_group_used(&cache[i]->item); - if (used + cache[i]->pinned < cache[i]->key.offset) { - found_group = cache[i]; - goto found; - } - if (used >= cache[i]->key.offset) { - radix_tree_tag_clear(radix, - cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - } + + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + break; + + cache = (struct btrfs_block_group_cache *)ptr; + last = cache->key.objectid + cache->key.offset; + used = btrfs_block_group_used(&cache->item); + + if (full_search) + free_check = cache->key.offset; + else + free_check = div_factor(cache->key.offset, factor); + + if (used + cache->pinned < free_check) { + found_group = cache; + goto found; } cond_resched(); } @@ -321,23 +306,11 @@ again: goto again; } if (!data_swap) { - struct radix_tree_root *tmp = radix; data_swap = 1; - radix = swap_radix; - swap_radix = tmp; + bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; last = search_start; goto again; } - if (!found_group) { - ret = radix_tree_gang_lookup(radix, - (void **)&found_group, 0, 1); - if (ret == 0) { - ret = radix_tree_gang_lookup(swap_radix, - (void **)&found_group, - 0, 1); - } - BUG_ON(ret != 1); - } found: return found_group; } @@ -538,68 +511,55 @@ fail: } -static int write_dirty_block_radix(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct radix_tree_root *radix) +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { - struct btrfs_block_group_cache *cache[8]; + struct extent_map_tree *block_group_cache; + struct btrfs_block_group_cache *cache; int ret; int err = 0; int werr = 0; - int i; struct btrfs_path *path; - unsigned long off = 0; + u64 last = 0; + u64 start; + u64 end; + u64 ptr; + block_group_cache = &root->fs_info->block_group_cache; path = btrfs_alloc_path(); if (!path) return -ENOMEM; while(1) { - ret = radix_tree_gang_lookup_tag(radix, (void **)cache, - off, ARRAY_SIZE(cache), - BTRFS_BLOCK_GROUP_DIRTY); - if (!ret) + ret = find_first_extent_bit(block_group_cache, last, + &start, &end, BLOCK_GROUP_DIRTY); + if (ret) break; - for (i = 0; i < ret; i++) { - err = write_one_cache_group(trans, root, - path, cache[i]); - /* - * if we fail to write the cache group, we want - * to keep it marked dirty in hopes that a later - * write will work - */ - if (err) { - werr = err; - off = cache[i]->key.objectid + - cache[i]->key.offset; - continue; - } - radix_tree_tag_clear(radix, cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_DIRTY); + last = end + 1; + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + break; + + cache = (struct btrfs_block_group_cache *)ptr; + err = write_one_cache_group(trans, root, + path, cache); + /* + * if we fail to write the cache group, we want + * to keep it marked dirty in hopes that a later + * write will work + */ + if (err) { + werr = err; + continue; } + clear_extent_bits(block_group_cache, start, end, + BLOCK_GROUP_DIRTY, GFP_NOFS); } btrfs_free_path(path); return werr; } -int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - int ret; - int ret2; - ret = write_dirty_block_radix(trans, root, - &root->fs_info->block_group_radix); - ret2 = write_dirty_block_radix(trans, root, - &root->fs_info->block_group_data_radix); - if (ret) - return ret; - if (ret2) - return ret2; - return 0; -} - static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num, int alloc, int mark_free, @@ -610,7 +570,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 total = num; u64 old_val; u64 block_in_group; - int ret; + u64 start; + u64 end; while(total) { cache = btrfs_lookup_block_group(info, blocknr); @@ -619,9 +580,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, } block_in_group = blocknr - cache->key.objectid; WARN_ON(block_in_group > cache->key.offset); - radix_tree_tag_set(cache->radix, cache->key.objectid + - cache->key.offset - 1, - BTRFS_BLOCK_GROUP_DIRTY); + start = cache->key.objectid; + end = start + cache->key.offset - 1; + set_extent_bits(&info->block_group_cache, start, end, + BLOCK_GROUP_DIRTY, GFP_NOFS); old_val = btrfs_block_group_used(&cache->item); num = min(total, cache->key.offset - block_in_group); @@ -630,25 +592,27 @@ static int update_block_group(struct btrfs_trans_handle *trans, cache->last_alloc = blocknr; if (cache->data != data && old_val < (cache->key.offset >> 1)) { - cache->data = data; - radix_tree_delete(cache->radix, - cache->key.objectid + - cache->key.offset - 1); + int bit_to_clear; + int bit_to_set; + cache->data = data; if (data) { - cache->radix = - &info->block_group_data_radix; + bit_to_clear = BLOCK_GROUP_DATA; + bit_to_set = BLOCK_GROUP_METADATA; cache->item.flags |= BTRFS_BLOCK_GROUP_DATA; } else { - cache->radix = &info->block_group_radix; + bit_to_clear = BLOCK_GROUP_METADATA; + bit_to_set = BLOCK_GROUP_DATA; cache->item.flags &= ~BTRFS_BLOCK_GROUP_DATA; } - ret = radix_tree_insert(cache->radix, - cache->key.objectid + - cache->key.offset - 1, - (void *)cache); + clear_extent_bits(&info->block_group_cache, + start, end, bit_to_clear, + GFP_NOFS); + set_extent_bits(&info->block_group_cache, + start, end, bit_to_set, + GFP_NOFS); } old_val += num; } else { @@ -660,13 +624,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, blocknr, blocknr + num - 1, GFP_NOFS); } - if (old_val < (cache->key.offset >> 1) && - old_val + num >= (cache->key.offset >> 1)) { - radix_tree_tag_set(cache->radix, - cache->key.objectid + - cache->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - } } btrfs_set_block_group_used(&cache->item, old_val); total -= num; @@ -730,11 +687,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, block_group->pinned--; if (gang[i] < block_group->last_alloc) block_group->last_alloc = gang[i]; - if (!block_group->data) { - set_extent_dirty(free_space_cache, - gang[i], gang[i], - GFP_NOFS); - } + set_extent_dirty(free_space_cache, + gang[i], gang[i], GFP_NOFS); } } } @@ -1059,8 +1013,8 @@ check_failed: ins->offset = search_end - ins->objectid; goto check_pending; } - btrfs_item_key_to_cpu(l, &key, slot); + if (key.objectid >= search_start && key.objectid > last_block && start_found) { if (last_block < search_start) @@ -1072,9 +1026,14 @@ check_failed: goto check_pending; } } - - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { + if (!start_found) { + last_block = key.objectid; + start_found = 1; + } goto next; + } + start_found = 1; last_block = key.objectid + key.offset; @@ -1120,9 +1079,6 @@ check_pending: } ins->offset = num_blocks; btrfs_free_path(path); - if (0 && ins->objectid != cached_search_start) { -printk("\tcached was %Lu found %Lu\n", cached_search_start, ins->objectid); - } return 0; new_group: @@ -1529,40 +1485,20 @@ out: return ret; } -static int free_block_group_radix(struct radix_tree_root *radix) +int btrfs_free_block_groups(struct btrfs_fs_info *info) { + u64 start; + u64 end; int ret; - struct btrfs_block_group_cache *cache[8]; - int i; while(1) { - ret = radix_tree_gang_lookup(radix, (void **)cache, 0, - ARRAY_SIZE(cache)); - if (!ret) + ret = find_first_extent_bit(&info->block_group_cache, 0, + &start, &end, (unsigned int)-1); + if (ret) break; - for (i = 0; i < ret; i++) { - radix_tree_delete(radix, cache[i]->key.objectid + - cache[i]->key.offset - 1); - kfree(cache[i]); - } + clear_extent_bits(&info->block_group_cache, start, + end, (unsigned int)-1, GFP_NOFS); } - return 0; -} - -int btrfs_free_block_groups(struct btrfs_fs_info *info) -{ - int ret; - int ret2; - u64 start; - u64 end; - - ret = free_block_group_radix(&info->block_group_radix); - ret2 = free_block_group_radix(&info->block_group_data_radix); - if (ret) - return ret; - if (ret2) - return ret2; - while(1) { ret = find_first_extent_bit(&info->free_space_cache, 0, &start, &end, EXTENT_DIRTY); @@ -1579,17 +1515,20 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_path *path; int ret; int err = 0; + int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - struct radix_tree_root *radix; + struct extent_map_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; u64 group_size_blocks; - u64 used; + + block_group_cache = &info->block_group_cache; group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >> - root->fs_info->sb->s_blocksize_bits; + info->sb->s_blocksize_bits; + root = info->extent_root; key.objectid = 0; key.offset = group_size_blocks; @@ -1617,35 +1556,30 @@ int btrfs_read_block_groups(struct btrfs_root *root) read_extent_buffer(leaf, &cache->item, btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); - if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { - radix = &info->block_group_data_radix; - cache->data = 1; - } else { - radix = &info->block_group_radix; - cache->data = 0; - } - memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; cache->pinned = 0; cache->cached = 0; - cache->radix = radix; - key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); - ret = radix_tree_insert(radix, found_key.objectid + - found_key.offset - 1, - (void *)cache); - BUG_ON(ret); - used = btrfs_block_group_used(&cache->item); - if (used < div_factor(key.offset, 8)) { - radix_tree_tag_set(radix, found_key.objectid + - found_key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); + if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { + bit = BLOCK_GROUP_DATA; + cache->data = 1; + } else { + bit = BLOCK_GROUP_METADATA; + cache->data = 0; } + + /* use EXTENT_LOCKED to prevent merging */ + set_extent_bits(block_group_cache, found_key.objectid, + found_key.objectid + found_key.offset - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, found_key.objectid, + (u64)cache); + if (key.objectid >= btrfs_super_total_blocks(&info->super_copy)) break; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 5b7dbcaacd1..1b2f9e059de 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -574,7 +574,7 @@ out: return set; search_again: - if (start >= end) + if (start > end) goto out; write_unlock_irqrestore(&tree->lock, flags); if (mask & __GFP_WAIT) @@ -819,6 +819,21 @@ int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_dirty); +int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return set_extent_bit(tree, start, end, bits, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_bits); + +int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return clear_extent_bit(tree, start, end, bits, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_bits); + int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) { @@ -1138,7 +1153,6 @@ int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) out: write_unlock_irq(&tree->lock); return ret; - } int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index d100f7c1476..5a63b417034 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -96,6 +96,10 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, void __init extent_map_init(void); void __exit extent_map_exit(void); int extent_clean_all_trees(struct extent_map_tree *tree); +int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); +int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, -- cgit v1.2.3-70-g09d2 From 1a5bc167f6707542b79a55452075525620ed43f5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:15:26 -0400 Subject: Btrfs: Change the remaining radix trees used by extent-tree.c to extent_map trees Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 16 ++-- fs/btrfs/disk-io.c | 9 ++- fs/btrfs/extent-tree.c | 193 ++++++++++++++++++------------------------------- fs/btrfs/extent_map.c | 5 +- fs/btrfs/extent_map.h | 4 +- fs/btrfs/transaction.c | 5 +- 6 files changed, 94 insertions(+), 138 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c6174b27fd1..256689551eb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -283,10 +283,6 @@ struct btrfs_block_group_item { struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; - u64 first_free; - u64 last_alloc; - u64 pinned; - u64 last_prealloc; int data; int cached; }; @@ -296,11 +292,13 @@ struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; - struct radix_tree_root pending_del_radix; - struct radix_tree_root pinned_radix; - struct radix_tree_root extent_ins_radix; + struct extent_map_tree free_space_cache; struct extent_map_tree block_group_cache; + struct extent_map_tree pinned_extents; + struct extent_map_tree pending_del; + struct extent_map_tree extent_ins; + u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; @@ -926,7 +924,7 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, /* extent-tree.c */ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy); +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 blocknr); @@ -949,7 +947,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *unpin_radix); + struct extent_map_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index aac7c82b0dc..2b86a1d779b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -432,9 +432,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) err = -ENOMEM; goto fail; } - init_bit_radix(&fs_info->pinned_radix); - init_bit_radix(&fs_info->pending_del_radix); - init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); @@ -458,6 +455,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_mapping, GFP_NOFS); extent_map_tree_init(&fs_info->block_group_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->pinned_extents, + fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->pending_del, + fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->extent_ins, + fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4bc639565d1..477466d167a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -188,13 +188,13 @@ again: return start; } out: - return max(cache->last_alloc, search_start); + return search_start; new_group: cache = btrfs_lookup_block_group(root->fs_info, last + cache->key.offset - 1); if (!cache) { - return max((*cache_ret)->last_alloc, search_start); + return search_start; } cache = btrfs_find_block_group(root, cache, last + cache->key.offset - 1, data, 0); @@ -247,16 +247,14 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, shint = btrfs_lookup_block_group(info, search_start); if (shint && shint->data == data) { used = btrfs_block_group_used(&shint->item); - if (used + shint->pinned < - div_factor(shint->key.offset, factor)) { + if (used < div_factor(shint->key.offset, factor)) { return shint; } } } if (hint && hint->data == data) { used = btrfs_block_group_used(&hint->item); - if (used + hint->pinned < - div_factor(hint->key.offset, factor)) { + if (used < div_factor(hint->key.offset, factor)) { return hint; } last = hint->key.offset * 3; @@ -294,7 +292,7 @@ again: else free_check = div_factor(cache->key.offset, factor); - if (used + cache->pinned < free_check) { + if (used < free_check) { found_group = cache; goto found; } @@ -505,8 +503,6 @@ fail: return ret; if (pending_ret) return pending_ret; - if (cache->data) - cache->last_alloc = cache->first_free; return 0; } @@ -588,8 +584,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val = btrfs_block_group_used(&cache->item); num = min(total, cache->key.offset - block_in_group); if (alloc) { - if (blocknr > cache->last_alloc) - cache->last_alloc = blocknr; if (cache->data != data && old_val < (cache->key.offset >> 1)) { int bit_to_clear; @@ -617,8 +611,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val += num; } else { old_val -= num; - if (blocknr < cache->first_free) - cache->first_free = blocknr; if (mark_free) { set_extent_dirty(&info->free_space_cache, blocknr, blocknr + num - 1, @@ -632,65 +624,47 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } -int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy) +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) { - unsigned long gang[8]; u64 last = 0; - struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; + u64 start; + u64 end; + struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; int ret; - int i; while(1) { - ret = find_first_radix_bit(pinned_radix, gang, last, - ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(pinned_extents, last, + &start, &end, EXTENT_DIRTY); + if (ret) break; - for (i = 0 ; i < ret; i++) { - set_radix_bit(copy, gang[i]); - last = gang[i] + 1; - } + set_extent_dirty(copy, start, end, GFP_NOFS); + last = end + 1; } - ret = find_first_radix_bit(&root->fs_info->extent_ins_radix, gang, 0, - ARRAY_SIZE(gang)); - WARN_ON(ret); return 0; } int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *unpin_radix) + struct extent_map_tree *unpin) { - unsigned long gang[8]; - struct btrfs_block_group_cache *block_group; - u64 first = 0; + u64 start; + u64 end; int ret; - int i; - struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; + struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; struct extent_map_tree *free_space_cache; free_space_cache = &root->fs_info->free_space_cache; while(1) { - ret = find_first_radix_bit(unpin_radix, gang, 0, - ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(unpin, 0, &start, &end, + EXTENT_DIRTY); + if (ret) break; - if (!first) - first = gang[0]; - for (i = 0; i < ret; i++) { - clear_radix_bit(pinned_radix, gang[i]); - clear_radix_bit(unpin_radix, gang[i]); - block_group = btrfs_lookup_block_group(root->fs_info, - gang[i]); - if (block_group) { - WARN_ON(block_group->pinned == 0); - block_group->pinned--; - if (gang[i] < block_group->last_alloc) - block_group->last_alloc = gang[i]; - set_extent_dirty(free_space_cache, - gang[i], gang[i], GFP_NOFS); - } - } + + clear_extent_dirty(pinned_extents, start, end, + GFP_NOFS); + clear_extent_dirty(unpin, start, end, GFP_NOFS); + set_extent_dirty(free_space_cache, start, end, GFP_NOFS); } return 0; } @@ -700,39 +674,36 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct { struct btrfs_key ins; struct btrfs_extent_item extent_item; - int i; int ret; - int err; - unsigned long gang[8]; + int err = 0; + u64 start; + u64 end; struct btrfs_fs_info *info = extent_root->fs_info; btrfs_set_stack_extent_refs(&extent_item, 1); - ins.offset = 1; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); btrfs_set_stack_extent_owner(&extent_item, extent_root->root_key.objectid); while(1) { - ret = find_first_radix_bit(&info->extent_ins_radix, gang, 0, - ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(&info->extent_ins, 0, &start, + &end, EXTENT_LOCKED); + if (ret) break; - for (i = 0; i < ret; i++) { - ins.objectid = gang[i]; - err = btrfs_insert_item(trans, extent_root, &ins, - &extent_item, - sizeof(extent_item)); - clear_radix_bit(&info->extent_ins_radix, gang[i]); - WARN_ON(err); - } + ins.objectid = start; + ins.offset = end + 1 - start; + err = btrfs_insert_item(trans, extent_root, &ins, + &extent_item, sizeof(extent_item)); + clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, + GFP_NOFS); } return 0; } static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) { - int err; + int err = 0; struct extent_buffer *buf; if (!pending) { @@ -748,16 +719,11 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) } free_extent_buffer(buf); } - err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); - if (!err) { - struct btrfs_block_group_cache *cache; - cache = btrfs_lookup_block_group(root->fs_info, - blocknr); - if (cache) - cache->pinned++; - } + set_extent_dirty(&root->fs_info->pinned_extents, + blocknr, blocknr, GFP_NOFS); } else { - err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); + set_extent_bits(&root->fs_info->pending_del, + blocknr, blocknr, EXTENT_LOCKED, GFP_NOFS); } BUG_ON(err < 0); return 0; @@ -840,43 +806,28 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root) { int ret; - int wret; int err = 0; - unsigned long gang[4]; - int i; - struct radix_tree_root *pending_radix; - struct radix_tree_root *pinned_radix; - struct btrfs_block_group_cache *cache; + u64 start; + u64 end; + struct extent_map_tree *pending_del; + struct extent_map_tree *pinned_extents; - pending_radix = &extent_root->fs_info->pending_del_radix; - pinned_radix = &extent_root->fs_info->pinned_radix; + pending_del = &extent_root->fs_info->pending_del; + pinned_extents = &extent_root->fs_info->pinned_extents; while(1) { - ret = find_first_radix_bit(pending_radix, gang, 0, - ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(pending_del, 0, &start, &end, + EXTENT_LOCKED); + if (ret) break; - for (i = 0; i < ret; i++) { - wret = set_radix_bit(pinned_radix, gang[i]); - if (wret == 0) { - cache = - btrfs_lookup_block_group(extent_root->fs_info, - gang[i]); - if (cache) - cache->pinned++; - } - if (wret < 0) { - printk(KERN_CRIT "set_radix_bit, err %d\n", - wret); - BUG_ON(wret < 0); - } - wret = clear_radix_bit(pending_radix, gang[i]); - BUG_ON(wret); - wret = __free_extent(trans, extent_root, - gang[i], 1, 0, 0); - if (wret) - err = wret; - } + + set_extent_dirty(pinned_extents, start, end, GFP_NOFS); + clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, + GFP_NOFS); + ret = __free_extent(trans, extent_root, + start, end + 1 - start, 0, 0); + if (ret) + err = ret; } return err; } @@ -920,7 +871,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 hole_size = 0; int slot = 0; u64 last_block = 0; - u64 test_block; u64 orig_search_start = search_start; int start_found; struct extent_buffer *l; @@ -1059,13 +1009,15 @@ check_pending: if (ins->objectid + num_blocks >= search_end) goto enospc; - for (test_block = ins->objectid; - test_block < ins->objectid + num_blocks; test_block++) { - if (test_radix_bit(&info->pinned_radix, test_block) || - test_radix_bit(&info->extent_ins_radix, test_block)) { - search_start = test_block + 1; - goto new_group; - } + if (test_range_bit(&info->extent_ins, ins->objectid, + ins->objectid + num_blocks -1, EXTENT_LOCKED, 0)) { + search_start = ins->objectid + num_blocks; + goto new_group; + } + if (test_range_bit(&info->pinned_extents, ins->objectid, + ins->objectid + num_blocks -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_blocks; + goto new_group; } if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start && ins->objectid < exclude_start + exclude_nr)) { @@ -1156,7 +1108,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (root == extent_root) { BUG_ON(num_blocks != 1); - set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid); + set_extent_bits(&root->fs_info->extent_ins, ins->objectid, + ins->objectid + ins->offset - 1, + EXTENT_LOCKED, GFP_NOFS); goto update_block; } @@ -1557,9 +1511,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); memcpy(&cache->key, &found_key, sizeof(found_key)); - cache->last_alloc = cache->key.objectid; - cache->first_free = cache->key.objectid; - cache->pinned = 0; cache->cached = 0; key.objectid = found_key.objectid + found_key.offset; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 1b2f9e059de..e081558d52f 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1188,8 +1188,8 @@ out: * has the bits set. Otherwise, 1 is returned if any bit in the * range is found set. */ -static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int filled) +int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, + int bits, int filled) { struct extent_state *state = NULL; struct rb_node *node; @@ -1222,6 +1222,7 @@ static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, read_unlock_irq(&tree->lock); return bitset; } +EXPORT_SYMBOL(test_range_bit); /* * helper function to set a given page up to date if all the diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 5a63b417034..75dc600dc0f 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -95,7 +95,9 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent); void __init extent_map_init(void); void __exit extent_map_exit(void); -int extent_clean_all_trees(struct extent_map_tree *tree); + +int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, + int bits, int filled); int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, int bits, gfp_t mask); int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 372b61f5733..55289b71056 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -481,11 +481,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; - struct radix_tree_root pinned_copy; + struct extent_map_tree pinned_copy; DEFINE_WAIT(wait); int ret; - init_bit_radix(&pinned_copy); + extent_map_tree_init(&pinned_copy, + root->fs_info->btree_inode->i_mapping, GFP_NOFS); INIT_LIST_HEAD(&dirty_fs_roots); mutex_lock(&root->fs_info->trans_mutex); -- cgit v1.2.3-70-g09d2 From db94535db75e67fab12ccbb7f5ee548e33fed891 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:15:53 -0400 Subject: Btrfs: Allow tree blocks larger than the page size Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 319 +++++++++++++++++++++++++++++++++++-------------- fs/btrfs/ctree.h | 112 ++++++++++------- fs/btrfs/disk-io.c | 92 ++++++++------ fs/btrfs/disk-io.h | 9 +- fs/btrfs/extent-tree.c | 285 ++++++++++++++++++++++--------------------- fs/btrfs/extent_map.c | 91 ++++++++++---- fs/btrfs/extent_map.h | 10 +- fs/btrfs/file-item.c | 10 +- fs/btrfs/file.c | 126 ++++++++++--------- fs/btrfs/inode.c | 90 +++++++------- fs/btrfs/print-tree.c | 23 ++-- fs/btrfs/super.c | 6 +- fs/btrfs/sysfs.c | 5 +- fs/btrfs/transaction.c | 39 +++--- fs/btrfs/tree-defrag.c | 18 +-- 15 files changed, 738 insertions(+), 497 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 50ef351ef6b..34de83630ae 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -83,16 +83,15 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(root->ref_cows && trans->transid != root->last_trans); - cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); + cow = btrfs_alloc_free_block(trans, root, buf->len, + search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); cow->alloc_addr = (unsigned long)__builtin_return_address(0); - if (buf->len != root->sectorsize || cow->len != root->sectorsize) - WARN_ON(1); copy_extent_buffer(cow, buf, 0, 0, cow->len); - btrfs_set_header_blocknr(cow, extent_buffer_blocknr(cow)); + btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); btrfs_set_header_owner(cow, root->root_key.objectid); @@ -110,16 +109,16 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, root->node = cow; extent_buffer_get(cow); if (buf != root->commit_root) { - btrfs_free_extent(trans, root, - extent_buffer_blocknr(buf), 1, 1); + btrfs_free_extent(trans, root, buf->start, + buf->len, 1); } free_extent_buffer(buf); } else { btrfs_set_node_blockptr(parent, parent_slot, - extent_buffer_blocknr(cow)); + cow->start); btrfs_mark_buffer_dirty(parent); WARN_ON(btrfs_header_generation(parent) != trans->transid); - btrfs_free_extent(trans, root, extent_buffer_blocknr(buf),1,1); + btrfs_free_extent(trans, root, buf->start, buf->len, 1); } free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); @@ -149,13 +148,14 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } - search_start = extent_buffer_blocknr(buf) & ~((u64)65535); + search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); (*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0); return ret; } +#if 0 static int close_blocks(u64 blocknr, u64 other) { if (blocknr < other && other - blocknr < 8) @@ -165,7 +165,6 @@ static int close_blocks(u64 blocknr, u64 other) return 0; } -#if 0 static int should_defrag_leaf(struct extent_buffer *eb) { return 0; @@ -355,7 +354,7 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(memcmp(&parent_key, &node_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(node)); + btrfs_header_bytenr(node)); } BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != 0) { @@ -398,7 +397,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(memcmp(&parent_key, &leaf_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(leaf)); + btrfs_header_bytenr(leaf)); } #if 0 for (i = 0; nritems > 1 && i < nritems - 2; i++) { @@ -467,14 +466,16 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { +#if 0 struct extent_buffer *buf = path->nodes[level]; if (memcmp_extent_buffer(buf, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(buf), BTRFS_FSID_SIZE)) { printk("warning bad block %Lu\n", buf->start); - BUG(); + return 1; } +#endif if (level == 0) return check_leaf(root, path, level); return check_node(root, path, level); @@ -585,7 +586,8 @@ static struct extent_buffer *read_node_slot(struct btrfs_root *root, return NULL; if (slot >= btrfs_header_nritems(parent)) return NULL; - return read_tree_block(root, btrfs_node_blockptr(parent, slot)); + return read_tree_block(root, btrfs_node_blockptr(parent, slot), + btrfs_level_size(root, btrfs_header_level(parent) - 1)); } static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root @@ -618,7 +620,6 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root */ if (!parent) { struct extent_buffer *child; - u64 blocknr = extent_buffer_blocknr(mid); if (btrfs_header_nritems(mid) != 1) return 0; @@ -632,9 +633,10 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root wait_on_tree_block_writeback(root, mid); /* once for the path */ free_extent_buffer(mid); + ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1); /* once for the root ptr */ free_extent_buffer(mid); - return btrfs_free_extent(trans, root, blocknr, 1, 1); + return ret; } if (btrfs_header_nritems(mid) > BTRFS_NODEPTRS_PER_BLOCK(root) / 4) @@ -680,7 +682,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0 && wret != -ENOSPC) ret = wret; if (btrfs_header_nritems(right) == 0) { - u64 blocknr = extent_buffer_blocknr(right); + u64 bytenr = right->start; + u32 blocksize = right->len; + clean_tree_block(trans, root, right); wait_on_tree_block_writeback(root, right); free_extent_buffer(right); @@ -689,7 +693,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root 1); if (wret) ret = wret; - wret = btrfs_free_extent(trans, root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, bytenr, + blocksize, 1); if (wret) ret = wret; } else { @@ -719,7 +724,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } if (btrfs_header_nritems(mid) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = extent_buffer_blocknr(mid); + u64 bytenr = mid->start; + u32 blocksize = mid->len; clean_tree_block(trans, root, mid); wait_on_tree_block_writeback(root, mid); free_extent_buffer(mid); @@ -727,7 +733,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) ret = wret; - wret = btrfs_free_extent(trans, root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1); if (wret) ret = wret; } else { @@ -830,7 +836,6 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, path->slots[level] = orig_slot; free_extent_buffer(left); } - check_node(root, path, level); return 0; } free_extent_buffer(left); @@ -874,12 +879,10 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, } else { free_extent_buffer(right); } - check_node(root, path, level); return 0; } free_extent_buffer(right); } - check_node(root, path, level); return 1; } @@ -889,19 +892,23 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { + return; +#if 0 struct extent_buffer *node; int i; u32 nritems; - u64 blocknr; + u64 bytenr; u64 search; u64 cluster_start; int ret; int nread = 0; int direction = path->reada; + int level; struct radix_tree_root found; unsigned long gang[8]; struct extent_buffer *eb; + if (level == 0) return; @@ -918,8 +925,9 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, init_bit_radix(&found); nritems = btrfs_header_nritems(node); + level = btrfs_header_level(node) - 1; for (i = slot; i < nritems; i++) { - blocknr = btrfs_node_blockptr(node, i); + bytenr = btrfs_node_blockptr(node, i); set_radix_bit(&found, blocknr); } if (direction > 0) { @@ -944,6 +952,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, } } } +#endif } /* * look for key in the tree. path is filled in with nodes along the way @@ -963,7 +972,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow) { struct extent_buffer *b; - u64 blocknr; + u64 bytenr; int slot; int ret; int level; @@ -1027,10 +1036,11 @@ again: /* this is only true while dropping a snapshot */ if (level == lowest_level) break; - blocknr = btrfs_node_blockptr(b, slot); + bytenr = btrfs_node_blockptr(b, slot); if (should_reada) reada_for_search(root, p, level, slot); - b = read_tree_block(root, btrfs_node_blockptr(b, slot)); + b = read_tree_block(root, bytenr, + btrfs_level_size(root, level - 1)); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -1193,14 +1203,14 @@ static int insert_new_root(struct btrfs_trans_handle *trans, BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - c = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(root->node), 0); + c = btrfs_alloc_free_block(trans, root, root->nodesize, + root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); memset_extent_buffer(c, 0, 0, root->nodesize); btrfs_set_header_nritems(c, 1); btrfs_set_header_level(c, level); - btrfs_set_header_blocknr(c, extent_buffer_blocknr(c)); + btrfs_set_header_bytenr(c, c->start); btrfs_set_header_generation(c, trans->transid); btrfs_set_header_owner(c, root->root_key.objectid); lower = path->nodes[level-1]; @@ -1213,7 +1223,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, else btrfs_node_key(lower, &lower_key, 0); btrfs_set_node_key(c, &lower_key, 0); - btrfs_set_node_blockptr(c, 0, extent_buffer_blocknr(lower)); + btrfs_set_node_blockptr(c, 0, lower->start); btrfs_mark_buffer_dirty(c); @@ -1237,7 +1247,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, */ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key - *key, u64 blocknr, int slot, int level) + *key, u64 bytenr, int slot, int level) { struct extent_buffer *lower; int nritems; @@ -1256,10 +1266,9 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root (nritems - slot) * sizeof(struct btrfs_key_ptr)); } btrfs_set_node_key(lower, key, slot); - btrfs_set_node_blockptr(lower, slot, blocknr); + btrfs_set_node_blockptr(lower, slot, bytenr); btrfs_set_header_nritems(lower, nritems + 1); btrfs_mark_buffer_dirty(lower); - check_node(root, path, level); return 0; } @@ -1300,14 +1309,14 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(c); - split = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(c), 0); + split = btrfs_alloc_free_block(trans, root, root->nodesize, + c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); btrfs_set_header_flags(split, btrfs_header_flags(c)); btrfs_set_header_level(split, btrfs_header_level(c)); - btrfs_set_header_blocknr(split, extent_buffer_blocknr(split)); + btrfs_set_header_bytenr(split, split->start); btrfs_set_header_generation(split, trans->transid); btrfs_set_header_owner(split, root->root_key.objectid); write_extent_buffer(split, root->fs_info->fsid, @@ -1328,8 +1337,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(split); btrfs_node_key(split, &disk_key, 0); - wret = insert_ptr(trans, root, path, &disk_key, - extent_buffer_blocknr(split), + wret = insert_ptr(trans, root, path, &disk_key, split->start, path->slots[level + 1] + 1, level + 1); if (wret) @@ -1407,6 +1415,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root u32 left_nritems; u32 right_nritems; u32 data_end; + u32 this_item_size; int ret; slot = path->slots[1]; @@ -1417,7 +1426,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; - right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1)); + right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1), + root->leafsize); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(right); @@ -1445,13 +1455,27 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root for (i = left_nritems - 1; i >= 1; i--) { item = btrfs_item_nr(left, i); + if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(left, item) + sizeof(*item) + push_space > - free_space) + + if (!left->map_token) { + map_extent_buffer(left, (unsigned long)item, + sizeof(struct btrfs_item), + &left->map_token, &left->kaddr, + &left->map_start, &left->map_len, + KM_USER1); + } + + this_item_size = btrfs_item_size(left, item); + if (this_item_size + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(left, item) + sizeof(*item); + push_space += this_item_size + sizeof(*item); + } + if (left->map_token) { + unmap_extent_buffer(left, left->map_token, KM_USER1); + left->map_token = NULL; } if (push_items == 0) { @@ -1493,11 +1517,23 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root right_nritems += push_items; btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); + for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - btrfs_set_item_offset(right, item, push_space - - btrfs_item_size(right, item)); - push_space = btrfs_item_offset(right, item); + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + push_space -= btrfs_item_size(right, item); + btrfs_set_item_offset(right, item, push_space); + } + + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; } left_nritems -= push_items; btrfs_set_header_nritems(left, left_nritems); @@ -1518,8 +1554,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root } else { free_extent_buffer(right); } - if (path->nodes[1]) - check_node(root, path, 1); return 0; } /* @@ -1542,6 +1576,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root u32 right_nritems; int ret = 0; int wret; + u32 this_item_size; + u32 old_left_item_size; slot = path->slots[1]; if (slot == 0) @@ -1550,7 +1586,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1], - slot - 1)); + slot - 1), root->leafsize); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(left); @@ -1579,14 +1615,30 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i < right_nritems - 1; i++) { item = btrfs_item_nr(right, i); + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(right, item) + sizeof(*item) + push_space > - free_space) + + this_item_size = btrfs_item_size(right, item); + if (this_item_size + sizeof(*item) + push_space > free_space) break; + push_items++; - push_space += btrfs_item_size(right, item) + sizeof(*item); + push_space += this_item_size + sizeof(*item); + } + + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; } + if (push_items == 0) { free_extent_buffer(left); return 1; @@ -1611,15 +1663,28 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root old_left_nritems = btrfs_header_nritems(left); BUG_ON(old_left_nritems < 0); + old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { u32 ioff; + item = btrfs_item_nr(left, i); + if (!left->map_token) { + map_extent_buffer(left, (unsigned long)item, + sizeof(struct btrfs_item), + &left->map_token, &left->kaddr, + &left->map_start, &left->map_len, + KM_USER1); + } + ioff = btrfs_item_offset(left, item); btrfs_set_item_offset(left, item, - ioff - (BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset_nr(left, old_left_nritems - 1))); + ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); } btrfs_set_header_nritems(left, old_left_nritems + push_items); + if (left->map_token) { + unmap_extent_buffer(left, left->map_token, KM_USER1); + left->map_token = NULL; + } /* fixup right node */ push_space = btrfs_item_offset_nr(right, push_items - 1) - @@ -1640,9 +1705,21 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - btrfs_set_item_offset(right, item, push_space - - btrfs_item_size(right, item)); - push_space = btrfs_item_offset(right, item); + + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + + push_space = push_space - btrfs_item_size(right, item); + btrfs_set_item_offset(right, item, push_space); + } + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; } btrfs_mark_buffer_dirty(left); @@ -1664,8 +1741,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); - if (path->nodes[1]) - check_node(root, path, 1); return ret; } @@ -1718,13 +1793,13 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; - right = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(l), 0); + right = btrfs_alloc_free_block(trans, root, root->leafsize, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); - btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_bytenr(right, right->start); btrfs_set_header_generation(right, trans->transid); btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); @@ -1740,8 +1815,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, - &disk_key, - extent_buffer_blocknr(right), + &disk_key, right->start, path->slots[1] + 1, 1); if (wret) ret = wret; @@ -1762,7 +1836,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - extent_buffer_blocknr(right), + right->start, path->slots[1], 1); if (wret) ret = wret; @@ -1799,15 +1873,30 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i < nritems; i++) { struct btrfs_item *item = btrfs_item_nr(right, i); - u32 ioff = btrfs_item_offset(right, item); + u32 ioff; + + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + + ioff = btrfs_item_offset(right, item); btrfs_set_item_offset(right, item, ioff + rt_data_off); } + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; + } + btrfs_set_header_nritems(l, mid); ret = 0; btrfs_item_key(right, &disk_key, 0); - wret = insert_ptr(trans, root, path, &disk_key, - extent_buffer_blocknr(right), path->slots[1] + 1, 1); + wret = insert_ptr(trans, root, path, &disk_key, right->start, + path->slots[1] + 1, 1); if (wret) ret = wret; @@ -1824,19 +1913,17 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root free_extent_buffer(right); BUG_ON(path->slots[0] < 0); - check_node(root, path, 1); - check_leaf(root, path, 0); if (!double_split) return ret; - right = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(l), 0); + right = btrfs_alloc_free_block(trans, root, root->leafsize, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); - btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_bytenr(right, right->start); btrfs_set_header_generation(right, trans->transid); btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); @@ -1847,8 +1934,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, - &disk_key, - extent_buffer_blocknr(right), + &disk_key, right->start, path->slots[1], 1); if (wret) ret = wret; @@ -1860,8 +1946,6 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] = 0; - check_node(root, path, 1); - check_leaf(root, path, 0); return ret; } @@ -1904,9 +1988,24 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(leaf, i); + + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } + ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + size_diff); } + + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + /* shift the data */ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + size_diff, btrfs_leaf_data(leaf) + @@ -1921,7 +2020,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - check_leaf(root, path, 0); return ret; } @@ -1963,10 +2061,23 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(leaf, i); + + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - data_size); } + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + /* shift the data */ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + @@ -1983,7 +2094,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - check_leaf(root, path, 0); return ret; } @@ -2046,12 +2156,26 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ + WARN_ON(leaf->map_token); for (i = slot; i < nritems; i++) { u32 ioff; + item = btrfs_item_nr(leaf, i); + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } + ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - data_size); } + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } /* shift the items */ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), @@ -2081,7 +2205,6 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - check_leaf(root, path, 0); out: return ret; } @@ -2186,10 +2309,24 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, for (i = slot + 1; i < nritems; i++) { u32 ioff; + item = btrfs_item_nr(leaf, i); + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + dsize); } + + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), btrfs_item_nr_offset(slot + 1), sizeof(struct btrfs_item) * @@ -2209,8 +2346,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - extent_buffer_blocknr(leaf), - 1, 1); + leaf->start, leaf->len, 1); if (wret) ret = wret; } @@ -2247,7 +2383,8 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, } if (btrfs_header_nritems(leaf) == 0) { - u64 blocknr = extent_buffer_blocknr(leaf); + u64 bytenr = leaf->start; + u32 blocksize = leaf->len; clean_tree_block(trans, root, leaf); wait_on_tree_block_writeback(root, leaf); @@ -2257,8 +2394,8 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = wret; free_extent_buffer(leaf); - wret = btrfs_free_extent(trans, root, blocknr, - 1, 1); + wret = btrfs_free_extent(trans, root, bytenr, + blocksize, 1); if (wret) ret = wret; } else { @@ -2281,7 +2418,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) { int slot; int level = 1; - u64 blocknr; + u64 bytenr; struct extent_buffer *c; struct extent_buffer *next = NULL; @@ -2296,14 +2433,15 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) continue; } - blocknr = btrfs_node_blockptr(c, slot); + bytenr = btrfs_node_blockptr(c, slot); if (next) free_extent_buffer(next); if (path->reada) reada_for_search(root, path, level, slot); - next = read_tree_block(root, blocknr); + next = read_tree_block(root, bytenr, + btrfs_level_size(root, level -1)); break; } path->slots[level] = slot; @@ -2317,7 +2455,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) break; if (path->reada) reada_for_search(root, path, level, 0); - next = read_tree_block(root, btrfs_node_blockptr(next, 0)); + next = read_tree_block(root, btrfs_node_blockptr(next, 0), + btrfs_level_size(root, level - 1)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 256689551eb..4d05456ec32 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -96,7 +96,7 @@ struct btrfs_key { struct btrfs_header { u8 csum[BTRFS_CSUM_SIZE]; u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ - __le64 blocknr; /* which block this node is supposed to live in */ + __le64 bytenr; /* which block this node is supposed to live in */ __le64 generation; __le64 owner; __le32 nritems; @@ -122,16 +122,17 @@ struct btrfs_super_block { u8 csum[BTRFS_CSUM_SIZE]; /* the first 3 fields must match struct btrfs_header */ u8 fsid[16]; /* FS specific uuid */ - __le64 blocknr; /* this block number */ + __le64 bytenr; /* this block number */ __le64 magic; __le64 generation; __le64 root; - __le64 total_blocks; - __le64 blocks_used; + __le64 total_bytes; + __le64 bytes_used; __le64 root_dir_objectid; __le32 sectorsize; __le32 nodesize; __le32 leafsize; + u8 root_level; } __attribute__ ((__packed__)); /* @@ -231,13 +232,14 @@ struct btrfs_dir_item { struct btrfs_root_item { struct btrfs_inode_item inode; __le64 root_dirid; - __le64 blocknr; - __le64 block_limit; - __le64 blocks_used; + __le64 bytenr; + __le64 byte_limit; + __le64 bytes_used; __le32 flags; __le32 refs; struct btrfs_disk_key drop_progress; u8 drop_level; + u8 level; } __attribute__ ((__packed__)); #define BTRFS_FILE_EXTENT_REG 0 @@ -250,8 +252,8 @@ struct btrfs_file_extent_item { * disk space consumed by the extent, checksum blocks are included * in these numbers */ - __le64 disk_blocknr; - __le64 disk_num_blocks; + __le64 disk_bytenr; + __le64 disk_num_bytes; /* * the logical offset in file blocks (no csums) * this extent record is for. This allows a file extent to point @@ -263,7 +265,7 @@ struct btrfs_file_extent_item { /* * the logical number of file blocks (no csums included) */ - __le64 num_blocks; + __le64 num_bytes; } __attribute__ ((__packed__)); struct btrfs_csum_item { @@ -429,6 +431,7 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb, \ int err; \ char *map_token; \ char *kaddr; \ + int unmap_on_exit = (eb->map_token == NULL); \ unsigned long map_start; \ unsigned long map_len; \ unsigned long offset = (unsigned long)s + \ @@ -436,12 +439,13 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb, \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ u##bits res = le##bits##_to_cpu(*tmp); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ return res; \ } else { \ __le##bits res; \ @@ -457,17 +461,19 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ char *kaddr; \ unsigned long map_start; \ unsigned long map_len; \ + int unmap_on_exit = (eb->map_token == NULL); \ unsigned long offset = (unsigned long)s + \ offsetof(type, member); \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ *tmp = cpu_to_le##bits(val); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ } else { \ val = cpu_to_le##bits(val); \ write_eb_member(eb, s, type, member, &val); \ @@ -483,15 +489,17 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ unsigned long map_start; \ unsigned long map_len; \ unsigned long offset = offsetof(type, member); \ + int unmap_on_exit = (eb->map_token == NULL); \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ u##bits res = le##bits##_to_cpu(*tmp); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ return res; \ } else { \ __le##bits res; \ @@ -508,15 +516,17 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ unsigned long map_start; \ unsigned long map_len; \ unsigned long offset = offsetof(type, member); \ + int unmap_on_exit = (eb->map_token == NULL); \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ *tmp = cpu_to_le##bits(val); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ } else { \ val = cpu_to_le##bits(val); \ write_eb_member(eb, NULL, type, member, &val); \ @@ -769,7 +779,7 @@ static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val) } /* struct btrfs_header */ -BTRFS_SETGET_HEADER_FUNCS(header_blocknr, struct btrfs_header, blocknr, 64); +BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64); BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, generation, 64); BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); @@ -817,24 +827,28 @@ static inline int btrfs_is_leaf(struct extent_buffer *eb) /* struct btrfs_root_item */ BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); -BTRFS_SETGET_FUNCS(disk_root_blocknr, struct btrfs_root_item, blocknr, 64); +BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8); -BTRFS_SETGET_STACK_FUNCS(root_blocknr, struct btrfs_root_item, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8); BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32); -BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, blocks_used, 64); -BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, block_limit, 64); +BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); /* struct btrfs_super_block */ -BTRFS_SETGET_STACK_FUNCS(super_blocknr, struct btrfs_super_block, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); -BTRFS_SETGET_STACK_FUNCS(super_total_blocks, struct btrfs_super_block, - total_blocks, 64); -BTRFS_SETGET_STACK_FUNCS(super_blocks_used, struct btrfs_super_block, - blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, + root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, + bytes_used, 64); BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, sectorsize, 32); BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, @@ -856,33 +870,33 @@ static inline unsigned long btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) { unsigned long offset = (unsigned long)e; - offset += offsetof(struct btrfs_file_extent_item, disk_blocknr); + offset += offsetof(struct btrfs_file_extent_item, disk_bytenr); return offset; } static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) { - return offsetof(struct btrfs_file_extent_item, disk_blocknr) + datasize; + return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; } static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, struct btrfs_item *e) { unsigned long offset; - offset = offsetof(struct btrfs_file_extent_item, disk_blocknr); + offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); return btrfs_item_size(eb, e) - offset; } -BTRFS_SETGET_FUNCS(file_extent_disk_blocknr, struct btrfs_file_extent_item, - disk_blocknr, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, + disk_bytenr, 64); BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, generation, 64); -BTRFS_SETGET_FUNCS(file_extent_disk_num_blocks, struct btrfs_file_extent_item, - disk_num_blocks, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item, + disk_num_bytes, 64); BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, offset, 64); -BTRFS_SETGET_FUNCS(file_extent_num_blocks, struct btrfs_file_extent_item, - num_blocks, 64); +BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, + num_bytes, 64); static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { @@ -906,6 +920,12 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, return 0; } +static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { + if (level == 0) + return root->leafsize; + return root->nodesize; +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -927,7 +947,7 @@ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, - u64 blocknr); + u64 bytenr); struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, @@ -935,22 +955,22 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint, - u64 empty_size); + struct btrfs_root *root, u32 size, + u64 hint, u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 empty_size, u64 search_start, + u64 num_bytes, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin); + *root, u64 bytenr, u64 num_bytes, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_map_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num_blocks); + u64 bytenr, u64 num_bytes); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); @@ -1040,12 +1060,12 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, u64 offset, - u64 disk_num_blocks, - u64 num_blocks); + u64 disk_num_bytes, + u64 num_bytes); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, - u64 blocknr, int mod); + u64 bytenr, int mod); int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2b86a1d779b..fad9298c696 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -28,6 +28,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "print-tree.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -43,26 +44,25 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) #endif struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 blocknr) + u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - blocknr * root->sectorsize, - root->sectorsize, GFP_NOFS); + bytenr, blocksize, GFP_NOFS); if (eb) eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr) + u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - blocknr * root->sectorsize, - root->sectorsize, GFP_NOFS); + bytenr, blocksize, GFP_NOFS); eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } @@ -208,13 +208,13 @@ static struct address_space_operations btree_aops = { .sync_page = block_sync_page, }; -int readahead_tree_block(struct btrfs_root *root, u64 blocknr) +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; int ret = 0; - buf = btrfs_find_create_tree_block(root, blocknr); + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, @@ -223,12 +223,13 @@ int readahead_tree_block(struct btrfs_root *root, u64 blocknr) return ret; } -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, + u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; - buf = btrfs_find_create_tree_block(root, blocknr); + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, @@ -261,7 +262,7 @@ int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) return 0; } -static int __setup_root(int blocksize, +static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) @@ -269,9 +270,9 @@ static int __setup_root(int blocksize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; - root->sectorsize = blocksize; - root->nodesize = blocksize; - root->leafsize = blocksize; + root->sectorsize = sectorsize; + root->nodesize = nodesize; + root->leafsize = leafsize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -291,21 +292,23 @@ static int __setup_root(int blocksize, return 0; } -static int find_and_setup_root(int blocksize, - struct btrfs_root *tree_root, +static int find_and_setup_root(struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, struct btrfs_root *root) { int ret; + u32 blocksize; - __setup_root(blocksize, root, fs_info, objectid); + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); - root->node = read_tree_block(root, - btrfs_root_blocknr(&root->root_item)); + blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), + blocksize); BUG_ON(!root->node); return 0; } @@ -318,14 +321,14 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_path *path; struct extent_buffer *l; u64 highest_inode; + u32 blocksize; int ret = 0; root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); if (location->offset == (u64)-1) { - ret = find_and_setup_root(fs_info->sb->s_blocksize, - fs_info->tree_root, fs_info, + ret = find_and_setup_root(tree_root, fs_info, location->objectid, root); if (ret) { kfree(root); @@ -334,7 +337,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, goto insert; } - __setup_root(fs_info->sb->s_blocksize, root, fs_info, + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, root, fs_info, location->objectid); path = btrfs_alloc_path(); @@ -357,8 +361,9 @@ out: kfree(root); return ERR_PTR(ret); } - root->node = read_tree_block(root, - btrfs_root_blocknr(&root->root_item)); + blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), + blocksize); BUG_ON(!root->node); insert: root->ref_cows = 1; @@ -418,6 +423,10 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *open_ctree(struct super_block *sb) { + u32 sectorsize; + u32 nodesize; + u32 leafsize; + u32 blocksize; struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -474,12 +483,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - __setup_root(sb->s_blocksize, tree_root, + __setup_root(512, 512, 512, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); fs_info->sb_buffer = read_tree_block(tree_root, - BTRFS_SUPER_INFO_OFFSET / - sb->s_blocksize); + BTRFS_SUPER_INFO_OFFSET, + 512); if (!fs_info->sb_buffer) goto fail_iput; @@ -494,9 +503,15 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; + nodesize = btrfs_super_nodesize(disk_super); + leafsize = btrfs_super_leafsize(disk_super); + sectorsize = btrfs_super_sectorsize(disk_super); + tree_root->nodesize = nodesize; + tree_root->leafsize = leafsize; + tree_root->sectorsize = sectorsize; + i_size_write(fs_info->btree_inode, - btrfs_super_total_blocks(disk_super) << - fs_info->btree_inode->i_blkbits); + btrfs_super_total_bytes(disk_super)); if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, @@ -504,13 +519,22 @@ struct btrfs_root *open_ctree(struct super_block *sb) printk("btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } + blocksize = btrfs_level_size(tree_root, + btrfs_super_root_level(disk_super)); tree_root->node = read_tree_block(tree_root, - btrfs_super_root(disk_super)); + btrfs_super_root(disk_super), + blocksize); if (!tree_root->node) goto fail_sb_buffer; +#if 0 + btrfs_print_leaf(tree_root, tree_root->node); + err = -EIO; + goto fail_tree_root; +#endif mutex_lock(&fs_info->fs_mutex); - ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, + + ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); if (ret) { mutex_unlock(&fs_info->fs_mutex); @@ -611,11 +635,11 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(fs_info->tree_root->node); free_extent_buffer(fs_info->sb_buffer); - truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); - iput(fs_info->btree_inode); btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); + truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + iput(fs_info->btree_inode); kfree(fs_info->extent_root); kfree(fs_info->tree_root); return 0; @@ -642,7 +666,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", - (unsigned long long)extent_buffer_blocknr(buf), + (unsigned long long)buf->start, transid, root->fs_info->generation); WARN_ON(1); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 70d9413c599..fd4db5f810c 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -21,10 +21,11 @@ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); -int readahead_tree_block(struct btrfs_root *root, u64 blocknr); +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, + u32 blocksize); +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr); + u64 bytenr, u32 blocksize); int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); struct btrfs_root *open_ctree(struct super_block *sb); @@ -32,7 +33,7 @@ int close_ctree(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 blocknr); + u64 bytenr, u32 blocksize); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 477466d167a..1be8f9f04a1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -129,7 +129,7 @@ err: struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, - u64 blocknr) + u64 bytenr) { struct extent_map_tree *block_group_cache; struct btrfs_block_group_cache *block_group = NULL; @@ -140,7 +140,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct block_group_cache = &info->block_group_cache; ret = find_first_extent_bit(block_group_cache, - blocknr, &start, &end, + bytenr, &start, &end, BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA); if (ret) { return NULL; @@ -152,7 +152,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct block_group = (struct btrfs_block_group_cache *)ptr; - if (block_group->key.objectid <= blocknr && blocknr <= + if (block_group->key.objectid <= bytenr && bytenr <= block_group->key.objectid + block_group->key.offset) return block_group; @@ -315,7 +315,7 @@ found: int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num_blocks) + u64 bytenr, u64 num_bytes) { struct btrfs_path *path; int ret; @@ -324,13 +324,14 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_extent_item *item; u32 refs; + WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); if (!path) return -ENOMEM; - key.objectid = blocknr; + key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = num_blocks; + key.offset = num_bytes; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); if (ret < 0) @@ -361,8 +362,8 @@ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, } static int lookup_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 blocknr, - u64 num_blocks, u32 *refs) + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u32 *refs) { struct btrfs_path *path; int ret; @@ -370,9 +371,10 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct extent_buffer *l; struct btrfs_extent_item *item; + WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); - key.objectid = blocknr; - key.offset = num_blocks; + key.objectid = bytenr; + key.offset = num_bytes; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); @@ -380,7 +382,7 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, goto out; if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); - printk("failed to find block number %Lu\n", blocknr); + printk("failed to find block number %Lu\n", bytenr); BUG(); } l = path->nodes[0]; @@ -394,19 +396,19 @@ out: int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return btrfs_inc_extent_ref(trans, root, - extent_buffer_blocknr(root->node), 1); + return btrfs_inc_extent_ref(trans, root, root->node->start, + root->node->len); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { - u64 blocknr; + u64 bytenr; u32 nritems; struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; - int leaf; + int level; int ret; int faili; int err; @@ -414,11 +416,11 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (!root->ref_cows) return 0; - leaf = btrfs_is_leaf(buf); + level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); for (i = 0; i < nritems; i++) { - if (leaf) { - u64 disk_blocknr; + if (level == 0) { + u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -427,18 +429,19 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); - if (disk_blocknr == 0) + disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (disk_bytenr == 0) continue; - ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(buf, fi)); + ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, + btrfs_file_extent_disk_num_bytes(buf, fi)); if (ret) { faili = i; goto fail; } } else { - blocknr = btrfs_node_blockptr(buf, i); - ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); + bytenr = btrfs_node_blockptr(buf, i); + ret = btrfs_inc_extent_ref(trans, root, bytenr, + btrfs_level_size(root, level - 1)); if (ret) { faili = i; goto fail; @@ -449,8 +452,8 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, fail: WARN_ON(1); for (i =0; i < faili; i++) { - if (leaf) { - u64 disk_blocknr; + if (level == 0) { + u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -459,16 +462,17 @@ fail: if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); - if (disk_blocknr == 0) + disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (disk_bytenr == 0) continue; - err = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(buf, + err = btrfs_free_extent(trans, root, disk_bytenr, + btrfs_file_extent_disk_num_bytes(buf, fi), 0); BUG_ON(err); } else { - blocknr = btrfs_node_blockptr(buf, i); - err = btrfs_free_extent(trans, root, blocknr, 1, 0); + bytenr = btrfs_node_blockptr(buf, i); + err = btrfs_free_extent(trans, root, bytenr, + btrfs_level_size(root, level - 1), 0); BUG_ON(err); } } @@ -558,31 +562,31 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num, int alloc, int mark_free, - int data) + u64 bytenr, u64 num_bytes, int alloc, + int mark_free, int data) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - u64 total = num; + u64 total = num_bytes; u64 old_val; - u64 block_in_group; + u64 byte_in_group; u64 start; u64 end; while(total) { - cache = btrfs_lookup_block_group(info, blocknr); + cache = btrfs_lookup_block_group(info, bytenr); if (!cache) { return -1; } - block_in_group = blocknr - cache->key.objectid; - WARN_ON(block_in_group > cache->key.offset); + byte_in_group = bytenr - cache->key.objectid; + WARN_ON(byte_in_group > cache->key.offset); start = cache->key.objectid; end = start + cache->key.offset - 1; set_extent_bits(&info->block_group_cache, start, end, BLOCK_GROUP_DIRTY, GFP_NOFS); old_val = btrfs_block_group_used(&cache->item); - num = min(total, cache->key.offset - block_in_group); + num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { if (cache->data != data && old_val < (cache->key.offset >> 1)) { @@ -608,18 +612,18 @@ static int update_block_group(struct btrfs_trans_handle *trans, start, end, bit_to_set, GFP_NOFS); } - old_val += num; + old_val += num_bytes; } else { - old_val -= num; + old_val -= num_bytes; if (mark_free) { set_extent_dirty(&info->free_space_cache, - blocknr, blocknr + num - 1, + bytenr, bytenr + num_bytes - 1, GFP_NOFS); } } btrfs_set_block_group_used(&cache->item, old_val); - total -= num; - blocknr += num; + total -= num_bytes; + bytenr += num_bytes; } return 0; } @@ -701,13 +705,14 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct return 0; } -static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) +static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, + int pending) { int err = 0; struct extent_buffer *buf; if (!pending) { - buf = btrfs_find_tree_block(root, blocknr); + buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { if (btrfs_buffer_uptodate(buf)) { u64 transid = @@ -720,10 +725,11 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) free_extent_buffer(buf); } set_extent_dirty(&root->fs_info->pinned_extents, - blocknr, blocknr, GFP_NOFS); + bytenr, bytenr + num_bytes - 1, GFP_NOFS); } else { set_extent_bits(&root->fs_info->pending_del, - blocknr, blocknr, EXTENT_LOCKED, GFP_NOFS); + bytenr, bytenr + num_bytes - 1, + EXTENT_LOCKED, GFP_NOFS); } BUG_ON(err < 0); return 0; @@ -733,7 +739,7 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin, + *root, u64 bytenr, u64 num_bytes, int pin, int mark_free) { struct btrfs_path *path; @@ -745,9 +751,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item *ei; u32 refs; - key.objectid = blocknr; + key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = num_blocks; + key.offset = num_bytes; path = btrfs_alloc_path(); if (!path) @@ -768,28 +774,29 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(leaf); if (refs == 0) { - u64 super_blocks_used, root_blocks_used; + u64 super_used; + u64 root_used; if (pin) { - ret = pin_down_block(root, blocknr, 0); + ret = pin_down_bytes(root, bytenr, num_bytes, 0); BUG_ON(ret); } /* block accounting for super block */ - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); - btrfs_set_super_blocks_used(&info->super_copy, - super_blocks_used - num_blocks); + super_used = btrfs_super_bytes_used(&info->super_copy); + btrfs_set_super_bytes_used(&info->super_copy, + super_used - num_bytes); /* block accounting for root item */ - root_blocks_used = btrfs_root_used(&root->root_item); + root_used = btrfs_root_used(&root->root_item); btrfs_set_root_used(&root->root_item, - root_blocks_used - num_blocks); + root_used - num_bytes); ret = btrfs_del_item(trans, extent_root, path); if (ret) { return ret; } - ret = update_block_group(trans, root, blocknr, num_blocks, 0, + ret = update_block_group(trans, root, bytenr, num_bytes, 0, mark_free, 0); BUG_ON(ret); } @@ -836,17 +843,18 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct * remove an extent from the root, returns 0 on success */ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin) + *root, u64 bytenr, u64 num_bytes, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; int pending_ret; int ret; + WARN_ON(num_bytes < root->sectorsize); if (root == extent_root) { - pin_down_block(root, blocknr, 1); + pin_down_bytes(root, bytenr, num_bytes, 1); return 0; } - ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0); + ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -860,8 +868,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root * Any available blocks before search_start are skipped. */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 empty_size, - u64 search_start, u64 search_end, u64 hint_block, + *orig_root, u64 num_bytes, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_byte, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data) { @@ -870,30 +878,29 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int ret; u64 hole_size = 0; int slot = 0; - u64 last_block = 0; + u64 last_byte = 0; u64 orig_search_start = search_start; int start_found; struct extent_buffer *l; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; - int total_needed = num_blocks; + u64 total_needed = num_bytes; int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; - u64 cached_search_start = 0; - WARN_ON(num_blocks < 1); + WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(root->node); if (search_end == (u64)-1) - search_end = btrfs_super_total_blocks(&info->super_copy); - if (hint_block) { - block_group = btrfs_lookup_block_group(info, hint_block); + search_end = btrfs_super_total_bytes(&info->super_copy); + if (hint_byte) { + block_group = btrfs_lookup_block_group(info, hint_byte); block_group = btrfs_find_block_group(root, block_group, - hint_block, data, 1); + hint_byte, data, 1); } else { block_group = btrfs_find_block_group(root, trans->block_group, 0, @@ -906,7 +913,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); - cached_search_start = search_start; btrfs_init_path(path); ins->objectid = search_start; @@ -958,27 +964,27 @@ check_failed: start_found = 1; goto check_pending; } - ins->objectid = last_block > search_start ? - last_block : search_start; + ins->objectid = last_byte > search_start ? + last_byte : search_start; ins->offset = search_end - ins->objectid; goto check_pending; } btrfs_item_key_to_cpu(l, &key, slot); - if (key.objectid >= search_start && key.objectid > last_block && + if (key.objectid >= search_start && key.objectid > last_byte && start_found) { - if (last_block < search_start) - last_block = search_start; - hole_size = key.objectid - last_block; - if (hole_size >= num_blocks) { - ins->objectid = last_block; + if (last_byte < search_start) + last_byte = search_start; + hole_size = key.objectid - last_byte; + if (hole_size >= num_bytes) { + ins->objectid = last_byte; ins->offset = hole_size; goto check_pending; } } if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { if (!start_found) { - last_block = key.objectid; + last_byte = key.objectid; start_found = 1; } goto next; @@ -986,9 +992,9 @@ check_failed: start_found = 1; - last_block = key.objectid + key.offset; + last_byte = key.objectid + key.offset; - if (!full_scan && last_block >= block_group->key.objectid + + if (!full_scan && last_byte >= block_group->key.objectid + block_group->key.offset) { btrfs_release_path(root, path); search_start = block_group->key.objectid + @@ -1006,20 +1012,20 @@ check_pending: btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); - if (ins->objectid + num_blocks >= search_end) + if (ins->objectid + num_bytes >= search_end) goto enospc; if (test_range_bit(&info->extent_ins, ins->objectid, - ins->objectid + num_blocks -1, EXTENT_LOCKED, 0)) { - search_start = ins->objectid + num_blocks; + ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { + search_start = ins->objectid + num_bytes; goto new_group; } if (test_range_bit(&info->pinned_extents, ins->objectid, - ins->objectid + num_blocks -1, EXTENT_DIRTY, 0)) { - search_start = ins->objectid + num_blocks; + ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_bytes; goto new_group; } - if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start && + if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; goto new_group; @@ -1029,12 +1035,12 @@ check_pending: if (block_group) trans->block_group = block_group; } - ins->offset = num_blocks; + ins->offset = num_bytes; btrfs_free_path(path); return 0; new_group: - if (search_start + num_blocks >= search_end) { + if (search_start + num_bytes >= search_end) { enospc: search_start = orig_search_start; if (full_scan) { @@ -1069,12 +1075,12 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 empty_size, u64 hint_block, + u64 num_bytes, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, int data) { int ret; int pending_ret; - u64 super_blocks_used, root_blocks_used; + u64 super_used, root_used; u64 search_start = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -1083,9 +1089,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_stack_extent_refs(&extent_item, 1); btrfs_set_stack_extent_owner(&extent_item, owner); - WARN_ON(num_blocks < 1); - ret = find_free_extent(trans, root, num_blocks, empty_size, - search_start, search_end, hint_block, ins, + WARN_ON(num_bytes < root->sectorsize); + ret = find_free_extent(trans, root, num_bytes, empty_size, + search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); BUG_ON(ret); @@ -1093,21 +1099,18 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, return ret; /* block accounting for super block */ - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); - btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + - num_blocks); + super_used = btrfs_super_bytes_used(&info->super_copy); + btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); /* block accounting for root item */ - root_blocks_used = btrfs_root_used(&root->root_item); - btrfs_set_root_used(&root->root_item, root_blocks_used + - num_blocks); + root_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_used + num_bytes); clear_extent_dirty(&root->fs_info->free_space_cache, ins->objectid, ins->objectid + ins->offset - 1, GFP_NOFS); if (root == extent_root) { - BUG_ON(num_blocks != 1); set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, EXTENT_LOCKED, GFP_NOFS); @@ -1146,7 +1149,8 @@ update_block: * returns the tree buffer or NULL. */ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint, + struct btrfs_root *root, + u32 blocksize, u64 hint, u64 empty_size) { struct btrfs_key ins; @@ -1154,14 +1158,15 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, empty_size, hint, (u64)-1, &ins, 0); + blocksize, empty_size, hint, + (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); return ERR_PTR(ret); } - buf = btrfs_find_create_tree_block(root, ins.objectid); + buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); if (!buf) { - btrfs_free_extent(trans, root, ins.objectid, 1, 0); + btrfs_free_extent(trans, root, ins.objectid, blocksize, 0); return ERR_PTR(-ENOMEM); } btrfs_set_buffer_uptodate(buf); @@ -1191,7 +1196,7 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, BUG_ON(!btrfs_is_leaf(leaf)); nritems = btrfs_header_nritems(leaf); for (i = 0; i < nritems; i++) { - u64 disk_blocknr; + u64 disk_bytenr; btrfs_item_key_to_cpu(leaf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) @@ -1204,11 +1209,11 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, * FIXME make sure to insert a trans record that * repeats the snapshot del on crash */ - disk_blocknr = btrfs_file_extent_disk_blocknr(leaf, fi); - if (disk_blocknr == 0) + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + if (disk_bytenr == 0) continue; - ret = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(leaf, fi), 0); + ret = btrfs_free_extent(trans, root, disk_bytenr, + btrfs_file_extent_disk_num_bytes(leaf, fi), 0); BUG_ON(ret); } return 0; @@ -1219,19 +1224,23 @@ static void reada_walk_down(struct btrfs_root *root, { int i; u32 nritems; - u64 blocknr; + u64 bytenr; int ret; u32 refs; + int level; + u32 blocksize; nritems = btrfs_header_nritems(node); + level = btrfs_header_level(node); for (i = 0; i < nritems; i++) { - blocknr = btrfs_node_blockptr(node, i); - ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs); + bytenr = btrfs_node_blockptr(node, i); + blocksize = btrfs_level_size(root, level - 1); + ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) continue; mutex_unlock(&root->fs_info->fs_mutex); - ret = readahead_tree_block(root, blocknr); + ret = readahead_tree_block(root, bytenr, blocksize); cond_resched(); mutex_lock(&root->fs_info->fs_mutex); if (ret) @@ -1248,15 +1257,16 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root { struct extent_buffer *next; struct extent_buffer *cur; - u64 blocknr; + u64 bytenr; + u32 blocksize; int ret; u32 refs; WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = lookup_extent_ref(trans, root, - extent_buffer_blocknr(path->nodes[*level]), - 1, &refs); + path->nodes[*level]->start, + path->nodes[*level]->len, &refs); BUG_ON(ret); if (refs > 1) goto out; @@ -1283,30 +1293,33 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); break; } - blocknr = btrfs_node_blockptr(cur, path->slots[*level]); - ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + blocksize = btrfs_level_size(root, *level - 1); + ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) { path->slots[*level]++; - ret = btrfs_free_extent(trans, root, blocknr, 1, 1); + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, 1); BUG_ON(ret); continue; } - next = btrfs_find_tree_block(root, blocknr); + next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); mutex_unlock(&root->fs_info->fs_mutex); - next = read_tree_block(root, blocknr); + next = read_tree_block(root, bytenr, blocksize); mutex_lock(&root->fs_info->fs_mutex); /* we dropped the lock, check one more time */ - ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); + ret = lookup_extent_ref(trans, root, bytenr, + blocksize, &refs); BUG_ON(ret); if (refs != 1) { path->slots[*level]++; free_extent_buffer(next); ret = btrfs_free_extent(trans, root, - blocknr, 1, 1); + bytenr, blocksize, 1); BUG_ON(ret); continue; } @@ -1321,8 +1334,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = btrfs_free_extent(trans, root, - extent_buffer_blocknr(path->nodes[*level]), 1, 1); + ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, + path->nodes[*level]->len, 1); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -1359,8 +1372,8 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } else { ret = btrfs_free_extent(trans, root, - extent_buffer_blocknr(path->nodes[*level]), - 1, 1); + path->nodes[*level]->start, + path->nodes[*level]->len, 1); BUG_ON(ret); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; @@ -1476,16 +1489,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; - u64 group_size_blocks; block_group_cache = &info->block_group_cache; - group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >> - info->sb->s_blocksize_bits; - root = info->extent_root; key.objectid = 0; - key.offset = group_size_blocks; + key.offset = BTRFS_BLOCK_GROUP_SIZE; btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); path = btrfs_alloc_path(); @@ -1532,7 +1541,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) (u64)cache); if (key.objectid >= - btrfs_super_total_blocks(&info->super_copy)) + btrfs_super_total_bytes(&info->super_copy)) break; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index e081558d52f..f658703c42e 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1963,18 +1963,27 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i) struct page *p; if (i == 0) return eb->first_page; + i += eb->start >> PAGE_CACHE_SHIFT; + if (eb->last_page && eb->last_page->index == i) + return eb->last_page; + p = find_get_page(eb->first_page->mapping, i); page_cache_release(p); + eb->last_page = p; return p; } +static inline unsigned long num_extent_pages(u64 start, u64 len) +{ + return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT); +} struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask) { - unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long num_pages = num_extent_pages(start, len); unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; @@ -1986,7 +1995,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = __builtin_return_address(0); + eb->alloc_addr = (unsigned long)__builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -1994,6 +2003,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); if (!p) { + WARN_ON(1); /* make sure the free only frees the pages we've * grabbed a reference on */ @@ -2021,8 +2031,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask) { - unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long num_pages = num_extent_pages(start, len); unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; @@ -2033,7 +2042,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = __builtin_return_address(0); + eb->alloc_addr = (unsigned long)__builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -2070,8 +2079,7 @@ void free_extent_buffer(struct extent_buffer *eb) if (!atomic_dec_and_test(&eb->refs)) return; - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); if (eb->first_page) page_cache_release(eb->first_page); @@ -2094,8 +2102,7 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, u64 end = start + eb->len - 1; set = clear_extent_dirty(tree, start, end, GFP_NOFS); - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); @@ -2145,8 +2152,7 @@ int set_extent_buffer_uptodate(struct extent_map_tree *tree, struct page *page; unsigned long num_pages; - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); @@ -2191,8 +2197,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, return 0; } - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (PageUptodate(page)) { @@ -2267,14 +2272,14 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } EXPORT_SYMBOL(read_extent_buffer); -int map_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, - char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) +static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) { size_t offset = start & (PAGE_CACHE_SIZE - 1); char *kaddr; + struct page *p; size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; unsigned long end_i = (start_offset + start + min_len) >> @@ -2283,21 +2288,59 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, if (i != end_i) return -EINVAL; - WARN_ON(start > eb->len); + if (start >= eb->len) { + printk("bad start in map eb start %Lu len %lu caller start %lu min %lu\n", eb->start, eb->len, start, min_len); + WARN_ON(1); + } if (i == 0) { offset = start_offset; *map_start = 0; } else { + offset = 0; *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; } - kaddr = kmap_atomic(extent_buffer_page(eb, i), km); + p = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(p)); + kaddr = kmap_atomic(p, km); *token = kaddr; *map = kaddr + offset; *map_len = PAGE_CACHE_SIZE - offset; return 0; } + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + int err; + int save = 0; + if (eb->map_token) { + if (start >= eb->map_start && + start + min_len <= eb->map_start + eb->map_len) { + *token = eb->map_token; + *map = eb->kaddr; + *map_start = eb->map_start; + *map_len = eb->map_len; + return 0; + } + unmap_extent_buffer(eb, eb->map_token, km); + eb->map_token = NULL; + save = 1; + } + err = __map_extent_buffer(eb, start, min_len, token, map, + map_start, map_len, km); + if (!err && save) { + eb->map_token = *token; + eb->kaddr = *map; + eb->map_start = *map_start; + eb->map_len = *map_len; + } + return err; +} EXPORT_SYMBOL(map_extent_buffer); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) @@ -2574,7 +2617,6 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, ((unsigned long)PAGE_CACHE_SIZE - 1); src_off_in_page = src_end & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (src_i == 0) src_off_in_page += start_offset; if (dst_i == 0) @@ -2582,14 +2624,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min(len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); - move_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); - dst_end -= cur - 1; - src_end -= cur - 1; + dst_end -= cur; + src_end -= cur; len -= cur; } } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 75dc600dc0f..4ef8bdd6834 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -70,7 +70,12 @@ struct extent_buffer { struct list_head list; struct list_head leak_list; struct page *first_page; + struct page *last_page; unsigned long alloc_addr; + char *map_token; + char *kaddr; + unsigned long map_start; + unsigned long map_len; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, @@ -147,11 +152,6 @@ static inline void extent_buffer_get(struct extent_buffer *eb) atomic_inc(&eb->refs); } -static inline u64 extent_buffer_blocknr(struct extent_buffer *eb) -{ - return eb->start / 4096; -} - int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, unsigned long start, unsigned long len); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 226f6d028c3..9a65e97a4e2 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -27,8 +27,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, - u64 offset, u64 disk_num_blocks, - u64 num_blocks) + u64 offset, u64 disk_num_bytes, + u64 num_bytes) { int ret = 0; struct btrfs_file_extent_item *item; @@ -50,10 +50,10 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(leaf, item, offset); - btrfs_set_file_extent_disk_num_blocks(leaf, item, disk_num_blocks); + btrfs_set_file_extent_disk_bytenr(leaf, item, offset); + btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); btrfs_set_file_extent_offset(leaf, item, 0); - btrfs_set_file_extent_num_blocks(leaf, item, num_blocks); + btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); btrfs_set_file_extent_generation(leaf, item, trans->transid); btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(leaf); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1734ca69555..844d8807e44 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -120,9 +120,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page, KM_USER1); write_extent_buffer(leaf, kaddr + page_offset, ptr, size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER1); btrfs_mark_buffer_dirty(leaf); fail: btrfs_free_path(path); @@ -142,11 +142,12 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct inode *inode = file->f_path.dentry->d_inode; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - u64 hint_block; - u64 num_blocks; + u64 hint_byte; + u64 num_bytes; u64 start_pos; u64 end_of_last_block; u64 end_pos = pos + write_bytes; + u32 inline_size; loff_t isize = i_size_read(inode); em = alloc_extent_map(GFP_NOFS); @@ -156,11 +157,12 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, em->bdev = inode->i_sb->s_bdev; start_pos = pos & ~((u64)root->sectorsize - 1); - num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> - inode->i_blkbits; + num_bytes = (write_bytes + pos - start_pos + + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); down_read(&BTRFS_I(inode)->root->snap_sem); - end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1; + end_of_last_block = start_pos + num_bytes - 1; + lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -169,8 +171,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, goto out_unlock; } btrfs_set_trans_block_group(trans, inode); - inode->i_blocks += num_blocks << 3; - hint_block = 0; + inode->i_blocks += num_bytes >> 9; + hint_byte = 0; if ((end_of_last_block & 4095) == 0) { printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block); @@ -191,11 +193,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, - &hint_block); + &hint_byte); if (err) goto failed; - hole_size >>= inode->i_blkbits; err = btrfs_insert_file_extent(trans, root, inode->i_ino, last_pos_in_file, @@ -209,8 +210,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, * either allocate an extent for the new bytes or setup the key * to show we are doing inline data in the extent */ + inline_size = end_pos - start_pos; if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || - pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { + inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root) || + inline_size >= PAGE_CACHE_SIZE) { u64 last_end; for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; @@ -224,10 +227,9 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, } else { struct page *p = pages[0]; /* step one, delete the existing extents in this range */ - /* FIXME blocksize != pagesize */ err = btrfs_drop_extents(trans, root, inode, start_pos, (pos + write_bytes + root->sectorsize -1) & - ~((u64)root->sectorsize - 1), &hint_block); + ~((u64)root->sectorsize - 1), &hint_byte); if (err) goto failed; @@ -283,7 +285,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) */ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 *hint_block) + u64 start, u64 end, u64 *hint_byte) { int ret; struct btrfs_key key; @@ -346,8 +348,7 @@ next_slot: found_type = btrfs_file_extent_type(leaf, extent); if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = key.offset + - (btrfs_file_extent_num_blocks(leaf, extent) << - inode->i_blkbits); + btrfs_file_extent_num_bytes(leaf, extent); found_extent = 1; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { struct btrfs_item *item; @@ -386,17 +387,17 @@ next_slot: if (end < extent_end && end >= key.offset) { if (found_extent) { - u64 disk_blocknr = - btrfs_file_extent_disk_blocknr(leaf,extent); - u64 disk_num_blocks = - btrfs_file_extent_disk_num_blocks(leaf, + u64 disk_bytenr = + btrfs_file_extent_disk_bytenr(leaf, extent); + u64 disk_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, extent); read_extent_buffer(leaf, &old, (unsigned long)extent, sizeof(old)); - if (disk_blocknr != 0) { + if (disk_bytenr != 0) { ret = btrfs_inc_extent_ref(trans, root, - disk_blocknr, disk_num_blocks); + disk_bytenr, disk_num_bytes); BUG_ON(ret); } } @@ -410,21 +411,19 @@ next_slot: keep = 1; WARN_ON(start & (root->sectorsize - 1)); if (found_extent) { - new_num = (start - key.offset) >> - inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(leaf, - extent); - *hint_block = - btrfs_file_extent_disk_blocknr(leaf, - extent); - if (btrfs_file_extent_disk_blocknr(leaf, - extent)) { + new_num = start - key.offset; + old_num = btrfs_file_extent_num_bytes(leaf, + extent); + *hint_byte = + btrfs_file_extent_disk_bytenr(leaf, + extent); + if (btrfs_file_extent_disk_bytenr(leaf, + extent)) { inode->i_blocks -= - (old_num - new_num) << 3; + (old_num - new_num) >> 9; } - btrfs_set_file_extent_num_blocks(leaf, - extent, - new_num); + btrfs_set_file_extent_num_bytes(leaf, extent, + new_num); btrfs_mark_buffer_dirty(leaf); } else { WARN_ON(1); @@ -432,33 +431,32 @@ next_slot: } /* delete the entire extent */ if (!keep) { - u64 disk_blocknr = 0; - u64 disk_num_blocks = 0; - u64 extent_num_blocks = 0; + u64 disk_bytenr = 0; + u64 disk_num_bytes = 0; + u64 extent_num_bytes = 0; if (found_extent) { - disk_blocknr = - btrfs_file_extent_disk_blocknr(leaf, + disk_bytenr = + btrfs_file_extent_disk_bytenr(leaf, extent); - disk_num_blocks = - btrfs_file_extent_disk_num_blocks(leaf, - extent); - extent_num_blocks = - btrfs_file_extent_num_blocks(leaf, - extent); - *hint_block = - btrfs_file_extent_disk_blocknr(leaf, + disk_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, extent); + extent_num_bytes = + btrfs_file_extent_num_bytes(leaf, extent); + *hint_byte = + btrfs_file_extent_disk_bytenr(leaf, + extent); } ret = btrfs_del_item(trans, root, path); /* TODO update progress marker and return */ BUG_ON(ret); btrfs_release_path(root, path); extent = NULL; - if (found_extent && disk_blocknr != 0) { - inode->i_blocks -= extent_num_blocks << 3; + if (found_extent && disk_bytenr != 0) { + inode->i_blocks -= extent_num_bytes >> 9; ret = btrfs_free_extent(trans, root, - disk_blocknr, - disk_num_blocks, 0); + disk_bytenr, + disk_num_bytes, 0); } BUG_ON(ret); @@ -491,20 +489,19 @@ next_slot: (unsigned long)extent, sizeof(old)); btrfs_set_file_extent_offset(leaf, extent, - le64_to_cpu(old.offset) + - ((end - key.offset) >> inode->i_blkbits)); - WARN_ON(le64_to_cpu(old.num_blocks) < - (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_num_blocks(leaf, extent, - (extent_end - end) >> inode->i_blkbits); - + le64_to_cpu(old.offset) + end - key.offset); + WARN_ON(le64_to_cpu(old.num_bytes) < + (extent_end - end)); + btrfs_set_file_extent_num_bytes(leaf, extent, + extent_end - end); btrfs_set_file_extent_type(leaf, extent, BTRFS_FILE_EXTENT_REG); + btrfs_mark_buffer_dirty(path->nodes[0]); - if (le64_to_cpu(old.disk_blocknr) != 0) { + if (le64_to_cpu(old.disk_bytenr) != 0) { inode->i_blocks += - btrfs_file_extent_num_blocks(leaf, - extent) << 3; + btrfs_file_extent_num_bytes(leaf, + extent) >> 9; } ret = 0; goto out; @@ -531,12 +528,9 @@ static int prepare_pages(struct btrfs_root *root, unsigned long index = pos >> PAGE_CACHE_SHIFT; struct inode *inode = file->f_path.dentry->d_inode; int err = 0; - u64 num_blocks; u64 start_pos; start_pos = pos & ~((u64)root->sectorsize - 1); - num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> - inode->i_blkbits; memset(pages, 0, num_pages * sizeof(struct page *)); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fbe2836364e..d6b3a55ed8e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -77,19 +77,19 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_key ins; u64 alloc_hint = 0; - u64 num_blocks; + u64 num_bytes; int ret; - u64 blocksize = 1 << inode->i_blkbits; + u64 blocksize = root->sectorsize; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); BUG_ON(!trans); - num_blocks = (end - start + blocksize) & ~(blocksize - 1); + num_bytes = (end - start + blocksize) & ~(blocksize - 1); ret = btrfs_drop_extents(trans, root, inode, - start, start + num_blocks, &alloc_hint); - num_blocks = num_blocks >> inode->i_blkbits; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_blocks, 0, + start, start + num_bytes, &alloc_hint); + + ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_bytes, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); @@ -186,7 +186,8 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) zeroit: printk("btrfs csum failed ino %lu off %llu\n", page->mapping->host->i_ino, (unsigned long long)start); - memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); + memset(kaddr + offset, 1, end - start + 1); + flush_dcache_page(page); kunmap_atomic(kaddr, KM_IRQ0); return 0; } @@ -547,7 +548,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; u64 extent_start = 0; - u64 extent_num_blocks = 0; + u64 extent_num_bytes = 0; u64 item_end = 0; int found_extent; int del_item; @@ -593,8 +594,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { item_end += - btrfs_file_extent_num_blocks(leaf, fi) << - inode->i_blkbits; + btrfs_file_extent_num_bytes(leaf, fi); } } if (found_type == BTRFS_CSUM_ITEM_KEY) { @@ -626,28 +626,27 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; - extent_start = btrfs_file_extent_disk_blocknr(leaf, fi); + extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); if (!del_item) { - u64 orig_num_blocks = - btrfs_file_extent_num_blocks(leaf, fi); - extent_num_blocks = inode->i_size - + u64 orig_num_bytes = + btrfs_file_extent_num_bytes(leaf, fi); + extent_num_bytes = inode->i_size - found_key.offset + root->sectorsize - 1; - extent_num_blocks >>= inode->i_blkbits; - btrfs_set_file_extent_num_blocks(leaf, fi, - extent_num_blocks); - num_dec = (orig_num_blocks - - extent_num_blocks) << 3; + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_num_bytes); + num_dec = (orig_num_bytes - + extent_num_bytes) >> 9; if (extent_start != 0) { inode->i_blocks -= num_dec; } btrfs_mark_buffer_dirty(leaf); } else { - extent_num_blocks = - btrfs_file_extent_disk_num_blocks(leaf, - fi); + extent_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, + fi); /* FIXME blocksize != 4096 */ - num_dec = btrfs_file_extent_num_blocks(leaf, - fi) << 3; + num_dec = btrfs_file_extent_num_bytes(leaf, + fi) >> 9; if (extent_start != 0) { found_extent = 1; inode->i_blocks -= num_dec; @@ -664,7 +663,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_release_path(root, path); if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, - extent_num_blocks, 0); + extent_num_bytes, 0); BUG_ON(ret); } } @@ -709,7 +708,8 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, static int btrfs_truncate_page(struct address_space *mapping, loff_t from) { struct inode *inode = mapping->host; - unsigned blocksize = 1 << inode->i_blkbits; + struct btrfs_root *root = BTRFS_I(inode)->root; + u32 blocksize = root->sectorsize; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); struct page *page; @@ -719,7 +719,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if ((offset & (blocksize - 1)) == 0) goto out; - down_read(&BTRFS_I(inode)->root->snap_sem); + down_read(&root->snap_sem); ret = -ENOMEM; page = grab_cache_page(mapping, index); if (!page) @@ -778,8 +778,6 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) err = btrfs_drop_extents(trans, root, inode, pos, pos + hole_size, &alloc_hint); - hole_size >>= inode->i_blkbits; - err = btrfs_insert_file_extent(trans, root, inode->i_ino, pos, 0, 0, hole_size); btrfs_end_transaction(trans, root); @@ -1490,7 +1488,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, { int ret; int err = 0; - u64 blocknr; + u64 bytenr; u64 extent_start = 0; u64 extent_end = 0; u64 objectid = inode->i_ino; @@ -1540,10 +1538,6 @@ again: leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - - blocknr = btrfs_file_extent_disk_blocknr(leaf, item); - blocknr += btrfs_file_extent_offset(leaf, item); - /* are we inside the extent that was found? */ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); @@ -1556,8 +1550,7 @@ again: extent_start = found_key.offset; if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = extent_start + - (btrfs_file_extent_num_blocks(leaf, item) << - inode->i_blkbits); + btrfs_file_extent_num_bytes(leaf, item); err = 0; if (start < extent_start || start >= extent_end) { em->start = start; @@ -1570,17 +1563,18 @@ again: } goto not_found_em; } - if (btrfs_file_extent_disk_blocknr(leaf, item) == 0) { + bytenr = btrfs_file_extent_disk_bytenr(leaf, item); + if (bytenr == 0) { em->start = extent_start; em->end = extent_end - 1; em->block_start = EXTENT_MAP_HOLE; em->block_end = EXTENT_MAP_HOLE; goto insert; } - em->block_start = blocknr << inode->i_blkbits; + bytenr += btrfs_file_extent_offset(leaf, item); + em->block_start = bytenr; em->block_end = em->block_start + - (btrfs_file_extent_num_blocks(leaf, item) << - inode->i_blkbits) - 1; + btrfs_file_extent_num_bytes(leaf, item) - 1; em->start = extent_start; em->end = extent_end - 1; goto insert; @@ -1592,7 +1586,8 @@ again: size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, path->slots[0])); - extent_end = extent_start | ((u64)root->sectorsize - 1); + extent_end = (extent_start + size) | + ((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { @@ -1617,8 +1612,10 @@ again: ptr = btrfs_file_extent_inline_start(item); map = kmap(page); read_extent_buffer(leaf, map + page_offset, ptr, size); + /* memset(map + page_offset + size, 0, root->sectorsize - (page_offset + size)); + */ flush_dcache_page(page); kunmap(page); set_extent_uptodate(em_tree, extent_start, @@ -1836,13 +1833,13 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - leaf = btrfs_alloc_free_block(trans, root, 0, 0); + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 0); if (IS_ERR(leaf)) return PTR_ERR(leaf); btrfs_set_header_nritems(leaf, 0); btrfs_set_header_level(leaf, 0); - btrfs_set_header_blocknr(leaf, extent_buffer_blocknr(leaf)); + btrfs_set_header_bytenr(leaf, leaf->start); btrfs_set_header_generation(leaf, trans->transid); btrfs_set_header_owner(leaf, root->root_key.objectid); write_extent_buffer(leaf, root->fs_info->fsid, @@ -1858,7 +1855,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) inode_item->nblocks = cpu_to_le64(1); inode_item->mode = cpu_to_le32(S_IFDIR | 0755); - btrfs_set_root_blocknr(&root_item, extent_buffer_blocknr(leaf)); + btrfs_set_root_bytenr(&root_item, leaf->start); + btrfs_set_root_level(&root_item, 0); btrfs_set_root_refs(&root_item, 1); btrfs_set_root_used(&root_item, 0); @@ -1971,8 +1969,8 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - btrfs_set_root_blocknr(&new_root_item, - extent_buffer_blocknr(root->node)); + btrfs_set_root_bytenr(&new_root_item, root->node->start); + btrfs_set_root_level(&new_root_item, btrfs_header_level(root->node)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index a825ce078a5..9f8696c8a8e 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -36,7 +36,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) u32 type; printk("leaf %llu total ptrs %d free space %d\n", - (unsigned long long)btrfs_header_blocknr(l), nr, + (unsigned long long)btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(l, i); @@ -65,8 +65,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - printk("\t\troot data blocknr %llu refs %u\n", - (unsigned long long)btrfs_disk_root_blocknr(l, ri), + printk("\t\troot data bytenr %llu refs %u\n", + (unsigned long long)btrfs_disk_root_bytenr(l, ri), btrfs_disk_root_refs(l, ri)); break; case BTRFS_EXTENT_ITEM_KEY: @@ -84,12 +84,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) btrfs_file_extent_inline_len(l, item)); break; } - printk("\t\textent data disk block %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_disk_blocknr(l, fi), - (unsigned long long)btrfs_file_extent_disk_num_blocks(l, fi)); + printk("\t\textent data disk bytenr %llu nr %llu\n", + (unsigned long long)btrfs_file_extent_disk_bytenr(l, fi), + (unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi)); printk("\t\textent data offset %llu nr %llu\n", (unsigned long long)btrfs_file_extent_offset(l, fi), - (unsigned long long)btrfs_file_extent_num_blocks(l, fi)); + (unsigned long long)btrfs_file_extent_num_bytes(l, fi)); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, @@ -106,16 +106,18 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) int i; u32 nr; struct btrfs_key key; + int level; if (!c) return; nr = btrfs_header_nritems(c); - if (btrfs_is_leaf(c)) { + level = btrfs_header_level(c); + if (level == 0) { btrfs_print_leaf(root, c); return; } printk("node %llu level %d total ptrs %d free spc %u\n", - (unsigned long long)btrfs_header_blocknr(c), + (unsigned long long)btrfs_header_bytenr(c), btrfs_header_level(c), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { @@ -129,7 +131,8 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) } for (i = 0; i < nr; i++) { struct extent_buffer *next = read_tree_block(root, - btrfs_node_blockptr(c, i)); + btrfs_node_blockptr(c, i), + btrfs_level_size(root, level - 1)); if (btrfs_is_leaf(next) && btrfs_header_level(c) != 1) BUG(); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 39a1435c68f..5c4370f3a5b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -303,10 +303,12 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); struct btrfs_super_block *disk_super = &root->fs_info->super_copy; + int bits = dentry->d_sb->s_blocksize_bits; buf->f_namelen = BTRFS_NAME_LEN; - buf->f_blocks = btrfs_super_total_blocks(disk_super); - buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super); + buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; + buf->f_bfree = buf->f_blocks - + (btrfs_super_bytes_used(disk_super) >> bits); buf->f_bavail = buf->f_bfree; buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_type = BTRFS_SUPER_MAGIC; diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 9654e90eec8..0bd1fd3d29d 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -42,14 +42,15 @@ static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) { + return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocks_used(&fs->super_copy)); + (unsigned long long)btrfs_super_bytes_used(&fs->super_copy)); } static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_total_blocks(&fs->super_copy)); + (unsigned long long)btrfs_super_total_bytes(&fs->super_copy)); } static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 55289b71056..60f61345a8d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -205,12 +205,13 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_write_dirty_block_groups(trans, extent_root); while(1) { - old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == - extent_buffer_blocknr(extent_root->node)) + old_extent_block = btrfs_root_bytenr(&extent_root->root_item); + if (old_extent_block == extent_root->node->start) break; - btrfs_set_root_blocknr(&extent_root->root_item, - extent_buffer_blocknr(extent_root->node)); + btrfs_set_root_bytenr(&extent_root->root_item, + extent_root->node->start); + btrfs_set_root_level(&extent_root->root_item, + btrfs_header_level(extent_root->node)); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -284,8 +285,8 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { - WARN_ON(extent_buffer_blocknr(root->node) != - btrfs_root_blocknr(&root->root_item)); + WARN_ON(root->node->start != + btrfs_root_bytenr(&root->root_item)); free_extent_buffer(root->commit_root); root->commit_root = NULL; @@ -314,8 +315,10 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, root->commit_root = NULL; root->root_key.offset = root->fs_info->generation; - btrfs_set_root_blocknr(&root->root_item, - extent_buffer_blocknr(root->node)); + btrfs_set_root_bytenr(&root->root_item, + root->node->start); + btrfs_set_root_level(&root->root_item, + btrfs_header_level(root->node)); err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); @@ -407,8 +410,8 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, struct dirty_root *dirty; struct btrfs_trans_handle *trans; unsigned long nr; - u64 num_blocks; - u64 blocks_used; + u64 num_bytes; + u64 bytes_used; int ret = 0; int err; @@ -419,7 +422,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); - num_blocks = btrfs_root_used(&dirty->root->root_item); + num_bytes = btrfs_root_used(&dirty->root->root_item); root = dirty->latest_root; while(1) { @@ -446,12 +449,12 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, } BUG_ON(ret); - num_blocks -= btrfs_root_used(&dirty->root->root_item); - blocks_used = btrfs_root_used(&root->root_item); - if (num_blocks) { + num_bytes -= btrfs_root_used(&dirty->root->root_item); + bytes_used = btrfs_root_used(&root->root_item); + if (num_bytes) { record_root_in_trans(root); btrfs_set_root_used(&root->root_item, - blocks_used - num_blocks); + bytes_used - num_bytes); } ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); if (ret) { @@ -560,7 +563,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_super_generation(&root->fs_info->super_copy, cur_trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, - extent_buffer_blocknr(root->fs_info->tree_root->node)); + root->fs_info->tree_root->node->start); + btrfs_set_super_root_level(&root->fs_info->super_copy, + btrfs_header_level(root->fs_info->tree_root->node)); write_extent_buffer(root->fs_info->sb_buffer, &root->fs_info->super_copy, 0, diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index daf019afa0a..3feac2f2862 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -27,13 +27,15 @@ static void reada_defrag(struct btrfs_root *root, { int i; u32 nritems; - u64 blocknr; + u64 bytenr; + u32 blocksize; int ret; + blocksize = btrfs_level_size(root, btrfs_header_level(node) - 1); nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); + bytenr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, bytenr, blocksize); if (ret) break; } @@ -46,7 +48,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, { struct extent_buffer *next; struct extent_buffer *cur; - u64 blocknr; + u64 bytenr; int ret = 0; int is_extent = 0; @@ -80,10 +82,11 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; } - blocknr = btrfs_node_blockptr(cur, path->slots[*level]); + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); if (cache_only) { - next = btrfs_find_tree_block(root, blocknr); + next = btrfs_find_tree_block(root, bytenr, + btrfs_level_size(root, *level - 1)); /* FIXME, test for defrag */ if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); @@ -91,7 +94,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, continue; } } else { - next = read_tree_block(root, blocknr); + next = read_tree_block(root, bytenr, + btrfs_level_size(root, *level - 1)); } ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); -- cgit v1.2.3-70-g09d2 From 6b80053d02be41886344b5007d04e345311ec0b5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:17:34 -0400 Subject: Btrfs: Add back the online defragging code Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 177 ++++++++++++++++++++++++------------------------- fs/btrfs/disk-io.c | 60 +++++++++++++++++ fs/btrfs/disk-io.h | 7 ++ fs/btrfs/extent-tree.c | 8 +-- fs/btrfs/extent_map.h | 2 + fs/btrfs/transaction.c | 4 +- fs/btrfs/tree-defrag.c | 14 ++-- 7 files changed, 166 insertions(+), 106 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 34de83630ae..08ddf1873ab 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -155,55 +155,49 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, return ret; } -#if 0 -static int close_blocks(u64 blocknr, u64 other) +static int close_blocks(u64 blocknr, u64 other, u32 blocksize) { - if (blocknr < other && other - blocknr < 8) + if (blocknr < other && other - (blocknr + blocksize) < 32768) return 1; - if (blocknr > other && blocknr - other < 8) + if (blocknr > other && blocknr - (other + blocksize) < 32768) return 1; return 0; } -static int should_defrag_leaf(struct extent_buffer *eb) +static int should_defrag_leaf(struct extent_buffer *leaf) { - return 0; - struct btrfs_leaf *leaf = btrfs_buffer_leaf(eb); - struct btrfs_disk_key *key; + struct btrfs_key key; u32 nritems; - if (buffer_defrag(bh)) + if (btrfs_buffer_defrag(leaf)) return 1; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); if (nritems == 0) return 0; - key = &leaf->items[0].key; - if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) + btrfs_item_key_to_cpu(leaf, &key, 0); + if (key.type == BTRFS_DIR_ITEM_KEY) return 1; - key = &leaf->items[nritems-1].key; - if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) + + btrfs_item_key_to_cpu(leaf, &key, nritems - 1); + if (key.type == BTRFS_DIR_ITEM_KEY) return 1; if (nritems > 4) { - key = &leaf->items[nritems/2].key; - if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) + btrfs_item_key_to_cpu(leaf, &key, nritems / 2); + if (key.type == BTRFS_DIR_ITEM_KEY) return 1; } return 0; } -#endif int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int cache_only, u64 *last_ret) { - return 0; -#if 0 - struct btrfs_node *parent_node; - struct extent_buffer *cur_eb; - struct extent_buffer *tmp_eb; + struct extent_buffer *cur; + struct extent_buffer *tmp; u64 blocknr; u64 search_start = *last_ret; u64 last_block = 0; @@ -214,6 +208,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, int i; int err = 0; int parent_level; + int uptodate; + u32 blocksize; if (trans->transaction != root->fs_info->running_transaction) { printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, @@ -225,12 +221,12 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, root->fs_info->generation); WARN_ON(1); } - if (buffer_defrag_done(parent)) + if (btrfs_buffer_defrag_done(parent)) return 0; - parent_node = btrfs_buffer_node(parent); - parent_nritems = btrfs_header_nritems(&parent_node->header); - parent_level = btrfs_header_level(&parent_node->header); + parent_nritems = btrfs_header_nritems(parent); + parent_level = btrfs_header_level(parent); + blocksize = btrfs_level_size(root, parent_level - 1); start_slot = 0; end_slot = parent_nritems; @@ -240,56 +236,60 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, for (i = start_slot; i < end_slot; i++) { int close = 1; - blocknr = btrfs_node_blockptr(parent_node, i); + blocknr = btrfs_node_blockptr(parent, i); if (last_block == 0) last_block = blocknr; if (i > 0) { - other = btrfs_node_blockptr(parent_node, i - 1); - close = close_blocks(blocknr, other); + other = btrfs_node_blockptr(parent, i - 1); + close = close_blocks(blocknr, other, blocksize); } if (close && i < end_slot - 1) { - other = btrfs_node_blockptr(parent_node, i + 1); - close = close_blocks(blocknr, other); + other = btrfs_node_blockptr(parent, i + 1); + close = close_blocks(blocknr, other, blocksize); } if (close) { last_block = blocknr; continue; } - cur_bh = btrfs_find_tree_block(root, blocknr); - if (!cur_bh || !buffer_uptodate(cur_bh) || - buffer_locked(cur_bh) || - (parent_level != 1 && !buffer_defrag(cur_bh)) || - (parent_level == 1 && !should_defrag_leaf(cur_bh))) { + cur = btrfs_find_tree_block(root, blocknr, blocksize); + if (cur) + uptodate = btrfs_buffer_uptodate(cur); + else + uptodate = 0; + if (!cur || !uptodate || + (parent_level != 1 && !btrfs_buffer_defrag(cur)) || + (parent_level == 1 && !should_defrag_leaf(cur))) { if (cache_only) { - brelse(cur_bh); + free_extent_buffer(cur); continue; } - if (!cur_bh || !buffer_uptodate(cur_bh) || - buffer_locked(cur_bh)) { - brelse(cur_bh); - cur_bh = read_tree_block(root, blocknr); + if (!cur) { + cur = read_tree_block(root, blocknr, + blocksize); + } else if (!uptodate) { + btrfs_read_buffer(cur); } } if (search_start == 0) - search_start = last_block & ~((u64)65535); + search_start = last_block; - err = __btrfs_cow_block(trans, root, cur_bh, parent, i, - &tmp_bh, search_start, - min(8, end_slot - i)); + err = __btrfs_cow_block(trans, root, cur, parent, i, + &tmp, search_start, + min(16 * blocksize, + (end_slot - i) * blocksize)); if (err) { - brelse(cur_bh); + free_extent_buffer(cur); break; } - search_start = bh_blocknr(tmp_bh); + search_start = tmp->start; *last_ret = search_start; if (parent_level == 1) - clear_buffer_defrag(tmp_bh); - set_buffer_defrag_done(tmp_bh); - brelse(tmp_bh); + btrfs_clear_buffer_defrag(tmp); + btrfs_set_buffer_defrag_done(tmp); + free_extent_buffer(tmp); } return err; -#endif } /* @@ -892,22 +892,17 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - return; -#if 0 struct extent_buffer *node; - int i; u32 nritems; - u64 bytenr; u64 search; - u64 cluster_start; - int ret; - int nread = 0; + u64 lowest_read; + u64 highest_read; + u64 nread = 0; int direction = path->reada; - int level; - struct radix_tree_root found; - unsigned long gang[8]; struct extent_buffer *eb; - + u32 nr; + u32 blocksize; + u32 nscan = 0; if (level == 0) return; @@ -917,42 +912,46 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, node = path->nodes[level]; search = btrfs_node_blockptr(node, slot); - eb = btrfs_find_tree_block(root, search); + blocksize = btrfs_level_size(root, level - 1); + eb = btrfs_find_tree_block(root, search, blocksize); if (eb) { free_extent_buffer(eb); return; } - init_bit_radix(&found); + highest_read = search; + lowest_read = search; + nritems = btrfs_header_nritems(node); - level = btrfs_header_level(node) - 1; - for (i = slot; i < nritems; i++) { - bytenr = btrfs_node_blockptr(node, i); - set_radix_bit(&found, blocknr); - } - if (direction > 0) { - cluster_start = search - 4; - if (cluster_start > search) - cluster_start = 0; - } else - cluster_start = search + 4; + nr = slot; while(1) { - ret = find_first_radix_bit(&found, gang, 0, ARRAY_SIZE(gang)); - if (!ret) - break; - for (i = 0; i < ret; i++) { - blocknr = gang[i]; - clear_radix_bit(&found, blocknr); - if (path->reada == 1 && nread > 16) - continue; - if (close_blocks(cluster_start, blocknr)) { - readahead_tree_block(root, blocknr); - nread++; - cluster_start = blocknr; - } + if (direction < 0) { + if (nr == 0) + break; + nr--; + } else if (direction > 0) { + nr++; + if (nr >= nritems) + break; } + search = btrfs_node_blockptr(node, nr); + if ((search >= lowest_read && search <= highest_read) || + (search < lowest_read && lowest_read - search <= 32768) || + (search > highest_read && search - highest_read <= 32768)) { + readahead_tree_block(root, search, blocksize); + nread += blocksize; + } + nscan++; + if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32)) + break; + if(nread > (1024 * 1024) || nscan > 128) + break; + + if (search < lowest_read) + lowest_read = search; + if (search > highest_read) + highest_read = search; } -#endif } /* * look for key in the tree. path is filled in with nodes along the way diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0636f79672e..8e606e6658a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -678,3 +678,63 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) balance_dirty_pages_ratelimited_nr( root->fs_info->btree_inode->i_mapping, nr); } + +void btrfs_set_buffer_defrag(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); +} + +void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, + GFP_NOFS); +} + +int btrfs_buffer_defrag(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); +} + +int btrfs_buffer_defrag_done(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, + EXTENT_DEFRAG_DONE, 0); +} + +int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, + EXTENT_DEFRAG_DONE, GFP_NOFS); +} + +int btrfs_clear_buffer_defrag(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, + EXTENT_DEFRAG, GFP_NOFS); +} + +int btrfs_read_buffer(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 1); +} diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index fd4db5f810c..190b07bc642 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -51,4 +51,11 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf); int btrfs_set_buffer_uptodate(struct extent_buffer *buf); int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf); +void btrfs_set_buffer_defrag(struct extent_buffer *buf); +void btrfs_set_buffer_defrag_done(struct extent_buffer *buf); +int btrfs_buffer_defrag(struct extent_buffer *buf); +int btrfs_buffer_defrag_done(struct extent_buffer *buf); +int btrfs_clear_buffer_defrag(struct extent_buffer *buf); +int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf); +int btrfs_read_buffer(struct extent_buffer *buf); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1be8f9f04a1..0b0c947194a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1173,13 +1173,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, buf->alloc_addr = (unsigned long)__builtin_return_address(0); set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - /* - set_buffer_checked(buf); - set_buffer_defrag(buf); - */ - /* FIXME!!!!!!!!!!!!!!!! - set_radix_bit(&trans->transaction->dirty_pages, buf->pages[0]->index); - */ + btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index d74a2b3e3b5..8409b5cbeda 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -14,6 +14,8 @@ #define EXTENT_LOCKED (1 << 3) #define EXTENT_NEW (1 << 4) #define EXTENT_DELALLOC (1 << 5) +#define EXTENT_DEFRAG (1 << 6) +#define EXTENT_DEFRAG_DONE (1 << 7) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 60f61345a8d..87456ab7427 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -355,7 +355,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) return 0; trans = btrfs_start_transaction(root, 1); - while (0) { + while (1) { root->defrag_running = 1; ret = btrfs_defrag_leaves(trans, root, cacheonly); nr = trans->blocks_used; @@ -400,7 +400,7 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) btrfs_defrag_root(root, 1); } } - // btrfs_defrag_root(info->extent_root, 1); + btrfs_defrag_root(info->extent_root, 1); return err; } diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 3feac2f2862..d23216aa4ab 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -116,10 +116,10 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, } WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); -#if 0 - clear_buffer_defrag(path->nodes[*level]); - clear_buffer_defrag_done(path->nodes[*level]); -#endif + + btrfs_clear_buffer_defrag(path->nodes[*level]); + btrfs_clear_buffer_defrag_done(path->nodes[*level]); + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -148,10 +148,8 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, root->defrag_level = i; return 0; } else { - /* - clear_buffer_defrag(path->nodes[*level]); - clear_buffer_defrag_done(path->nodes[*level]); - */ + btrfs_clear_buffer_defrag(path->nodes[*level]); + btrfs_clear_buffer_defrag_done(path->nodes[*level]); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; -- cgit v1.2.3-70-g09d2 From e19caa5f0e34b571ed0c2617554af5c43cb124d1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:17:44 -0400 Subject: Btrfs: Fix allocation routines to avoid intermixing data and metadata allocations Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 38 ++++++++++++++++++++++---------------- fs/btrfs/extent_map.c | 6 +++--- 2 files changed, 25 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0b0c947194a..72e6b274a45 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -165,7 +165,7 @@ static u64 find_search_start(struct btrfs_root *root, { int ret; struct btrfs_block_group_cache *cache = *cache_ret; - u64 last = max(search_start, cache->key.objectid); + u64 last; u64 start = 0; u64 end = 0; @@ -173,11 +173,14 @@ again: ret = cache_block_group(root, cache); if (ret) goto out; + last = max(search_start, cache->key.objectid); + while(1) { ret = find_first_extent_bit(&root->fs_info->free_space_cache, last, &start, &end, EXTENT_DIRTY); - if (ret) - goto out; + if (ret) { + goto new_group; + } start = max(last, start); last = end + 1; @@ -191,15 +194,13 @@ out: return search_start; new_group: - cache = btrfs_lookup_block_group(root->fs_info, - last + cache->key.offset - 1); + last = cache->key.objectid + cache->key.offset; + cache = btrfs_lookup_block_group(root->fs_info, last); if (!cache) { return search_start; } - cache = btrfs_find_block_group(root, cache, - last + cache->key.offset - 1, data, 0); + cache = btrfs_find_block_group(root, cache, last, data, 0); *cache_ret = cache; - last = min(cache->key.objectid, last); goto again; } @@ -257,12 +258,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (used < div_factor(hint->key.offset, factor)) { return hint; } - last = hint->key.offset * 3; - if (hint->key.objectid >= last) - last = max(search_start + hint->key.offset - 1, - hint->key.objectid - last); - else - last = hint->key.objectid + hint->key.offset; + last = hint->key.objectid + hint->key.offset; hint_last = last; } else { if (hint) @@ -913,7 +909,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); - btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; @@ -958,6 +953,9 @@ check_failed: continue; if (ret < 0) goto error; + + search_start = max(search_start, + block_group->key.objectid); if (!start_found) { ins->objectid = search_start; ins->offset = search_end - search_start; @@ -967,6 +965,7 @@ check_failed: ins->objectid = last_byte > search_start ? last_byte : search_start; ins->offset = search_end - ins->objectid; + BUG_ON(ins->objectid >= search_end); goto check_pending; } btrfs_item_key_to_cpu(l, &key, slot); @@ -998,7 +997,7 @@ check_failed: block_group->key.offset) { btrfs_release_path(root, path); search_start = block_group->key.objectid + - block_group->key.offset * 2; + block_group->key.offset; goto new_group; } next: @@ -1015,6 +1014,12 @@ check_pending: if (ins->objectid + num_bytes >= search_end) goto enospc; + if (!full_scan && ins->objectid + num_bytes >= block_group-> + key.objectid + block_group->key.offset) { + search_start = block_group->key.objectid + + block_group->key.offset; + goto new_group; + } if (test_range_bit(&info->extent_ins, ins->objectid, ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { search_start = ins->objectid + num_bytes; @@ -1114,6 +1119,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, EXTENT_LOCKED, GFP_NOFS); + WARN_ON(data == 1); goto update_block; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 7ef3397a266..e241699024d 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -981,7 +981,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start, struct extent_state *state; int ret = 1; - write_lock_irq(&tree->lock); + read_lock_irq(&tree->lock); /* * this search will find all the extents that end after * our range starts. @@ -993,7 +993,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start, while(1) { state = rb_entry(node, struct extent_state, rb_node); - if (state->state & bits) { + if (state->end >= start && (state->state & bits)) { *start_ret = state->start; *end_ret = state->end; ret = 0; @@ -1004,7 +1004,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start, break; } out: - write_unlock_irq(&tree->lock); + read_unlock_irq(&tree->lock); return ret; } EXPORT_SYMBOL(find_first_extent_bit); -- cgit v1.2.3-70-g09d2 From 4dc119046d0d8501afa4346472917fb05586ad9c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:18:14 -0400 Subject: Btrfs: Add an extent buffer LRU to reduce radix tree hits Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 - fs/btrfs/disk-io.c | 25 +++---- fs/btrfs/extent-tree.c | 1 - fs/btrfs/extent_map.c | 183 ++++++++++++++++++++++++++++--------------------- fs/btrfs/extent_map.h | 16 +++-- fs/btrfs/transaction.c | 5 +- 6 files changed, 128 insertions(+), 105 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 08ddf1873ab..bef61ee883e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -88,8 +88,6 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (IS_ERR(cow)) return PTR_ERR(cow); - cow->alloc_addr = (unsigned long)__builtin_return_address(0); - copy_extent_buffer(cow, buf, 0, 0, cow->len); btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); @@ -151,7 +149,6 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); - (*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0); return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e606e6658a..fd7e6c182b9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -50,8 +50,6 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, struct extent_buffer *eb; eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, bytenr, blocksize, GFP_NOFS); - if (eb) - eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } @@ -63,7 +61,6 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, bytenr, blocksize, GFP_NOFS); - eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } @@ -234,7 +231,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); - buf->alloc_addr = (unsigned long)__builtin_return_address(0); return buf; } @@ -638,6 +634,7 @@ int close_ctree(struct btrfs_root *root) btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); + extent_map_tree_cleanup(&BTRFS_I(fs_info->btree_inode)->extent_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); kfree(fs_info->extent_root); @@ -647,20 +644,20 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->last_page->mapping->host; return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->last_page->mapping->host; return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; @@ -681,7 +678,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) void btrfs_set_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); @@ -689,7 +686,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf) void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, @@ -698,7 +695,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) int btrfs_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); @@ -706,7 +703,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf) int btrfs_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -715,7 +712,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -724,7 +721,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -733,7 +730,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) int btrfs_read_buffer(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 72e6b274a45..525fa845d61 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1176,7 +1176,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); } btrfs_set_buffer_uptodate(buf); - buf->alloc_addr = (unsigned long)__builtin_return_address(0); set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); btrfs_set_buffer_defrag(buf); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index e241699024d..85b28a6a4e0 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -20,14 +21,11 @@ static struct kmem_cache *extent_map_cache; static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; -static LIST_HEAD(extent_buffers); static LIST_HEAD(buffers); static LIST_HEAD(states); -static spinlock_t extent_buffers_lock; static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; -static int nr_extent_buffers; -#define MAX_EXTENT_BUFFER_CACHE 128 +#define BUFFER_LRU_MAX 64 struct tree_entry { u64 start; @@ -47,20 +45,12 @@ void __init extent_map_init(void) extent_buffer_cache = btrfs_cache_create("extent_buffers", sizeof(struct extent_buffer), 0, NULL); - spin_lock_init(&extent_buffers_lock); } void __exit extent_map_exit(void) { - struct extent_buffer *eb; struct extent_state *state; - while (!list_empty(&extent_buffers)) { - eb = list_entry(extent_buffers.next, - struct extent_buffer, list); - list_del(&eb->list); - kmem_cache_free(extent_buffer_cache, eb); - } while (!list_empty(&states)) { state = list_entry(states.next, struct extent_state, list); printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); @@ -68,14 +58,6 @@ void __exit extent_map_exit(void) kmem_cache_free(extent_state_cache, state); } - while (!list_empty(&buffers)) { - eb = list_entry(buffers.next, - struct extent_buffer, leak_list); - printk("buffer leak start %Lu len %lu return %lX\n", eb->start, eb->len, eb->alloc_addr); - list_del(&eb->leak_list); - kmem_cache_free(extent_buffer_cache, eb); - } - if (extent_map_cache) kmem_cache_destroy(extent_map_cache); @@ -92,10 +74,25 @@ void extent_map_tree_init(struct extent_map_tree *tree, tree->state.rb_node = NULL; tree->ops = NULL; rwlock_init(&tree->lock); + spin_lock_init(&tree->lru_lock); tree->mapping = mapping; + INIT_LIST_HEAD(&tree->buffer_lru); + tree->lru_size = 0; } EXPORT_SYMBOL(extent_map_tree_init); +void extent_map_tree_cleanup(struct extent_map_tree *tree) +{ + struct extent_buffer *eb; + while(!list_empty(&tree->buffer_lru)) { + eb = list_entry(tree->buffer_lru.next, struct extent_buffer, + lru); + list_del(&eb->lru); + free_extent_buffer(eb); + } +} +EXPORT_SYMBOL(extent_map_tree_cleanup); + struct extent_map *alloc_extent_map(gfp_t mask) { struct extent_map *em; @@ -1915,66 +1912,99 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, return (em->block_start + start - em->start) >> inode->i_blkbits; } -static struct extent_buffer *__alloc_extent_buffer(gfp_t mask) +static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb) { - struct extent_buffer *eb = NULL; - - spin_lock(&extent_buffers_lock); - if (!list_empty(&extent_buffers)) { - eb = list_entry(extent_buffers.next, struct extent_buffer, - list); - list_del(&eb->list); - WARN_ON(nr_extent_buffers == 0); - nr_extent_buffers--; - } - spin_unlock(&extent_buffers_lock); + if (list_empty(&eb->lru)) { + extent_buffer_get(eb); + list_add(&eb->lru, &tree->buffer_lru); + tree->lru_size++; + if (tree->lru_size >= BUFFER_LRU_MAX) { + struct extent_buffer *rm; + rm = list_entry(tree->buffer_lru.prev, + struct extent_buffer, lru); + tree->lru_size--; + list_del(&rm->lru); + free_extent_buffer(rm); + } + } else + list_move(&eb->lru, &tree->buffer_lru); + return 0; +} +static struct extent_buffer *find_lru(struct extent_map_tree *tree, + u64 start, unsigned long len) +{ + struct list_head *lru = &tree->buffer_lru; + struct list_head *cur = lru->next; + struct extent_buffer *eb; - if (eb) { - memset(eb, 0, sizeof(*eb)); - } else { - eb = kmem_cache_zalloc(extent_buffer_cache, mask); - } - spin_lock(&extent_buffers_lock); - list_add(&eb->leak_list, &buffers); - spin_unlock(&extent_buffers_lock); + if (list_empty(lru)) + return NULL; - return eb; + do { + eb = list_entry(cur, struct extent_buffer, lru); + if (eb->start == start && eb->len == len) { + extent_buffer_get(eb); + return eb; + } + cur = cur->next; + } while (cur != lru); + return NULL; } -static void __free_extent_buffer(struct extent_buffer *eb) +static inline unsigned long num_extent_pages(u64 start, u64 len) { - - spin_lock(&extent_buffers_lock); - list_del_init(&eb->leak_list); - spin_unlock(&extent_buffers_lock); - - if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) { - kmem_cache_free(extent_buffer_cache, eb); - } else { - spin_lock(&extent_buffers_lock); - list_add(&eb->list, &extent_buffers); - nr_extent_buffers++; - spin_unlock(&extent_buffers_lock); - } + return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT); } -static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i) +static inline struct page *extent_buffer_page(struct extent_buffer *eb, + unsigned long i) { struct page *p; - if (i < EXTENT_INLINE_PAGES) - return eb->pages[i]; + if (i == 0) + return eb->last_page; i += eb->start >> PAGE_CACHE_SHIFT; - p = find_get_page(eb->pages[0]->mapping, i); + p = find_get_page(eb->last_page->mapping, i); page_cache_release(p); return p; } -static inline unsigned long num_extent_pages(u64 start, u64 len) +static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, + u64 start, + unsigned long len, + gfp_t mask) { - return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT); + struct extent_buffer *eb = NULL; + + spin_lock(&tree->lru_lock); + eb = find_lru(tree, start, len); + if (eb) + goto lru_add; + spin_unlock(&tree->lru_lock); + + if (eb) { + memset(eb, 0, sizeof(*eb)); + } else { + eb = kmem_cache_zalloc(extent_buffer_cache, mask); + } + INIT_LIST_HEAD(&eb->lru); + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + spin_lock(&tree->lru_lock); +lru_add: + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); + return eb; +} + +static void __free_extent_buffer(struct extent_buffer *eb) +{ + kmem_cache_free(extent_buffer_cache, eb); } + struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask) @@ -1987,14 +2017,12 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, struct address_space *mapping = tree->mapping; int uptodate = 0; - eb = __alloc_extent_buffer(mask); + eb = __alloc_extent_buffer(tree, start, len, mask); if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = (unsigned long)__builtin_return_address(0); - eb->start = start; - eb->len = len; - atomic_set(&eb->refs, 1); + if (eb->flags & EXTENT_BUFFER_FILLED) + return eb; for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); @@ -2008,14 +2036,15 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i < EXTENT_INLINE_PAGES) - eb->pages[i] = p; + if (i == 0) + eb->last_page = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); } if (uptodate) eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; return eb; fail: free_extent_buffer(eb); @@ -2035,14 +2064,12 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, struct address_space *mapping = tree->mapping; int uptodate = 1; - eb = __alloc_extent_buffer(mask); + eb = __alloc_extent_buffer(tree, start, len, mask); if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = (unsigned long)__builtin_return_address(0); - eb->start = start; - eb->len = len; - atomic_set(&eb->refs, 1); + if (eb->flags & EXTENT_BUFFER_FILLED) + return eb; for (i = 0; i < num_pages; i++, index++) { p = find_lock_page(mapping, index); @@ -2055,14 +2082,15 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i < EXTENT_INLINE_PAGES) - eb->pages[i] = p; + if (i == 0) + eb->last_page = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); } if (uptodate) eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; return eb; fail: free_extent_buffer(eb); @@ -2231,7 +2259,8 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, ret = -EIO; } } - eb->flags |= EXTENT_UPTODATE; + if (!ret) + eb->flags |= EXTENT_UPTODATE; return ret; } EXPORT_SYMBOL(read_extent_buffer_pages); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 8409b5cbeda..52a8b9394fc 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -16,6 +16,7 @@ #define EXTENT_DELALLOC (1 << 5) #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) +#define EXTENT_BUFFER_FILLED (1 << 8) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) @@ -33,6 +34,9 @@ struct extent_map_tree { struct address_space *mapping; rwlock_t lock; struct extent_map_ops *ops; + spinlock_t lru_lock; + struct list_head buffer_lru; + int lru_size; }; /* note, this must start with the same fields as fs/extent_map.c:tree_entry */ @@ -64,20 +68,17 @@ struct extent_state { struct list_head list; }; -#define EXTENT_INLINE_PAGES 32 struct extent_buffer { u64 start; unsigned long len; - atomic_t refs; - int flags; - struct list_head list; - struct list_head leak_list; - unsigned long alloc_addr; char *map_token; char *kaddr; unsigned long map_start; unsigned long map_len; - struct page *pages[EXTENT_INLINE_PAGES]; + struct page *last_page; + struct list_head lru; + atomic_t refs; + int flags; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, @@ -88,6 +89,7 @@ typedef struct extent_map *(get_extent_t)(struct inode *inode, void extent_map_tree_init(struct extent_map_tree *tree, struct address_space *mapping, gfp_t mask); +void extent_map_tree_cleanup(struct extent_map_tree *tree); struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 start, u64 end); int add_extent_mapping(struct extent_map_tree *tree, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 87456ab7427..67e4aca36a6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -443,8 +443,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, BUG_ON(ret); mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root, nr); - schedule(); - + cond_resched(); mutex_lock(&tree_root->fs_info->fs_mutex); } BUG_ON(ret); @@ -471,7 +470,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root, nr); - schedule(); + cond_resched(); } return ret; } -- cgit v1.2.3-70-g09d2 From 0f82731fc56448c2733f58e1f5db6c2cbfc90652 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:18:56 -0400 Subject: Breakout BTRFS_SETGET_FUNCS into a separate C file, the inlines were too big. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 1 - fs/btrfs/ctree.h | 74 ++----------------------------------- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/struct-funcs.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/tree-defrag.c | 2 +- 6 files changed, 104 insertions(+), 74 deletions(-) create mode 100644 fs/btrfs/struct-funcs.c (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a3b51085d7f..551743be5f0 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,7 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o sysfs.o + extent_map.o sysfs.o struct-funcs.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 54a5d006c56..0c6ed17ac1b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a942a242722..d1c6f023a30 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -426,77 +426,11 @@ struct btrfs_root { offsetof(type, member), \ sizeof(((type *)0)->member))) +#ifndef BTRFS_SETGET_FUNCS #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ -static inline u##bits btrfs_##name(struct extent_buffer *eb, \ - type *s) \ -{ \ - int err; \ - char *map_token; \ - char *kaddr; \ - int unmap_on_exit = (eb->map_token == NULL); \ - unsigned long map_start; \ - unsigned long map_len; \ - unsigned long offset = (unsigned long)s + \ - offsetof(type, member); \ - if (eb->map_token && offset >= eb->map_start && \ - offset + sizeof(((type *)0)->member) <= eb->map_start + \ - eb->map_len) { \ - kaddr = eb->kaddr; \ - map_start = eb->map_start; \ - err = 0; \ - } else { \ - err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ - &map_token, &kaddr, \ - &map_start, &map_len, KM_USER1); \ - } \ - if (!err) { \ - __le##bits *tmp = (__le##bits *)(kaddr + offset - \ - map_start); \ - u##bits res = le##bits##_to_cpu(*tmp); \ - if (unmap_on_exit) \ - unmap_extent_buffer(eb, map_token, KM_USER1); \ - return res; \ - } else { \ - __le##bits res; \ - read_eb_member(eb, s, type, member, &res); \ - return le##bits##_to_cpu(res); \ - } \ -} \ -static inline void btrfs_set_##name(struct extent_buffer *eb, \ - type *s, u##bits val) \ -{ \ - int err; \ - char *map_token; \ - char *kaddr; \ - unsigned long map_start; \ - unsigned long map_len; \ - int unmap_on_exit = (eb->map_token == NULL); \ - unsigned long offset = (unsigned long)s + \ - offsetof(type, member); \ - if (eb->map_token && offset >= eb->map_start && \ - offset + sizeof(((type *)0)->member) <= eb->map_start + \ - eb->map_len) { \ - kaddr = eb->kaddr; \ - map_start = eb->map_start; \ - err = 0; \ - } else { \ - err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ - &map_token, &kaddr, \ - &map_start, &map_len, KM_USER1); \ - } \ - if (!err) { \ - __le##bits *tmp = (__le##bits *)(kaddr + offset - \ - map_start); \ - *tmp = cpu_to_le##bits(val); \ - if (unmap_on_exit) \ - unmap_extent_buffer(eb, map_token, KM_USER1); \ - } else { \ - val = cpu_to_le##bits(val); \ - write_eb_member(eb, s, type, member, &val); \ - } \ -} +u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ +void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); +#endif #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 525fa845d61..e4e68ea9648 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1443,7 +1443,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { free_extent_buffer(path->nodes[i]); - path->nodes[i] = 0; + path->nodes[i] = NULL; } } out: diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c new file mode 100644 index 00000000000..cf68fcf9b55 --- /dev/null +++ b/fs/btrfs/struct-funcs.c @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ +u##bits btrfs_##name(struct extent_buffer *eb, \ + type *s) \ +{ \ + unsigned long offset = (unsigned long)s + \ + offsetof(type, member); \ + __le##bits *tmp; \ + /* ugly, but we want the fast path here */ \ + if (eb->map_token && offset >= eb->map_start && \ + offset + sizeof(((type *)0)->member) <= eb->map_start + \ + eb->map_len) { \ + tmp = (__le##bits *)(eb->kaddr + offset - \ + eb->map_start); \ + return le##bits##_to_cpu(*tmp); \ + } \ + { \ + int err; \ + char *map_token; \ + char *kaddr; \ + int unmap_on_exit = (eb->map_token == NULL); \ + unsigned long map_start; \ + unsigned long map_len; \ + __le##bits res; \ + err = map_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &map_token, &kaddr, \ + &map_start, &map_len, KM_USER1); \ + if (err) { \ + read_eb_member(eb, s, type, member, &res); \ + return le##bits##_to_cpu(res); \ + } \ + tmp = (__le##bits *)(kaddr + offset - map_start); \ + res = le##bits##_to_cpu(*tmp); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ + return res; \ + } \ +} \ +void btrfs_set_##name(struct extent_buffer *eb, \ + type *s, u##bits val) \ +{ \ + unsigned long offset = (unsigned long)s + \ + offsetof(type, member); \ + __le##bits *tmp; \ + /* ugly, but we want the fast path here */ \ + if (eb->map_token && offset >= eb->map_start && \ + offset + sizeof(((type *)0)->member) <= eb->map_start + \ + eb->map_len) { \ + tmp = (__le##bits *)(eb->kaddr + offset - \ + eb->map_start); \ + *tmp = cpu_to_le##bits(val); \ + return; \ + } \ + { \ + int err; \ + char *map_token; \ + char *kaddr; \ + int unmap_on_exit = (eb->map_token == NULL); \ + unsigned long map_start; \ + unsigned long map_len; \ + err = map_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &map_token, &kaddr, \ + &map_start, &map_len, KM_USER1); \ + if (err) { \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, s, type, member, &val); \ + return; \ + } \ + tmp = (__le##bits *)(kaddr + offset - map_start); \ + *tmp = cpu_to_le##bits(val); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ + } \ +} + +#include "ctree.h" + diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index d23216aa4ab..0d1a1be0995 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -239,7 +239,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { free_extent_buffer(path->nodes[i]); - path->nodes[i] = 0; + path->nodes[i] = NULL; } } out: -- cgit v1.2.3-70-g09d2 From 19c00ddcc31ad4bdfb86b57085e06d6135b9b1d7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:19:22 -0400 Subject: Btrfs: Add back metadata checksumming Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 + fs/btrfs/disk-io.c | 215 +++++++++++++++++++++++++++++++++++-------------- fs/btrfs/extent-tree.c | 4 + fs/btrfs/extent_map.c | 83 +++++++++++++++---- fs/btrfs/extent_map.h | 18 ++++- fs/btrfs/inode.c | 4 - 6 files changed, 245 insertions(+), 81 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d1c6f023a30..1af0a966f85 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -310,9 +310,11 @@ struct btrfs_fs_info { struct extent_buffer *sb_buffer; struct super_block *sb; struct inode *btree_inode; + spinlock_t hash_lock; struct mutex trans_mutex; struct mutex fs_mutex; struct list_head trans_list; + struct list_head hashers; struct list_head dead_roots; struct delayed_work trans_work; struct kobject super_kobj; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 16f0260fca6..1176e5420c5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -60,7 +60,7 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct extent_buffer *eb; eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - bytenr, blocksize, GFP_NOFS); + bytenr, blocksize, NULL, GFP_NOFS); return eb; } @@ -99,10 +99,102 @@ out: return em; } +u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) +{ + return crc32c(seed, data, len); +} + +void btrfs_csum_final(u32 crc, char *result) +{ + *(__le32 *)result = ~cpu_to_le32(crc); +} + +static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, + int verify) +{ + char result[BTRFS_CRC32_SIZE]; + unsigned long len; + unsigned long cur_len; + unsigned long offset = BTRFS_CSUM_SIZE; + char *map_token = NULL; + char *kaddr; + unsigned long map_start; + unsigned long map_len; + int err; + u32 crc = ~(u32)0; + + len = buf->len - offset; + while(len > 0) { + err = map_private_extent_buffer(buf, offset, 32, + &map_token, &kaddr, + &map_start, &map_len, KM_USER0); + if (err) { + printk("failed to map extent buffer! %lu\n", + offset); + return 1; + } + cur_len = min(len, map_len - (offset - map_start)); + crc = btrfs_csum_data(root, kaddr + offset - map_start, + crc, cur_len); + len -= cur_len; + offset += cur_len; + unmap_extent_buffer(buf, map_token, KM_USER0); + } + btrfs_csum_final(crc, result); + + if (verify) { + if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { + printk("btrfs: %s checksum verify failed on %llu\n", + root->fs_info->sb->s_id, + buf->start); + return 1; + } + } else { + write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE); + } + return 0; +} + + +int csum_dirty_buffer(struct btrfs_root *root, struct page *page) +{ + struct extent_map_tree *tree; + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 found_start; + int found_level; + unsigned long len; + struct extent_buffer *eb; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + if (!page->private) + goto out; + len = page->private >> 2; + if (len == 0) { + WARN_ON(1); + } + eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); + read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1); + found_start = btrfs_header_bytenr(eb); + if (found_start != start) { + printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", + start, found_start, len); + } + found_level = btrfs_header_level(eb); + csum_tree_block(root, eb, 0); + free_extent_buffer(eb); +out: + return 0; +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_map_tree *tree; + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; tree = &BTRFS_I(page->mapping->host)->extent_tree; + + csum_dirty_buffer(root, page); return extent_write_full_page(tree, page, btree_get_extent, wbc); } int btree_readpage(struct file *file, struct page *page) @@ -117,7 +209,6 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) struct extent_map_tree *tree; int ret; - BUG_ON(page->private != 1); tree = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(tree, page); if (ret == 1) { @@ -136,46 +227,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) btree_releasepage(page, GFP_NOFS); } -int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, - char *result) -{ - return 0; -#if 0 - u32 crc; - crc = crc32c(0, data, len); - memcpy(result, &crc, BTRFS_CRC32_SIZE); - return 0; -#endif -} - -#if 0 -static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, - int verify) -{ - return 0; - char result[BTRFS_CRC32_SIZE]; - int ret; - struct btrfs_node *node; - - ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, - bh->b_size - BTRFS_CSUM_SIZE, result); - if (ret) - return ret; - if (verify) { - if (memcmp(bh->b_data, result, BTRFS_CRC32_SIZE)) { - printk("btrfs: %s checksum verify failed on %llu\n", - root->fs_info->sb->s_id, - (unsigned long long)bh_blocknr(bh)); - return 1; - } - } else { - node = btrfs_buffer_node(bh); - memcpy(node->header.csum, result, BTRFS_CRC32_SIZE); - } - return 0; -} -#endif - #if 0 static int btree_writepage(struct page *page, struct writeback_control *wbc) { @@ -215,7 +266,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 0); + buf, 0, 0); free_extent_buffer(buf); return ret; } @@ -225,12 +276,29 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_map_tree *extent_tree; + int ret; + + extent_tree = &BTRFS_I(btree_inode)->extent_tree; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 1); + buf, 0, 1); + if (buf->flags & EXTENT_CSUM) { + return buf; + } + if (test_range_bit(extent_tree, buf->start, buf->start + buf->len - 1, + EXTENT_CSUM, 1)) { + buf->flags |= EXTENT_CSUM; + return buf; + } + ret = csum_tree_block(root, buf, 1); + set_extent_bits(extent_tree, buf->start, + buf->start + buf->len - 1, + EXTENT_CSUM, GFP_NOFS); + buf->flags |= EXTENT_CSUM; return buf; } @@ -251,13 +319,6 @@ int wait_on_tree_block_writeback(struct btrfs_root *root, return 0; } -int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); - return 0; -} - static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, @@ -416,7 +477,24 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, return root; } - +#if 0 +static int add_hasher(struct btrfs_fs_info *info, char *type) { + struct btrfs_hasher *hasher; + + hasher = kmalloc(sizeof(*hasher), GFP_NOFS); + if (!hasher) + return -ENOMEM; + hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC); + if (!hasher->hash_tfm) { + kfree(hasher); + return -EINVAL; + } + spin_lock(&info->hash_lock); + list_add(&hasher->list, &info->hashers); + spin_unlock(&info->hash_lock); + return 0; +} +#endif struct btrfs_root *open_ctree(struct super_block *sb) { u32 sectorsize; @@ -440,6 +518,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); + INIT_LIST_HEAD(&fs_info->hashers); + spin_lock_init(&fs_info->hash_lock); + memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); init_completion(&fs_info->kobj_unregister); sb_set_blocksize(sb, 4096); @@ -479,6 +560,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); +#if 0 + ret = add_hasher(fs_info, "crc32c"); + if (ret) { + printk("btrfs: failed hash setup, modprobe cryptomgr?\n"); + err = -ENOMEM; + goto fail_iput; + } +#endif __setup_root(512, 512, 512, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); @@ -509,25 +598,21 @@ struct btrfs_root *open_ctree(struct super_block *sb) i_size_write(fs_info->btree_inode, btrfs_super_total_bytes(disk_super)); - if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { printk("btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } + blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), blocksize); if (!tree_root->node) goto fail_sb_buffer; -#if 0 - btrfs_print_leaf(tree_root, tree_root->node); - err = -EIO; - goto fail_tree_root; -#endif mutex_lock(&fs_info->fs_mutex); ret = find_and_setup_root(tree_root, fs_info, @@ -634,9 +719,19 @@ int close_ctree(struct btrfs_root *root) btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); - extent_map_tree_cleanup(&BTRFS_I(fs_info->btree_inode)->extent_tree); + extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); +#if 0 + while(!list_empty(&fs_info->hashers)) { + struct btrfs_hasher *hasher; + hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, + hashers); + list_del(&hasher->hashers); + crypto_free_hash(&fs_info->hash_tfm); + kfree(hasher); + } +#endif kfree(fs_info->extent_root); kfree(fs_info->tree_root); return 0; @@ -733,5 +828,5 @@ int btrfs_read_buffer(struct extent_buffer *buf) struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 1); + buf, 0, 1); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e4e68ea9648..6b2f2b414fa 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1178,6 +1178,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_set_buffer_uptodate(buf); set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); + set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, + EXTENT_CSUM, GFP_NOFS); + buf->flags |= EXTENT_CSUM; btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f8aaba8a30a..2a8bc4bd43a 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -81,7 +81,7 @@ void extent_map_tree_init(struct extent_map_tree *tree, } EXPORT_SYMBOL(extent_map_tree_init); -void extent_map_tree_cleanup(struct extent_map_tree *tree) +void extent_map_tree_empty_lru(struct extent_map_tree *tree) { struct extent_buffer *eb; while(!list_empty(&tree->buffer_lru)) { @@ -91,7 +91,7 @@ void extent_map_tree_cleanup(struct extent_map_tree *tree) free_extent_buffer(eb); } } -EXPORT_SYMBOL(extent_map_tree_cleanup); +EXPORT_SYMBOL(extent_map_tree_empty_lru); struct extent_map *alloc_extent_map(gfp_t mask) { @@ -1464,7 +1464,7 @@ void set_page_extent_mapped(struct page *page) if (!PagePrivate(page)) { SetPagePrivate(page); WARN_ON(!page->mapping->a_ops->invalidatepage); - set_page_private(page, 1); + set_page_private(page, EXTENT_PAGE_PRIVATE); page_cache_get(page); } } @@ -1979,8 +1979,9 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, spin_lock(&tree->lru_lock); eb = find_lru(tree, start, len); - if (eb) + if (eb) { goto lru_add; + } spin_unlock(&tree->lru_lock); if (eb) { @@ -2007,6 +2008,7 @@ static void __free_extent_buffer(struct extent_buffer *eb) struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, + struct page *page0, gfp_t mask) { unsigned long num_pages = num_extent_pages(start, len); @@ -2024,7 +2026,18 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, if (eb->flags & EXTENT_BUFFER_FILLED) return eb; - for (i = 0; i < num_pages; i++, index++) { + if (page0) { + eb->first_page = page0; + i = 1; + index++; + page_cache_get(page0); + set_page_extent_mapped(page0); + set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE | + len << 2); + } else { + i = 0; + } + for (; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); if (!p) { WARN_ON(1); @@ -2036,8 +2049,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i == 0) + if (i == 0) { eb->first_page = p; + set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | + len << 2); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2057,8 +2075,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, gfp_t mask) { unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; struct page *p; struct address_space *mapping = tree->mapping; @@ -2082,8 +2099,15 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i == 0) + + if (i == 0) { eb->first_page = p; + set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | + len << 2); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } + if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2174,7 +2198,21 @@ int set_extent_buffer_dirty(struct extent_map_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { + struct page *page = extent_buffer_page(eb, i); + /* writepage may need to do something special for the + * first page, we have to make sure page->private is + * properly set. releasepage may drop page->private + * on us if the page isn't already dirty. + */ + if (i == 0) { + lock_page(page); + set_page_private(page, + EXTENT_PAGE_PRIVATE_FIRST_PAGE | + eb->len << 2); + } __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); + if (i == 0) + unlock_page(page); } return set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); @@ -2217,9 +2255,12 @@ int extent_buffer_uptodate(struct extent_map_tree *tree, EXPORT_SYMBOL(extent_buffer_uptodate); int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, int wait) + struct extent_buffer *eb, + u64 start, + int wait) { unsigned long i; + unsigned long start_i; struct page *page; int err; int ret = 0; @@ -2232,9 +2273,16 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, EXTENT_UPTODATE, 1)) { return 0; } + if (start) { + WARN_ON(start < eb->start); + start_i = (start >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT); + } else { + start_i = 0; + } num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { + for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (PageUptodate(page)) { continue; @@ -2260,7 +2308,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, return ret; } - for (i = 0; i < num_pages; i++) { + for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); wait_on_page_locked(page); if (!PageUptodate(page)) { @@ -2314,7 +2362,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } EXPORT_SYMBOL(read_extent_buffer); -static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, char **token, char **map, unsigned long *map_start, unsigned long *map_len, int km) @@ -2337,6 +2385,10 @@ static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, offset = 0; *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; } + if (start + min_len >= eb->len) { +printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); + WARN_ON(1); + } p = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(p)); @@ -2346,6 +2398,7 @@ static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, *map_len = PAGE_CACHE_SIZE - offset; return 0; } +EXPORT_SYMBOL(map_private_extent_buffer); int map_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, @@ -2360,8 +2413,8 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, eb->map_token = NULL; save = 1; } - err = __map_extent_buffer(eb, start, min_len, token, map, - map_start, map_len, km); + err = map_private_extent_buffer(eb, start, min_len, token, map, + map_start, map_len, km); if (!err && save) { eb->map_token = *token; eb->kaddr = *map; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index f1dc28d260e..39d78d31b6a 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -17,8 +17,17 @@ #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) #define EXTENT_BUFFER_FILLED (1 << 8) +#define EXTENT_CSUM (1 << 9) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) +/* + * page->private values. Every page that is controlled by the extent + * map has page->private set to one. + */ + +#define EXTENT_PAGE_PRIVATE 1 +#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 + struct extent_map_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); @@ -89,7 +98,7 @@ typedef struct extent_map *(get_extent_t)(struct inode *inode, void extent_map_tree_init(struct extent_map_tree *tree, struct address_space *mapping, gfp_t mask); -void extent_map_tree_cleanup(struct extent_map_tree *tree); +void extent_map_tree_empty_lru(struct extent_map_tree *tree); struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 start, u64 end); int add_extent_mapping(struct extent_map_tree *tree, @@ -143,13 +152,14 @@ void set_page_extent_mapped(struct page *page); struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, + struct page *page0, gfp_t mask); struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask); void free_extent_buffer(struct extent_buffer *eb); int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, int wait); + struct extent_buffer *eb, u64 start, int wait); static inline void extent_buffer_get(struct extent_buffer *eb) { @@ -187,5 +197,9 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long min_len, char **token, char **map, unsigned long *map_start, unsigned long *map_len, int km); +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 84f496c838c..80f84b48a70 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1713,10 +1713,6 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) struct extent_map_tree *tree; int ret; - if (page->private != 1) { - WARN_ON(1); - return try_to_free_buffers(page); - } tree = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(tree, page); if (ret == 1) { -- cgit v1.2.3-70-g09d2 From ae2f5411c4ce7180cca8418853db50c8e52d40db Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 19 Oct 2007 09:22:59 -0400 Subject: btrfs: 32-bit type problems An assorted set of casts to get rid of the warnings on 32-bit archs. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- fs/btrfs/extent-tree.c | 8 ++++---- fs/btrfs/extent_map.c | 6 +++--- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 54967099a8e..17a322663f3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1389,7 +1389,7 @@ int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); if (ret < 0) { printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n", - ret, BTRFS_LEAF_DATA_SIZE(root), + ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), leaf_space_used(leaf, 0, nritems), nritems); } return ret; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6b2f2b414fa..7de7707d939 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -149,7 +149,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct if (ret) return NULL; - block_group = (struct btrfs_block_group_cache *)ptr; + block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; if (block_group->key.objectid <= bytenr && bytenr <= @@ -279,7 +279,7 @@ again: if (ret) break; - cache = (struct btrfs_block_group_cache *)ptr; + cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); @@ -537,7 +537,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (ret) break; - cache = (struct btrfs_block_group_cache *)ptr; + cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; err = write_one_cache_group(trans, root, path, cache); /* @@ -1541,7 +1541,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) found_key.objectid + found_key.offset - 1, bit | EXTENT_LOCKED, GFP_NOFS); set_state_private(block_group_cache, found_key.objectid, - (u64)cache); + (unsigned long)cache); if (key.objectid >= btrfs_super_total_bytes(&info->super_copy)) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index e87e476dca9..89d0ebd0ea8 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2654,8 +2654,8 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - src_off_in_page)); - cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE - - dst_off_in_page)); + cur = min_t(unsigned long, cur, + (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); copy_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), @@ -2707,7 +2707,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, if (dst_i == 0) dst_off_in_page += start_offset; - cur = min(len, src_off_in_page + 1); + cur = min_t(unsigned long, len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); move_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4aacf99bd97..96df1b10cb6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -188,7 +188,7 @@ insert: while (size > 0) { page = pages[i]; kaddr = kmap_atomic(page, KM_USER0); - cur_size = min(PAGE_CACHE_SIZE - page_offset, size); + cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size); write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size); kunmap_atomic(kaddr, KM_USER0); page_offset = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ac69f6926af..0e1ca423e22 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1606,7 +1606,7 @@ again: extent_start; ptr = btrfs_file_extent_inline_start(item) + extent_offset; map = kmap(page); - copy_size = min(PAGE_CACHE_SIZE - page_offset, + copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, size - extent_offset); em->block_start = EXTENT_MAP_INLINE; -- cgit v1.2.3-70-g09d2 From b97f9203b4d672c06eca50a1b1b99e4f69e1daf8 Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 1 Nov 2007 11:28:41 -0400 Subject: Btrfs: Fix typo and memory leak in extent-tree.c This patch fixes a typo in update_block_group and memory leak in btrfs_free_block_groups. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7de7707d939..e7192ea8f96 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -588,16 +588,15 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val < (cache->key.offset >> 1)) { int bit_to_clear; int bit_to_set; - cache->data = data; if (data) { - bit_to_clear = BLOCK_GROUP_DATA; - bit_to_set = BLOCK_GROUP_METADATA; + bit_to_clear = BLOCK_GROUP_METADATA; + bit_to_set = BLOCK_GROUP_DATA; cache->item.flags |= BTRFS_BLOCK_GROUP_DATA; } else { - bit_to_clear = BLOCK_GROUP_METADATA; - bit_to_set = BLOCK_GROUP_DATA; + bit_to_clear = BLOCK_GROUP_DATA; + bit_to_set = BLOCK_GROUP_METADATA; cache->item.flags &= ~BTRFS_BLOCK_GROUP_DATA; } @@ -1459,13 +1458,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) { u64 start; u64 end; + u64 ptr; int ret; - while(1) { ret = find_first_extent_bit(&info->block_group_cache, 0, &start, &end, (unsigned int)-1); if (ret) break; + ret = get_state_private(&info->block_group_cache, start, &ptr); + if (!ret) + kfree((void *)(unsigned long)ptr); clear_extent_bits(&info->block_group_cache, start, end, (unsigned int)-1, GFP_NOFS); } -- cgit v1.2.3-70-g09d2 From c549228ff63498025bf0b065c5a5909a06f21647 Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 6 Nov 2007 10:25:25 -0500 Subject: Btrfs: Properly update free space cache in __free_extent When pin_down_bytes decides not to pin a block because it was from the current transaction, make sure the in memory cache of free extents is updated Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e7192ea8f96..9eb2ee02f64 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -714,7 +714,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, root->fs_info->running_transaction->transid; if (btrfs_header_generation(buf) == transid) { free_extent_buffer(buf); - return 0; + return 1; } } free_extent_buffer(buf); @@ -774,7 +774,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (pin) { ret = pin_down_bytes(root, bytenr, num_bytes, 0); - BUG_ON(ret); + if (ret > 0) + mark_free = 1; + BUG_ON(ret < 0); } /* block accounting for super block */ -- cgit v1.2.3-70-g09d2 From f84a8b362d9785ca1fa0598d8a90f35184bd8750 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 6 Nov 2007 10:26:29 -0500 Subject: Btrfs: Optimize allocations as we need to mix data and metadata into one group Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/extent-tree.c | 42 ++++++++++++++++++++++++++++++++++++------ fs/btrfs/tree-defrag.c | 6 +++++- 3 files changed, 43 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d82afb618bf..f7907b02fa7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -279,6 +279,8 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_DATA 1 +#define BTRFS_BLOCK_GROUP_MIXED 2 + struct btrfs_block_group_item { __le64 used; u8 flags; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9eb2ee02f64..d54ab8ef06d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -168,11 +168,13 @@ static u64 find_search_start(struct btrfs_root *root, u64 last; u64 start = 0; u64 end = 0; + int wrapped = 0; again: ret = cache_block_group(root, cache); if (ret) goto out; + last = max(search_start, cache->key.objectid); while(1) { @@ -195,8 +197,15 @@ out: new_group: last = cache->key.objectid + cache->key.offset; +wrapped: cache = btrfs_lookup_block_group(root->fs_info, last); if (!cache) { + if (!wrapped) { + wrapped = 1; + last = search_start; + data = BTRFS_BLOCK_GROUP_MIXED; + goto wrapped; + } return search_start; } cache = btrfs_find_block_group(root, cache, last, data, 0); @@ -236,9 +245,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, block_group_cache = &info->block_group_cache; if (!owner) - factor = 5; + factor = 8; - if (data) + if (data == BTRFS_BLOCK_GROUP_MIXED) + bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; + else if (data) bit = BLOCK_GROUP_DATA; else bit = BLOCK_GROUP_METADATA; @@ -246,14 +257,16 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (search_start) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_block_group(info, search_start); - if (shint && shint->data == data) { + if (shint && (shint->data == data || + shint->data == BTRFS_BLOCK_GROUP_MIXED)) { used = btrfs_block_group_used(&shint->item); if (used < div_factor(shint->key.offset, factor)) { return shint; } } } - if (hint && hint->data == data) { + if (hint && (hint->data == data || + hint->data == BTRFS_BLOCK_GROUP_MIXED)) { used = btrfs_block_group_used(&hint->item); if (used < div_factor(hint->key.offset, factor)) { return hint; @@ -592,11 +605,15 @@ static int update_block_group(struct btrfs_trans_handle *trans, if (data) { bit_to_clear = BLOCK_GROUP_METADATA; bit_to_set = BLOCK_GROUP_DATA; + cache->item.flags &= + ~BTRFS_BLOCK_GROUP_MIXED; cache->item.flags |= BTRFS_BLOCK_GROUP_DATA; } else { bit_to_clear = BLOCK_GROUP_DATA; bit_to_set = BLOCK_GROUP_METADATA; + cache->item.flags &= + ~BTRFS_BLOCK_GROUP_MIXED; cache->item.flags &= ~BTRFS_BLOCK_GROUP_DATA; } @@ -606,6 +623,14 @@ static int update_block_group(struct btrfs_trans_handle *trans, set_extent_bits(&info->block_group_cache, start, end, bit_to_set, GFP_NOFS); + } else if (cache->data != data && + cache->data != BTRFS_BLOCK_GROUP_MIXED) { + cache->data = BTRFS_BLOCK_GROUP_MIXED; + set_extent_bits(&info->block_group_cache, + start, end, + BLOCK_GROUP_DATA | + BLOCK_GROUP_METADATA, + GFP_NOFS); } old_val += num_bytes; } else { @@ -886,6 +911,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; + u64 cached_start; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -910,6 +936,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); + cached_start = search_start; btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; @@ -1532,9 +1559,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); - if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { + if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) { + bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; + cache->data = BTRFS_BLOCK_GROUP_MIXED; + } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { bit = BLOCK_GROUP_DATA; - cache->data = 1; + cache->data = BTRFS_BLOCK_GROUP_DATA; } else { bit = BLOCK_GROUP_METADATA; cache->data = 0; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 6ef1ba5f9c2..3994795edfe 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -58,6 +58,10 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, if (root->fs_info->extent_root == root) is_extent = 1; + if (*level == 1 && cache_only && path->nodes[1] && + !btrfs_buffer_defrag(path->nodes[1])) { + goto out; + } while(*level > 0) { WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); @@ -116,7 +120,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, WARN_ON(*level >= BTRFS_MAX_LEVEL); btrfs_clear_buffer_defrag(path->nodes[*level]); - +out: free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; -- cgit v1.2.3-70-g09d2 From 257d0ce36f215937602eb5c1b328b69afaf3bcd8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 Nov 2007 21:08:16 -0500 Subject: Btrfs: Allow large data extents in a single file to span into metadata block groups Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 36 ++++++++++++++++++++++++++++++------ fs/btrfs/file.c | 6 ++++++ 2 files changed, 36 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d54ab8ef06d..7405bd5301a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -168,6 +168,7 @@ static u64 find_search_start(struct btrfs_root *root, u64 last; u64 start = 0; u64 end = 0; + u64 cache_miss = 0; int wrapped = 0; again: @@ -181,14 +182,20 @@ again: ret = find_first_extent_bit(&root->fs_info->free_space_cache, last, &start, &end, EXTENT_DIRTY); if (ret) { + if (!cache_miss) + cache_miss = last; goto new_group; } start = max(last, start); last = end + 1; - if (end + 1 - start < num) + if (last - start < num) { + if (last == cache->key.objectid + cache->key.offset) + cache_miss = start; continue; - if (start + num >= cache->key.objectid + cache->key.offset) + } + if (data != BTRFS_BLOCK_GROUP_MIXED && + start + num >= cache->key.objectid + cache->key.offset) goto new_group; return start; } @@ -208,13 +215,22 @@ wrapped: } return search_start; } + if (cache_miss && !cache->cached) { + cache_block_group(root, cache); + last = cache_miss; + + cache = btrfs_lookup_block_group(root->fs_info, last); + } cache = btrfs_find_block_group(root, cache, last, data, 0); *cache_ret = cache; + cache_miss = 0; goto again; } static u64 div_factor(u64 num, int factor) { + if (factor == 10) + return num; num *= factor; do_div(num, 10); return num; @@ -247,9 +263,10 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (!owner) factor = 8; - if (data == BTRFS_BLOCK_GROUP_MIXED) + if (data == BTRFS_BLOCK_GROUP_MIXED) { bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; - else if (data) + factor = 10; + } else if (data) bit = BLOCK_GROUP_DATA; else bit = BLOCK_GROUP_METADATA; @@ -918,6 +935,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root level = btrfs_header_level(root->node); + if (num_bytes >= 96 * 1024 * 1024 && hint_byte) { + data = BTRFS_BLOCK_GROUP_MIXED; + } + if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); if (hint_byte) { @@ -937,6 +958,7 @@ check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); cached_start = search_start; + btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; @@ -1021,7 +1043,8 @@ check_failed: start_found = 1; last_byte = key.objectid + key.offset; - if (!full_scan && last_byte >= block_group->key.objectid + + if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && + last_byte >= block_group->key.objectid + block_group->key.offset) { btrfs_release_path(root, path); search_start = block_group->key.objectid + @@ -1042,7 +1065,8 @@ check_pending: if (ins->objectid + num_bytes >= search_end) goto enospc; - if (!full_scan && ins->objectid + num_bytes >= block_group-> + if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && + ins->objectid + num_bytes >= block_group-> key.objectid + block_group->key.offset) { search_start = block_group->key.objectid + block_group->key.offset; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5ceaed25dce..9dd2c5c23b5 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -439,6 +439,12 @@ next_slot: struct btrfs_file_extent_item); found_type = btrfs_file_extent_type(leaf, extent); if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = + btrfs_file_extent_disk_bytenr(leaf, + extent); + if (extent_end) + *hint_byte = extent_end; + extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, extent); found_extent = 1; -- cgit v1.2.3-70-g09d2 From 324ae4df00fdc1a6a179bf584d8addf027bb75fb Mon Sep 17 00:00:00 2001 From: Yan Date: Fri, 16 Nov 2007 14:57:08 -0500 Subject: Btrfs: Add block group pinned accounting back This patch adds a helper function 'update_pinned_extents' to extent-tree.c. The usage of the helper function is similar to 'update_block_group', the last parameter of the function indicates pin vs unpin. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 5 +++-- fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 54 +++++++++++++++++++++++++++++++++++++------------- 3 files changed, 44 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 27cadae1af6..56b977ffe91 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -292,8 +292,8 @@ struct btrfs_block_group_cache { struct btrfs_block_group_item item; int data; int cached; + u64 pinned; }; - struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *extent_root; @@ -324,8 +324,9 @@ struct btrfs_fs_info { struct completion kobj_unregister; int do_barriers; int closing; -}; + u64 total_pinned; +}; /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c8533fba7c..3e16cca72b4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -569,6 +569,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; + fs_info->total_pinned = 0; INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); BTRFS_I(fs_info->btree_inode)->root = tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7405bd5301a..4ef6dc3d7d3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -277,7 +277,8 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (shint && (shint->data == data || shint->data == BTRFS_BLOCK_GROUP_MIXED)) { used = btrfs_block_group_used(&shint->item); - if (used < div_factor(shint->key.offset, factor)) { + if (used + shint->pinned < + div_factor(shint->key.offset, factor)) { return shint; } } @@ -285,7 +286,8 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (hint && (hint->data == data || hint->data == BTRFS_BLOCK_GROUP_MIXED)) { used = btrfs_block_group_used(&hint->item); - if (used < div_factor(hint->key.offset, factor)) { + if (used + hint->pinned < + div_factor(hint->key.offset, factor)) { return hint; } last = hint->key.objectid + hint->key.offset; @@ -317,8 +319,7 @@ again: free_check = cache->key.offset; else free_check = div_factor(cache->key.offset, factor); - - if (used < free_check) { + if (used + cache->pinned < free_check) { found_group = cache; goto found; } @@ -664,6 +665,37 @@ static int update_block_group(struct btrfs_trans_handle *trans, } return 0; } +static int update_pinned_extents(struct btrfs_root *root, + u64 bytenr, u64 num, int pin) +{ + u64 len; + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *fs_info = root->fs_info; + + if (pin) { + set_extent_dirty(&fs_info->pinned_extents, + bytenr, bytenr + num - 1, GFP_NOFS); + } else { + clear_extent_dirty(&fs_info->pinned_extents, + bytenr, bytenr + num - 1, GFP_NOFS); + } + while (num > 0) { + cache = btrfs_lookup_block_group(fs_info, bytenr); + WARN_ON(!cache); + len = min(num, cache->key.offset - + (bytenr - cache->key.objectid)); + if (pin) { + cache->pinned += len; + fs_info->total_pinned += len; + } else { + cache->pinned -= len; + fs_info->total_pinned -= len; + } + bytenr += len; + num -= len; + } + return 0; +} int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) { @@ -691,9 +723,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, u64 start; u64 end; int ret; - struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; struct extent_map_tree *free_space_cache; - free_space_cache = &root->fs_info->free_space_cache; while(1) { @@ -701,9 +731,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, EXTENT_DIRTY); if (ret) break; - - clear_extent_dirty(pinned_extents, start, end, - GFP_NOFS); + update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); set_extent_dirty(free_space_cache, start, end, GFP_NOFS); } @@ -761,8 +789,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, } free_extent_buffer(buf); } - set_extent_dirty(&root->fs_info->pinned_extents, - bytenr, bytenr + num_bytes - 1, GFP_NOFS); + update_pinned_extents(root, bytenr, num_bytes, 1); } else { set_extent_bits(&root->fs_info->pending_del, bytenr, bytenr + num_bytes - 1, @@ -866,8 +893,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct EXTENT_LOCKED); if (ret) break; - - set_extent_dirty(pinned_extents, start, end, GFP_NOFS); + update_pinned_extents(extent_root, start, end + 1 - start, 1); clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, GFP_NOFS); ret = __free_extent(trans, extent_root, @@ -1579,7 +1605,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) sizeof(cache->item)); memcpy(&cache->key, &found_key, sizeof(found_key)); cache->cached = 0; - + cache->pinned = 0; key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); -- cgit v1.2.3-70-g09d2 From 5e5745dcaf73c5860a16c09572d59383ecee6d28 Mon Sep 17 00:00:00 2001 From: Yan Date: Fri, 16 Nov 2007 14:57:09 -0500 Subject: Btrfs: Add full_scan parameter to find_search_start This patch adds a new parameter 'full_scan' to 'find_search_start', thereby 'find_search_start' can know whether 'find_free_extent' is in full scan phrase. I feel that 'find_search_start' should skip calling 'btrfs_find_block_group' when 'find_free_extent' is in full scan phrase. In my test on a 2GB volume, Oops occurs when space usage is about 76%. After apply the patch, Oops occurs when space usage is near 100%. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4ef6dc3d7d3..e5340677d6c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -158,10 +158,10 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return NULL; } - static u64 find_search_start(struct btrfs_root *root, struct btrfs_block_group_cache **cache_ret, - u64 search_start, int num, int data) + u64 search_start, int num, + int data, int full_scan) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; @@ -218,10 +218,10 @@ wrapped: if (cache_miss && !cache->cached) { cache_block_group(root, cache); last = cache_miss; - cache = btrfs_lookup_block_group(root->fs_info, last); } - cache = btrfs_find_block_group(root, cache, last, data, 0); + if (!full_scan) + cache = btrfs_find_block_group(root, cache, last, data, 0); *cache_ret = cache; cache_miss = 0; goto again; @@ -979,12 +979,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root total_needed += empty_size; path = btrfs_alloc_path(); - check_failed: - search_start = find_search_start(root, &block_group, - search_start, total_needed, data); + search_start = find_search_start(root, &block_group, search_start, + total_needed, data, full_scan); cached_start = search_start; - btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; -- cgit v1.2.3-70-g09d2 From 5cf664263b05beb080bf95037e4c3bd50c80b096 Mon Sep 17 00:00:00 2001 From: Yan Date: Fri, 16 Nov 2007 14:57:09 -0500 Subject: Btrfs: Off by one fixes for extent-tree.c Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e5340677d6c..6a7a9ba75fd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -150,12 +150,9 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return NULL; block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; - - - if (block_group->key.objectid <= bytenr && bytenr <= + if (block_group->key.objectid <= bytenr && bytenr < block_group->key.objectid + block_group->key.offset) return block_group; - return NULL; } static u64 find_search_start(struct btrfs_root *root, @@ -195,7 +192,7 @@ again: continue; } if (data != BTRFS_BLOCK_GROUP_MIXED && - start + num >= cache->key.objectid + cache->key.offset) + start + num > cache->key.objectid + cache->key.offset) goto new_group; return start; } @@ -1088,9 +1085,8 @@ check_pending: if (ins->objectid + num_bytes >= search_end) goto enospc; - if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && - ins->objectid + num_bytes >= block_group-> + ins->objectid + num_bytes > block_group-> key.objectid + block_group->key.offset) { search_start = block_group->key.objectid + block_group->key.offset; -- cgit v1.2.3-70-g09d2 From 0e4de58432f552c0ecead965b0fad8a9fb1d31ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Nov 2007 10:55:49 -0500 Subject: Btrfs: Add check for null block group to find_search_start Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6a7a9ba75fd..39be6baccc3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -204,6 +204,7 @@ new_group: wrapped: cache = btrfs_lookup_block_group(root->fs_info, last); if (!cache) { +no_cache: if (!wrapped) { wrapped = 1; last = search_start; @@ -219,6 +220,8 @@ wrapped: } if (!full_scan) cache = btrfs_find_block_group(root, cache, last, data, 0); + if (!cache) + goto no_cache; *cache_ret = cache; cache_miss = 0; goto again; -- cgit v1.2.3-70-g09d2 From 015a739c7c238768fbfa4eea8ea2ebc1a35e7bb1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Nov 2007 16:15:16 -0800 Subject: Btrfs: Handle writeback under high memory pressure better Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/extent_map.c | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 39be6baccc3..3c00f967ecc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -961,7 +961,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root level = btrfs_header_level(root->node); - if (num_bytes >= 96 * 1024 * 1024 && hint_byte) { + if (num_bytes >= 32 * 1024 * 1024 && hint_byte) { data = BTRFS_BLOCK_GROUP_MIXED; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index a4e9096754f..55f272c335c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1861,13 +1861,25 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, struct writeback_control *wbc) { int ret; + struct address_space *mapping = page->mapping; struct extent_page_data epd = { .bio = NULL, .tree = tree, .get_extent = get_extent, }; + struct writeback_control wbc_writepages = { + .bdi = wbc->bdi, + .sync_mode = WB_SYNC_NONE, + .older_than_this = NULL, + .nr_to_write = 64, + .range_start = page_offset(page) + PAGE_CACHE_SIZE, + .range_end = (loff_t)-1, + }; + ret = __extent_writepage(page, wbc, &epd); + + write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); if (epd.bio) submit_one_bio(WRITE, epd.bio); return ret; -- cgit v1.2.3-70-g09d2 From 00f5c795fca47d038fedd3f0c9311da3be710c9f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 30 Nov 2007 10:09:33 -0500 Subject: btrfs_drop_extents: make sure the item is getting smaller before truncate Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 8 ++++++++ fs/btrfs/file.c | 30 +++++++++++++++--------------- 3 files changed, 24 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 56b977ffe91..b29b911dd82 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1037,7 +1037,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end); extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 inline_end, u64 *hint_block); + u64 start, u64 end, u64 inline_limit, u64 *hint_block); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3c00f967ecc..55abdf997ca 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -45,6 +45,9 @@ static int cache_block_group(struct btrfs_root *root, u64 first_free; int found = 0; + if (!block_group) + return 0; + root = root->fs_info->extent_root; free_space_cache = &root->fs_info->free_space_cache; @@ -168,6 +171,11 @@ static u64 find_search_start(struct btrfs_root *root, u64 cache_miss = 0; int wrapped = 0; + if (!cache) { + cache = btrfs_lookup_block_group(root->fs_info, search_start); + if (!cache) + return search_start; + } again: ret = cache_block_group(root, cache); if (ret) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ba624ae16e6..b0d63778726 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -377,23 +377,23 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) */ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 inline_end, u64 *hint_byte) + u64 start, u64 end, u64 inline_limit, u64 *hint_byte) { - int ret; - struct btrfs_key key; + u64 extent_end = 0; + u64 search_start = start; struct extent_buffer *leaf; - int slot; struct btrfs_file_extent_item *extent; - u64 extent_end = 0; - int keep; - struct btrfs_file_extent_item old; struct btrfs_path *path; - u64 search_start = start; + struct btrfs_key key; + struct btrfs_file_extent_item old; + int keep; + int slot; int bookend; int found_type; int found_extent; int found_inline; int recow; + int ret; btrfs_drop_extent_cache(inode, start, end - 1); @@ -502,7 +502,7 @@ next_slot: } bookend = 1; if (found_inline && start <= key.offset && - inline_end < extent_end) + inline_limit < extent_end) keep = 1; } /* truncate existing extent */ @@ -526,12 +526,12 @@ next_slot: btrfs_set_file_extent_num_bytes(leaf, extent, new_num); btrfs_mark_buffer_dirty(leaf); - } else if (end > extent_end && - key.offset < inline_end && - inline_end < extent_end) { + } else if (key.offset < inline_limit && + (end > extent_end) && + (inline_limit < extent_end)) { u32 new_size; new_size = btrfs_file_extent_calc_inline_size( - inline_end - key.offset); + inline_limit - key.offset); btrfs_truncate_item(trans, root, path, new_size, 1); } @@ -575,10 +575,10 @@ next_slot: continue; } if (bookend && found_inline && start <= key.offset && - inline_end < extent_end) { + inline_limit < extent_end && key.offset <= inline_limit) { u32 new_size; new_size = btrfs_file_extent_calc_inline_size( - extent_end - inline_end); + extent_end - inline_limit); btrfs_truncate_item(trans, root, path, new_size, 0); } /* create bookend, splitting the extent in two */ -- cgit v1.2.3-70-g09d2 From 87ee04eb0f2f0c63314cef4a76bd1adac748425e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 30 Nov 2007 11:30:34 -0500 Subject: Btrfs: Add simple stripe size parameter Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 5 +++++ fs/btrfs/disk-io.c | 15 ++++++++++----- fs/btrfs/extent-tree.c | 36 ++++++++++++++++++++++++++++-------- 3 files changed, 43 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b29b911dd82..9bc1b0a8615 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -135,6 +135,7 @@ struct btrfs_super_block { __le32 sectorsize; __le32 nodesize; __le32 leafsize; + __le32 stripesize; u8 root_level; } __attribute__ ((__packed__)); @@ -353,6 +354,8 @@ struct btrfs_root { /* leaf allocations are done in leafsize units */ u32 leafsize; + u32 stripesize; + u32 type; u64 highest_inode; u64 last_inode_alloc; @@ -776,6 +779,8 @@ BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, nodesize, 32); BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block, leafsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, + stripesize, 32); BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, root_dir_objectid, 64); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eef4ab56b9c..60a30da6af0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -355,7 +355,7 @@ int wait_on_tree_block_writeback(struct btrfs_root *root, } static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, - struct btrfs_root *root, + u32 stripesize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { @@ -365,6 +365,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->sectorsize = sectorsize; root->nodesize = nodesize; root->leafsize = leafsize; + root->stripesize = stripesize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -393,7 +394,8 @@ static int find_and_setup_root(struct btrfs_root *tree_root, u32 blocksize; __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, root, fs_info, objectid); + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -430,8 +432,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, } __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, root, fs_info, - location->objectid); + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, location->objectid); path = btrfs_alloc_path(); BUG_ON(!path); @@ -537,6 +539,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) u32 nodesize; u32 leafsize; u32 blocksize; + u32 stripesize; struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -607,7 +610,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) goto fail_iput; } #endif - __setup_root(512, 512, 512, tree_root, + __setup_root(512, 512, 512, 512, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); fs_info->sb_buffer = read_tree_block(tree_root, @@ -630,9 +633,11 @@ struct btrfs_root *open_ctree(struct super_block *sb) nodesize = btrfs_super_nodesize(disk_super); leafsize = btrfs_super_leafsize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); + stripesize = btrfs_super_stripesize(disk_super); tree_root->nodesize = nodesize; tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; + tree_root->stripesize = stripesize; sb_set_blocksize(sb, sectorsize); i_size_write(fs_info->btree_inode, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 55abdf997ca..91397e98939 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -932,6 +932,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root return ret ? ret : pending_ret; } +static u64 stripe_align(struct btrfs_root *root, u64 val) +{ + u64 mask = ((u64)root->stripesize - 1); + u64 ret = (val + mask) & ~mask; + return ret; +} + /* * walks the btree of allocated extents and find a hole of a given size. * The key ins is changed to record the hole: @@ -948,8 +955,9 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root { struct btrfs_path *path; struct btrfs_key key; - int ret; u64 hole_size = 0; + u64 aligned; + int ret; int slot = 0; u64 last_byte = 0; u64 orig_search_start = search_start; @@ -990,6 +998,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data, full_scan); + search_start = stripe_align(root, search_start); cached_start = search_start; btrfs_init_path(path); ins->objectid = search_start; @@ -1039,13 +1048,23 @@ check_failed: search_start = max(search_start, block_group->key.objectid); if (!start_found) { - ins->objectid = search_start; - ins->offset = search_end - search_start; + aligned = stripe_align(root, search_start); + ins->objectid = aligned; + if (aligned >= search_end) { + ret = -ENOSPC; + goto error; + } + ins->offset = search_end - aligned; start_found = 1; goto check_pending; } - ins->objectid = last_byte > search_start ? - last_byte : search_start; + ins->objectid = stripe_align(root, + last_byte > search_start ? + last_byte : search_start); + if (search_end <= ins->objectid) { + ret = -ENOSPC; + goto error; + } ins->offset = search_end - ins->objectid; BUG_ON(ins->objectid >= search_end); goto check_pending; @@ -1056,9 +1075,10 @@ check_failed: start_found) { if (last_byte < search_start) last_byte = search_start; - hole_size = key.objectid - last_byte; - if (hole_size >= num_bytes) { - ins->objectid = last_byte; + aligned = stripe_align(root, last_byte); + hole_size = key.objectid - aligned; + if (key.objectid > aligned && hole_size >= num_bytes) { + ins->objectid = aligned; ins->offset = hole_size; goto check_pending; } -- cgit v1.2.3-70-g09d2 From 1a2b2ac78a4151a49267a1e3f48a131f5810814e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 4 Dec 2007 13:18:24 -0500 Subject: Btrfs: Fix extent allocation for btree blocks as the disk fills Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 91397e98939..8ab4954f6ad 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -172,9 +172,7 @@ static u64 find_search_start(struct btrfs_root *root, int wrapped = 0; if (!cache) { - cache = btrfs_lookup_block_group(root->fs_info, search_start); - if (!cache) - return search_start; + goto out; } again: ret = cache_block_group(root, cache); @@ -205,6 +203,13 @@ again: return start; } out: + cache = btrfs_lookup_block_group(root->fs_info, search_start); + if (!cache) { + printk("Unable to find block group for %Lu\n", + search_start); + WARN_ON(1); + return search_start; + } return search_start; new_group: @@ -219,15 +224,14 @@ no_cache: data = BTRFS_BLOCK_GROUP_MIXED; goto wrapped; } - return search_start; + goto out; } if (cache_miss && !cache->cached) { cache_block_group(root, cache); last = cache_miss; cache = btrfs_lookup_block_group(root->fs_info, last); } - if (!full_scan) - cache = btrfs_find_block_group(root, cache, last, data, 0); + cache = btrfs_find_block_group(root, cache, last, data, 0); if (!cache) goto no_cache; *cache_ret = cache; @@ -985,12 +989,14 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root search_end = btrfs_super_total_bytes(&info->super_copy); if (hint_byte) { block_group = btrfs_lookup_block_group(info, hint_byte); + if (!block_group) + hint_byte = search_start; block_group = btrfs_find_block_group(root, block_group, hint_byte, data, 1); } else { block_group = btrfs_find_block_group(root, - trans->block_group, 0, - data, 1); + trans->block_group, + search_start, data, 1); } total_needed += empty_size; @@ -1159,14 +1165,14 @@ enospc: if (!full_scan) total_needed -= empty_size; full_scan = 1; + data = BTRFS_BLOCK_GROUP_MIXED; } else wrapped = 1; } block_group = btrfs_lookup_block_group(info, search_start); cond_resched(); - if (!full_scan) - block_group = btrfs_find_block_group(root, block_group, - search_start, data, 0); + block_group = btrfs_find_block_group(root, block_group, + search_start, data, 0); goto check_failed; error: -- cgit v1.2.3-70-g09d2 From 74493f7a59bfd4d1c7029c74ab2cd0e400612c6b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 11 Dec 2007 09:25:06 -0500 Subject: Btrfs: Implement generation numbers in block pointers Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 16 ++++++++++++++ fs/btrfs/ctree.h | 47 ++++++++++++++++++++++++++++++++++----- fs/btrfs/extent-tree.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 115 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3b16051b121..5697705f753 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -114,6 +114,9 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, } else { btrfs_set_node_blockptr(parent, parent_slot, cow->start); + WARN_ON(trans->transid == 0); + btrfs_set_node_ptr_generation(parent, parent_slot, + trans->transid); btrfs_mark_buffer_dirty(parent); WARN_ON(btrfs_header_generation(parent) != trans->transid); btrfs_free_extent(trans, root, buf->start, buf->len, 1); @@ -967,6 +970,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root { struct extent_buffer *b; u64 bytenr; + u64 ptr_gen; int slot; int ret; int level; @@ -1031,10 +1035,18 @@ again: if (level == lowest_level) break; bytenr = btrfs_node_blockptr(b, slot); + ptr_gen = btrfs_node_ptr_generation(b, slot); if (should_reada) reada_for_search(root, p, level, slot); b = read_tree_block(root, bytenr, btrfs_level_size(root, level - 1)); + if (ptr_gen != btrfs_header_generation(b)) { + printk("block %llu bad gen wanted %llu " + "found %llu\n", + (unsigned long long)b->start, + (unsigned long long)ptr_gen, + (unsigned long long)btrfs_header_generation(b)); + } } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -1218,6 +1230,8 @@ static int insert_new_root(struct btrfs_trans_handle *trans, btrfs_node_key(lower, &lower_key, 0); btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); + WARN_ON(btrfs_header_generation(lower) == 0); + btrfs_set_node_ptr_generation(c, 0, btrfs_header_generation(lower)); btrfs_mark_buffer_dirty(c); @@ -1261,6 +1275,8 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root } btrfs_set_node_key(lower, key, slot); btrfs_set_node_blockptr(lower, slot, bytenr); + WARN_ON(trans->transid == 0); + btrfs_set_node_ptr_generation(lower, slot, trans->transid); btrfs_set_header_nritems(lower, nritems + 1); btrfs_mark_buffer_dirty(lower); return 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9bc1b0a8615..fd58dd846e6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -110,7 +110,7 @@ struct btrfs_header { #define BTRFS_MAX_LEVEL 8 #define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ sizeof(struct btrfs_header)) / \ - (sizeof(struct btrfs_disk_key) + sizeof(u64))) + sizeof(struct btrfs_key_ptr)) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ @@ -168,6 +168,7 @@ struct btrfs_leaf { struct btrfs_key_ptr { struct btrfs_disk_key key; __le64 blockptr; + __le64 generation; } __attribute__ ((__packed__)); struct btrfs_node { @@ -196,7 +197,13 @@ struct btrfs_path { */ struct btrfs_extent_item { __le32 refs; - __le64 owner; +} __attribute__ ((__packed__)); + +struct btrfs_extent_ref { + __le64 root; + __le64 generation; + __le64 objectid; + __le64 offset; } __attribute__ ((__packed__)); struct btrfs_inode_timespec { @@ -402,12 +409,13 @@ struct btrfs_root { * are used, and how many references there are to each block */ #define BTRFS_EXTENT_ITEM_KEY 33 +#define BTRFS_EXTENT_REF_KEY 34 /* * block groups give us hints into the extent allocation trees. Which * blocks are free etc etc */ -#define BTRFS_BLOCK_GROUP_ITEM_KEY 34 +#define BTRFS_BLOCK_GROUP_ITEM_KEY 50 /* * string items are for debugging. They just store a short string of @@ -529,15 +537,25 @@ BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32); /* struct btrfs_extent_item */ BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); -BTRFS_SETGET_FUNCS(extent_owner, struct btrfs_extent_item, owner, 64); + +/* struct btrfs_extent_ref */ +BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); +BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); +BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); +BTRFS_SETGET_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64); + +BTRFS_SETGET_STACK_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); +BTRFS_SETGET_STACK_FUNCS(ref_generation, struct btrfs_extent_ref, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); +BTRFS_SETGET_STACK_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64); BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 32); -BTRFS_SETGET_STACK_FUNCS(stack_extent_owner, struct btrfs_extent_item, - owner, 64); /* struct btrfs_node */ BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); +BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) { @@ -556,6 +574,23 @@ static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); } +static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr) +{ + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr); +} + +static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb, + int nr, u64 val) +{ + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val); +} + static inline unsigned long btrfs_node_key_ptr_offset(int nr) { return offsetof(struct btrfs_node, ptrs) + diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8ab4954f6ad..0f1ebdd4e92 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -17,6 +17,7 @@ */ #include +#include "hash.h" #include "ctree.h" #include "disk-io.h" #include "print-tree.h" @@ -352,9 +353,63 @@ found: return found_group; } +static u64 hash_extent_ref(u64 root_objectid, u64 root_generation, + u64 owner, u64 owner_offset) +{ + u32 high_crc = ~(u32)0; + u32 low_crc = ~(u32)0; + __le64 lenum; + + lenum = cpu_to_le64(root_objectid); + high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); + lenum = cpu_to_le64(root_generation); + high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); + + lenum = cpu_to_le64(owner); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + + lenum = cpu_to_le64(owner_offset); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + + return ((u64)high_crc << 32) | (u64)low_crc; +} + +int insert_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, + u64 root_objectid, u64 root_generation, + u64 owner, u64 owner_offset) +{ + u64 hash; + struct btrfs_key key; + struct btrfs_extent_ref ref; + struct extent_buffer *l; + struct btrfs_extent_item *item; + int ret; + + btrfs_set_stack_ref_root(&ref, root_objectid); + btrfs_set_stack_ref_generation(&ref, root_generation); + btrfs_set_stack_ref_objectid(&ref, owner); + btrfs_set_stack_ref_offset(&ref, owner_offset); + + ret = btrfs_name_hash(&ref, sizeof(ref), &hash); + key.offset = hash; + key.objectid = bytenr; + key.type = BTRFS_EXTENT_REF_KEY; + + ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref)); + while (ret == -EEXIST) { + + } + +} + int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u64 num_bytes) + u64 bytenr, u64 num_bytes, + u64 root_objectid, u64 root_generation, + u64 owner, u64 owner_offset) { struct btrfs_path *path; int ret; @@ -386,9 +441,10 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root->fs_info->extent_root, path); - btrfs_free_path(path); finish_current_insert(trans, root->fs_info->extent_root); del_pending_extents(trans, root->fs_info->extent_root); + + btrfs_free_path(path); return 0; } -- cgit v1.2.3-70-g09d2 From 7bb86316c3961d1bc401ef184fd996f999556c7f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 11 Dec 2007 09:25:06 -0500 Subject: Btrfs: Add back pointers from extents to the btree or file referencing them Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 196 +++++++++++++++++++++++--- fs/btrfs/ctree.h | 41 ++++-- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 373 +++++++++++++++++++++++++++++++++++++++++-------- fs/btrfs/file.c | 20 ++- fs/btrfs/inode.c | 38 +++-- fs/btrfs/print-tree.c | 10 ++ fs/btrfs/tree-defrag.c | 2 + 8 files changed, 577 insertions(+), 105 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5697705f753..fd8233e05cf 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -77,13 +77,37 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret, u64 search_start, u64 empty_size) { + u64 root_gen; struct extent_buffer *cow; + u32 nritems; int ret = 0; int different_trans = 0; + int level; + struct btrfs_key first_key; + + if (root->ref_cows) { + root_gen = trans->transid; + } else { + root_gen = 0; + } + WARN_ON(root->ref_cows && trans->transid != + root->fs_info->running_transaction->transid); WARN_ON(root->ref_cows && trans->transid != root->last_trans); - cow = btrfs_alloc_free_block(trans, root, buf->len, + level = btrfs_header_level(buf); + nritems = btrfs_header_nritems(buf); + if (nritems) { + if (level == 0) + btrfs_item_key_to_cpu(buf, &first_key, 0); + else + btrfs_node_key_to_cpu(buf, &first_key, 0); + } else { + first_key.objectid = 0; + } + cow = __btrfs_alloc_free_block(trans, root, buf->len, + root->root_key.objectid, + root_gen, first_key.objectid, level, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -104,14 +128,17 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, } if (buf == root->node) { + root_gen = btrfs_header_generation(buf); root->node = cow; extent_buffer_get(cow); if (buf != root->commit_root) { btrfs_free_extent(trans, root, buf->start, - buf->len, 1); + buf->len, root->root_key.objectid, + root_gen, 0, 0, 1); } free_extent_buffer(buf); } else { + root_gen = btrfs_header_generation(parent); btrfs_set_node_blockptr(parent, parent_slot, cow->start); WARN_ON(trans->transid == 0); @@ -119,7 +146,9 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid); btrfs_mark_buffer_dirty(parent); WARN_ON(btrfs_header_generation(parent) != trans->transid); - btrfs_free_extent(trans, root, buf->start, buf->len, 1); + btrfs_free_extent(trans, root, buf->start, buf->len, + btrfs_header_owner(parent), root_gen, + 0, 0, 1); } free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); @@ -606,6 +635,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root return 0; mid = path->nodes[level]; + WARN_ON(btrfs_header_generation(mid) != trans->transid); + orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) @@ -631,7 +662,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root wait_on_tree_block_writeback(root, mid); /* once for the path */ free_extent_buffer(mid); - ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1); + ret = btrfs_free_extent(trans, root, mid->start, mid->len, + root->root_key.objectid, + btrfs_header_generation(mid), 0, 0, 1); /* once for the root ptr */ free_extent_buffer(mid); return ret; @@ -681,6 +714,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; if (btrfs_header_nritems(right) == 0) { u64 bytenr = right->start; + u64 generation = btrfs_header_generation(parent); u32 blocksize = right->len; clean_tree_block(trans, root, right); @@ -692,7 +726,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; wret = btrfs_free_extent(trans, root, bytenr, - blocksize, 1); + blocksize, + btrfs_header_owner(parent), + generation, 0, 0, 1); if (wret) ret = wret; } else { @@ -722,6 +758,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } if (btrfs_header_nritems(mid) == 0) { /* we've managed to empty the middle node, drop it */ + u64 root_gen = btrfs_header_generation(parent); u64 bytenr = mid->start; u32 blocksize = mid->len; clean_tree_block(trans, root, mid); @@ -731,7 +768,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) ret = wret; - wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1); + wret = btrfs_free_extent(trans, root, bytenr, blocksize, + btrfs_header_owner(parent), + root_gen, 0, 0, 1); if (wret) ret = wret; } else { @@ -788,6 +827,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, return 1; mid = path->nodes[level]; + WARN_ON(btrfs_header_generation(mid) != trans->transid); orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) @@ -1113,6 +1153,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root src_nritems = btrfs_header_nritems(src); dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; + WARN_ON(btrfs_header_generation(src) != trans->transid); + WARN_ON(btrfs_header_generation(dst) != trans->transid); if (push_items <= 0) { return 1; @@ -1159,6 +1201,9 @@ static int balance_node_right(struct btrfs_trans_handle *trans, int dst_nritems; int ret = 0; + WARN_ON(btrfs_header_generation(src) != trans->transid); + WARN_ON(btrfs_header_generation(dst) != trans->transid); + src_nritems = btrfs_header_nritems(src); dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; @@ -1202,6 +1247,8 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { + u64 root_gen; + u64 lower_gen; struct extent_buffer *lower; struct extent_buffer *c; struct btrfs_disk_key lower_key; @@ -1209,7 +1256,20 @@ static int insert_new_root(struct btrfs_trans_handle *trans, BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - c = btrfs_alloc_free_block(trans, root, root->nodesize, + if (root->ref_cows) + root_gen = trans->transid; + else + root_gen = 0; + + lower = path->nodes[level-1]; + if (level == 1) + btrfs_item_key(lower, &lower_key, 0); + else + btrfs_node_key(lower, &lower_key, 0); + + c = __btrfs_alloc_free_block(trans, root, root->nodesize, + root->root_key.objectid, + root_gen, lower_key.objectid, level, root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); @@ -1219,19 +1279,16 @@ static int insert_new_root(struct btrfs_trans_handle *trans, btrfs_set_header_bytenr(c, c->start); btrfs_set_header_generation(c, trans->transid); btrfs_set_header_owner(c, root->root_key.objectid); - lower = path->nodes[level-1]; write_extent_buffer(c, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(c), BTRFS_FSID_SIZE); - if (level == 1) - btrfs_item_key(lower, &lower_key, 0); - else - btrfs_node_key(lower, &lower_key, 0); btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); - WARN_ON(btrfs_header_generation(lower) == 0); - btrfs_set_node_ptr_generation(c, 0, btrfs_header_generation(lower)); + lower_gen = btrfs_header_generation(lower); + WARN_ON(lower_gen == 0); + + btrfs_set_node_ptr_generation(c, 0, lower_gen); btrfs_mark_buffer_dirty(c); @@ -1241,6 +1298,18 @@ static int insert_new_root(struct btrfs_trans_handle *trans, extent_buffer_get(c); path->nodes[level] = c; path->slots[level] = 0; + + if (root->ref_cows && lower_gen != trans->transid) { + struct btrfs_path *back_path = btrfs_alloc_path(); + int ret; + ret = btrfs_insert_extent_backref(trans, + root->fs_info->extent_root, + path, lower->start, + root->root_key.objectid, + trans->transid, 0, 0); + BUG_ON(ret); + btrfs_free_path(back_path); + } return 0; } @@ -1294,6 +1363,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { + u64 root_gen; struct extent_buffer *c; struct extent_buffer *split; struct btrfs_disk_key disk_key; @@ -1303,6 +1373,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root u32 c_nritems; c = path->nodes[level]; + WARN_ON(btrfs_header_generation(c) != trans->transid); if (c == root->node) { /* trying to split the root, lets make a new one */ ret = insert_new_root(trans, root, path, level + 1); @@ -1319,8 +1390,17 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(c); - split = btrfs_alloc_free_block(trans, root, root->nodesize, - c->start, 0); + if (root->ref_cows) + root_gen = trans->transid; + else + root_gen = 0; + + btrfs_node_key(c, &disk_key, 0); + split = __btrfs_alloc_free_block(trans, root, root->nodesize, + root->root_key.objectid, + root_gen, + btrfs_disk_key_objectid(&disk_key), + level, c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); @@ -1789,6 +1869,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, struct btrfs_path *path, int data_size, int extend) { + u64 root_gen; struct extent_buffer *l; u32 nritems; int mid; @@ -1807,6 +1888,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (extend) space_needed = data_size; + if (root->ref_cows) + root_gen = trans->transid; + else + root_gen = 0; + /* first try to make some room by pushing left and right */ if (ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size, 0); @@ -1837,8 +1923,12 @@ again: nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; - right = btrfs_alloc_free_block(trans, root, root->leafsize, - l->start, 0); + btrfs_item_key(l, &disk_key, 0); + + right = __btrfs_alloc_free_block(trans, root, root->leafsize, + root->root_key.objectid, + root_gen, disk_key.objectid, 0, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); @@ -2413,13 +2503,16 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (leaf == root->node) { btrfs_set_header_level(leaf, 0); } else { + u64 root_gen = btrfs_header_generation(path->nodes[1]); clean_tree_block(trans, root, leaf); wait_on_tree_block_writeback(root, leaf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - leaf->start, leaf->len, 1); + leaf->start, leaf->len, + btrfs_header_owner(path->nodes[1]), + root_gen, 0, 0, 1); if (wret) ret = wret; } @@ -2456,9 +2549,13 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, } if (btrfs_header_nritems(leaf) == 0) { + u64 root_gen; u64 bytenr = leaf->start; u32 blocksize = leaf->len; + root_gen = btrfs_header_generation( + path->nodes[1]); + clean_tree_block(trans, root, leaf); wait_on_tree_block_writeback(root, leaf); @@ -2468,7 +2565,9 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, free_extent_buffer(leaf); wret = btrfs_free_extent(trans, root, bytenr, - blocksize, 1); + blocksize, + btrfs_header_owner(path->nodes[1]), + root_gen, 0, 0, 1); if (wret) ret = wret; } else { @@ -2482,6 +2581,61 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, return ret; } +/* + * walk up the tree as far as required to find the previous leaf. + * returns 0 if it found something or 1 if there are no lesser leaves. + * returns < 0 on io errors. + */ +int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) +{ + int slot; + int level = 1; + u64 bytenr; + struct extent_buffer *c; + struct extent_buffer *next = NULL; + + while(level < BTRFS_MAX_LEVEL) { + if (!path->nodes[level]) + return 1; + + slot = path->slots[level]; + c = path->nodes[level]; + if (slot == 0) { + level++; + if (level == BTRFS_MAX_LEVEL) + return 1; + continue; + } + slot--; + + bytenr = btrfs_node_blockptr(c, slot); + if (next) + free_extent_buffer(next); + + if (path->reada < 0) + reada_for_search(root, path, level, slot); + + next = read_tree_block(root, bytenr, + btrfs_level_size(root, level - 1)); + break; + } + path->slots[level] = slot; + while(1) { + level--; + c = path->nodes[level]; + free_extent_buffer(c); + path->nodes[level] = next; + path->slots[level] = 0; + if (!level) + break; + if (path->reada) + reada_for_search(root, path, level, 0); + next = read_tree_block(root, btrfs_node_blockptr(next, 0), + btrfs_level_size(root, level - 1)); + } + return 0; +} + /* * walk up the tree as far as required to find the next leaf. * returns 0 if it found something or 1 if there are no greater leaves. @@ -2503,6 +2657,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) c = path->nodes[level]; if (slot >= btrfs_header_nritems(c)) { level++; + if (level == BTRFS_MAX_LEVEL) + return 1; continue; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fd58dd846e6..cb1b156d954 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -544,11 +544,12 @@ BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); BTRFS_SETGET_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64); -BTRFS_SETGET_STACK_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); -BTRFS_SETGET_STACK_FUNCS(ref_generation, struct btrfs_extent_ref, +BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64); +BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref, generation, 64); -BTRFS_SETGET_STACK_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); -BTRFS_SETGET_STACK_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref, + objectid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_ref_offset, struct btrfs_extent_ref, offset, 64); BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 32); @@ -914,24 +915,45 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, *hint, u64 search_start, int data, int owner); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, u64 owner_objectid); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 size, + u64 root_objectid, u64 hint, u64 empty_size); +struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u32 blocksize, + u64 root_objectid, + u64 ref_generation, + u64 first_objectid, + int level, + u64 hint, + u64 empty_size); +int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 owner, - u64 num_bytes, u64 empty_size, u64 search_start, + struct btrfs_root *root, + u64 num_bytes, u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, int pin); + *root, u64 bytenr, u64 num_bytes, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid, u64 owner_offset, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_map_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u64 num_bytes); + u64 bytenr, u64 num_bytes, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); @@ -966,6 +988,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *cpu_key, u32 data_size); int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); +int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60a30da6af0..0ac21e3aac8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,7 +210,7 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; - if (wbc->sync_mode == WB_SYNC_NONE) { + if (0 && wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; unsigned long thresh = 96 * 1024 * 1024; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0f1ebdd4e92..32991f73e9d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -17,6 +17,7 @@ */ #include +#include #include "hash.h" #include "ctree.h" #include "disk-io.h" @@ -89,7 +90,8 @@ static int cache_block_group(struct btrfs_root *root, btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid < block_group->key.objectid) { - if (key.objectid + key.offset > first_free) + if (btrfs_key_type(&key) != BTRFS_EXTENT_REF_KEY && + key.objectid + key.offset > first_free) first_free = key.objectid + key.offset; goto next; } @@ -353,7 +355,7 @@ found: return found_group; } -static u64 hash_extent_ref(u64 root_objectid, u64 root_generation, +static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset) { u32 high_crc = ~(u32)0; @@ -362,53 +364,149 @@ static u64 hash_extent_ref(u64 root_objectid, u64 root_generation, lenum = cpu_to_le64(root_objectid); high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); - lenum = cpu_to_le64(root_generation); - high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); + lenum = cpu_to_le64(ref_generation); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); +#if 0 lenum = cpu_to_le64(owner); low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); - lenum = cpu_to_le64(owner_offset); low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); - +#endif return ((u64)high_crc << 32) | (u64)low_crc; } -int insert_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - u64 bytenr, - u64 root_objectid, u64 root_generation, - u64 owner, u64 owner_offset) +static int match_extent_ref(struct extent_buffer *leaf, + struct btrfs_extent_ref *disk_ref, + struct btrfs_extent_ref *cpu_ref) +{ + int ret; + int len; + + if (cpu_ref->objectid) + len = sizeof(*cpu_ref); + else + len = 2 * sizeof(u64); + ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref, + len); + return ret == 0; +} + +static int lookup_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, int del) { u64 hash; struct btrfs_key key; + struct btrfs_key found_key; struct btrfs_extent_ref ref; - struct extent_buffer *l; - struct btrfs_extent_item *item; + struct extent_buffer *leaf; + struct btrfs_extent_ref *disk_ref; + int ret; + int ret2; + + btrfs_set_stack_ref_root(&ref, root_objectid); + btrfs_set_stack_ref_generation(&ref, ref_generation); + btrfs_set_stack_ref_objectid(&ref, owner); + btrfs_set_stack_ref_offset(&ref, owner_offset); + + hash = hash_extent_ref(root_objectid, ref_generation, owner, + owner_offset); + key.offset = hash; + key.objectid = bytenr; + key.type = BTRFS_EXTENT_REF_KEY; + + while (1) { + ret = btrfs_search_slot(trans, root, &key, path, + del ? -1 : 0, del); + if (ret < 0) + goto out; + leaf = path->nodes[0]; + if (ret != 0) { + u32 nritems = btrfs_header_nritems(leaf); + if (path->slots[0] >= nritems) { + ret2 = btrfs_next_leaf(root, path); + if (ret2) + goto out; + leaf = path->nodes[0]; + } + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid != bytenr || + found_key.type != BTRFS_EXTENT_REF_KEY) + goto out; + key.offset = found_key.offset; + if (del) { + btrfs_release_path(root, path); + continue; + } + } + disk_ref = btrfs_item_ptr(path->nodes[0], + path->slots[0], + struct btrfs_extent_ref); + if (match_extent_ref(path->nodes[0], disk_ref, &ref)) { + ret = 0; + goto out; + } + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + key.offset = found_key.offset + 1; + btrfs_release_path(root, path); + } +out: + return ret; +} + +int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset) +{ + u64 hash; + struct btrfs_key key; + struct btrfs_extent_ref ref; + struct btrfs_extent_ref *disk_ref; int ret; btrfs_set_stack_ref_root(&ref, root_objectid); - btrfs_set_stack_ref_generation(&ref, root_generation); + btrfs_set_stack_ref_generation(&ref, ref_generation); btrfs_set_stack_ref_objectid(&ref, owner); btrfs_set_stack_ref_offset(&ref, owner_offset); - ret = btrfs_name_hash(&ref, sizeof(ref), &hash); + hash = hash_extent_ref(root_objectid, ref_generation, owner, + owner_offset); key.offset = hash; key.objectid = bytenr; key.type = BTRFS_EXTENT_REF_KEY; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref)); while (ret == -EEXIST) { - + disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_ref); + if (match_extent_ref(path->nodes[0], disk_ref, &ref)) + goto out; + key.offset++; + btrfs_release_path(root, path); + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(ref)); } - + if (ret) + goto out; + disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_ref); + write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref, + sizeof(ref)); + btrfs_mark_buffer_dirty(path->nodes[0]); +out: + btrfs_release_path(root, path); + return ret; } int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, - u64 root_objectid, u64 root_generation, + u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset) { struct btrfs_path *path; @@ -441,6 +539,11 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root->fs_info->extent_root, path); + + ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, + path, bytenr, root_objectid, + ref_generation, owner, owner_offset); + BUG_ON(ret); finish_current_insert(trans, root->fs_info->extent_root); del_pending_extents(trans, root->fs_info->extent_root); @@ -489,10 +592,29 @@ out: } int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root, u64 owner_objectid) { + u64 generation; + u64 key_objectid; + u64 level; + u32 nritems; + struct btrfs_disk_key disk_key; + + level = btrfs_header_level(root->node); + generation = trans->transid; + nritems = btrfs_header_nritems(root->node); + if (nritems > 0) { + if (level == 0) + btrfs_item_key(root->node, &disk_key, 0); + else + btrfs_node_key(root->node, &disk_key, 0); + key_objectid = btrfs_disk_key_objectid(&disk_key); + } else { + key_objectid = 0; + } return btrfs_inc_extent_ref(trans, root, root->node->start, - root->node->len); + root->node->len, owner_objectid, + generation, 0, 0); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -506,7 +628,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int level; int ret; int faili; - int err; if (!root->ref_cows) return 0; @@ -528,7 +649,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (disk_bytenr == 0) continue; ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, - btrfs_file_extent_disk_num_bytes(buf, fi)); + btrfs_file_extent_disk_num_bytes(buf, fi), + root->root_key.objectid, trans->transid, + key.objectid, key.offset); if (ret) { faili = i; goto fail; @@ -536,7 +659,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } else { bytenr = btrfs_node_blockptr(buf, i); ret = btrfs_inc_extent_ref(trans, root, bytenr, - btrfs_level_size(root, level - 1)); + btrfs_level_size(root, level - 1), + root->root_key.objectid, + trans->transid, 0, 0); if (ret) { faili = i; goto fail; @@ -546,6 +671,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; fail: WARN_ON(1); +#if 0 for (i =0; i < faili; i++) { if (level == 0) { u64 disk_bytenr; @@ -571,6 +697,7 @@ fail: BUG_ON(err); } } +#endif return ret; } @@ -809,18 +936,18 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root) { + u64 start; + u64 end; + struct btrfs_fs_info *info = extent_root->fs_info; + struct btrfs_path *path; struct btrfs_key ins; struct btrfs_extent_item extent_item; int ret; int err = 0; - u64 start; - u64 end; - struct btrfs_fs_info *info = extent_root->fs_info; btrfs_set_stack_extent_refs(&extent_item, 1); btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); - btrfs_set_stack_extent_owner(&extent_item, - extent_root->root_key.objectid); + path = btrfs_alloc_path(); while(1) { ret = find_first_extent_bit(&info->extent_ins, 0, &start, @@ -834,7 +961,12 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct &extent_item, sizeof(extent_item)); clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, GFP_NOFS); + err = btrfs_insert_extent_backref(trans, extent_root, path, + start, extent_root->root_key.objectid, + 0, 0, 0); + BUG_ON(err); } + btrfs_free_path(path); return 0; } @@ -871,7 +1003,9 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, int pin, + *root, u64 bytenr, u64 num_bytes, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid, u64 owner_offset, int pin, int mark_free) { struct btrfs_path *path; @@ -891,6 +1025,24 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; + if (ref_generation && owner_objectid == 0 && root_objectid == 3) { +//printk("drop backref root %Lu gen %Lu byte %Lu\n", root_objectid, ref_generation, bytenr ); + } + ret = lookup_extent_backref(trans, extent_root, path, + bytenr, root_objectid, + ref_generation, + owner_objectid, owner_offset, 1); + if (ret == 0) { + ret = btrfs_del_item(trans, extent_root, path); + } else { + btrfs_print_leaf(extent_root, path->nodes[0]); + WARN_ON(1); + printk("Unable to find ref byte nr %Lu root %Lu " + " gen %Lu owner %Lu offset %Lu\n", bytenr, + root_objectid, ref_generation, owner_objectid, + owner_offset); + } + btrfs_release_path(extent_root, path); ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret < 0) return ret; @@ -965,7 +1117,9 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, GFP_NOFS); ret = __free_extent(trans, extent_root, - start, end + 1 - start, 0, 0); + start, end + 1 - start, + extent_root->root_key.objectid, + 0, 0, 0, 0, 0); if (ret) err = ret; } @@ -976,18 +1130,25 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct * remove an extent from the root, returns 0 on success */ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, int pin) + *root, u64 bytenr, u64 num_bytes, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid, u64 owner_offset, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; int pending_ret; int ret; WARN_ON(num_bytes < root->sectorsize); + if (!root->ref_cows) + ref_generation = 0; + if (root == extent_root) { pin_down_bytes(root, bytenr, num_bytes, 1); return 0; } - ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0); + ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid, + ref_generation, owner_objectid, owner_offset, + pin, pin == 0); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -1080,23 +1241,26 @@ check_failed: btrfs_item_key_to_cpu(l, &key, path->slots[0]); /* - * a rare case, go back one key if we hit a block group item - * instead of an extent item + * walk backwards to find the first extent item key */ - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY && - key.objectid + key.offset >= search_start) { - ins->objectid = key.objectid; - ins->offset = key.offset - 1; - btrfs_release_path(root, path); - ret = btrfs_search_slot(trans, root, ins, path, 0, 0); - if (ret < 0) - goto error; - - if (path->slots[0] > 0) { + while(btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret != 0) { + ret = btrfs_search_slot(trans, root, ins, + path, 0, 0); + if (ret < 0) + goto error; + if (path->slots[0] > 0) + path->slots[0]--; + break; + } + } else { path->slots[0]--; } + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &key, path->slots[0]); } - while (1) { l = path->nodes[0]; slot = path->slots[0]; @@ -1146,7 +1310,8 @@ check_failed: } } if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { - if (!start_found) { + if (!start_found && btrfs_key_type(&key) == + BTRFS_BLOCK_GROUP_ITEM_KEY) { last_byte = key.objectid; start_found = 1; } @@ -1244,8 +1409,10 @@ error: * returns 0 if everything worked, non-zero otherwise. */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 owner, - u64 num_bytes, u64 empty_size, u64 hint_byte, + struct btrfs_root *root, + u64 num_bytes, u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, int data) { int ret; @@ -1255,9 +1422,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; + struct btrfs_path *path; btrfs_set_stack_extent_refs(&extent_item, 1); - btrfs_set_stack_extent_owner(&extent_item, owner); WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, @@ -1296,8 +1463,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, trans->alloc_exclude_start = 0; trans->alloc_exclude_nr = 0; + BUG_ON(ret); + + path = btrfs_alloc_path(); + BUG_ON(!path); + ret = btrfs_insert_extent_backref(trans, extent_root, path, + ins->objectid, root_objectid, + ref_generation, owner, owner_offset); BUG_ON(ret); + btrfs_free_path(path); finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); @@ -1321,15 +1496,43 @@ update_block: */ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u32 blocksize, u64 hint, + u32 blocksize, + u64 root_objectid, u64 hint, + u64 empty_size) +{ + u64 ref_generation; + + if (root->ref_cows) + ref_generation = trans->transid; + else + ref_generation = 0; + + + return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid, + ref_generation, 0, 0, hint, empty_size); +} + +/* + * helper function to allocate a block for a given tree + * returns the tree buffer or NULL. + */ +struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u32 blocksize, + u64 root_objectid, + u64 ref_generation, + u64 first_objectid, + int level, + u64 hint, u64 empty_size) { struct btrfs_key ins; int ret; struct extent_buffer *buf; - ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - blocksize, empty_size, hint, + ret = btrfs_alloc_extent(trans, root, blocksize, + root_objectid, ref_generation, + first_objectid, level, empty_size, hint, (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); @@ -1337,7 +1540,9 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, } buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); if (!buf) { - btrfs_free_extent(trans, root, ins.objectid, blocksize, 0); + btrfs_free_extent(trans, root, ins.objectid, blocksize, + root->root_key.objectid, ref_generation, + 0, 0, 0); return ERR_PTR(-ENOMEM); } btrfs_set_buffer_uptodate(buf); @@ -1355,6 +1560,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, static int drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf) { + u64 leaf_owner; + u64 leaf_generation; struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; @@ -1363,6 +1570,9 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, BUG_ON(!btrfs_is_leaf(leaf)); nritems = btrfs_header_nritems(leaf); + leaf_owner = btrfs_header_owner(leaf); + leaf_generation = btrfs_header_generation(leaf); + for (i = 0; i < nritems; i++) { u64 disk_bytenr; @@ -1381,7 +1591,9 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, if (disk_bytenr == 0) continue; ret = btrfs_free_extent(trans, root, disk_bytenr, - btrfs_file_extent_disk_num_bytes(leaf, fi), 0); + btrfs_file_extent_disk_num_bytes(leaf, fi), + leaf_owner, leaf_generation, + key.objectid, key.offset, 0); BUG_ON(ret); } return 0; @@ -1423,9 +1635,12 @@ static void reada_walk_down(struct btrfs_root *root, static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { + u64 root_owner; + u64 root_gen; + u64 bytenr; struct extent_buffer *next; struct extent_buffer *cur; - u64 bytenr; + struct extent_buffer *parent; u32 blocksize; int ret; u32 refs; @@ -1466,9 +1681,13 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) { + parent = path->nodes[*level]; + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); path->slots[*level]++; ret = btrfs_free_extent(trans, root, bytenr, - blocksize, 1); + blocksize, root_owner, + root_gen, 0, 0, 1); BUG_ON(ret); continue; } @@ -1484,10 +1703,16 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root blocksize, &refs); BUG_ON(ret); if (refs != 1) { + parent = path->nodes[*level]; + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); + path->slots[*level]++; free_extent_buffer(next); - ret = btrfs_free_extent(trans, root, - bytenr, blocksize, 1); + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, + root_owner, + root_gen, 0, 0, 1); BUG_ON(ret); continue; } @@ -1502,8 +1727,19 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); + + if (path->nodes[*level] == root->node) { + root_owner = root->root_key.objectid; + parent = path->nodes[*level]; + } else { + parent = path->nodes[*level + 1]; + root_owner = btrfs_header_owner(parent); + } + + root_gen = btrfs_header_generation(parent); ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, - path->nodes[*level]->len, 1); + path->nodes[*level]->len, + root_owner, root_gen, 0, 0, 1); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -1519,10 +1755,12 @@ out: static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { + u64 root_owner; + u64 root_gen; + struct btrfs_root_item *root_item = &root->root_item; int i; int slot; int ret; - struct btrfs_root_item *root_item = &root->root_item; for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; @@ -1539,9 +1777,20 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root root_item->drop_level = i; return 0; } else { + if (path->nodes[*level] == root->node) { + root_owner = root->root_key.objectid; + root_gen = + btrfs_header_generation(path->nodes[*level]); + } else { + struct extent_buffer *node; + node = path->nodes[*level + 1]; + root_owner = btrfs_header_owner(node); + root_gen = btrfs_header_generation(node); + } ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, - path->nodes[*level]->len, 1); + path->nodes[*level]->len, + root_owner, root_gen, 0, 0, 1); BUG_ON(ret); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b0d63778726..5b1f90f06e0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -496,7 +496,10 @@ next_slot: sizeof(old)); if (disk_bytenr != 0) { ret = btrfs_inc_extent_ref(trans, root, - disk_bytenr, disk_num_bytes); + disk_bytenr, disk_num_bytes, + root->root_key.objectid, + trans->transid, + key.objectid, end); BUG_ON(ret); } } @@ -541,6 +544,14 @@ next_slot: u64 disk_bytenr = 0; u64 disk_num_bytes = 0; u64 extent_num_bytes = 0; + u64 root_gen; + + if (leaf != root->node) { + root_gen = + btrfs_header_generation(path->nodes[1]); + } else { + root_gen = btrfs_header_generation(leaf); + } if (found_extent) { disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, @@ -562,8 +573,11 @@ next_slot: if (found_extent && disk_bytenr != 0) { inode->i_blocks -= extent_num_bytes >> 9; ret = btrfs_free_extent(trans, root, - disk_bytenr, - disk_num_bytes, 0); + disk_bytenr, + disk_num_bytes, + root->root_key.objectid, + root_gen, inode->i_ino, + key.offset, 0); } BUG_ON(ret); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d0af0807d9f..bb70db0c9df 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -93,7 +93,9 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) if (alloc_hint == EXTENT_MAP_INLINE) goto out; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_bytes, 0, + ret = btrfs_alloc_extent(trans, root, num_bytes, + root->root_key.objectid, trans->transid, + inode->i_ino, start, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); @@ -560,6 +562,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, u64 extent_start = 0; u64 extent_num_bytes = 0; u64 item_end = 0; + u64 root_gen = 0; int found_extent; int del_item; int extent_type = -1; @@ -670,6 +673,15 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, found_extent = 1; inode->i_blocks -= num_dec; } + if (leaf == root->node) { + root_gen = + btrfs_header_generation(leaf); + } else { + struct extent_buffer *parent; + parent = path->nodes[1]; + root_gen = + btrfs_header_generation(parent); + } } } else if (extent_type == BTRFS_FILE_EXTENT_INLINE && !del_item) { @@ -690,7 +702,10 @@ delete: btrfs_release_path(root, path); if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, - extent_num_bytes, 0); + extent_num_bytes, + root->root_key.objectid, + root_gen, inode->i_ino, + found_key.offset, 0); BUG_ON(ret); } } @@ -1900,7 +1915,14 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 0); + ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, + 0, &objectid); + if (ret) + goto fail; + + leaf = __btrfs_alloc_free_block(trans, root, root->leafsize, + objectid, trans->transid, 0, 0, + 0, 0); if (IS_ERR(leaf)) return PTR_ERR(leaf); @@ -1908,7 +1930,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_header_level(leaf, 0); btrfs_set_header_bytenr(leaf, leaf->start); btrfs_set_header_generation(leaf, trans->transid); - btrfs_set_header_owner(leaf, root->root_key.objectid); + btrfs_set_header_owner(leaf, objectid); + write_extent_buffer(leaf, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); @@ -1933,11 +1956,6 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) free_extent_buffer(leaf); leaf = NULL; - ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, - 0, &objectid); - if (ret) - goto fail; - btrfs_set_root_dirid(&root_item, new_dirid); key.objectid = objectid; @@ -2056,7 +2074,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail; - ret = btrfs_inc_root_ref(trans, root); + ret = btrfs_inc_root_ref(trans, root, objectid); if (ret) goto fail; fail: diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 030324febf6..da0b4dcf361 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -33,6 +33,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_file_extent_item *fi; struct btrfs_key key; struct btrfs_key found_key; + struct btrfs_extent_ref *ref; u32 type; printk("leaf %llu total ptrs %d free space %d\n", @@ -73,6 +74,15 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) printk("\t\textent data refs %u\n", btrfs_extent_refs(l, ei)); break; + case BTRFS_EXTENT_REF_KEY: + ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref); + printk("\t\textent back ref root %llu gen %llu " + "owner %llu offset %llu\n", + (unsigned long long)btrfs_ref_root(l, ref), + (unsigned long long)btrfs_ref_generation(l, ref), + (unsigned long long)btrfs_ref_objectid(l, ref), + (unsigned long long)btrfs_ref_offset(l, ref)); + break; case BTRFS_EXTENT_DATA_KEY: fi = btrfs_item_ptr(l, i, diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 3994795edfe..5c58630dce0 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -78,6 +78,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; if (*level == 1) { + WARN_ON(btrfs_header_generation(path->nodes[*level]) != + trans->transid); ret = btrfs_realloc_node(trans, root, path->nodes[*level], path->slots[*level], -- cgit v1.2.3-70-g09d2 From d8d5f3e16d1ae4fe9b93312e083f2c04a95520f0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 11 Dec 2007 12:42:00 -0500 Subject: Btrfs: Add lowest key information to back refs for extent tree blocks as well. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/file.c | 11 ++---- fs/btrfs/inode.c | 14 ++----- 4 files changed, 110 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0ac21e3aac8..60a30da6af0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,7 +210,7 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; - if (0 && wbc->sync_mode == WB_SYNC_NONE) { + if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; unsigned long thresh = 96 * 1024 * 1024; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32991f73e9d..187be401247 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -457,6 +457,94 @@ out: return ret; } +/* + * Back reference rules. Back refs have three main goals: + * + * 1) differentiate between all holders of references to an extent so that + * when a reference is dropped we can make sure it was a valid reference + * before freeing the extent. + * + * 2) Provide enough information to quickly find the holders of an extent + * if we notice a given block is corrupted or bad. + * + * 3) Make it easy to migrate blocks for FS shrinking or storage pool + * maintenance. This is actually the same as #2, but with a slightly + * different use case. + * + * File extents can be referenced by: + * + * - multiple snapshots, subvolumes, or different generations in one subvol + * - different files inside a single subvolume (in theory, not implemented yet) + * - different offsets inside a file (bookend extents in file.c) + * + * The extent ref structure has fields for: + * + * - Objectid of the subvolume root + * - Generation number of the tree holding the reference + * - objectid of the file holding the reference + * - offset in the file corresponding to the key holding the reference + * + * When a file extent is allocated the fields are filled in: + * (root_key.objectid, trans->transid, inode objectid, offset in file) + * + * When a leaf is cow'd new references are added for every file extent found + * in the leaf. It looks the same as the create case, but trans->transid + * will be different when the block is cow'd. + * + * (root_key.objectid, trans->transid, inode objectid, offset in file) + * + * When a file extent is removed either during snapshot deletion or file + * truncation, the corresponding back reference is found + * by searching for: + * + * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), + * inode objectid, offset in file) + * + * Btree extents can be referenced by: + * + * - Different subvolumes + * - Different generations of the same subvolume + * + * Storing sufficient information for a full reverse mapping of a btree + * block would require storing the lowest key of the block in the backref, + * and it would require updating that lowest key either before write out or + * every time it changed. Instead, the objectid of the lowest key is stored + * along with the level of the tree block. This provides a hint + * about where in the btree the block can be found. Searches through the + * btree only need to look for a pointer to that block, so they stop one + * level higher than the level recorded in the backref. + * + * Some btrees do not do reference counting on their extents. These + * include the extent tree and the tree of tree roots. Backrefs for these + * trees always have a generation of zero. + * + * When a tree block is created, back references are inserted: + * + * (root->root_key.objectid, trans->transid or zero, lowest_key_objectid, level) + * + * When a tree block is cow'd in a reference counted root, + * new back references are added for all the blocks it points to. + * These are of the form (trans->transid will have increased since creation): + * + * (root->root_key.objectid, trans->transid, lowest_key_objectid, level) + * + * Because the lowest_key_objectid and the level are just hints + * they are not used when backrefs are deleted. When a backref is deleted: + * + * if backref was for a tree root: + * root_objectid = root->root_key.objectid + * else + * root_objectid = btrfs_header_owner(parent) + * + * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0) + * + * Back Reference Key hashing: + * + * Back references have four fields, each 64 bits long. Unfortunately, + * This is hashed into a single 64 bit number and placed into the key offset. + * The key objectid corresponds to the first byte in the extent, and the + * key type is set to BTRFS_EXTENT_REF_KEY + */ int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, @@ -939,10 +1027,13 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct u64 start; u64 end; struct btrfs_fs_info *info = extent_root->fs_info; + struct extent_buffer *eb; struct btrfs_path *path; struct btrfs_key ins; + struct btrfs_disk_key first; struct btrfs_extent_item extent_item; int ret; + int level; int err = 0; btrfs_set_stack_extent_refs(&extent_item, 1); @@ -961,10 +1052,19 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct &extent_item, sizeof(extent_item)); clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, GFP_NOFS); + eb = read_tree_block(extent_root, ins.objectid, ins.offset); + level = btrfs_header_level(eb); + if (level == 0) { + btrfs_item_key(eb, &first, 0); + } else { + btrfs_node_key(eb, &first, 0); + } err = btrfs_insert_extent_backref(trans, extent_root, path, start, extent_root->root_key.objectid, - 0, 0, 0); + 0, btrfs_disk_key_objectid(&first), + level); BUG_ON(err); + free_extent_buffer(eb); } btrfs_free_path(path); return 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5b1f90f06e0..1cc4d285951 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -545,13 +545,10 @@ next_slot: u64 disk_num_bytes = 0; u64 extent_num_bytes = 0; u64 root_gen; + u64 root_owner; - if (leaf != root->node) { - root_gen = - btrfs_header_generation(path->nodes[1]); - } else { - root_gen = btrfs_header_generation(leaf); - } + root_gen = btrfs_header_generation(leaf); + root_owner = btrfs_header_owner(leaf); if (found_extent) { disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, @@ -575,7 +572,7 @@ next_slot: ret = btrfs_free_extent(trans, root, disk_bytenr, disk_num_bytes, - root->root_key.objectid, + root_owner, root_gen, inode->i_ino, key.offset, 0); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bb70db0c9df..03fea037667 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -563,6 +563,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, u64 extent_num_bytes = 0; u64 item_end = 0; u64 root_gen = 0; + u64 root_owner = 0; int found_extent; int del_item; int extent_type = -1; @@ -673,15 +674,8 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, found_extent = 1; inode->i_blocks -= num_dec; } - if (leaf == root->node) { - root_gen = - btrfs_header_generation(leaf); - } else { - struct extent_buffer *parent; - parent = path->nodes[1]; - root_gen = - btrfs_header_generation(parent); - } + root_gen = btrfs_header_generation(leaf); + root_owner = btrfs_header_owner(leaf); } } else if (extent_type == BTRFS_FILE_EXTENT_INLINE && !del_item) { @@ -703,7 +697,7 @@ delete: if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, - root->root_key.objectid, + root_owner, root_gen, inode->i_ino, found_key.offset, 0); BUG_ON(ret); -- cgit v1.2.3-70-g09d2 From 70b043f0c7d7c78d8a0a01e65067c7c8fe338485 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 13 Dec 2007 09:02:46 -0500 Subject: Btrfs: Extra NULL block group checks in find_free_extent Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 187be401247..7c953b2ecf1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1319,6 +1319,12 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root total_needed += empty_size; path = btrfs_alloc_path(); check_failed: + if (!block_group) { + block_group = btrfs_lookup_block_group(info, search_start); + if (!block_group) + block_group = btrfs_lookup_block_group(info, + orig_search_start); + } search_start = find_search_start(root, &block_group, search_start, total_needed, data, full_scan); search_start = stripe_align(root, search_start); -- cgit v1.2.3-70-g09d2 From 6caab489c5866788ae64a72986520b1f0e40c706 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 13 Dec 2007 09:48:07 -0500 Subject: Fix btrfs_inc_ref to add backref hints Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7c953b2ecf1..057cdd49c09 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -702,7 +702,7 @@ int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, } return btrfs_inc_extent_ref(trans, root, root->node->start, root->node->len, owner_objectid, - generation, 0, 0); + generation, key_objectid, level); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -746,10 +746,12 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } } else { bytenr = btrfs_node_blockptr(buf, i); + btrfs_node_key_to_cpu(buf, &key, i); ret = btrfs_inc_extent_ref(trans, root, bytenr, btrfs_level_size(root, level - 1), root->root_key.objectid, - trans->transid, 0, 0); + trans->transid, key.objectid, + level - 1); if (ret) { faili = i; goto fail; -- cgit v1.2.3-70-g09d2 From f6dbff55d77dee363c22873481db54d3bada3ea6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 13 Dec 2007 11:13:32 -0500 Subject: Btrfs: Reorder extent back refs to differentiate btree blocks from file data Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/extent-tree.c | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 035fa155034..052555ced44 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -37,11 +37,12 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_B2RfS_M" +#define BTRFS_MAX_LEVEL 8 #define BTRFS_ROOT_TREE_OBJECTID 1ULL #define BTRFS_EXTENT_TREE_OBJECTID 2ULL #define BTRFS_FS_TREE_OBJECTID 3ULL #define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL -#define BTRFS_FIRST_FREE_OBJECTID 5ULL +#define BTRFS_FIRST_FREE_OBJECTID 256ULL /* * we can actually store much bigger names, but lets not confuse the rest @@ -107,7 +108,6 @@ struct btrfs_header { u8 level; } __attribute__ ((__packed__)); -#define BTRFS_MAX_LEVEL 8 #define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ sizeof(struct btrfs_header)) / \ sizeof(struct btrfs_key_ptr)) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 057cdd49c09..00414836e96 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -520,13 +520,13 @@ out: * * When a tree block is created, back references are inserted: * - * (root->root_key.objectid, trans->transid or zero, lowest_key_objectid, level) + * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid) * * When a tree block is cow'd in a reference counted root, * new back references are added for all the blocks it points to. * These are of the form (trans->transid will have increased since creation): * - * (root->root_key.objectid, trans->transid, lowest_key_objectid, level) + * (root->root_key.objectid, trans->transid, level, lowest_key_objectid) * * Because the lowest_key_objectid and the level are just hints * they are not used when backrefs are deleted. When a backref is deleted: @@ -702,7 +702,7 @@ int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, } return btrfs_inc_extent_ref(trans, root, root->node->start, root->node->len, owner_objectid, - generation, key_objectid, level); + generation, level, key_objectid); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -750,8 +750,8 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_inc_extent_ref(trans, root, bytenr, btrfs_level_size(root, level - 1), root->root_key.objectid, - trans->transid, key.objectid, - level - 1); + trans->transid, + level - 1, key.objectid); if (ret) { faili = i; goto fail; @@ -1063,8 +1063,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct } err = btrfs_insert_extent_backref(trans, extent_root, path, start, extent_root->root_key.objectid, - 0, btrfs_disk_key_objectid(&first), - level); + 0, level, + btrfs_disk_key_objectid(&first)); BUG_ON(err); free_extent_buffer(eb); } @@ -1640,7 +1640,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, ret = btrfs_alloc_extent(trans, root, blocksize, root_objectid, ref_generation, - first_objectid, level, empty_size, hint, + level, first_objectid, empty_size, hint, (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); -- cgit v1.2.3-70-g09d2 From be20aa9dbadc8c06283784ee12bbc0d97dea3418 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Dec 2007 20:14:01 -0500 Subject: Btrfs: Add mount option to turn off data cow A number of workloads do not require copy on write data or checksumming. mount -o nodatasum to disable checksums and -o nodatacow to disable both copy on write and checksumming. In nodatacow mode, copy on write is still performed when a given extent is under snapshot. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 53 ++++++++++++++++++++- fs/btrfs/ctree.h | 8 ++++ fs/btrfs/extent-tree.c | 101 ++++++++++++++++++++++++++++++++++++++-- fs/btrfs/inode.c | 122 +++++++++++++++++++++++++++++++++++++++++++------ fs/btrfs/super.c | 34 +++++++++++--- 5 files changed, 293 insertions(+), 25 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index fd8233e05cf..585f279d111 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -70,7 +70,58 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } -static int __btrfs_cow_block(struct btrfs_trans_handle *trans, +int btrfs_copy_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer **cow_ret, u64 new_root_objectid) +{ + struct extent_buffer *cow; + u32 nritems; + int ret = 0; + int level; + struct btrfs_key first_key; + struct btrfs_root new_root; + + memcpy(&new_root, root, sizeof(new_root)); + new_root.root_key.objectid = new_root_objectid; + + WARN_ON(root->ref_cows && trans->transid != + root->fs_info->running_transaction->transid); + WARN_ON(root->ref_cows && trans->transid != root->last_trans); + + level = btrfs_header_level(buf); + nritems = btrfs_header_nritems(buf); + if (nritems) { + if (level == 0) + btrfs_item_key_to_cpu(buf, &first_key, 0); + else + btrfs_node_key_to_cpu(buf, &first_key, 0); + } else { + first_key.objectid = 0; + } + cow = __btrfs_alloc_free_block(trans, &new_root, buf->len, + new_root_objectid, + trans->transid, first_key.objectid, + level, buf->start, 0); + if (IS_ERR(cow)) + return PTR_ERR(cow); + + copy_extent_buffer(cow, buf, 0, 0, cow->len); + btrfs_set_header_bytenr(cow, cow->start); + btrfs_set_header_generation(cow, trans->transid); + btrfs_set_header_owner(cow, new_root_objectid); + + WARN_ON(btrfs_header_generation(buf) > trans->transid); + ret = btrfs_inc_ref(trans, &new_root, buf); + if (ret) + return ret; + + btrfs_mark_buffer_dirty(cow); + *cow_ret = cow; + return 0; +} + +int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5e255cabfd1..b51b021fff8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -431,6 +431,7 @@ struct btrfs_root { #define BTRFS_STRING_ITEM_KEY 253 #define BTRFS_MOUNT_NODATASUM 0x1 +#define BTRFS_MOUNT_NODATACOW 0x2 #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -915,6 +916,9 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { btrfs_item_offset_nr(leaf, slot))) /* extent-tree.c */ +u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, + struct btrfs_path *count_path, + u64 first_extent); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); @@ -974,6 +978,10 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret); +int btrfs_copy_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer **cow_ret, u64 new_root_objectid); int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); int btrfs_truncate_item(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 00414836e96..1412d556313 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -679,6 +679,104 @@ out: return 0; } +u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, + struct btrfs_path *count_path, + u64 first_extent) +{ + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_path *path; + u64 bytenr; + u64 found_objectid; + u64 root_objectid = 0; + u32 total_count = 0; + u32 cur_count; + u32 refs; + u32 nritems; + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *l; + struct btrfs_extent_item *item; + struct btrfs_extent_ref *ref_item; + int level = -1; + + path = btrfs_alloc_path(); +again: + if (level == -1) + bytenr = first_extent; + else + bytenr = count_path->nodes[level]->start; + + cur_count = 0; + key.objectid = bytenr; + key.offset = 0; + + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + if (ret < 0) + goto out; + BUG_ON(ret == 0); + + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); + + if (found_key.objectid != bytenr || + found_key.type != BTRFS_EXTENT_ITEM_KEY) { + goto out; + } + + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); + refs = btrfs_extent_refs(l, item); + while (1) { + nritems = btrfs_header_nritems(l); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(extent_root, path); + if (ret == 0) + continue; + break; + } + btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); + if (found_key.objectid != bytenr) + break; + if (found_key.type != BTRFS_EXTENT_REF_KEY) { + path->slots[0]++; + continue; + } + + cur_count++; + ref_item = btrfs_item_ptr(l, path->slots[0], + struct btrfs_extent_ref); + found_objectid = btrfs_ref_root(l, ref_item); + + if (found_objectid != root_objectid) + total_count++; + + if (total_count > 1) + goto out; + + if (root_objectid == 0) + root_objectid = found_objectid; + + path->slots[0]++; + } + if (cur_count == 0) { + total_count = 0; + goto out; + } + if (total_count > 1) + goto out; + if (level >= 0 && root->node == count_path->nodes[level]) + goto out; + level++; + btrfs_release_path(root, path); + goto again; + +out: + btrfs_free_path(path); + return total_count; + +} + int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner_objectid) { @@ -1127,9 +1225,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - if (ref_generation && owner_objectid == 0 && root_objectid == 3) { -//printk("drop backref root %Lu gen %Lu byte %Lu\n", root_objectid, ref_generation, bytenr ); - } ret = lookup_extent_backref(trans, extent_root, path, bytenr, root_objectid, ref_generation, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 11885cb114e..91f3fc43e2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -72,21 +72,22 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; -static int run_delalloc_range(struct inode *inode, u64 start, u64 end) +static int cow_file_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - struct btrfs_key ins; u64 alloc_hint = 0; u64 num_bytes; - int ret; u64 blocksize = root->sectorsize; + struct btrfs_key ins; + int ret; - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); BUG_ON(!trans); + btrfs_set_trans_block_group(trans, inode); + num_bytes = (end - start + blocksize) & ~(blocksize - 1); + num_bytes = max(blocksize, num_bytes); ret = btrfs_drop_extents(trans, root, inode, start, start + num_bytes, start, &alloc_hint); @@ -106,6 +107,101 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) ins.offset); out: btrfs_end_transaction(trans, root); + return ret; +} + +static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) +{ + u64 extent_start; + u64 extent_end; + u64 bytenr; + u64 cow_end; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_buffer *leaf; + int found_type; + struct btrfs_path *path; + struct btrfs_file_extent_item *item; + int ret; + int err; + struct btrfs_key found_key; + + path = btrfs_alloc_path(); + BUG_ON(!path); +again: + ret = btrfs_lookup_file_extent(NULL, root, path, + inode->i_ino, start, 0); + if (ret < 0) { + btrfs_free_path(path); + return ret; + } + + cow_end = end; + if (ret != 0) { + if (path->slots[0] == 0) + goto not_found; + path->slots[0]--; + } + + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + /* are we inside the extent that was found? */ + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); + if (found_key.objectid != inode->i_ino || + found_type != BTRFS_EXTENT_DATA_KEY) { + goto not_found; + } + + found_type = btrfs_file_extent_type(leaf, item); + extent_start = found_key.offset; + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = extent_start + + btrfs_file_extent_num_bytes(leaf, item); + err = 0; + + if (start < extent_start || start >= extent_end) + goto not_found; + + cow_end = min(end, extent_end - 1); + bytenr = btrfs_file_extent_disk_bytenr(leaf, item); + if (bytenr == 0) + goto not_found; + + bytenr += btrfs_file_extent_offset(leaf, item); + if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) { + goto not_found; + } + + start = extent_end; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + goto not_found; + } +loop: + if (start > end) { + btrfs_free_path(path); + return 0; + } + btrfs_release_path(root, path); + goto again; + +not_found: + cow_file_range(inode, start, cow_end); + start = cow_end + 1; + goto loop; +} + +static int run_delalloc_range(struct inode *inode, u64 start, u64 end) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + mutex_lock(&root->fs_info->fs_mutex); + if (btrfs_test_opt(root, NODATACOW)) + ret = run_delalloc_nocow(inode, start, end); + else + ret = cow_file_range(inode, start, end); mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -1907,9 +2003,6 @@ int btrfs_commit_write(struct file *file, struct page *page, btrfs_cow_one_page(inode, page, PAGE_CACHE_SIZE); - set_page_extent_mapped(page); - set_page_dirty(page); - if (pos > inode->i_size) { i_size_write(inode, pos); mark_inode_dirty(inode); @@ -2078,13 +2171,18 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + extent_buffer_get(root->node); btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); free_extent_buffer(tmp); - btrfs_set_root_bytenr(&new_root_item, root->node->start); - btrfs_set_root_level(&new_root_item, btrfs_header_level(root->node)); + + btrfs_copy_root(trans, root, root->node, &tmp, objectid); + + btrfs_set_root_bytenr(&new_root_item, tmp->start); + btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); + free_extent_buffer(tmp); if (ret) goto fail; @@ -2106,10 +2204,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail; - - ret = btrfs_inc_root_ref(trans, root, objectid); - if (ret) - goto fail; fail: nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ad4f280ec77..2116728d1f9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -61,12 +61,13 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_subvol, Opt_nodatasum, Opt_err, + Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_err, }; static match_table_t tokens = { {Opt_subvol, "subvol=%s"}, {Opt_nodatasum, "nodatasum"}, + {Opt_nodatacow, "nodatacow"}, {Opt_err, NULL} }; @@ -78,12 +79,20 @@ static int parse_options (char * options, struct btrfs_fs_info *info = NULL; substring_t args[MAX_OPT_ARGS]; - if (root) - info = root->fs_info; - if (!options) return 1; + /* + * strsep changes the string, duplicate it because parse_options + * gets called twice + */ + options = kstrdup(options, GFP_NOFS); + if (!options) + return -ENOMEM; + + if (root) + info = root->fs_info; + while ((p = strsep (&options, ",")) != NULL) { int token; if (!*p) @@ -92,17 +101,28 @@ static int parse_options (char * options, token = match_token(p, tokens, args); switch (token) { case Opt_subvol: - if (subvol_name) + if (subvol_name) { *subvol_name = match_strdup(&args[0]); + } break; case Opt_nodatasum: - if (root) + if (info) { + printk("btrfs: setting nodatacsum\n"); btrfs_set_opt(info->mount_opt, NODATASUM); + } + break; + case Opt_nodatacow: + if (info) { + printk("btrfs: setting nodatacow\n"); + btrfs_set_opt(info->mount_opt, NODATACOW); + btrfs_set_opt(info->mount_opt, NODATASUM); + } break; default: - return 0; + break; } } + kfree(options); return 1; } -- cgit v1.2.3-70-g09d2 From edbd8d4efe4ddaf29a175ae504e2c9a05a96ebee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 21 Dec 2007 16:27:24 -0500 Subject: Btrfs: Support for online FS resize (grow and shrink) Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 10 ++ fs/btrfs/disk-io.c | 32 +++- fs/btrfs/disk-io.h | 2 + fs/btrfs/extent-tree.c | 480 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/inode.c | 105 ++++++++++- fs/btrfs/ioctl.h | 2 + fs/btrfs/root-tree.c | 5 + fs/btrfs/super.c | 7 +- 8 files changed, 629 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 10129cc6656..b83a1bec346 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -508,6 +508,8 @@ BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, used, 64); BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, used, 64); +BTRFS_SETGET_FUNCS(disk_block_group_flags, struct btrfs_block_group_item, + flags, 8); /* struct btrfs_inode_ref */ BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); @@ -960,6 +962,9 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int level, u64 hint, u64 empty_size); +int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 new_size); +int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, @@ -1117,6 +1122,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +unsigned long btrfs_force_ra(struct address_space *mapping, + struct file_ra_state *ra, struct file *file, + pgoff_t offset, pgoff_t last_index); int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int for_del); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); @@ -1162,4 +1170,6 @@ void btrfs_sysfs_del_super(struct btrfs_fs_info *root); ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); +/* super.c */ +u64 btrfs_parse_size(char *str); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eebb4fb65c6..de026d9d9b2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -471,13 +471,17 @@ insert: return root; } -struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location, - const char *name, int namelen) +struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) { struct btrfs_root *root; int ret; + if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) + return fs_info->tree_root; + if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID) + return fs_info->extent_root; + root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); if (root) @@ -494,6 +498,23 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } + ret = btrfs_find_dead_roots(fs_info->tree_root, + root->root_key.objectid, root); + BUG_ON(ret); + + return root; +} + +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location, + const char *name, int namelen) +{ + struct btrfs_root *root; + int ret; + + root = btrfs_read_fs_root_no_name(fs_info, location); + if (!root) + return NULL; ret = btrfs_set_root_name(root, name, namelen); if (ret) { @@ -509,11 +530,6 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } - - ret = btrfs_find_dead_roots(fs_info->tree_root, - root->root_key.objectid, root); - BUG_ON(ret); - return root; } #if 0 diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 33d2ccfd74d..8c3cfd02901 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -39,6 +39,8 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, const char *name, int namelen); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_key *location); +struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, + struct btrfs_key *location); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, u64 device_id, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1412d556313..de0fb0743cf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -18,6 +18,7 @@ #include #include +#include #include "hash.h" #include "ctree.h" #include "disk-io.h" @@ -1622,6 +1623,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, int pending_ret; u64 super_used, root_used; u64 search_start = 0; + u64 new_hint; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; @@ -1629,6 +1631,10 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_stack_extent_refs(&extent_item, 1); + new_hint = max(hint_byte, 16ULL * 1024 * 1024 * 1024); + if (new_hint < btrfs_super_total_bytes(&info->super_copy)) + hint_byte = new_hint; + WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, @@ -2100,6 +2106,480 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +static int relocate_inode_pages(struct inode *inode, u64 start, u64 len) +{ + u64 page_start; + u64 page_end; + u64 delalloc_start; + u64 existing_delalloc; + unsigned long last_index; + unsigned long first_index; + unsigned long i; + struct page *page; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct file_ra_state ra; + + mutex_lock(&inode->i_mutex); + first_index = start >> PAGE_CACHE_SHIFT; + last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; + + memset(&ra, 0, sizeof(ra)); + file_ra_state_init(&ra, inode->i_mapping); + btrfs_force_ra(inode->i_mapping, &ra, NULL, first_index, last_index); + + for (i = first_index; i <= last_index; i++) { + page = grab_cache_page(inode->i_mapping, i); + if (!page) + goto out_unlock; + if (!PageUptodate(page)) { + btrfs_readpage(NULL, page); + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + goto out_unlock; + } + } + page_start = (u64)page->index << PAGE_CACHE_SHIFT; + page_end = page_start + PAGE_CACHE_SIZE - 1; + + lock_extent(em_tree, page_start, page_end, GFP_NOFS); + + delalloc_start = page_start; + existing_delalloc = + count_range_bits(&BTRFS_I(inode)->extent_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); + + set_extent_delalloc(em_tree, page_start, + page_end, GFP_NOFS); + + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - + existing_delalloc; + spin_unlock(&root->fs_info->delalloc_lock); + + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + } + +out_unlock: + mutex_unlock(&inode->i_mutex); + return 0; +} + +static int relocate_one_reference(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key, + u64 ref_root, u64 ref_gen, u64 ref_objectid, + u64 ref_offset) +{ + struct inode *inode; + struct btrfs_root *found_root; + struct btrfs_key root_location; + int ret; + + root_location.objectid = ref_root; + if (ref_gen == 0) + root_location.offset = 0; + else + root_location.offset = (u64)-1; + root_location.type = BTRFS_ROOT_ITEM_KEY; + + found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, + &root_location); + BUG_ON(!found_root); + + if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + mutex_unlock(&extent_root->fs_info->fs_mutex); + inode = btrfs_iget_locked(extent_root->fs_info->sb, + ref_objectid, found_root); + if (inode->i_state & I_NEW) { + /* the inode and parent dir are two different roots */ + BTRFS_I(inode)->root = found_root; + BTRFS_I(inode)->location.objectid = ref_objectid; + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.offset = 0; + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + + } + /* this can happen if the reference is not against + * the latest version of the tree root + */ + if (is_bad_inode(inode)) { + mutex_lock(&extent_root->fs_info->fs_mutex); + goto out; + } + relocate_inode_pages(inode, ref_offset, extent_key->offset); + /* FIXME, data=ordered will help get rid of this */ + filemap_fdatawrite(inode->i_mapping); + iput(inode); + mutex_lock(&extent_root->fs_info->fs_mutex); + } else { + struct btrfs_trans_handle *trans; + struct btrfs_key found_key; + struct extent_buffer *eb; + int level; + int i; + + trans = btrfs_start_transaction(found_root, 1); + eb = read_tree_block(found_root, extent_key->objectid, + extent_key->offset); + level = btrfs_header_level(eb); + + if (level == 0) + btrfs_item_key_to_cpu(eb, &found_key, 0); + else + btrfs_node_key_to_cpu(eb, &found_key, 0); + + free_extent_buffer(eb); + + path->lowest_level = level; + path->reada = 0; + ret = btrfs_search_slot(trans, found_root, &found_key, path, + 0, 1); + path->lowest_level = 0; + for (i = level; i < BTRFS_MAX_LEVEL; i++) { + if (!path->nodes[i]) + break; + free_extent_buffer(path->nodes[i]); + path->nodes[i] = NULL; + } + btrfs_release_path(found_root, path); + btrfs_end_transaction(trans, found_root); + } + +out: + return 0; +} + +static int relocate_one_extent(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key) +{ + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_extent_ref *ref; + struct extent_buffer *leaf; + u64 ref_root; + u64 ref_gen; + u64 ref_objectid; + u64 ref_offset; + u32 nritems; + u32 item_size; + int ret = 0; + + key.objectid = extent_key->objectid; + key.type = BTRFS_EXTENT_REF_KEY; + key.offset = 0; + + while(1) { + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + + BUG_ON(ret == 0); + + if (ret < 0) + goto out; + + ret = 0; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (path->slots[0] == nritems) + goto out; + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid != extent_key->objectid) + break; + + if (found_key.type != BTRFS_EXTENT_REF_KEY) + break; + + key.offset = found_key.offset + 1; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref); + ref_root = btrfs_ref_root(leaf, ref); + ref_gen = btrfs_ref_generation(leaf, ref); + ref_objectid = btrfs_ref_objectid(leaf, ref); + ref_offset = btrfs_ref_offset(leaf, ref); + btrfs_release_path(extent_root, path); + + ret = relocate_one_reference(extent_root, path, + extent_key, ref_root, ref_gen, + ref_objectid, ref_offset); + if (ret) + goto out; + } + ret = 0; +out: + btrfs_release_path(extent_root, path); + return ret; +} + +static int find_overlapping_extent(struct btrfs_root *root, + struct btrfs_path *path, u64 new_size) +{ + struct btrfs_key found_key; + struct extent_buffer *leaf; + int ret; + + while(1) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret == 1) { + return 1; + } + if (ret < 0) + return ret; + } else { + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.type == BTRFS_EXTENT_ITEM_KEY) { + if (found_key.objectid + found_key.offset > new_size) + return 0; + else + return 1; + } + } + return 1; +} + +int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = root->fs_info->tree_root; + struct btrfs_path *path; + u64 cur_byte; + u64 total_found; + u64 ptr; + struct btrfs_fs_info *info = root->fs_info; + struct extent_map_tree *block_group_cache; + struct btrfs_key key; + struct btrfs_key found_key = { 0, 0, 0 }; + struct extent_buffer *leaf; + u32 nritems; + int ret; + int slot; + + btrfs_set_super_total_bytes(&info->super_copy, new_size); + block_group_cache = &info->block_group_cache; + path = btrfs_alloc_path(); + root = root->fs_info->extent_root; + +again: + total_found = 0; + key.objectid = new_size; + cur_byte = key.objectid; + key.offset = 0; + key.type = 0; + while(1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; +next: + leaf = path->nodes[0]; + if (key.objectid == new_size - 1) { + ret = find_overlapping_extent(root, path, new_size); + if (ret != 0) { + btrfs_release_path(root, path); + ret = btrfs_search_slot(NULL, root, &key, + path, 0, 0); + if (ret < 0) + goto out; + } + } + nritems = btrfs_header_nritems(leaf); + ret = 0; + slot = path->slots[0]; + if (slot < nritems) + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (slot == nritems || + btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY) { + path->slots[0]++; + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret == 1) { + ret = 0; + break; + } + } + goto next; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.objectid + found_key.offset <= cur_byte) + continue; + total_found++; + cur_byte = found_key.objectid + found_key.offset; + key.objectid = cur_byte; + btrfs_release_path(root, path); + ret = relocate_one_extent(root, path, &found_key); + } + + btrfs_release_path(root, path); + + if (total_found > 0) { + trans = btrfs_start_transaction(tree_root, 1); + btrfs_commit_transaction(trans, tree_root); + + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_clean_old_snapshots(tree_root); + mutex_lock(&root->fs_info->fs_mutex); + + trans = btrfs_start_transaction(tree_root, 1); + btrfs_commit_transaction(trans, tree_root); + goto again; + } + + trans = btrfs_start_transaction(root, 1); + key.objectid = new_size; + key.offset = 0; + key.type = 0; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; +bg_next: + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + ret = 0; + slot = path->slots[0]; + if (slot < nritems) + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (slot == nritems || + btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) { + if (slot < nritems) { + printk("shrinker found key %Lu %u %Lu\n", + found_key.objectid, found_key.type, + found_key.offset); + path->slots[0]++; + } + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + break; + if (ret == 1) { + ret = 0; + break; + } + } + goto bg_next; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + ret = get_state_private(&info->block_group_cache, + found_key.objectid, &ptr); + if (!ret) + kfree((void *)(unsigned long)ptr); + + clear_extent_bits(&info->block_group_cache, found_key.objectid, + found_key.objectid + found_key.offset - 1, + (unsigned int)-1, GFP_NOFS); + + key.objectid = found_key.objectid + 1; + btrfs_del_item(trans, root, path); + btrfs_release_path(root, path); + } + clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, + GFP_NOFS); + btrfs_commit_transaction(trans, root); +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 new_size) +{ + struct btrfs_path *path; + u64 nr = 0; + u64 cur_byte; + u64 old_size; + struct btrfs_block_group_cache *cache; + struct btrfs_block_group_item *item; + struct btrfs_fs_info *info = root->fs_info; + struct extent_map_tree *block_group_cache; + struct btrfs_key key; + struct extent_buffer *leaf; + int ret; + int bit; + + old_size = btrfs_super_total_bytes(&info->super_copy); + block_group_cache = &info->block_group_cache; + + root = info->extent_root; + + cache = btrfs_lookup_block_group(root->fs_info, old_size - 1); + + cur_byte = cache->key.objectid + cache->key.offset; + if (cur_byte >= new_size) + goto set_size; + + key.offset = BTRFS_BLOCK_GROUP_SIZE; + btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while(cur_byte < new_size) { + key.objectid = cur_byte; + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(struct btrfs_block_group_item)); + BUG_ON(ret); + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_block_group_item); + + btrfs_set_disk_block_group_used(leaf, item, 0); + if (nr % 3) { + btrfs_set_disk_block_group_flags(leaf, item, + BTRFS_BLOCK_GROUP_DATA); + } else { + btrfs_set_disk_block_group_flags(leaf, item, 0); + } + nr++; + + cache = kmalloc(sizeof(*cache), GFP_NOFS); + BUG_ON(!cache); + + read_extent_buffer(leaf, &cache->item, (unsigned long)item, + sizeof(cache->item)); + + memcpy(&cache->key, &key, sizeof(key)); + cache->cached = 0; + cache->pinned = 0; + cur_byte = key.objectid + key.offset; + btrfs_release_path(root, path); + + if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { + bit = BLOCK_GROUP_DATA; + cache->data = BTRFS_BLOCK_GROUP_DATA; + } else { + bit = BLOCK_GROUP_METADATA; + cache->data = 0; + } + + /* use EXTENT_LOCKED to prevent merging */ + set_extent_bits(block_group_cache, key.objectid, + key.objectid + key.offset - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, key.objectid, + (unsigned long)cache); + } + btrfs_free_path(path); +set_size: + btrfs_set_super_total_bytes(&info->super_copy, new_size); + return 0; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f5b7b89b14..f6a20112d9e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2346,7 +2346,7 @@ fail_unlock: return ret; } -static unsigned long force_ra(struct address_space *mapping, +unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index) { @@ -2372,6 +2372,8 @@ int btrfs_defrag_file(struct file *file) { unsigned long ra_index = 0; u64 page_start; u64 page_end; + u64 delalloc_start; + u64 existing_delalloc; unsigned long i; int ret; @@ -2385,8 +2387,9 @@ int btrfs_defrag_file(struct file *file) { last_index = inode->i_size >> PAGE_CACHE_SHIFT; for (i = 0; i <= last_index; i++) { if (i == ra_index) { - ra_index = force_ra(inode->i_mapping, &file->f_ra, - file, ra_index, last_index); + ra_index = btrfs_force_ra(inode->i_mapping, + &file->f_ra, + file, ra_index, last_index); } page = grab_cache_page(inode->i_mapping, i); if (!page) @@ -2404,8 +2407,19 @@ int btrfs_defrag_file(struct file *file) { page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(em_tree, page_start, page_end, GFP_NOFS); + delalloc_start = page_start; + existing_delalloc = + count_range_bits(&BTRFS_I(inode)->extent_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); set_extent_delalloc(em_tree, page_start, page_end, GFP_NOFS); + + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - + existing_delalloc; + spin_unlock(&root->fs_info->delalloc_lock); + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); @@ -2418,6 +2432,89 @@ out_unlock: return 0; } +static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) +{ + u64 new_size; + u64 old_size; + struct btrfs_ioctl_vol_args *vol_args; + struct btrfs_trans_handle *trans; + char *sizestr; + int ret = 0; + int namelen; + int mod = 0; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + namelen = strlen(vol_args->name); + if (namelen > BTRFS_VOL_NAME_MAX) { + ret = -EINVAL; + goto out; + } + + sizestr = vol_args->name; + if (!strcmp(sizestr, "max")) + new_size = root->fs_info->sb->s_bdev->bd_inode->i_size; + else { + if (sizestr[0] == '-') { + mod = -1; + sizestr++; + } else if (sizestr[0] == '+') { + mod = 1; + sizestr++; + } + new_size = btrfs_parse_size(sizestr); + if (new_size == 0) { + ret = -EINVAL; + goto out; + } + } + + mutex_lock(&root->fs_info->fs_mutex); + old_size = btrfs_super_total_bytes(&root->fs_info->super_copy); + + if (mod < 0) { + if (new_size > old_size) { + ret = -EINVAL; + goto out_unlock; + } + new_size = old_size - new_size; + } else if (mod > 0) { + new_size = old_size + new_size; + } + + if (new_size < 256 * 1024 * 1024) { + ret = -EINVAL; + goto out_unlock; + } + if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) { + ret = -EFBIG; + goto out_unlock; + } + new_size = (new_size / root->sectorsize) * root->sectorsize; + +printk("new size is %Lu\n", new_size); + if (new_size > old_size) { + trans = btrfs_start_transaction(root, 1); + ret = btrfs_grow_extent_tree(trans, root, new_size); + btrfs_commit_transaction(trans, root); + } else { + ret = btrfs_shrink_extent_tree(root, new_size); + } + +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); +out: + kfree(vol_args); + return ret; +} + static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) { struct btrfs_ioctl_vol_args *vol_args; @@ -2510,6 +2607,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_snap_create(root, (void __user *)arg); case BTRFS_IOC_DEFRAG: return btrfs_ioctl_defrag(file); + case BTRFS_IOC_RESIZE: + return btrfs_ioctl_resize(root, (void __user *)arg); } return -ENOTTY; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 8bc47dec286..8c6290665d4 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -30,4 +30,6 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 88bcdd33f56..e2b9b86af69 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -179,6 +179,11 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) goto out; + if (ret) { +btrfs_print_leaf(root, path->nodes[0]); +printk("failed to del %Lu %u %Lu\n", key->objectid, key->type, key->offset); + + } BUG_ON(ret != 0); leaf = path->nodes[0]; ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7cab680418a..3bbd7f66569 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -74,9 +74,9 @@ static match_table_t tokens = { {Opt_err, NULL} }; -static unsigned long parse_size(char *str) +u64 btrfs_parse_size(char *str) { - unsigned long res; + u64 res; int mult = 1; char *end; char last; @@ -150,7 +150,8 @@ static int parse_options (char * options, if (info) { char *num = match_strdup(&args[0]); if (num) { - info->max_extent = parse_size(num); + info->max_extent = + btrfs_parse_size(num); kfree(num); info->max_extent = max_t(u64, -- cgit v1.2.3-70-g09d2 From e52ec0eb62e76ea584c8438ef92933e9b815db80 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 21 Dec 2007 16:36:24 -0500 Subject: Btrfs: Fix NULL block groups on reading the inode Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/inode.c | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index de0fb0743cf..a421069e998 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1630,11 +1630,11 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; btrfs_set_stack_extent_refs(&extent_item, 1); - +#if 0 new_hint = max(hint_byte, 16ULL * 1024 * 1024 * 1024); if (new_hint < btrfs_super_total_bytes(&info->super_copy)) hint_byte = new_hint; - +#endif WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f6a20112d9e..3c7bf3bddca 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -402,6 +402,10 @@ void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, alloc_group_block); + if (!BTRFS_I(inode)->block_group) { + BTRFS_I(inode)->block_group = btrfs_find_block_group(root, + NULL, 0, 0, 0); + } btrfs_free_path(path); inode_item = NULL; -- cgit v1.2.3-70-g09d2 From 8f662a76c6af8eb367fa519e9bb9766040d9cea8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 2 Jan 2008 10:01:11 -0500 Subject: Btrfs: Add readahead to the online shrinker, and a mount -o alloc_start= for testing Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 9 ++++++--- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 9 +++++---- fs/btrfs/inode.c | 2 +- fs/btrfs/super.c | 16 +++++++++++++++- 5 files changed, 28 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2b3ffa707ea..8fa92a2d981 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2654,9 +2654,9 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, */ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { + u64 bytenr; int slot; int level = 1; - u64 bytenr; struct extent_buffer *c; struct extent_buffer *next = NULL; @@ -2687,11 +2687,14 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) level--; c = path->nodes[level]; free_extent_buffer(c); + slot = btrfs_header_nritems(next); + if (slot != 0) + slot--; path->nodes[level] = next; - path->slots[level] = 0; + path->slots[level] = slot; if (!level) break; - next = read_tree_block(root, btrfs_node_blockptr(next, 0), + next = read_tree_block(root, btrfs_node_blockptr(next, slot), btrfs_level_size(root, level - 1)); } return 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b83a1bec346..d6e5c19969b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -324,6 +324,7 @@ struct btrfs_fs_info { u64 last_trans_committed; unsigned long mount_opt; u64 max_extent; + u64 alloc_start; struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; struct extent_buffer *sb_buffer; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a421069e998..d6243c2afc7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1630,11 +1630,11 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; btrfs_set_stack_extent_refs(&extent_item, 1); -#if 0 - new_hint = max(hint_byte, 16ULL * 1024 * 1024 * 1024); + + new_hint = max(hint_byte, root->fs_info->alloc_start); if (new_hint < btrfs_super_total_bytes(&info->super_copy)) hint_byte = new_hint; -#endif + WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, @@ -2239,7 +2239,7 @@ static int relocate_one_reference(struct btrfs_root *extent_root, free_extent_buffer(eb); path->lowest_level = level; - path->reada = 0; + path->reada = 2; ret = btrfs_search_slot(trans, found_root, &found_key, path, 0, 1); path->lowest_level = 0; @@ -2372,6 +2372,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) block_group_cache = &info->block_group_cache; path = btrfs_alloc_path(); root = root->fs_info->extent_root; + path->reada = 2; again: total_found = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3c7bf3bddca..d6de4696188 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2058,7 +2058,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); - mutex_lock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->fs_mutex); if (ret) goto out; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3bbd7f66569..f1b56eb77b1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -63,7 +63,8 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_err, + Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, + Opt_alloc_start, Opt_err, }; static match_table_t tokens = { @@ -71,6 +72,7 @@ static match_table_t tokens = { {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, {Opt_max_extent, "max_extent=%s"}, + {Opt_alloc_start, "alloc_start=%s"}, {Opt_err, NULL} }; @@ -162,6 +164,18 @@ static int parse_options (char * options, } } break; + case Opt_alloc_start: + if (info) { + char *num = match_strdup(&args[0]); + if (num) { + info->alloc_start = + btrfs_parse_size(num); + kfree(num); + printk("btrfs: allocations start at " + "%Lu\n", info->alloc_start); + } + } + break; default: break; } -- cgit v1.2.3-70-g09d2 From 56b453c92fdf51fd3283a2dc2dfbedf36f516031 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 09:08:27 -0500 Subject: Btrfs: Explicitly send a root objectid to count_snapshots_in_path Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 16 ++++++---------- fs/btrfs/inode.c | 39 +++++---------------------------------- 2 files changed, 11 insertions(+), 44 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d6243c2afc7..91c2b27d968 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -688,7 +688,7 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, struct btrfs_path *path; u64 bytenr; u64 found_objectid; - u64 root_objectid = 0; + u64 root_objectid = root->root_key.objectid; u32 total_count = 0; u32 cur_count; u32 refs; @@ -749,15 +749,11 @@ again: struct btrfs_extent_ref); found_objectid = btrfs_ref_root(l, ref_item); - if (found_objectid != root_objectid) - total_count++; - - if (total_count > 1) - goto out; - - if (root_objectid == 0) - root_objectid = found_objectid; - + if (found_objectid != root_objectid) { + total_count = 2; + break; + } + total_count = 1; path->slots[0]++; } if (cur_count == 0) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d6de4696188..5003a86510a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1770,6 +1770,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, again: em = lookup_extent_mapping(em_tree, start, end); if (em) { + if (em->start > start) { + printk("get_extent start %Lu em start %Lu\n", + start, em->start); + WARN_ON(1); + } goto out; } if (!em) { @@ -1952,23 +1957,6 @@ static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) return extent_bmap(mapping, iblock, btrfs_get_extent); } -static int btrfs_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; - int err; - - mutex_lock(&root->fs_info->fs_mutex); - err = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); - mutex_unlock(&root->fs_info->fs_mutex); - if (err) - return -ENOSPC; - - return extent_prepare_write(&BTRFS_I(page->mapping->host)->extent_tree, - page->mapping->host, page, from, to, - btrfs_get_extent); -} - int btrfs_readpage(struct file *file, struct page *page) { struct extent_map_tree *tree; @@ -2120,21 +2108,6 @@ static void btrfs_truncate(struct inode *inode) btrfs_btree_balance_dirty(root, nr); } -int btrfs_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - struct inode *inode = page->mapping->host; - - btrfs_cow_one_page(inode, page, PAGE_CACHE_SIZE); - - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} - static int create_subvol(struct btrfs_root *root, char *name, int namelen) { struct btrfs_trans_handle *trans; @@ -2930,8 +2903,6 @@ static struct address_space_operations btrfs_aops = { .writepages = btrfs_writepages, .readpages = btrfs_readpages, .sync_page = block_sync_page, - .prepare_write = btrfs_prepare_write, - .commit_write = btrfs_commit_write, .bmap = btrfs_bmap, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, -- cgit v1.2.3-70-g09d2 From 4313b3994d719fcdeb7e661473019ca3d62e829b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 09:08:48 -0500 Subject: Btrfs: Reduce stack usage in the resizer, fix 32 bit compiles Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 5 ++++ fs/btrfs/extent-tree.c | 74 ++++++++++++++++++++++++++------------------------ fs/btrfs/inode.c | 6 ++-- fs/btrfs/transaction.c | 20 ++++++++++---- 5 files changed, 63 insertions(+), 43 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d6e5c19969b..9873975ce0e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -386,6 +386,7 @@ struct btrfs_root { int defrag_running; int defrag_level; char *name; + int in_sysfs; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index de026d9d9b2..67d9fd72886 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -374,6 +374,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->highest_inode = 0; root->last_inode_alloc = 0; root->name = NULL; + root->in_sysfs = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -516,6 +517,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, if (!root) return NULL; + if (root->in_sysfs) + return root; + ret = btrfs_set_root_name(root, name, namelen); if (ret) { free_extent_buffer(root->node); @@ -530,6 +534,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } + root->in_sysfs = 1; return root; } #if 0 diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 91c2b27d968..6137f06091e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -751,7 +751,7 @@ again: if (found_objectid != root_objectid) { total_count = 2; - break; + goto out; } total_count = 1; path->slots[0]++; @@ -760,8 +760,6 @@ again: total_count = 0; goto out; } - if (total_count > 1) - goto out; if (level >= 0 && root->node == count_path->nodes[level]) goto out; level++; @@ -2109,22 +2107,23 @@ static int relocate_inode_pages(struct inode *inode, u64 start, u64 len) u64 delalloc_start; u64 existing_delalloc; unsigned long last_index; - unsigned long first_index; unsigned long i; struct page *page; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct file_ra_state ra; + struct file_ra_state *ra; + + ra = kzalloc(sizeof(*ra), GFP_NOFS); mutex_lock(&inode->i_mutex); - first_index = start >> PAGE_CACHE_SHIFT; + i = start >> PAGE_CACHE_SHIFT; last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; - memset(&ra, 0, sizeof(ra)); - file_ra_state_init(&ra, inode->i_mapping); - btrfs_force_ra(inode->i_mapping, &ra, NULL, first_index, last_index); + file_ra_state_init(ra, inode->i_mapping); + btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index); + kfree(ra); - for (i = first_index; i <= last_index; i++) { + for (; i <= last_index; i++) { page = grab_cache_page(inode->i_mapping, i); if (!page) goto out_unlock; @@ -2167,27 +2166,43 @@ out_unlock: return 0; } +/* + * note, this releases the path + */ static int relocate_one_reference(struct btrfs_root *extent_root, struct btrfs_path *path, - struct btrfs_key *extent_key, - u64 ref_root, u64 ref_gen, u64 ref_objectid, - u64 ref_offset) + struct btrfs_key *extent_key) { struct inode *inode; struct btrfs_root *found_root; - struct btrfs_key root_location; + struct btrfs_key *root_location; + struct btrfs_extent_ref *ref; + u64 ref_root; + u64 ref_gen; + u64 ref_objectid; + u64 ref_offset; int ret; - root_location.objectid = ref_root; + ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_ref); + ref_root = btrfs_ref_root(path->nodes[0], ref); + ref_gen = btrfs_ref_generation(path->nodes[0], ref); + ref_objectid = btrfs_ref_objectid(path->nodes[0], ref); + ref_offset = btrfs_ref_offset(path->nodes[0], ref); + btrfs_release_path(extent_root, path); + + root_location = kmalloc(sizeof(*root_location), GFP_NOFS); + root_location->objectid = ref_root; if (ref_gen == 0) - root_location.offset = 0; + root_location->offset = 0; else - root_location.offset = (u64)-1; - root_location.type = BTRFS_ROOT_ITEM_KEY; + root_location->offset = (u64)-1; + root_location->type = BTRFS_ROOT_ITEM_KEY; found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, - &root_location); + root_location); BUG_ON(!found_root); + kfree(root_location); if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { mutex_unlock(&extent_root->fs_info->fs_mutex); @@ -2259,12 +2274,7 @@ static int relocate_one_extent(struct btrfs_root *extent_root, { struct btrfs_key key; struct btrfs_key found_key; - struct btrfs_extent_ref *ref; struct extent_buffer *leaf; - u64 ref_root; - u64 ref_gen; - u64 ref_objectid; - u64 ref_offset; u32 nritems; u32 item_size; int ret = 0; @@ -2297,17 +2307,7 @@ static int relocate_one_extent(struct btrfs_root *extent_root, key.offset = found_key.offset + 1; item_size = btrfs_item_size_nr(leaf, path->slots[0]); - ref = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_ref); - ref_root = btrfs_ref_root(leaf, ref); - ref_gen = btrfs_ref_generation(leaf, ref); - ref_objectid = btrfs_ref_objectid(leaf, ref); - ref_offset = btrfs_ref_offset(leaf, ref); - btrfs_release_path(extent_root, path); - - ret = relocate_one_reference(extent_root, path, - extent_key, ref_root, ref_gen, - ref_objectid, ref_offset); + ret = relocate_one_reference(extent_root, path, extent_key); if (ret) goto out; } @@ -2354,7 +2354,6 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) struct btrfs_path *path; u64 cur_byte; u64 total_found; - u64 ptr; struct btrfs_fs_info *info = root->fs_info; struct extent_map_tree *block_group_cache; struct btrfs_key key; @@ -2377,6 +2376,7 @@ again: key.offset = 0; key.type = 0; while(1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; @@ -2441,6 +2441,8 @@ next: key.offset = 0; key.type = 0; while(1) { + u64 ptr; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5003a86510a..b62f35e862b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2108,7 +2108,8 @@ static void btrfs_truncate(struct inode *inode) btrfs_btree_balance_dirty(root, nr); } -static int create_subvol(struct btrfs_root *root, char *name, int namelen) +static int noinline create_subvol(struct btrfs_root *root, char *name, + int namelen) { struct btrfs_trans_handle *trans; struct btrfs_key key; @@ -2492,7 +2493,8 @@ out: return ret; } -static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) +static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root, + void __user *arg) { struct btrfs_ioctl_vol_args *vol_args; struct btrfs_dir_item *di; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1ad611b9f61..02721eea9a7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -481,12 +481,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; - struct extent_map_tree pinned_copy; + struct extent_map_tree *pinned_copy; DEFINE_WAIT(wait); int ret; - extent_map_tree_init(&pinned_copy, - root->fs_info->btree_inode->i_mapping, GFP_NOFS); INIT_LIST_HEAD(&dirty_fs_roots); mutex_lock(&root->fs_info->trans_mutex); @@ -507,6 +505,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->fs_mutex); return 0; } + + pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); + if (!pinned_copy) + return -ENOMEM; + + extent_map_tree_init(pinned_copy, + root->fs_info->btree_inode->i_mapping, GFP_NOFS); + trans->transaction->in_commit = 1; cur_trans = trans->transaction; if (cur_trans->list.prev != &root->fs_info->trans_list) { @@ -568,16 +574,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, &root->fs_info->super_copy, 0, sizeof(root->fs_info->super_copy)); - btrfs_copy_pinned(root, &pinned_copy); + btrfs_copy_pinned(root, pinned_copy); mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root); + mutex_lock(&root->fs_info->fs_mutex); - btrfs_finish_extent_commit(trans, root, &pinned_copy); + btrfs_finish_extent_commit(trans, root, pinned_copy); mutex_lock(&root->fs_info->trans_mutex); + + kfree(pinned_copy); + cur_trans->commit_done = 1; root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); -- cgit v1.2.3-70-g09d2 From f9ef6604ace23a6fcd698e08b58a883d6009157b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 09:22:38 -0500 Subject: Btrfs: 32 bit compile fixes for the resizer and enospc checks Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 +++- fs/btrfs/inode.c | 10 +++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6137f06091e..54f2b1d8bdf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2501,6 +2501,7 @@ int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, u64 nr = 0; u64 cur_byte; u64 old_size; + unsigned long rem; struct btrfs_block_group_cache *cache; struct btrfs_block_group_item *item; struct btrfs_fs_info *info = root->fs_info; @@ -2538,7 +2539,8 @@ int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, struct btrfs_block_group_item); btrfs_set_disk_block_group_used(leaf, item, 0); - if (nr % 3) { + div_long_long_rem(nr, 3, &rem); + if (rem) { btrfs_set_disk_block_group_flags(leaf, item, BTRFS_BLOCK_GROUP_DATA); } else { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b62f35e862b..8a2001bbf10 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -81,9 +81,11 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int ret = 0; if (for_del) - thresh = (total * 90) / 100; + thresh = total * 90; else - thresh = (total * 85) / 100; + thresh = total * 85; + + do_div(thresh, 100); spin_lock(&root->fs_info->delalloc_lock); if (used + root->fs_info->delalloc_bytes + num_required > thresh) @@ -2475,7 +2477,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) ret = -EFBIG; goto out_unlock; } - new_size = (new_size / root->sectorsize) * root->sectorsize; + + do_div(new_size, root->sectorsize); + new_size *= root->sectorsize; printk("new size is %Lu\n", new_size); if (new_size > old_size) { -- cgit v1.2.3-70-g09d2 From 98ed51747b63435b9987ef12692a75c223818bbe Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 10:01:48 -0500 Subject: Btrfs: Force inlining off in a few places to save stack usage Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 21 ++++++++-------- fs/btrfs/extent-tree.c | 66 ++++++++++++++++++++++++++++---------------------- fs/btrfs/file.c | 16 +++++------- 3 files changed, 54 insertions(+), 49 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8fa92a2d981..35c57074a37 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -548,8 +548,8 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, return 0; } -static int check_block(struct btrfs_root *root, struct btrfs_path *path, - int level) +static int noinline check_block(struct btrfs_root *root, + struct btrfs_path *path, int level) { return 0; #if 0 @@ -676,8 +676,9 @@ static struct extent_buffer *read_node_slot(struct btrfs_root *root, btrfs_level_size(root, btrfs_header_level(parent) - 1)); } -static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int level) +static int balance_level(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) { struct extent_buffer *right = NULL; struct extent_buffer *mid; @@ -868,9 +869,9 @@ enospc: } /* returns zero if the push worked, non-zero otherwise */ -static int push_nodes_for_insert(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, int level) +static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) { struct extent_buffer *right = NULL; struct extent_buffer *mid; @@ -1207,8 +1208,8 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, * returns 0 if some ptrs were pushed left, < 0 if there was some horrible * error, and > 0 if there was no room in the left hand block. */ -static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct extent_buffer *dst, +static int push_node_left(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *dst, struct extent_buffer *src) { int push_items = 0; @@ -1309,7 +1310,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, * * returns zero on success or < 0 on failure. */ -static int insert_new_root(struct btrfs_trans_handle *trans, +static int noinline insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 54f2b1d8bdf..4957cface9a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -162,10 +162,11 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return block_group; return NULL; } -static u64 find_search_start(struct btrfs_root *root, - struct btrfs_block_group_cache **cache_ret, - u64 search_start, int num, - int data, int full_scan) + +static u64 noinline find_search_start(struct btrfs_root *root, + struct btrfs_block_group_cache **cache_ret, + u64 search_start, int num, + int data, int full_scan) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; @@ -393,11 +394,12 @@ static int match_extent_ref(struct extent_buffer *leaf, return ret == 0; } -static int lookup_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 bytenr, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, int del) +static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr, + u64 root_objectid, + u64 ref_generation, u64 owner, + u64 owner_offset, int del) { u64 hash; struct btrfs_key key; @@ -1116,8 +1118,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, return 0; } -static int finish_current_insert(struct btrfs_trans_handle *trans, struct - btrfs_root *extent_root) +static int finish_current_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root) { u64 start; u64 end; @@ -1360,11 +1362,13 @@ static u64 stripe_align(struct btrfs_root *root, u64 val) * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_bytes, u64 empty_size, - u64 search_start, u64 search_end, u64 hint_byte, - struct btrfs_key *ins, u64 exclude_start, - u64 exclude_nr, int data) +static int noinline find_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *orig_root, + u64 num_bytes, u64 empty_size, + u64 search_start, u64 search_end, + u64 hint_byte, struct btrfs_key *ins, + u64 exclude_start, u64 exclude_nr, + int data) { struct btrfs_path *path; struct btrfs_key key; @@ -1760,8 +1764,9 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } -static int drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct extent_buffer *leaf) +static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *leaf) { u64 leaf_owner; u64 leaf_generation; @@ -1802,8 +1807,8 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -static void reada_walk_down(struct btrfs_root *root, - struct extent_buffer *node) +static void noinline reada_walk_down(struct btrfs_root *root, + struct extent_buffer *node) { int i; u32 nritems; @@ -1835,8 +1840,9 @@ static void reada_walk_down(struct btrfs_root *root, * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. */ -static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int *level) +static int noinline walk_down_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level) { u64 root_owner; u64 root_gen; @@ -1955,8 +1961,9 @@ out: * to find the first node higher up where we haven't yet gone through * all the slots */ -static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int *level) +static int noinline walk_up_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level) { u64 root_owner; u64 root_gen; @@ -2100,7 +2107,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } -static int relocate_inode_pages(struct inode *inode, u64 start, u64 len) +static int noinline relocate_inode_pages(struct inode *inode, u64 start, + u64 len) { u64 page_start; u64 page_end; @@ -2169,7 +2177,7 @@ out_unlock: /* * note, this releases the path */ -static int relocate_one_reference(struct btrfs_root *extent_root, +static int noinline relocate_one_reference(struct btrfs_root *extent_root, struct btrfs_path *path, struct btrfs_key *extent_key) { @@ -2268,9 +2276,9 @@ out: return 0; } -static int relocate_one_extent(struct btrfs_root *extent_root, - struct btrfs_path *path, - struct btrfs_key *extent_key) +static int noinline relocate_one_extent(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key) { struct btrfs_key key; struct btrfs_key found_key; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 71dc2d33b6c..c9ebd4fe8f7 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -80,7 +80,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) } } -static int insert_inline_extent(struct btrfs_trans_handle *trans, +static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 offset, size_t size, struct page **pages, size_t page_offset, @@ -221,7 +221,7 @@ fail: return err; } -static int dirty_and_release_pages(struct btrfs_trans_handle *trans, +static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct file *file, struct page **pages, @@ -653,14 +653,10 @@ out: /* * this gets pages into the page cache and locks them down */ -static int prepare_pages(struct btrfs_root *root, - struct file *file, - struct page **pages, - size_t num_pages, - loff_t pos, - unsigned long first_index, - unsigned long last_index, - size_t write_bytes) +static int prepare_pages(struct btrfs_root *root, struct file *file, + struct page **pages, size_t num_pages, + loff_t pos, unsigned long first_index, + unsigned long last_index, size_t write_bytes) { int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; -- cgit v1.2.3-70-g09d2 From bd09835d9aad9c7b664cddc8435cc37b86077971 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 13:23:19 -0500 Subject: count_snapshots: Properly update the leaf pointer after btrfs_next_leaf Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/inode.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4957cface9a..b801eb4b3ee 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -693,7 +693,6 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, u64 root_objectid = root->root_key.objectid; u32 total_count = 0; u32 cur_count; - u32 refs; u32 nritems; int ret; struct btrfs_key key; @@ -729,8 +728,8 @@ again: } item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - refs = btrfs_extent_refs(l, item); while (1) { + l = path->nodes[0]; nritems = btrfs_header_nritems(l); if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(extent_root, path); @@ -741,6 +740,7 @@ again: btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); if (found_key.objectid != bytenr) break; + if (found_key.type != BTRFS_EXTENT_REF_KEY) { path->slots[0]++; continue; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8a2001bbf10..6e2d03626fb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -208,7 +208,7 @@ again: } start = extent_end; - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + } else { goto not_found; } loop: -- cgit v1.2.3-70-g09d2 From d548ee5182bda42c87bdd4e28b539d5e436e206a Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 3 Jan 2008 13:56:30 -0500 Subject: Btrfs: Add a helper that finds previous extent item Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 80 +++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 44 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b801eb4b3ee..cc89df4493a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,6 +33,28 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static int find_previous_extent(struct btrfs_root *root, + struct btrfs_path *path) +{ + struct btrfs_key found_key; + struct extent_buffer *leaf; + int ret; + + while(1) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret != 0) + return ret; + } else { + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.type == BTRFS_EXTENT_ITEM_KEY) + return 0; + } + return 1; +} static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) @@ -65,16 +87,19 @@ static int cache_block_group(struct btrfs_root *root, first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; key.offset = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) return ret; - - if (ret && path->slots[0] > 0) - path->slots[0]--; - + ret = find_previous_extent(root, path); + if (ret < 0) + return ret; + if (ret == 0) { + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid + key.offset > first_free) + first_free = key.objectid + key.offset; + } while(1) { leaf = path->nodes[0]; slot = path->slots[0]; @@ -88,15 +113,10 @@ static int cache_block_group(struct btrfs_root *root, break; } } - btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid < block_group->key.objectid) { - if (btrfs_key_type(&key) != BTRFS_EXTENT_REF_KEY && - key.objectid + key.offset > first_free) - first_free = key.objectid + key.offset; goto next; } - if (key.objectid >= block_group->key.objectid + block_group->key.offset) { break; @@ -162,11 +182,9 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return block_group; return NULL; } - static u64 noinline find_search_start(struct btrfs_root *root, struct btrfs_block_group_cache **cache_ret, - u64 search_start, int num, - int data, int full_scan) + u64 search_start, int num, int data) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; @@ -771,9 +789,7 @@ again: out: btrfs_free_path(path); return total_count; - } - int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner_objectid) { @@ -1422,7 +1438,7 @@ check_failed: orig_search_start); } search_start = find_search_start(root, &block_group, search_start, - total_needed, data, full_scan); + total_needed, data); search_start = stripe_align(root, search_start); cached_start = search_start; btrfs_init_path(path); @@ -1434,35 +1450,11 @@ check_failed: ret = btrfs_search_slot(trans, root, ins, path, 0, 0); if (ret < 0) goto error; - - if (path->slots[0] > 0) { - path->slots[0]--; - } - + ret = find_previous_extent(root, path); + if (ret < 0) + goto error; l = path->nodes[0]; btrfs_item_key_to_cpu(l, &key, path->slots[0]); - - /* - * walk backwards to find the first extent item key - */ - while(btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { - if (path->slots[0] == 0) { - ret = btrfs_prev_leaf(root, path); - if (ret != 0) { - ret = btrfs_search_slot(trans, root, ins, - path, 0, 0); - if (ret < 0) - goto error; - if (path->slots[0] > 0) - path->slots[0]--; - break; - } - } else { - path->slots[0]--; - } - l = path->nodes[0]; - btrfs_item_key_to_cpu(l, &key, path->slots[0]); - } while (1) { l = path->nodes[0]; slot = path->slots[0]; -- cgit v1.2.3-70-g09d2 From 73e48b277a3fcd647f1936c71aea2f8d450ecc74 Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 3 Jan 2008 14:14:39 -0500 Subject: Btrfs: Properly handle overlapping extent in shrink_extent_tree The patch fixes the overlapping extent issue in shrink_extent_tree. It checks whether there is an overlapping extent by using find_previous_extent. If there is an overlapping extent, it setups key.objectid and cur_byte properly. --- Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 140 ++++++++++++++++++++----------------------------- 1 file changed, 56 insertions(+), 84 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cc89df4493a..426a0222e12 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2317,36 +2317,6 @@ out: return ret; } -static int find_overlapping_extent(struct btrfs_root *root, - struct btrfs_path *path, u64 new_size) -{ - struct btrfs_key found_key; - struct extent_buffer *leaf; - int ret; - - while(1) { - if (path->slots[0] == 0) { - ret = btrfs_prev_leaf(root, path); - if (ret == 1) { - return 1; - } - if (ret < 0) - return ret; - } else { - path->slots[0]--; - } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.type == BTRFS_EXTENT_ITEM_KEY) { - if (found_key.objectid + found_key.offset > new_size) - return 0; - else - return 1; - } - } - return 1; -} - int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) { struct btrfs_trans_handle *trans; @@ -2357,11 +2327,10 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) struct btrfs_fs_info *info = root->fs_info; struct extent_map_tree *block_group_cache; struct btrfs_key key; - struct btrfs_key found_key = { 0, 0, 0 }; + struct btrfs_key found_key; struct extent_buffer *leaf; u32 nritems; int ret; - int slot; btrfs_set_super_total_bytes(&info->super_copy, new_size); block_group_cache = &info->block_group_cache; @@ -2372,48 +2341,54 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) again: total_found = 0; key.objectid = new_size; - cur_byte = key.objectid; key.offset = 0; key.type = 0; - while(1) { + cur_byte = key.objectid; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + ret = find_previous_extent(root, path); + if (ret < 0) + goto out; + if (ret == 0) { + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid + found_key.offset > new_size) { + cur_byte = found_key.objectid; + key.objectid = cur_byte; + } + } + btrfs_release_path(root, path); + + while(1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; -next: + leaf = path->nodes[0]; - if (key.objectid == new_size - 1) { - ret = find_overlapping_extent(root, path, new_size); - if (ret != 0) { - btrfs_release_path(root, path); - ret = btrfs_search_slot(NULL, root, &key, - path, 0, 0); - if (ret < 0) - goto out; + nritems = btrfs_header_nritems(leaf); +next: + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret == 1) { + ret = 0; + break; } + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); } - nritems = btrfs_header_nritems(leaf); - ret = 0; - slot = path->slots[0]; - if (slot < nritems) - btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (slot == nritems || - btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY) { + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || + found_key.objectid + found_key.offset <= cur_byte) { path->slots[0]++; - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - goto out; - if (ret == 1) { - ret = 0; - break; - } - } goto next; } - btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (found_key.objectid + found_key.offset <= cur_byte) - continue; + total_found++; cur_byte = found_key.objectid + found_key.offset; key.objectid = cur_byte; @@ -2446,33 +2421,30 @@ next: ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) goto out; -bg_next: + leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); - ret = 0; - slot = path->slots[0]; - if (slot < nritems) - btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (slot == nritems || - btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) { - if (slot < nritems) { - printk("shrinker found key %Lu %u %Lu\n", - found_key.objectid, found_key.type, - found_key.offset); - path->slots[0]++; - } - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - break; - if (ret == 1) { - ret = 0; - break; - } +bg_next: + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + break; + if (ret == 1) { + ret = 0; + break; } + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + } + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) { + printk("shrinker found key %Lu %u %Lu\n", + found_key.objectid, found_key.type, + found_key.offset); + path->slots[0]++; goto bg_next; } - btrfs_item_key_to_cpu(leaf, &found_key, slot); ret = get_state_private(&info->block_group_cache, found_key.objectid, &ptr); if (!ret) -- cgit v1.2.3-70-g09d2 From 1372f8e609d260c2352bf74e125614eef7aee8ef Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 4 Jan 2008 09:34:54 -0500 Subject: Properly call btrfs_search_slot while shrinking The shrinking code used btrfs_next_leaf to find the next item, but this does not cow the blocks it touches. This fix calls search_slot after finding the next item to do appropriate cow and balancing. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 426a0222e12..c390fd847a2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2434,7 +2434,15 @@ bg_next: break; } leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + /* + * btrfs_next_leaf doesn't cow buffers, we have to + * do the search again + */ + memcpy(&key, &found_key, sizeof(key)); + btrfs_release_path(root, path); + continue; } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); -- cgit v1.2.3-70-g09d2 From 725c8463ea9bcfc2b56cff09b93a90e5bcca59c4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 4 Jan 2008 16:47:16 -0500 Subject: Btrfs: resizer: don't hold the fs_mutex for long periods of time Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c390fd847a2..c906bb19b21 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2331,6 +2331,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) struct extent_buffer *leaf; u32 nritems; int ret; + int progress = 0; btrfs_set_super_total_bytes(&info->super_copy, new_size); block_group_cache = &info->block_group_cache; @@ -2383,6 +2384,19 @@ next: } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (progress && need_resched()) { + memcpy(&key, &found_key, sizeof(key)); + mutex_unlock(&root->fs_info->fs_mutex); + cond_resched(); + mutex_lock(&root->fs_info->fs_mutex); + btrfs_release_path(root, path); + btrfs_search_slot(NULL, root, &key, path, 0, 0); + progress = 0; + goto next; + } + progress = 1; + if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || found_key.objectid + found_key.offset <= cur_byte) { path->slots[0]++; @@ -2442,7 +2456,7 @@ bg_next: */ memcpy(&key, &found_key, sizeof(key)); btrfs_release_path(root, path); - continue; + goto resched_check; } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); @@ -2465,6 +2479,12 @@ bg_next: key.objectid = found_key.objectid + 1; btrfs_del_item(trans, root, path); btrfs_release_path(root, path); +resched_check: + if (need_resched()) { + mutex_unlock(&root->fs_info->fs_mutex); + cond_resched(); + mutex_lock(&root->fs_info->fs_mutex); + } } clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, GFP_NOFS); -- cgit v1.2.3-70-g09d2 From dc17ff8f11d129db9e83ab7244769e4eae05e14d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:30 -0500 Subject: Btrfs: Add data=ordered support This forces file data extents down the disk along with the metadata that references them. The current implementation is fairly simple, and just writes out all of the dirty pages in an inode before the commit. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/ctree.c | 6 +- fs/btrfs/ctree.h | 7 +- fs/btrfs/disk-io.c | 16 +++- fs/btrfs/disk-io.h | 2 + fs/btrfs/extent-tree.c | 4 +- fs/btrfs/file.c | 5 +- fs/btrfs/inode.c | 46 ++++++---- fs/btrfs/ordered-data.c | 221 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ordered-data.h | 39 +++++++++ fs/btrfs/transaction.c | 58 +++++++++++++ fs/btrfs/transaction.h | 8 +- 13 files changed, 387 insertions(+), 28 deletions(-) create mode 100644 fs/btrfs/ordered-data.c create mode 100644 fs/btrfs/ordered-data.h (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d5804c5ca07..ab9a9f8edbf 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,7 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o sysfs.o struct-funcs.o xattr.o acl.o + extent_map.o sysfs.o struct-funcs.o xattr.o acl.o ordered-data.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index d1d5af471c3..f27e633f174 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -29,6 +29,7 @@ struct btrfs_inode { struct extent_map_tree extent_tree; struct inode vfs_inode; + u64 ordered_trans; /* * transid of the trans_handle that last modified this inode */ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 35c57074a37..43d23148a4f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -221,7 +221,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret) { u64 search_start; + u64 header_trans; int ret; + if (trans->transaction != root->fs_info->running_transaction) { printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, root->fs_info->running_transaction->transid); @@ -232,7 +234,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, root->fs_info->generation); WARN_ON(1); } - if (btrfs_header_generation(buf) == trans->transid) { + + header_trans = btrfs_header_generation(buf); + if (header_trans == trans->transid) { *cow_ret = buf; return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9873975ce0e..b55dba58dfa 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -16,8 +16,8 @@ * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS__ -#define __BTRFS__ +#ifndef __BTRFS_CTREE__ +#define __BTRFS_CTREE__ #include #include @@ -363,7 +363,6 @@ struct btrfs_root { struct inode *inode; struct kobject root_kobj; struct completion kobj_unregister; - struct rw_semaphore snap_sem; u64 objectid; u64 last_trans; @@ -1142,6 +1141,8 @@ void btrfs_destroy_cachep(void); long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); +struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, + u64 root_objectid); int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to); struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a6170ff19e7..34cf1f1f47b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -406,7 +406,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); init_completion(&root->kobj_unregister); - init_rwsem(&root->snap_sem); root->defrag_running = 0; root->defrag_level = 0; root->root_key.objectid = objectid; @@ -498,6 +497,21 @@ insert: return root; } +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_objectid) +{ + struct btrfs_root *root; + + if (root_objectid == BTRFS_ROOT_TREE_OBJECTID) + return fs_info->tree_root; + if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) + return fs_info->extent_root; + + root = radix_tree_lookup(&fs_info->fs_roots_radix, + (unsigned long)root_objectid); + return root; +} + struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, struct btrfs_key *location) { diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 8c3cfd02901..dae9fba8efc 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -34,6 +34,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_objectid); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c906bb19b21..68137cd8506 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1195,7 +1195,9 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, if (btrfs_buffer_uptodate(buf)) { u64 transid = root->fs_info->running_transaction->transid; - if (btrfs_header_generation(buf) == transid) { + u64 header_transid = + btrfs_header_generation(buf); + if (header_transid == transid) { free_extent_buffer(buf); return 1; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 94c93373cb7..0a5f4defe59 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -34,6 +34,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "ordered-data.h" #include "ioctl.h" #include "print-tree.h" @@ -329,6 +330,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, root->fs_info->delalloc_bytes += (end_of_last_block + 1 - start_pos) - existing_delalloc; spin_unlock(&root->fs_info->delalloc_lock); + btrfs_add_ordered_inode(inode); } else { u64 aligned_end; /* step one, delete the existing extents in this range */ @@ -724,8 +726,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); - down_read(&BTRFS_I(inode)->root->snap_sem); - mutex_lock(&inode->i_mutex); first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; @@ -804,7 +804,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } out: mutex_unlock(&inode->i_mutex); - up_read(&BTRFS_I(inode)->root->snap_sem); out_nolock: kfree(pages); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6d0cd9a7a61..6d6e1ac0a9a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -135,6 +135,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } + btrfs_add_ordered_inode(inode); out: btrfs_end_transaction(trans, root); return ret; @@ -367,8 +368,8 @@ void btrfs_read_locked_inode(struct inode *inode) path = btrfs_alloc_path(); BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); - memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); + ret = btrfs_lookup_inode(NULL, root, path, &location, 0); if (ret) goto make_bad; @@ -898,7 +899,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if ((offset & (blocksize - 1)) == 0) goto out; - down_read(&root->snap_sem); ret = -ENOMEM; page = grab_cache_page(mapping, index); if (!page) @@ -917,7 +917,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) unlock_page(page); page_cache_release(page); - up_read(&BTRFS_I(inode)->root->snap_sem); out: return ret; } @@ -1146,6 +1145,19 @@ static int btrfs_find_actor(struct inode *inode, void *opaque) args->root == BTRFS_I(inode)->root); } +struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, + u64 root_objectid) +{ + struct btrfs_iget_args args; + args.ino = objectid; + args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid); + + if (!args.root) + return NULL; + + return ilookup5(s, objectid, btrfs_find_actor, (void *)&args); +} + struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root) { @@ -1336,7 +1348,6 @@ read_dir_items: d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; btrfs_dir_item_key_to_cpu(leaf, di, &location); - over = filldir(dirent, name_ptr, name_len, found_key.offset, location.objectid, @@ -2054,7 +2065,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) ret = -EINVAL; - down_read(&BTRFS_I(inode)->root->snap_sem); lock_page(page); wait_on_page_writeback(page); size = i_size_read(inode); @@ -2075,7 +2085,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) ret = btrfs_cow_one_page(inode, page, end); out_unlock: - up_read(&BTRFS_I(inode)->root->snap_sem); unlock_page(page); out: return ret; @@ -2118,7 +2127,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name, struct btrfs_root_item root_item; struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; - struct btrfs_root *new_root; + struct btrfs_root *new_root = root; struct inode *inode; struct inode *dir; int ret; @@ -2230,7 +2239,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name, goto fail; fail: nr = trans->blocks_used; - err = btrfs_commit_transaction(trans, root); + err = btrfs_commit_transaction(trans, new_root); if (err && !ret) ret = err; fail_commit: @@ -2253,10 +2262,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (!root->ref_cows) return -EINVAL; - down_write(&root->snap_sem); - freeze_bdev(root->fs_info->sb->s_bdev); - thaw_bdev(root->fs_info->sb->s_bdev, root->fs_info->sb); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 0); if (ret) @@ -2264,6 +2269,9 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); + err = btrfs_commit_transaction(trans, root); + + trans = btrfs_start_transaction(root, 1); ret = btrfs_update_inode(trans, root, root->inode); if (ret) @@ -2272,9 +2280,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); if (ret) - goto fail; - - memcpy(&new_root_item, &root->root_item, + goto fail; memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); key.objectid = objectid; @@ -2285,12 +2291,20 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); free_extent_buffer(tmp); + /* write the ordered inodes to force all delayed allocations to + * be filled. Once this is done, we can copy the root + */ + mutex_lock(&root->fs_info->trans_mutex); + btrfs_write_ordered_inodes(trans, root); + mutex_unlock(&root->fs_info->trans_mutex); + btrfs_copy_root(trans, root, root->node, &tmp, objectid); btrfs_set_root_bytenr(&new_root_item, tmp->start); btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); +printk("new root %Lu node %Lu\n", objectid, tmp->start); free_extent_buffer(tmp); if (ret) goto fail; @@ -2321,7 +2335,6 @@ fail: ret = err; fail_unlock: mutex_unlock(&root->fs_info->fs_mutex); - up_write(&root->snap_sem); btrfs_btree_balance_dirty(root, nr); return ret; } @@ -2608,6 +2621,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->last_trans = 0; + ei->ordered_trans = 0; return &ei->vfs_inode; } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c new file mode 100644 index 00000000000..411aba84d30 --- /dev/null +++ b/fs/btrfs/ordered-data.c @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include "ctree.h" +#include "transaction.h" +#include "btrfs_inode.h" + +struct tree_entry { + u64 root_objectid; + u64 objectid; + struct rb_node rb_node; +}; + +/* + * returns > 0 if entry passed (root, objectid) is > entry, + * < 0 if (root, objectid) < entry and zero if they are equal + */ +static int comp_entry(struct tree_entry *entry, u64 root_objectid, + u64 objectid) +{ + if (root_objectid < entry->root_objectid) + return -1; + if (root_objectid > entry->root_objectid) + return 1; + if (objectid < entry->objectid) + return -1; + if (objectid > entry->objectid) + return 1; + return 0; +} + +static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, + u64 objectid, struct rb_node *node) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct tree_entry *entry; + int comp; + + while(*p) { + parent = *p; + entry = rb_entry(parent, struct tree_entry, rb_node); + + comp = comp_entry(entry, root_objectid, objectid); + if (comp < 0) + p = &(*p)->rb_left; + else if (comp > 0) + p = &(*p)->rb_right; + else + return parent; + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, + u64 objectid, struct rb_node **prev_ret) +{ + struct rb_node * n = root->rb_node; + struct rb_node *prev = NULL; + struct tree_entry *entry; + struct tree_entry *prev_entry = NULL; + int comp; + + while(n) { + entry = rb_entry(n, struct tree_entry, rb_node); + prev = n; + prev_entry = entry; + comp = comp_entry(entry, root_objectid, objectid); + + if (comp < 0) + n = n->rb_left; + else if (comp > 0) + n = n->rb_right; + else + return n; + } + if (!prev_ret) + return NULL; + + while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) { + prev = rb_next(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *prev_ret = prev; + return NULL; +} + +static inline struct rb_node *tree_search(struct rb_root *root, + u64 root_objectid, u64 objectid) +{ + struct rb_node *prev; + struct rb_node *ret; + ret = __tree_search(root, root_objectid, objectid, &prev); + if (!ret) + return prev; + return ret; +} + +int btrfs_add_ordered_inode(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 root_objectid = root->root_key.objectid; + u64 transid = root->fs_info->running_transaction->transid; + struct tree_entry *entry; + struct rb_node *node; + struct btrfs_ordered_inode_tree *tree; + + if (transid <= BTRFS_I(inode)->ordered_trans) + return 0; + + tree = &root->fs_info->running_transaction->ordered_inode_tree; + + read_lock(&tree->lock); + node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL); + read_unlock(&tree->lock); + if (node) { + return 0; + } + + entry = kmalloc(sizeof(*entry), GFP_NOFS); + if (!entry) + return -ENOMEM; + + write_lock(&tree->lock); + entry->objectid = inode->i_ino; + entry->root_objectid = root_objectid; + + node = tree_insert(&tree->tree, root_objectid, + inode->i_ino, &entry->rb_node); + + BTRFS_I(inode)->ordered_trans = transid; + + write_unlock(&tree->lock); + if (node) + kfree(entry); + return 0; +} + +int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, + u64 *root_objectid, u64 *objectid) +{ + struct tree_entry *entry; + struct rb_node *node; + + write_lock(&tree->lock); + node = tree_search(&tree->tree, *root_objectid, *objectid); + if (!node) { + write_unlock(&tree->lock); + return 0; + } + entry = rb_entry(node, struct tree_entry, rb_node); + + while(comp_entry(entry, *root_objectid, *objectid) >= 0) { + node = rb_next(node); + if (!node) + break; + entry = rb_entry(node, struct tree_entry, rb_node); + } + if (!node) { + write_unlock(&tree->lock); + return 0; + } + + *root_objectid = entry->root_objectid; + *objectid = entry->objectid; + write_unlock(&tree->lock); + return 1; +} + +int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, + u64 *root_objectid, u64 *objectid) +{ + struct tree_entry *entry; + struct rb_node *node; + + write_lock(&tree->lock); + node = tree_search(&tree->tree, *root_objectid, *objectid); + if (!node) { + write_unlock(&tree->lock); + return 0; + } + + entry = rb_entry(node, struct tree_entry, rb_node); + while(comp_entry(entry, *root_objectid, *objectid) >= 0) { + node = rb_next(node); + if (!node) + break; + entry = rb_entry(node, struct tree_entry, rb_node); + } + if (!node) { + write_unlock(&tree->lock); + return 0; + } + + *root_objectid = entry->root_objectid; + *objectid = entry->objectid; + rb_erase(node, &tree->tree); + write_unlock(&tree->lock); + kfree(entry); + return 1; +} diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h new file mode 100644 index 00000000000..aaf9eb14271 --- /dev/null +++ b/fs/btrfs/ordered-data.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_ORDERED_DATA__ +#define __BTRFS_ORDERED_DATA__ + +struct btrfs_ordered_inode_tree { + rwlock_t lock; + struct rb_root tree; +}; + +static inline void +btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) +{ + rwlock_init(&t->lock); + t->tree.rb_node = NULL; +} + +int btrfs_add_ordered_inode(struct inode *inode); +int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, + u64 *root_objectid, u64 *objectid); +int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, + u64 *root_objectid, u64 *objectid); +#endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 02721eea9a7..3ed5868e7c0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -67,6 +67,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); + btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); extent_map_tree_init(&cur_trans->dirty_pages, root->fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -473,6 +474,60 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, return ret; } +int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_transaction *cur_trans = trans->transaction; + struct inode *inode; + u64 root_objectid = 0; + u64 objectid = 0; + u64 transid = trans->transid; + int ret; + +printk("write ordered trans %Lu\n", transid); + while(1) { + ret = btrfs_find_first_ordered_inode( + &cur_trans->ordered_inode_tree, + &root_objectid, &objectid); + if (!ret) + break; + + mutex_unlock(&root->fs_info->trans_mutex); + mutex_unlock(&root->fs_info->fs_mutex); + inode = btrfs_ilookup(root->fs_info->sb, objectid, + root_objectid); + if (inode) { + if (S_ISREG(inode->i_mode)) + filemap_fdatawrite(inode->i_mapping); + iput(inode); + } + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->trans_mutex); + } + while(1) { + root_objectid = 0; + objectid = 0; + ret = btrfs_find_del_first_ordered_inode( + &cur_trans->ordered_inode_tree, + &root_objectid, &objectid); + if (!ret) + break; + mutex_unlock(&root->fs_info->trans_mutex); + mutex_unlock(&root->fs_info->fs_mutex); + inode = btrfs_ilookup(root->fs_info->sb, objectid, + root_objectid); + if (inode) { + if (S_ISREG(inode->i_mode)) + filemap_write_and_wait(inode->i_mapping); + iput(inode); + } + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->trans_mutex); + } +printk("done write ordered trans %Lu\n", transid); + return 0; +} + int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -550,10 +605,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); + ret = btrfs_write_ordered_inodes(trans, root); + } while (cur_trans->num_writers > 1 || (cur_trans->num_joined != joined)); WARN_ON(cur_trans != trans->transaction); + ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); BUG_ON(ret); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index eef840bca91..c157ddbe9d1 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -16,9 +16,10 @@ * Boston, MA 021110-1307, USA. */ -#ifndef __TRANSACTION__ -#define __TRANSACTION__ +#ifndef __BTRFS_TRANSACTION__ +#define __BTRFS_TRANSACTION__ #include "btrfs_inode.h" +#include "ordered-data.h" struct btrfs_transaction { u64 transid; @@ -30,6 +31,7 @@ struct btrfs_transaction { struct list_head list; struct extent_map_tree dirty_pages; unsigned long start_time; + struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; @@ -90,4 +92,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif -- cgit v1.2.3-70-g09d2 From b0331a4c4c339ba7786472b137d6ece9e7f810ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:31 -0500 Subject: Btrfs: Disable btree reada during extent backref lookups. This reada is generally not effective. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 68137cd8506..660b05a4baf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -630,6 +630,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; + path->reada = 0; key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_bytes; @@ -649,6 +650,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_release_path(root->fs_info->extent_root, path); + path->reada = 0; ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, path, bytenr, root_objectid, ref_generation, owner, owner_offset); @@ -680,6 +682,7 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); + path->reada = 0; key.objectid = bytenr; key.offset = num_bytes; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -1240,6 +1243,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; + path->reada = 0; ret = lookup_extent_backref(trans, extent_root, path, bytenr, root_objectid, ref_generation, -- cgit v1.2.3-70-g09d2 From c31f8830f0dfd31e7e196b85ca1b39aef8b77d57 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:31 -0500 Subject: Btrfs: online shrinking fixes While shrinking the FS, the allocation functions need to make sure they don't try to allocate bytes past the end of the FS. nodatacow needed an extra check to force cows when the existing extents are past the end of the FS. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 32 +++++++++++++++++++++++--------- fs/btrfs/inode.c | 15 +++++++++++++-- 2 files changed, 36 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 660b05a4baf..99a8b0f0d31 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -192,11 +192,13 @@ static u64 noinline find_search_start(struct btrfs_root *root, u64 start = 0; u64 end = 0; u64 cache_miss = 0; + u64 total_fs_bytes; int wrapped = 0; if (!cache) { goto out; } + total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); again: ret = cache_block_group(root, cache); if (ret) @@ -223,6 +225,8 @@ again: if (data != BTRFS_BLOCK_GROUP_MIXED && start + num > cache->key.objectid + cache->key.offset) goto new_group; + if (start + num > total_fs_bytes) + goto new_group; return start; } out: @@ -239,7 +243,7 @@ new_group: last = cache->key.objectid + cache->key.offset; wrapped: cache = btrfs_lookup_block_group(root->fs_info, last); - if (!cache) { + if (!cache || cache->key.objectid >= total_fs_bytes) { no_cache: if (!wrapped) { wrapped = 1; @@ -287,6 +291,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, u64 end; u64 free_check; u64 ptr; + u64 total_fs_bytes; int bit; int ret; int full_search = 0; @@ -294,6 +299,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data_swap = 0; block_group_cache = &info->block_group_cache; + total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); if (!owner) factor = 8; @@ -306,7 +312,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, else bit = BLOCK_GROUP_METADATA; - if (search_start) { + if (search_start && search_start < total_fs_bytes) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_block_group(info, search_start); if (shint && (shint->data == data || @@ -318,8 +324,8 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } } } - if (hint && (hint->data == data || - hint->data == BTRFS_BLOCK_GROUP_MIXED)) { + if (hint && hint->key.objectid < total_fs_bytes && + (hint->data == data || hint->data == BTRFS_BLOCK_GROUP_MIXED)) { used = btrfs_block_group_used(&hint->item); if (used + hint->pinned < div_factor(hint->key.offset, factor)) { @@ -333,6 +339,8 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, else hint_last = search_start; + if (hint_last >= total_fs_bytes) + hint_last = search_start; last = hint_last; } again: @@ -350,6 +358,9 @@ again: last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); + if (cache->key.objectid > total_fs_bytes) + break; + if (full_search) free_check = cache->key.offset; else @@ -1420,8 +1431,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_MIXED; } - if (search_end == (u64)-1) - search_end = btrfs_super_total_bytes(&info->super_copy); + search_end = min(search_end, + btrfs_super_total_bytes(&info->super_copy)); if (hint_byte) { block_group = btrfs_lookup_block_group(info, hint_byte); if (!block_group) @@ -1617,7 +1628,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, { int ret; int pending_ret; - u64 super_used, root_used; + u64 super_used; + u64 root_used; u64 search_start = 0; u64 new_hint; struct btrfs_fs_info *info = root->fs_info; @@ -1636,6 +1648,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); +if (ret) +printk("find free extent returns %d\n", ret); BUG_ON(ret); if (ret) return ret; @@ -2292,8 +2306,6 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, while(1) { ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); - BUG_ON(ret == 0); - if (ret < 0) goto out; @@ -2340,6 +2352,8 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) int progress = 0; btrfs_set_super_total_bytes(&info->super_copy, new_size); + clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, + GFP_NOFS); block_group_cache = &info->block_group_cache; path = btrfs_alloc_path(); root = root->fs_info->extent_root; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2cb2dd32407..281757007dd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -148,6 +148,7 @@ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) u64 bytenr; u64 cow_end; u64 loops = 0; + u64 total_fs_bytes; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_buffer *leaf; int found_type; @@ -157,6 +158,7 @@ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) int err; struct btrfs_key found_key; + total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); path = btrfs_alloc_path(); BUG_ON(!path); again: @@ -189,8 +191,10 @@ again: found_type = btrfs_file_extent_type(leaf, item); extent_start = found_key.offset; if (found_type == BTRFS_FILE_EXTENT_REG) { - extent_end = extent_start + - btrfs_file_extent_num_bytes(leaf, item); + u64 extent_num_bytes; + + extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item); + extent_end = extent_start + extent_num_bytes; err = 0; if (loops && start != extent_start) @@ -204,6 +208,13 @@ again: if (bytenr == 0) goto not_found; + /* + * we may be called by the resizer, make sure we're inside + * the limits of the FS + */ + if (bytenr + extent_num_bytes > total_fs_bytes) + goto not_found; + if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) { goto not_found; } -- cgit v1.2.3-70-g09d2 From 55c69072d6bd5be170a85467f64a20963cddf490 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Jan 2008 15:55:33 -0500 Subject: Btrfs: Fix extent_buffer usage when nodesize != leafsize Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 26 +++++++++++++++++++++++--- fs/btrfs/extent-tree.c | 13 ++++++++++--- fs/btrfs/extent_map.c | 30 +++++++++++++++++++++--------- 3 files changed, 54 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a481b970608..0338f8fd382 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -197,9 +197,23 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) if (found_start != start) { printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", start, found_start, len); + WARN_ON(1); + goto err; + } + if (eb->first_page != page) { + printk("bad first page %lu %lu\n", eb->first_page->index, + page->index); + WARN_ON(1); + goto err; + } + if (!PageUptodate(page)) { + printk("csum not up to date page %lu\n", page->index); + WARN_ON(1); + goto err; } found_level = btrfs_header_level(eb); csum_tree_block(root, eb, 0); +err: free_extent_buffer(eb); out: return 0; @@ -368,7 +382,10 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { struct inode *btree_inode = root->fs_info->btree_inode; - clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + if (btrfs_header_generation(buf) == + root->fs_info->running_transaction->transid) + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, + buf); return 0; } @@ -897,8 +914,11 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) void btrfs_throttle(struct btrfs_root *root) { - if (root->fs_info->throttles) - congestion_wait(WRITE, HZ/10); + struct backing_dev_info *bdi; + + bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + if (root->fs_info->throttles && bdi_write_congested(bdi)) + congestion_wait(WRITE, HZ/20); } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 99a8b0f0d31..2c569b4d59d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1212,6 +1212,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, u64 header_transid = btrfs_header_generation(buf); if (header_transid == transid) { + clean_tree_block(NULL, root, buf); free_extent_buffer(buf); return 1; } @@ -1249,7 +1250,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_bytes; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1648,8 +1648,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); -if (ret) -printk("find free extent returns %d\n", ret); BUG_ON(ret); if (ret) return ret; @@ -1764,7 +1762,16 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 0, 0, 0); return ERR_PTR(-ENOMEM); } + btrfs_set_header_generation(buf, trans->transid); + clean_tree_block(trans, root, buf); + wait_on_tree_block_writeback(root, buf); btrfs_set_buffer_uptodate(buf); + + if (PageDirty(buf->first_page)) { + printk("page %lu dirty\n", buf->first_page->index); + WARN_ON(1); + } + set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 9d6aefa937c..f3a384ed700 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1663,6 +1663,13 @@ void set_page_extent_mapped(struct page *page) } } +void set_page_extent_head(struct page *page, unsigned long len) +{ + WARN_ON(page->private && page->private == EXTENT_PAGE_PRIVATE && + PageDirty(page)); + set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); +} + /* * basic readpage implementation. Locked extent state structs are inserted * into the tree that are removed when the IO is done (by the end_io @@ -2490,8 +2497,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, mark_page_accessed(page0); set_page_extent_mapped(page0); WARN_ON(!PageUptodate(page0)); - set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE | - len << 2); + set_page_extent_head(page0, len); } else { i = 0; } @@ -2505,8 +2511,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, mark_page_accessed(p); if (i == 0) { eb->first_page = p; - set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | - len << 2); + set_page_extent_head(p, len); } else { set_page_private(p, EXTENT_PAGE_PRIVATE); } @@ -2569,8 +2574,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, if (i == 0) { eb->first_page = p; - set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | - len << 2); + set_page_extent_head(p, len); } else { set_page_private(p, EXTENT_PAGE_PRIVATE); } @@ -2643,6 +2647,11 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); lock_page(page); + if (i == 0) + set_page_extent_head(page, eb->len); + else + set_page_private(page, EXTENT_PAGE_PRIVATE); + /* * if we're on the last page or the first page and the * block isn't aligned on a page boundary, do extra checks @@ -2697,9 +2706,12 @@ int set_extent_buffer_dirty(struct extent_map_tree *tree, */ if (i == 0) { lock_page(page); - set_page_private(page, - EXTENT_PAGE_PRIVATE_FIRST_PAGE | - eb->len << 2); + set_page_extent_head(page, eb->len); + } else if (PagePrivate(page) && + page->private != EXTENT_PAGE_PRIVATE) { + lock_page(page); + set_page_extent_mapped(page); + unlock_page(page); } __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); if (i == 0) -- cgit v1.2.3-70-g09d2 From e18e4809b10e6c9efb5fe10c1ddcb4ebb690d517 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Jan 2008 10:54:22 -0500 Subject: Btrfs: Add mount -o ssd, which includes optimizations for seek free storage Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/disk-io.c | 3 +++ fs/btrfs/extent-tree.c | 31 ++++++++++++++++++++++++++++++- fs/btrfs/super.c | 9 ++++++++- fs/btrfs/transaction.c | 1 + fs/btrfs/tree-defrag.c | 3 +++ 6 files changed, 47 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fa65fe027e4..7a588ba2b74 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -351,6 +351,7 @@ struct btrfs_fs_info { spinlock_t delalloc_lock; spinlock_t new_trans_lock; u64 delalloc_bytes; + u64 last_alloc; }; /* * in ram representation of the tree. extent_root is used for all allocations @@ -444,6 +445,7 @@ struct btrfs_root { #define BTRFS_MOUNT_NODATASUM (1 << 0) #define BTRFS_MOUNT_NODATACOW (1 << 1) #define BTRFS_MOUNT_NOBARRIER (1 << 2) +#define BTRFS_MOUNT_SSD (1 << 3) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 731a534f81f..5d1f9bca271 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -193,6 +193,7 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1); + btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", @@ -676,6 +677,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->closing = 0; fs_info->total_pinned = 0; + fs_info->last_alloc = 0; + #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); #else diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2c569b4d59d..b69a46691a9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1431,6 +1431,19 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_MIXED; } + /* for SSD, cluster allocations together as much as possible */ + if (btrfs_test_opt(root, SSD)) { + if (!data) { + if (root->fs_info->last_alloc) + hint_byte = root->fs_info->last_alloc; + else { + hint_byte = hint_byte & + ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); + empty_size += 16 * 1024 * 1024; + } + } + } + search_end = min(search_end, btrfs_super_total_bytes(&info->super_copy)); if (hint_byte) { @@ -1456,6 +1469,19 @@ check_failed: } search_start = find_search_start(root, &block_group, search_start, total_needed, data); + + if (!data && btrfs_test_opt(root, SSD) && info->last_alloc && + search_start != info->last_alloc) { + info->last_alloc = 0; + if (!empty_size) { + empty_size += 16 * 1024 * 1024; + total_needed += empty_size; + } + search_start = find_search_start(root, &block_group, + search_start, total_needed, + data); + } + search_start = stripe_align(root, search_start); cached_start = search_start; btrfs_init_path(path); @@ -1610,6 +1636,8 @@ enospc: error: btrfs_release_path(root, path); btrfs_free_path(path); + if (btrfs_test_opt(root, SSD) && !ret && !data) + info->last_alloc = ins->objectid + ins->offset; return ret; } /* @@ -1778,7 +1806,8 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, buf->start, buf->start + buf->len - 1, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; - btrfs_set_buffer_defrag(buf); + if (!btrfs_test_opt(root, SSD)) + btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e506de3168b..b6982f0133c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -64,7 +64,7 @@ static void btrfs_put_super (struct super_block * sb) enum { Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, - Opt_alloc_start, Opt_nobarrier, Opt_err, + Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_err, }; static match_table_t tokens = { @@ -74,6 +74,7 @@ static match_table_t tokens = { {Opt_nobarrier, "nobarrier"}, {Opt_max_extent, "max_extent=%s"}, {Opt_alloc_start, "alloc_start=%s"}, + {Opt_ssd, "ssd"}, {Opt_err, NULL} }; @@ -149,6 +150,12 @@ static int parse_options (char * options, btrfs_set_opt(info->mount_opt, NODATASUM); } break; + case Opt_ssd: + if (info) { + printk("btrfs: use ssd allocation scheme\n"); + btrfs_set_opt(info->mount_opt, SSD); + } + break; case Opt_nobarrier: if (info) { printk("btrfs: turning off barriers\n"); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b6bbfc179c2..163c01a2449 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -57,6 +57,7 @@ static int join_transaction(struct btrfs_root *root) BUG_ON(!cur_trans); root->fs_info->generation++; root->fs_info->running_transaction = cur_trans; + root->fs_info->last_alloc = 0; cur_trans->num_writers = 1; cur_trans->num_joined = 0; cur_trans->transid = root->fs_info->generation; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 5c58630dce0..5935cbd8f2b 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -179,6 +179,9 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (root->ref_cows == 0 && !is_extent) goto out; + if (btrfs_test_opt(root, SSD)) + goto out; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From d1310b2e0cd98eb1348553e69b73827b436dca7b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jan 2008 16:13:08 -0500 Subject: Btrfs: Split the extent_map code into two parts There is now extent_map for mapping offsets in the file to disk and extent_io for state tracking, IO submission and extent_bufers. The new extent_map code shifts from [start,end] pairs to [start,len], and pushes the locking out into the caller. This allows a few performance optimizations and is easier to use. A number of extent_map usage bugs were fixed, mostly with failing to remove extent_map entries when changing the file. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/btrfs_inode.h | 2 + fs/btrfs/ctree.h | 15 +- fs/btrfs/disk-io.c | 123 +- fs/btrfs/extent-tree.c | 43 +- fs/btrfs/extent_io.c | 3089 +++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/extent_io.h | 193 +++ fs/btrfs/extent_map.c | 3119 ++---------------------------------------------- fs/btrfs/extent_map.h | 214 +--- fs/btrfs/file.c | 29 +- fs/btrfs/inode.c | 175 +-- fs/btrfs/super.c | 10 +- fs/btrfs/transaction.c | 8 +- fs/btrfs/transaction.h | 2 +- 14 files changed, 3612 insertions(+), 3413 deletions(-) create mode 100644 fs/btrfs/extent_io.c create mode 100644 fs/btrfs/extent_io.h (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 72cc3e16741..9c61609f371 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,8 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o + extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ + extent_io.o ifeq ($(CONFIG_FS_POSIX_ACL),y) btrfs-y += acl.o diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 9fc99883c70..9a0647dc5a0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -20,6 +20,7 @@ #define __BTRFS_I__ #include "extent_map.h" +#include "extent_io.h" /* in memory btrfs inode */ struct btrfs_inode { @@ -27,6 +28,7 @@ struct btrfs_inode { struct btrfs_block_group_cache *block_group; struct btrfs_key location; struct extent_map_tree extent_tree; + struct extent_io_tree io_tree; struct inode vfs_inode; u64 ordered_trans; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b9f2975b55c..6c65473e0fe 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -27,6 +27,7 @@ #include #include #include "bit-radix.h" +#include "extent_io.h" #include "extent_map.h" struct btrfs_trans_handle; @@ -314,11 +315,11 @@ struct btrfs_fs_info { struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; - struct extent_map_tree free_space_cache; - struct extent_map_tree block_group_cache; - struct extent_map_tree pinned_extents; - struct extent_map_tree pending_del; - struct extent_map_tree extent_ins; + struct extent_io_tree free_space_cache; + struct extent_io_tree block_group_cache; + struct extent_io_tree pinned_extents; + struct extent_io_tree pending_del; + struct extent_io_tree extent_ins; u64 generation; u64 last_trans_committed; @@ -956,7 +957,7 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, u64 first_extent); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr); @@ -1001,7 +1002,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 owner_objectid, u64 owner_offset, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_map_tree *unpin); + struct extent_io_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5d1f9bca271..4c4ebea0b2a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -43,14 +43,14 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) } #endif -static struct extent_map_ops btree_extent_map_ops; +static struct extent_io_ops btree_extent_io_ops; struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, GFP_NOFS); return eb; } @@ -61,13 +61,13 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, NULL, GFP_NOFS); return eb; } struct extent_map *btree_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 end, + size_t page_offset, u64 start, u64 len, int create) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; @@ -75,7 +75,9 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, int ret; again: - em = lookup_extent_mapping(em_tree, start, end); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + spin_unlock(&em_tree->lock); if (em) { goto out; } @@ -85,11 +87,14 @@ again: goto out; } em->start = 0; - em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1; + em->len = i_size_read(inode); em->block_start = 0; - em->block_end = em->end; em->bdev = inode->i_sb->s_bdev; + + spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret == -EEXIST) { free_extent_map(em); em = NULL; @@ -175,13 +180,13 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 found_start; int found_level; unsigned long len; struct extent_buffer *eb; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; if (page->private == EXTENT_PAGE_PRIVATE) goto out; @@ -230,16 +235,16 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) static int btree_writepage(struct page *page, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_write_full_page(tree, page, btree_get_extent, wbc); } static int btree_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; @@ -264,18 +269,20 @@ static int btree_writepages(struct address_space *mapping, int btree_readpage(struct file *file, struct page *page) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_read_full_page(tree, page, btree_get_extent); } static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; + struct extent_map_tree *map; int ret; - tree = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(tree, page); + tree = &BTRFS_I(page->mapping->host)->io_tree; + map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(map, tree, page); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); @@ -286,8 +293,8 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) static void btree_invalidatepage(struct page *page, unsigned long offset) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); } @@ -331,7 +338,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return 0; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 0); free_extent_buffer(buf); return ret; @@ -342,40 +349,39 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_map_tree *extent_tree; + struct extent_io_tree *io_tree; u64 end; int ret; - extent_tree = &BTRFS_I(btree_inode)->extent_tree; + io_tree = &BTRFS_I(btree_inode)->io_tree; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 0, 1); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); if (buf->flags & EXTENT_CSUM) return buf; end = buf->start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; return buf; } - lock_extent(extent_tree, buf->start, end, GFP_NOFS); + lock_extent(io_tree, buf->start, end, GFP_NOFS); - if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; goto out_unlock; } ret = csum_tree_block(root, buf, 1); - set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); + set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; out_unlock: - unlock_extent(extent_tree, buf->start, end, GFP_NOFS); + unlock_extent(io_tree, buf->start, end, GFP_NOFS); return buf; } @@ -385,7 +391,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) - clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); return 0; } @@ -394,7 +400,7 @@ int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf) { struct inode *btree_inode = root->fs_info->btree_inode; - wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree, + wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree, buf); return 0; } @@ -659,20 +665,23 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; - extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); - BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops; + extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + GFP_NOFS); + + BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; - extent_map_tree_init(&fs_info->free_space_cache, + extent_io_tree_init(&fs_info->free_space_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->block_group_cache, + extent_io_tree_init(&fs_info->block_group_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->pinned_extents, + extent_io_tree_init(&fs_info->pinned_extents, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->pending_del, + extent_io_tree_init(&fs_info->pending_del, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->extent_ins, + extent_io_tree_init(&fs_info->extent_ins, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -787,7 +796,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root if (!btrfs_test_opt(root, NOBARRIER)) blkdev_issue_flush(sb->s_bdev, NULL); - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super); ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, super->start, super->len); if (!btrfs_test_opt(root, NOBARRIER)) @@ -864,12 +873,12 @@ int close_ctree(struct btrfs_root *root) filemap_write_and_wait(fs_info->btree_inode->i_mapping); - extent_map_tree_empty_lru(&fs_info->free_space_cache); - extent_map_tree_empty_lru(&fs_info->block_group_cache); - extent_map_tree_empty_lru(&fs_info->pinned_extents); - extent_map_tree_empty_lru(&fs_info->pending_del); - extent_map_tree_empty_lru(&fs_info->extent_ins); - extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree); + extent_io_tree_empty_lru(&fs_info->free_space_cache); + extent_io_tree_empty_lru(&fs_info->block_group_cache); + extent_io_tree_empty_lru(&fs_info->pinned_extents); + extent_io_tree_empty_lru(&fs_info->pending_del); + extent_io_tree_empty_lru(&fs_info->extent_ins); + extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); @@ -892,13 +901,13 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { struct inode *btree_inode = buf->first_page->mapping->host; - return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); + return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { struct inode *btree_inode = buf->first_page->mapping->host; - return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, + return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); } @@ -914,7 +923,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) transid, root->fs_info->generation); WARN_ON(1); } - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } void btrfs_throttle(struct btrfs_root *root) @@ -941,7 +950,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); } @@ -949,7 +958,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, GFP_NOFS); } @@ -958,7 +967,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + return test_range_bit(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); } @@ -966,7 +975,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + return test_range_bit(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, 0); } @@ -975,7 +984,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, GFP_NOFS); } @@ -984,7 +993,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); } @@ -993,10 +1002,10 @@ int btrfs_read_buffer(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); } -static struct extent_map_ops btree_extent_map_ops = { +static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, }; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b69a46691a9..1cf125ab782 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -63,7 +63,7 @@ static int cache_block_group(struct btrfs_root *root, int ret; struct btrfs_key key; struct extent_buffer *leaf; - struct extent_map_tree *free_space_cache; + struct extent_io_tree *free_space_cache; int slot; u64 last = 0; u64 hole_size; @@ -158,7 +158,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr) { - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *block_group = NULL; u64 ptr; u64 start; @@ -281,7 +281,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data, int owner) { struct btrfs_block_group_cache *cache; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; u64 used; @@ -951,7 +951,7 @@ fail: int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *cache; int ret; int err = 0; @@ -1107,12 +1107,12 @@ static int update_pinned_extents(struct btrfs_root *root, return 0; } -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) { u64 last = 0; u64 start; u64 end; - struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; + struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; int ret; while(1) { @@ -1128,12 +1128,12 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_map_tree *unpin) + struct extent_io_tree *unpin) { u64 start; u64 end; int ret; - struct extent_map_tree *free_space_cache; + struct extent_io_tree *free_space_cache; free_space_cache = &root->fs_info->free_space_cache; while(1) { @@ -1329,8 +1329,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int err = 0; u64 start; u64 end; - struct extent_map_tree *pending_del; - struct extent_map_tree *pinned_extents; + struct extent_io_tree *pending_del; + struct extent_io_tree *pinned_extents; pending_del = &extent_root->fs_info->pending_del; pinned_extents = &extent_root->fs_info->pinned_extents; @@ -1802,7 +1802,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree, + set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; @@ -2166,7 +2166,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, unsigned long i; struct page *page; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct file_ra_state *ra; ra = kzalloc(sizeof(*ra), GFP_NOFS); @@ -2195,15 +2195,14 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; - existing_delalloc = - count_range_bits(&BTRFS_I(inode)->extent_tree, - &delalloc_start, page_end, - PAGE_CACHE_SIZE, EXTENT_DELALLOC); + existing_delalloc = count_range_bits(io_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(em_tree, page_start, + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -2211,7 +2210,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, existing_delalloc; spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); page_cache_release(page); @@ -2379,7 +2378,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) u64 cur_byte; u64 total_found; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; @@ -2561,7 +2560,7 @@ int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *cache; struct btrfs_block_group_item *item; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct extent_buffer *leaf; int ret; @@ -2645,7 +2644,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c new file mode 100644 index 00000000000..15cc158a049 --- /dev/null +++ b/fs/btrfs/extent_io.c @@ -0,0 +1,3089 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "extent_io.h" +#include "extent_map.h" + +/* temporary define until extent_map moves out of btrfs */ +struct kmem_cache *btrfs_cache_create(const char *name, size_t size, + unsigned long extra_flags, + void (*ctor)(void *, struct kmem_cache *, + unsigned long)); + +static struct kmem_cache *extent_state_cache; +static struct kmem_cache *extent_buffer_cache; + +static LIST_HEAD(buffers); +static LIST_HEAD(states); + +static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; +#define BUFFER_LRU_MAX 64 + +struct tree_entry { + u64 start; + u64 end; + int in_tree; + struct rb_node rb_node; +}; + +struct extent_page_data { + struct bio *bio; + struct extent_io_tree *tree; + get_extent_t *get_extent; +}; + +int __init extent_io_init(void) +{ + extent_state_cache = btrfs_cache_create("extent_state", + sizeof(struct extent_state), 0, + NULL); + if (!extent_state_cache) + return -ENOMEM; + + extent_buffer_cache = btrfs_cache_create("extent_buffers", + sizeof(struct extent_buffer), 0, + NULL); + if (!extent_buffer_cache) + goto free_state_cache; + return 0; + +free_state_cache: + kmem_cache_destroy(extent_state_cache); + return -ENOMEM; +} + +void extent_io_exit(void) +{ + struct extent_state *state; + + while (!list_empty(&states)) { + state = list_entry(states.next, struct extent_state, list); + printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); + list_del(&state->list); + kmem_cache_free(extent_state_cache, state); + + } + + if (extent_state_cache) + kmem_cache_destroy(extent_state_cache); + if (extent_buffer_cache) + kmem_cache_destroy(extent_buffer_cache); +} + +void extent_io_tree_init(struct extent_io_tree *tree, + struct address_space *mapping, gfp_t mask) +{ + tree->state.rb_node = NULL; + tree->ops = NULL; + tree->dirty_bytes = 0; + rwlock_init(&tree->lock); + spin_lock_init(&tree->lru_lock); + tree->mapping = mapping; + INIT_LIST_HEAD(&tree->buffer_lru); + tree->lru_size = 0; +} +EXPORT_SYMBOL(extent_io_tree_init); + +void extent_io_tree_empty_lru(struct extent_io_tree *tree) +{ + struct extent_buffer *eb; + while(!list_empty(&tree->buffer_lru)) { + eb = list_entry(tree->buffer_lru.next, struct extent_buffer, + lru); + list_del_init(&eb->lru); + free_extent_buffer(eb); + } +} +EXPORT_SYMBOL(extent_io_tree_empty_lru); + +struct extent_state *alloc_extent_state(gfp_t mask) +{ + struct extent_state *state; + unsigned long flags; + + state = kmem_cache_alloc(extent_state_cache, mask); + if (!state || IS_ERR(state)) + return state; + state->state = 0; + state->in_tree = 0; + state->private = 0; + + spin_lock_irqsave(&state_lock, flags); + list_add(&state->list, &states); + spin_unlock_irqrestore(&state_lock, flags); + + atomic_set(&state->refs, 1); + init_waitqueue_head(&state->wq); + return state; +} +EXPORT_SYMBOL(alloc_extent_state); + +void free_extent_state(struct extent_state *state) +{ + unsigned long flags; + if (!state) + return; + if (atomic_dec_and_test(&state->refs)) { + WARN_ON(state->in_tree); + spin_lock_irqsave(&state_lock, flags); + list_del(&state->list); + spin_unlock_irqrestore(&state_lock, flags); + kmem_cache_free(extent_state_cache, state); + } +} +EXPORT_SYMBOL(free_extent_state); + +static struct rb_node *tree_insert(struct rb_root *root, u64 offset, + struct rb_node *node) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct tree_entry *entry; + + while(*p) { + parent = *p; + entry = rb_entry(parent, struct tree_entry, rb_node); + + if (offset < entry->start) + p = &(*p)->rb_left; + else if (offset > entry->end) + p = &(*p)->rb_right; + else + return parent; + } + + entry = rb_entry(node, struct tree_entry, rb_node); + entry->in_tree = 1; + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct rb_node *__tree_search(struct rb_root *root, u64 offset, + struct rb_node **prev_ret, + struct rb_node **next_ret) +{ + struct rb_node * n = root->rb_node; + struct rb_node *prev = NULL; + struct rb_node *orig_prev = NULL; + struct tree_entry *entry; + struct tree_entry *prev_entry = NULL; + + while(n) { + entry = rb_entry(n, struct tree_entry, rb_node); + prev = n; + prev_entry = entry; + + if (offset < entry->start) + n = n->rb_left; + else if (offset > entry->end) + n = n->rb_right; + else + return n; + } + + if (prev_ret) { + orig_prev = prev; + while(prev && offset > prev_entry->end) { + prev = rb_next(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *prev_ret = prev; + prev = orig_prev; + } + + if (next_ret) { + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + while(prev && offset < prev_entry->start) { + prev = rb_prev(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *next_ret = prev; + } + return NULL; +} + +static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) +{ + struct rb_node *prev; + struct rb_node *ret; + ret = __tree_search(root, offset, &prev, NULL); + if (!ret) + return prev; + return ret; +} + +/* + * utility function to look for merge candidates inside a given range. + * Any extents with matching state are merged together into a single + * extent in the tree. Extents with EXTENT_IO in their state field + * are not merged because the end_io handlers need to be able to do + * operations on them without sleeping (or doing allocations/splits). + * + * This should be called with the tree lock held. + */ +static int merge_state(struct extent_io_tree *tree, + struct extent_state *state) +{ + struct extent_state *other; + struct rb_node *other_node; + + if (state->state & EXTENT_IOBITS) + return 0; + + other_node = rb_prev(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->end == state->start - 1 && + other->state == state->state) { + state->start = other->start; + other->in_tree = 0; + rb_erase(&other->rb_node, &tree->state); + free_extent_state(other); + } + } + other_node = rb_next(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->start == state->end + 1 && + other->state == state->state) { + other->start = state->start; + state->in_tree = 0; + rb_erase(&state->rb_node, &tree->state); + free_extent_state(state); + } + } + return 0; +} + +/* + * insert an extent_state struct into the tree. 'bits' are set on the + * struct before it is inserted. + * + * This may return -EEXIST if the extent is already there, in which case the + * state struct is freed. + * + * The tree lock is not taken internally. This is a utility function and + * probably isn't what you want to call (see set/clear_extent_bit). + */ +static int insert_state(struct extent_io_tree *tree, + struct extent_state *state, u64 start, u64 end, + int bits) +{ + struct rb_node *node; + + if (end < start) { + printk("end < start %Lu %Lu\n", end, start); + WARN_ON(1); + } + if (bits & EXTENT_DIRTY) + tree->dirty_bytes += end - start + 1; + state->state |= bits; + state->start = start; + state->end = end; + node = tree_insert(&tree->state, end, &state->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + free_extent_state(state); + return -EEXIST; + } + merge_state(tree, state); + return 0; +} + +/* + * split a given extent state struct in two, inserting the preallocated + * struct 'prealloc' as the newly created second half. 'split' indicates an + * offset inside 'orig' where it should be split. + * + * Before calling, + * the tree has 'orig' at [orig->start, orig->end]. After calling, there + * are two extent state structs in the tree: + * prealloc: [orig->start, split - 1] + * orig: [ split, orig->end ] + * + * The tree locks are not taken by this function. They need to be held + * by the caller. + */ +static int split_state(struct extent_io_tree *tree, struct extent_state *orig, + struct extent_state *prealloc, u64 split) +{ + struct rb_node *node; + prealloc->start = orig->start; + prealloc->end = split - 1; + prealloc->state = orig->state; + orig->start = split; + + node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); + free_extent_state(prealloc); + return -EEXIST; + } + return 0; +} + +/* + * utility function to clear some bits in an extent state struct. + * it will optionally wake up any one waiting on this state (wake == 1), or + * forcibly remove the state from the tree (delete == 1). + * + * If no bits are set on the state struct after clearing things, the + * struct is freed and removed from the tree + */ +static int clear_state_bit(struct extent_io_tree *tree, + struct extent_state *state, int bits, int wake, + int delete) +{ + int ret = state->state & bits; + + if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + WARN_ON(range > tree->dirty_bytes); + tree->dirty_bytes -= range; + } + state->state &= ~bits; + if (wake) + wake_up(&state->wq); + if (delete || state->state == 0) { + if (state->in_tree) { + rb_erase(&state->rb_node, &tree->state); + state->in_tree = 0; + free_extent_state(state); + } else { + WARN_ON(1); + } + } else { + merge_state(tree, state); + } + return ret; +} + +/* + * clear some bits on a range in the tree. This may require splitting + * or inserting elements in the tree, so the gfp mask is used to + * indicate which allocations or sleeping are allowed. + * + * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove + * the given range from the tree regardless of state (ie for truncate). + * + * the range [start, end] is inclusive. + * + * This takes the tree lock, and returns < 0 on error, > 0 if any of the + * bits were already set, or zero if none of the bits were already set. + */ +int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int wake, int delete, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + unsigned long flags; + int err; + int set = 0; + +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irqsave(&tree->lock, flags); + /* + * this search will find the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + goto out; + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > end) + goto out; + WARN_ON(state->end < start); + + /* + * | ---- desired range ---- | + * | state | or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip + * bits on second half. + * + * If the extent we found extends past our range, we + * just split and search again. It'll get split again + * the next time though. + * + * If the extent we found is inside our range, we clear + * the desired bit on it. + */ + + if (state->start < start) { + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, + wake, delete); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and clear the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + if (wake) + wake_up(&state->wq); + set |= clear_state_bit(tree, prealloc, bits, + wake, delete); + prealloc = NULL; + goto out; + } + + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, wake, delete); + goto search_again; + +out: + write_unlock_irqrestore(&tree->lock, flags); + if (prealloc) + free_extent_state(prealloc); + + return set; + +search_again: + if (start > end) + goto out; + write_unlock_irqrestore(&tree->lock, flags); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(clear_extent_bit); + +static int wait_on_state(struct extent_io_tree *tree, + struct extent_state *state) +{ + DEFINE_WAIT(wait); + prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); + read_unlock_irq(&tree->lock); + schedule(); + read_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + return 0; +} + +/* + * waits for one or more bits to clear on a range in the state tree. + * The range [start, end] is inclusive. + * The tree lock is taken by this function + */ +int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) +{ + struct extent_state *state; + struct rb_node *node; + + read_lock_irq(&tree->lock); +again: + while (1) { + /* + * this search will find all the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + break; + + state = rb_entry(node, struct extent_state, rb_node); + + if (state->start > end) + goto out; + + if (state->state & bits) { + start = state->start; + atomic_inc(&state->refs); + wait_on_state(tree, state); + free_extent_state(state); + goto again; + } + start = state->end + 1; + + if (start > end) + break; + + if (need_resched()) { + read_unlock_irq(&tree->lock); + cond_resched(); + read_lock_irq(&tree->lock); + } + } +out: + read_unlock_irq(&tree->lock); + return 0; +} +EXPORT_SYMBOL(wait_extent_bit); + +static void set_state_bits(struct extent_io_tree *tree, + struct extent_state *state, + int bits) +{ + if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + tree->dirty_bytes += range; + } + state->state |= bits; +} + +/* + * set some bits on a range in the tree. This may require allocations + * or sleeping, so the gfp mask is used to indicate what is allowed. + * + * If 'exclusive' == 1, this will fail with -EEXIST if some part of the + * range already has the desired bits set. The start of the existing + * range is returned in failed_start in this case. + * + * [start, end] is inclusive + * This takes the tree lock. + */ +int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, + int exclusive, u64 *failed_start, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + unsigned long flags; + int err = 0; + int set; + u64 last_start; + u64 last_end; +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irqsave(&tree->lock, flags); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node) { + err = insert_state(tree, prealloc, start, end, bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + goto out; + } + + state = rb_entry(node, struct extent_state, rb_node); + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going + */ + if (state->start == start && state->end <= end) { + set = state->state & bits; + if (set && exclusive) { + *failed_start = state->start; + err = -EEXIST; + goto out; + } + set_state_bits(tree, state, bits); + start = state->end + 1; + merge_state(tree, state); + goto search_again; + } + + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on + * second half. + * + * If the extent we found extends past our + * range, we just split and search again. It'll get split + * again the next time though. + * + * If the extent we found is inside our range, we set the + * desired bit on it. + */ + if (state->start < start) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + set_state_bits(tree, state, bits); + start = state->end + 1; + merge_state(tree, state); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and + * ignore the extent we found. + */ + if (state->start > start) { + u64 this_end; + if (end < last_start) + this_end = end; + else + this_end = last_start -1; + err = insert_state(tree, prealloc, start, this_end, + bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + if (err) + goto out; + start = this_end + 1; + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and set the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + set_state_bits(tree, prealloc, bits); + merge_state(tree, prealloc); + prealloc = NULL; + goto out; + } + + goto search_again; + +out: + write_unlock_irqrestore(&tree->lock, flags); + if (prealloc) + free_extent_state(prealloc); + + return err; + +search_again: + if (start > end) + goto out; + write_unlock_irqrestore(&tree->lock, flags); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(set_extent_bit); + +/* wrappers around set/clear extent bit */ +int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_dirty); + +int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return set_extent_bit(tree, start, end, bits, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_bits); + +int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return clear_extent_bit(tree, start, end, bits, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_bits); + +int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, + EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_delalloc); + +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_dirty); + +int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_new); + +int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_new); + +int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_uptodate); + +int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_uptodate); + +int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, + 0, NULL, mask); +} +EXPORT_SYMBOL(set_extent_writeback); + +int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); +} +EXPORT_SYMBOL(clear_extent_writeback); + +int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) +{ + return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); +} +EXPORT_SYMBOL(wait_on_extent_writeback); + +/* + * locks a range in ascending order, waiting for any locked regions + * it hits on the way. [start,end] are inclusive, and this will sleep. + */ +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) +{ + int err; + u64 failed_start; + while (1) { + err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, + &failed_start, mask); + if (err == -EEXIST && (mask & __GFP_WAIT)) { + wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); + start = failed_start; + } else { + break; + } + WARN_ON(start > end); + } + return err; +} +EXPORT_SYMBOL(lock_extent); + +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); +} +EXPORT_SYMBOL(unlock_extent); + +/* + * helper function to set pages and extents in the tree dirty + */ +int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + __set_page_dirty_nobuffers(page); + page_cache_release(page); + index++; + } + set_extent_dirty(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_dirty); + +/* + * helper function to set both pages and extents in the tree writeback + */ +int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + set_page_writeback(page); + page_cache_release(page); + index++; + } + set_extent_writeback(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_writeback); + +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 1; + + read_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->end >= start && (state->state & bits)) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + break; + } + node = rb_next(node); + if (!node) + break; + } +out: + read_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(find_first_extent_bit); + +u64 find_lock_delalloc_range(struct extent_io_tree *tree, + u64 *start, u64 *end, u64 max_bytes) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = *start; + u64 found = 0; + u64 total_bytes = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ +search_again: + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + *end = (u64)-1; + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (found && state->start != cur_start) { + goto out; + } + if (!(state->state & EXTENT_DELALLOC)) { + if (!found) + *end = state->end; + goto out; + } + if (!found) { + struct extent_state *prev_state; + struct rb_node *prev_node = node; + while(1) { + prev_node = rb_prev(prev_node); + if (!prev_node) + break; + prev_state = rb_entry(prev_node, + struct extent_state, + rb_node); + if (!(prev_state->state & EXTENT_DELALLOC)) + break; + state = prev_state; + node = prev_node; + } + } + if (state->state & EXTENT_LOCKED) { + DEFINE_WAIT(wait); + atomic_inc(&state->refs); + prepare_to_wait(&state->wq, &wait, + TASK_UNINTERRUPTIBLE); + write_unlock_irq(&tree->lock); + schedule(); + write_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + free_extent_state(state); + goto search_again; + } + state->state |= EXTENT_LOCKED; + if (!found) + *start = state->start; + found++; + *end = state->end; + cur_start = state->end + 1; + node = rb_next(node); + if (!node) + break; + total_bytes += state->end - state->start + 1; + if (total_bytes >= max_bytes) + break; + } +out: + write_unlock_irq(&tree->lock); + return found; +} + +u64 count_range_bits(struct extent_io_tree *tree, + u64 *start, u64 search_end, u64 max_bytes, + unsigned long bits) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = *start; + u64 total_bytes = 0; + int found = 0; + + if (search_end <= cur_start) { + printk("search_end %Lu start %Lu\n", search_end, cur_start); + WARN_ON(1); + return 0; + } + + write_lock_irq(&tree->lock); + if (cur_start == 0 && bits == EXTENT_DIRTY) { + total_bytes = tree->dirty_bytes; + goto out; + } + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > search_end) + break; + if (state->end >= cur_start && (state->state & bits)) { + total_bytes += min(search_end, state->end) + 1 - + max(cur_start, state->start); + if (total_bytes >= max_bytes) + break; + if (!found) { + *start = state->start; + found = 1; + } + } + node = rb_next(node); + if (!node) + break; + } +out: + write_unlock_irq(&tree->lock); + return total_bytes; +} +/* + * helper function to lock both pages and extents in the tree. + * pages must be locked first. + */ +int lock_range(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + int err; + + while (index <= end_index) { + page = grab_cache_page(tree->mapping, index); + if (!page) { + err = -ENOMEM; + goto failed; + } + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto failed; + } + index++; + } + lock_extent(tree, start, end, GFP_NOFS); + return 0; + +failed: + /* + * we failed above in getting the page at 'index', so we undo here + * up to but not including the page at 'index' + */ + end_index = index; + index = start >> PAGE_CACHE_SHIFT; + while (index < end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + return err; +} +EXPORT_SYMBOL(lock_range); + +/* + * helper function to unlock both pages and extents in the tree. + */ +int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + unlock_extent(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(unlock_range); + +int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + state->private = private; +out: + write_unlock_irq(&tree->lock); + return ret; +} + +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + read_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + *private = state->private; +out: + read_unlock_irq(&tree->lock); + return ret; +} + +/* + * searches a range in the state tree for a given mask. + * If 'filled' == 1, this returns 1 only if ever extent in the tree + * has the bits set. Otherwise, 1 is returned if any bit in the + * range is found set. + */ +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled) +{ + struct extent_state *state = NULL; + struct rb_node *node; + int bitset = 0; + unsigned long flags; + + read_lock_irqsave(&tree->lock, flags); + node = tree_search(&tree->state, start); + while (node && start <= end) { + state = rb_entry(node, struct extent_state, rb_node); + + if (filled && state->start > start) { + bitset = 0; + break; + } + + if (state->start > end) + break; + + if (state->state & bits) { + bitset = 1; + if (!filled) + break; + } else if (filled) { + bitset = 0; + break; + } + start = state->end + 1; + if (start > end) + break; + node = rb_next(node); + if (!node) { + if (filled) + bitset = 0; + break; + } + } + read_unlock_irqrestore(&tree->lock, flags); + return bitset; +} +EXPORT_SYMBOL(test_range_bit); + +/* + * helper function to set a given page up to date if all the + * extents in the tree for that page are up to date + */ +static int check_page_uptodate(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) + SetPageUptodate(page); + return 0; +} + +/* + * helper function to unlock a page if all the extents in the tree + * for that page are unlocked + */ +static int check_page_locked(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) + unlock_page(page); + return 0; +} + +/* + * helper function to end page writeback if all the extents + * in the tree for that page are done with writeback + */ +static int check_page_writeback(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) + end_page_writeback(page); + return 0; +} + +/* lots and lots of room for performance fixes in the end_bio funcs */ + +/* + * after a writepage IO is done, we need to: + * clear the uptodate bits on error + * clear the writeback bits in the extent tree for this IO + * end_page_writeback if the page has no more pending IO + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_writepage(struct bio *bio, int err) +#else +static int end_bio_extent_writepage(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (!uptodate) { + clear_extent_uptodate(tree, start, end, GFP_ATOMIC); + ClearPageUptodate(page); + SetPageError(page); + } + clear_extent_writeback(tree, start, end, GFP_ATOMIC); + + if (whole_page) + end_page_writeback(page); + else + check_page_writeback(tree, page); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, start, end); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +/* + * after a readpage IO is done, we need to: + * clear the uptodate bits on error + * set the uptodate bits if things worked + * set the page up to date if all extents in the tree are uptodate + * clear the lock bit in the extent tree + * unlock the page if there are no other extents locked for it + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_readpage(struct bio *bio, int err) +#else +static int end_bio_extent_readpage(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + int ret; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { + ret = tree->ops->readpage_end_io_hook(page, start, end); + if (ret) + uptodate = 0; + } + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + if (whole_page) + SetPageUptodate(page); + else + check_page_uptodate(tree, page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + if (whole_page) + unlock_page(page); + else + check_page_locked(tree, page); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +/* + * IO done from prepare_write is pretty simple, we just unlock + * the structs in the extent tree when done, and set the uptodate bits + * as appropriate. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_preparewrite(struct bio *bio, int err) +#else +static int end_bio_extent_preparewrite(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +static struct bio * +extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, + gfp_t gfp_flags) +{ + struct bio *bio; + + bio = bio_alloc(gfp_flags, nr_vecs); + + if (bio == NULL && (current->flags & PF_MEMALLOC)) { + while (!bio && (nr_vecs /= 2)) + bio = bio_alloc(gfp_flags, nr_vecs); + } + + if (bio) { + bio->bi_bdev = bdev; + bio->bi_sector = first_sector; + } + return bio; +} + +static int submit_one_bio(int rw, struct bio *bio) +{ + u64 maxsector; + int ret = 0; + + bio_get(bio); + + maxsector = bio->bi_bdev->bd_inode->i_size >> 9; + if (maxsector < bio->bi_sector) { + printk("sector too large max %Lu got %llu\n", maxsector, + (unsigned long long)bio->bi_sector); + WARN_ON(1); + } + + submit_bio(rw, bio); + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + bio_put(bio); + return ret; +} + +static int submit_extent_page(int rw, struct extent_io_tree *tree, + struct page *page, sector_t sector, + size_t size, unsigned long offset, + struct block_device *bdev, + struct bio **bio_ret, + unsigned long max_pages, + bio_end_io_t end_io_func) +{ + int ret = 0; + struct bio *bio; + int nr; + + if (bio_ret && *bio_ret) { + bio = *bio_ret; + if (bio->bi_sector + (bio->bi_size >> 9) != sector || + bio_add_page(bio, page, size, offset) < size) { + ret = submit_one_bio(rw, bio); + bio = NULL; + } else { + return 0; + } + } + nr = min_t(int, max_pages, bio_get_nr_vecs(bdev)); + bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); + if (!bio) { + printk("failed to allocate bio nr %d\n", nr); + } + bio_add_page(bio, page, size, offset); + bio->bi_end_io = end_io_func; + bio->bi_private = tree; + if (bio_ret) { + *bio_ret = bio; + } else { + ret = submit_one_bio(rw, bio); + } + + return ret; +} + +void set_page_extent_mapped(struct page *page) +{ + if (!PagePrivate(page)) { + SetPagePrivate(page); + WARN_ON(!page->mapping->a_ops->invalidatepage); + set_page_private(page, EXTENT_PAGE_PRIVATE); + page_cache_get(page); + } +} + +void set_page_extent_head(struct page *page, unsigned long len) +{ + set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); +} + +/* + * basic readpage implementation. Locked extent state structs are inserted + * into the tree that are removed when the IO is done (by the end_io + * handlers) + */ +static int __extent_read_full_page(struct extent_io_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct bio **bio) +{ + struct inode *inode = page->mapping->host; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + u64 cur_end; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t iosize; + size_t blocksize = inode->i_sb->s_blocksize; + + set_page_extent_mapped(page); + + end = page_end; + lock_extent(tree, start, end, GFP_NOFS); + + while (cur <= end) { + if (cur >= last_byte) { + char *userpage; + iosize = PAGE_CACHE_SIZE - page_offset; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + break; + } + em = get_extent(inode, page, page_offset, cur, + end - cur + 1, 0); + if (IS_ERR(em) || !em) { + SetPageError(page); + unlock_extent(tree, cur, end, GFP_NOFS); + break; + } + + extent_offset = cur - em->start; + BUG_ON(extent_map_end(em) <= cur); + BUG_ON(end < cur); + + iosize = min(extent_map_end(em) - cur, end - cur + 1); + cur_end = min(extent_map_end(em) - 1, end); + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + /* we've found a hole, just zero and go on */ + if (block_start == EXTENT_MAP_HOLE) { + char *userpage; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + /* the get_extent function already copied into the page */ + if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + ret = 0; + if (tree->ops && tree->ops->readpage_io_hook) { + ret = tree->ops->readpage_io_hook(page, cur, + cur + iosize - 1); + } + if (!ret) { + unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + nr -= page->index; + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, + bdev, bio, nr, + end_bio_extent_readpage); + } + if (ret) + SetPageError(page); + cur = cur + iosize; + page_offset += iosize; + nr++; + } + if (!nr) { + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); + } + return 0; +} + +int extent_read_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent) +{ + struct bio *bio = NULL; + int ret; + + ret = __extent_read_full_page(tree, page, get_extent, &bio); + if (bio) + submit_one_bio(READ, bio); + return ret; +} +EXPORT_SYMBOL(extent_read_full_page); + +/* + * the writepage semantics are similar to regular writepage. extent + * records are inserted to lock ranges in the tree, and as dirty areas + * are found, they are marked writeback. Then the lock bits are removed + * and the end_io handler clears the writeback ranges + */ +static int __extent_writepage(struct page *page, struct writeback_control *wbc, + void *data) +{ + struct inode *inode = page->mapping->host; + struct extent_page_data *epd = data; + struct extent_io_tree *tree = epd->tree; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 delalloc_start; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + u64 iosize; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t blocksize; + loff_t i_size = i_size_read(inode); + unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; + u64 nr_delalloc; + u64 delalloc_end; + + WARN_ON(!PageLocked(page)); + if (page->index > end_index) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; + } + + if (page->index == end_index) { + char *userpage; + + size_t offset = i_size & (PAGE_CACHE_SIZE - 1); + + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + } + + set_page_extent_mapped(page); + + delalloc_start = start; + delalloc_end = 0; + while(delalloc_end < page_end) { + nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, + &delalloc_end, + 128 * 1024 * 1024); + if (nr_delalloc == 0) { + delalloc_start = delalloc_end + 1; + continue; + } + tree->ops->fill_delalloc(inode, delalloc_start, + delalloc_end); + clear_extent_bit(tree, delalloc_start, + delalloc_end, + EXTENT_LOCKED | EXTENT_DELALLOC, + 1, 0, GFP_NOFS); + delalloc_start = delalloc_end + 1; + } + lock_extent(tree, start, page_end, GFP_NOFS); + + end = page_end; + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after lock_extent\n"); + } + + if (last_byte <= start) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + goto done; + } + + set_extent_uptodate(tree, start, page_end, GFP_NOFS); + blocksize = inode->i_sb->s_blocksize; + + while (cur <= end) { + if (cur >= last_byte) { + clear_extent_dirty(tree, cur, page_end, GFP_NOFS); + break; + } + em = epd->get_extent(inode, page, page_offset, cur, + end - cur + 1, 1); + if (IS_ERR(em) || !em) { + SetPageError(page); + break; + } + + extent_offset = cur - em->start; + BUG_ON(extent_map_end(em) <= cur); + BUG_ON(end < cur); + iosize = min(extent_map_end(em) - cur, end - cur + 1); + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + if (block_start == EXTENT_MAP_HOLE || + block_start == EXTENT_MAP_INLINE) { + clear_extent_dirty(tree, cur, + cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + /* leave this out until we have a page_mkwrite call */ + if (0 && !test_range_bit(tree, cur, cur + iosize - 1, + EXTENT_DIRTY, 0)) { + cur = cur + iosize; + page_offset += iosize; + continue; + } + clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); + if (tree->ops && tree->ops->writepage_io_hook) { + ret = tree->ops->writepage_io_hook(page, cur, + cur + iosize - 1); + } else { + ret = 0; + } + if (ret) + SetPageError(page); + else { + unsigned long max_nr = end_index + 1; + set_range_writeback(tree, cur, cur + iosize - 1); + if (!PageWriteback(page)) { + printk("warning page %lu not writeback, " + "cur %llu end %llu\n", page->index, + (unsigned long long)cur, + (unsigned long long)end); + } + + ret = submit_extent_page(WRITE, tree, page, sector, + iosize, page_offset, bdev, + &epd->bio, max_nr, + end_bio_extent_writepage); + if (ret) + SetPageError(page); + } + cur = cur + iosize; + page_offset += iosize; + nr++; + } +done: + if (nr == 0) { + /* make sure the mapping tag for page dirty gets cleared */ + set_page_writeback(page); + end_page_writeback(page); + } + unlock_extent(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; +} + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + +/* Taken directly from 2.6.23 for 2.6.18 back port */ +typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, + void *data); + +/** + * write_cache_pages - walk the list of dirty pages of the given address space + * and write all of them. + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * @writepage: function called for each page + * @data: data passed to writepage function + * + * If a page is already under I/O, write_cache_pages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + */ +static int write_cache_pages(struct address_space *mapping, + struct writeback_control *wbc, writepage_t writepage, + void *data) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc, data); + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { + unlock_page(page); + ret = 0; + } + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} +#endif + +int extent_write_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret; + struct address_space *mapping = page->mapping; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + struct writeback_control wbc_writepages = { + .bdi = wbc->bdi, + .sync_mode = WB_SYNC_NONE, + .older_than_this = NULL, + .nr_to_write = 64, + .range_start = page_offset(page) + PAGE_CACHE_SIZE, + .range_end = (loff_t)-1, + }; + + + ret = __extent_writepage(page, wbc, &epd); + + write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); + if (epd.bio) { + submit_one_bio(WRITE, epd.bio); + } + return ret; +} +EXPORT_SYMBOL(extent_write_full_page); + + +int extent_writepages(struct extent_io_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret = 0; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + + ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); + if (epd.bio) { + submit_one_bio(WRITE, epd.bio); + } + return ret; +} +EXPORT_SYMBOL(extent_writepages); + +int extent_readpages(struct extent_io_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent) +{ + struct bio *bio = NULL; + unsigned page_idx; + struct pagevec pvec; + + pagevec_init(&pvec, 0); + for (page_idx = 0; page_idx < nr_pages; page_idx++) { + struct page *page = list_entry(pages->prev, struct page, lru); + + prefetchw(&page->flags); + list_del(&page->lru); + /* + * what we want to do here is call add_to_page_cache_lru, + * but that isn't exported, so we reproduce it here + */ + if (!add_to_page_cache(page, mapping, + page->index, GFP_KERNEL)) { + + /* open coding of lru_cache_add, also not exported */ + page_cache_get(page); + if (!pagevec_add(&pvec, page)) + __pagevec_lru_add(&pvec); + __extent_read_full_page(tree, page, get_extent, &bio); + } + page_cache_release(page); + } + if (pagevec_count(&pvec)) + __pagevec_lru_add(&pvec); + BUG_ON(!list_empty(pages)); + if (bio) + submit_one_bio(READ, bio); + return 0; +} +EXPORT_SYMBOL(extent_readpages); + +/* + * basic invalidatepage code, this waits on any locked or writeback + * ranges corresponding to the page, and then deletes any extent state + * records from the tree + */ +int extent_invalidatepage(struct extent_io_tree *tree, + struct page *page, unsigned long offset) +{ + u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); + u64 end = start + PAGE_CACHE_SIZE - 1; + size_t blocksize = page->mapping->host->i_sb->s_blocksize; + + start += (offset + blocksize -1) & ~(blocksize - 1); + if (start > end) + return 0; + + lock_extent(tree, start, end, GFP_NOFS); + wait_on_extent_writeback(tree, start, end); + clear_extent_bit(tree, start, end, + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, + 1, 1, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(extent_invalidatepage); + +/* + * simple commit_write call, set_range_dirty is used to mark both + * the pages and the extent records as dirty + */ +int extent_commit_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + set_page_extent_mapped(page); + set_page_dirty(page); + + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; +} +EXPORT_SYMBOL(extent_commit_write); + +int extent_prepare_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent) +{ + u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + u64 block_start; + u64 orig_block_start; + u64 block_end; + u64 cur_end; + struct extent_map *em; + unsigned blocksize = 1 << inode->i_blkbits; + size_t page_offset = 0; + size_t block_off_start; + size_t block_off_end; + int err = 0; + int iocount = 0; + int ret = 0; + int isnew; + + set_page_extent_mapped(page); + + block_start = (page_start + from) & ~((u64)blocksize - 1); + block_end = (page_start + to - 1) | (blocksize - 1); + orig_block_start = block_start; + + lock_extent(tree, page_start, page_end, GFP_NOFS); + while(block_start <= block_end) { + em = get_extent(inode, page, page_offset, block_start, + block_end - block_start + 1, 1); + if (IS_ERR(em) || !em) { + goto err; + } + cur_end = min(block_end, extent_map_end(em) - 1); + block_off_start = block_start & (PAGE_CACHE_SIZE - 1); + block_off_end = block_off_start + blocksize; + isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); + + if (!PageUptodate(page) && isnew && + (block_off_end > to || block_off_start < from)) { + void *kaddr; + + kaddr = kmap_atomic(page, KM_USER0); + if (block_off_end > to) + memset(kaddr + to, 0, block_off_end - to); + if (block_off_start < from) + memset(kaddr + block_off_start, 0, + from - block_off_start); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + if ((em->block_start != EXTENT_MAP_HOLE && + em->block_start != EXTENT_MAP_INLINE) && + !isnew && !PageUptodate(page) && + (block_off_end > to || block_off_start < from) && + !test_range_bit(tree, block_start, cur_end, + EXTENT_UPTODATE, 1)) { + u64 sector; + u64 extent_offset = block_start - em->start; + size_t iosize; + sector = (em->block_start + extent_offset) >> 9; + iosize = (cur_end - block_start + blocksize) & + ~((u64)blocksize - 1); + /* + * we've already got the extent locked, but we + * need to split the state such that our end_bio + * handler can clear the lock. + */ + set_extent_bit(tree, block_start, + block_start + iosize - 1, + EXTENT_LOCKED, 0, NULL, GFP_NOFS); + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, em->bdev, + NULL, 1, + end_bio_extent_preparewrite); + iocount++; + block_start = block_start + iosize; + } else { + set_extent_uptodate(tree, block_start, cur_end, + GFP_NOFS); + unlock_extent(tree, block_start, cur_end, GFP_NOFS); + block_start = cur_end + 1; + } + page_offset = block_start & (PAGE_CACHE_SIZE - 1); + free_extent_map(em); + } + if (iocount) { + wait_extent_bit(tree, orig_block_start, + block_end, EXTENT_LOCKED); + } + check_page_uptodate(tree, page); +err: + /* FIXME, zero out newly allocated blocks on error */ + return err; +} +EXPORT_SYMBOL(extent_prepare_write); + +/* + * a helper for releasepage. As long as there are no locked extents + * in the range corresponding to the page, both state records and extent + * map records are removed + */ +int try_release_extent_mapping(struct extent_map_tree *map, + struct extent_io_tree *tree, struct page *page) +{ + struct extent_map *em; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + u64 orig_start = start; + int ret = 1; + + while (start <= end) { + spin_lock(&map->lock); + em = lookup_extent_mapping(map, start, end); + if (!em || IS_ERR(em)) { + spin_unlock(&map->lock); + break; + } + if (!test_range_bit(tree, em->start, extent_map_end(em) - 1, + EXTENT_LOCKED, 0)) { + remove_extent_mapping(map, em); + /* once for the rb tree */ + free_extent_map(em); + } + start = extent_map_end(em); + spin_unlock(&map->lock); + + /* once for us */ + free_extent_map(em); + } + if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) + ret = 0; + else + clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, + 1, 1, GFP_NOFS); + return ret; +} +EXPORT_SYMBOL(try_release_extent_mapping); + +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent) +{ + struct inode *inode = mapping->host; + u64 start = iblock << inode->i_blkbits; + sector_t sector = 0; + struct extent_map *em; + + em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0); + if (!em || IS_ERR(em)) + return 0; + + if (em->block_start == EXTENT_MAP_INLINE || + em->block_start == EXTENT_MAP_HOLE) + goto out; + + sector = (em->block_start + start - em->start) >> inode->i_blkbits; +printk("bmap finds %Lu %Lu block %Lu\n", em->start, em->len, em->block_start); +out: + free_extent_map(em); + return sector; +} + +static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb) +{ + if (list_empty(&eb->lru)) { + extent_buffer_get(eb); + list_add(&eb->lru, &tree->buffer_lru); + tree->lru_size++; + if (tree->lru_size >= BUFFER_LRU_MAX) { + struct extent_buffer *rm; + rm = list_entry(tree->buffer_lru.prev, + struct extent_buffer, lru); + tree->lru_size--; + list_del_init(&rm->lru); + free_extent_buffer(rm); + } + } else + list_move(&eb->lru, &tree->buffer_lru); + return 0; +} +static struct extent_buffer *find_lru(struct extent_io_tree *tree, + u64 start, unsigned long len) +{ + struct list_head *lru = &tree->buffer_lru; + struct list_head *cur = lru->next; + struct extent_buffer *eb; + + if (list_empty(lru)) + return NULL; + + do { + eb = list_entry(cur, struct extent_buffer, lru); + if (eb->start == start && eb->len == len) { + extent_buffer_get(eb); + return eb; + } + cur = cur->next; + } while (cur != lru); + return NULL; +} + +static inline unsigned long num_extent_pages(u64 start, u64 len) +{ + return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT); +} + +static inline struct page *extent_buffer_page(struct extent_buffer *eb, + unsigned long i) +{ + struct page *p; + struct address_space *mapping; + + if (i == 0) + return eb->first_page; + i += eb->start >> PAGE_CACHE_SHIFT; + mapping = eb->first_page->mapping; + read_lock_irq(&mapping->tree_lock); + p = radix_tree_lookup(&mapping->page_tree, i); + read_unlock_irq(&mapping->tree_lock); + return p; +} + +static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, + unsigned long len, + gfp_t mask) +{ + struct extent_buffer *eb = NULL; + + spin_lock(&tree->lru_lock); + eb = find_lru(tree, start, len); + spin_unlock(&tree->lru_lock); + if (eb) { + return eb; + } + + eb = kmem_cache_zalloc(extent_buffer_cache, mask); + INIT_LIST_HEAD(&eb->lru); + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + return eb; +} + +static void __free_extent_buffer(struct extent_buffer *eb) +{ + kmem_cache_free(extent_buffer_cache, eb); +} + +struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + struct page *page0, + gfp_t mask) +{ + unsigned long num_pages = num_extent_pages(start, len); + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 1; + + eb = __alloc_extent_buffer(tree, start, len, mask); + if (!eb || IS_ERR(eb)) + return NULL; + + if (eb->flags & EXTENT_BUFFER_FILLED) + goto lru_add; + + if (page0) { + eb->first_page = page0; + i = 1; + index++; + page_cache_get(page0); + mark_page_accessed(page0); + set_page_extent_mapped(page0); + WARN_ON(!PageUptodate(page0)); + set_page_extent_head(page0, len); + } else { + i = 0; + } + for (; i < num_pages; i++, index++) { + p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); + if (!p) { + WARN_ON(1); + goto fail; + } + set_page_extent_mapped(p); + mark_page_accessed(p); + if (i == 0) { + eb->first_page = p; + set_page_extent_head(p, len); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); + return eb; + +fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 1; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(alloc_extent_buffer); + +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = num_extent_pages(start, len); + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 1; + + eb = __alloc_extent_buffer(tree, start, len, mask); + if (!eb || IS_ERR(eb)) + return NULL; + + if (eb->flags & EXTENT_BUFFER_FILLED) + goto lru_add; + + for (i = 0; i < num_pages; i++, index++) { + p = find_lock_page(mapping, index); + if (!p) { + goto fail; + } + set_page_extent_mapped(p); + mark_page_accessed(p); + + if (i == 0) { + eb->first_page = p; + set_page_extent_head(p, len); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } + + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); + return eb; +fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 1; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(find_extent_buffer); + +void free_extent_buffer(struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + if (!eb) + return; + + if (!atomic_dec_and_test(&eb->refs)) + return; + + WARN_ON(!list_empty(&eb->lru)); + num_pages = num_extent_pages(eb->start, eb->len); + + for (i = 1; i < num_pages; i++) { + page_cache_release(extent_buffer_page(eb, i)); + } + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); +} +EXPORT_SYMBOL(free_extent_buffer); + +int clear_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + int set; + unsigned long i; + unsigned long num_pages; + struct page *page; + + u64 start = eb->start; + u64 end = start + eb->len - 1; + + set = clear_extent_dirty(tree, start, end, GFP_NOFS); + num_pages = num_extent_pages(eb->start, eb->len); + + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + lock_page(page); + if (i == 0) + set_page_extent_head(page, eb->len); + else + set_page_private(page, EXTENT_PAGE_PRIVATE); + + /* + * if we're on the last page or the first page and the + * block isn't aligned on a page boundary, do extra checks + * to make sure we don't clean page that is partially dirty + */ + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { + start = (u64)page->index << PAGE_CACHE_SHIFT; + end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, + EXTENT_DIRTY, 0)) { + unlock_page(page); + continue; + } + } + clear_page_dirty_for_io(page); + write_lock_irq(&page->mapping->tree_lock); + if (!PageDirty(page)) { + radix_tree_tag_clear(&page->mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + } + write_unlock_irq(&page->mapping->tree_lock); + unlock_page(page); + } + return 0; +} +EXPORT_SYMBOL(clear_extent_buffer_dirty); + +int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + return wait_on_extent_writeback(tree, eb->start, + eb->start + eb->len - 1); +} +EXPORT_SYMBOL(wait_on_extent_buffer_writeback); + +int set_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = 0; i < num_pages; i++) { + struct page *page = extent_buffer_page(eb, i); + /* writepage may need to do something special for the + * first page, we have to make sure page->private is + * properly set. releasepage may drop page->private + * on us if the page isn't already dirty. + */ + if (i == 0) { + lock_page(page); + set_page_extent_head(page, eb->len); + } else if (PagePrivate(page) && + page->private != EXTENT_PAGE_PRIVATE) { + lock_page(page); + set_page_extent_mapped(page); + unlock_page(page); + } + __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); + if (i == 0) + unlock_page(page); + } + return set_extent_dirty(tree, eb->start, + eb->start + eb->len - 1, GFP_NOFS); +} +EXPORT_SYMBOL(set_extent_buffer_dirty); + +int set_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + struct page *page; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + + set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { + check_page_uptodate(tree, page); + continue; + } + SetPageUptodate(page); + } + return 0; +} +EXPORT_SYMBOL(set_extent_buffer_uptodate); + +int extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + if (eb->flags & EXTENT_UPTODATE) + return 1; + return test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1); +} +EXPORT_SYMBOL(extent_buffer_uptodate); + +int read_extent_buffer_pages(struct extent_io_tree *tree, + struct extent_buffer *eb, + u64 start, + int wait) +{ + unsigned long i; + unsigned long start_i; + struct page *page; + int err; + int ret = 0; + unsigned long num_pages; + + if (eb->flags & EXTENT_UPTODATE) + return 0; + + if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1)) { + return 0; + } + + if (start) { + WARN_ON(start < eb->start); + start_i = (start >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT); + } else { + start_i = 0; + } + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if (PageUptodate(page)) { + continue; + } + if (!wait) { + if (TestSetPageLocked(page)) { + continue; + } + } else { + lock_page(page); + } + if (!PageUptodate(page)) { + err = page->mapping->a_ops->readpage(NULL, page); + if (err) { + ret = err; + } + } else { + unlock_page(page); + } + } + + if (ret || !wait) { + return ret; + } + + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + wait_on_page_locked(page); + if (!PageUptodate(page)) { + ret = -EIO; + } + } + if (!ret) + eb->flags |= EXTENT_UPTODATE; + return ret; +} +EXPORT_SYMBOL(read_extent_buffer_pages); + +void read_extent_buffer(struct extent_buffer *eb, void *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *dst = (char *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long num_pages = num_extent_pages(eb->start, eb->len); + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + if (!PageUptodate(page)) { + printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); + WARN_ON(1); + } + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + kaddr = kmap_atomic(page, KM_USER1); + memcpy(dst, kaddr + offset, cur); + kunmap_atomic(kaddr, KM_USER1); + + dst += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(read_extent_buffer); + +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + size_t offset = start & (PAGE_CACHE_SIZE - 1); + char *kaddr; + struct page *p; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long end_i = (start_offset + start + min_len - 1) >> + PAGE_CACHE_SHIFT; + + if (i != end_i) + return -EINVAL; + + if (i == 0) { + offset = start_offset; + *map_start = 0; + } else { + offset = 0; + *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; + } + if (start + min_len > eb->len) { +printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); + WARN_ON(1); + } + + p = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(p)); + kaddr = kmap_atomic(p, km); + *token = kaddr; + *map = kaddr + offset; + *map_len = PAGE_CACHE_SIZE - offset; + return 0; +} +EXPORT_SYMBOL(map_private_extent_buffer); + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + int err; + int save = 0; + if (eb->map_token) { + unmap_extent_buffer(eb, eb->map_token, km); + eb->map_token = NULL; + save = 1; + } + err = map_private_extent_buffer(eb, start, min_len, token, map, + map_start, map_len, km); + if (!err && save) { + eb->map_token = *token; + eb->kaddr = *map; + eb->map_start = *map_start; + eb->map_len = *map_len; + } + return err; +} +EXPORT_SYMBOL(map_extent_buffer); + +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) +{ + kunmap_atomic(token, km); +} +EXPORT_SYMBOL(unmap_extent_buffer); + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *ptr = (char *)ptrv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + + kaddr = kmap_atomic(page, KM_USER0); + ret = memcmp(ptr, kaddr + offset, cur); + kunmap_atomic(kaddr, KM_USER0); + if (ret) + break; + + ptr += cur; + len -= cur; + offset = 0; + i++; + } + return ret; +} +EXPORT_SYMBOL(memcmp_extent_buffer); + +void write_extent_buffer(struct extent_buffer *eb, const void *srcv, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *src = (char *)srcv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + kaddr = kmap_atomic(page, KM_USER1); + memcpy(kaddr + offset, src, cur); + kunmap_atomic(kaddr, KM_USER1); + + src += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(write_extent_buffer); + +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, c, cur); + kunmap_atomic(kaddr, KM_USER0); + + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(memset_extent_buffer); + +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len) +{ + u64 dst_len = dst->len; + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + + WARN_ON(src->len != dst_len); + + offset = (start_offset + dst_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(dst, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); + + kaddr = kmap_atomic(page, KM_USER0); + read_extent_buffer(src, kaddr + offset, src_offset, cur); + kunmap_atomic(kaddr, KM_USER0); + + src_offset += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(copy_extent_buffer); + +static void move_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + if (dst_page == src_page) { + memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); + } else { + char *src_kaddr = kmap_atomic(src_page, KM_USER1); + char *p = dst_kaddr + dst_off + len; + char *s = src_kaddr + src_off + len; + + while (len--) + *--p = *--s; + + kunmap_atomic(src_kaddr, KM_USER1); + } + kunmap_atomic(dst_kaddr, KM_USER0); +} + +static void copy_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *src_kaddr; + + if (dst_page != src_page) + src_kaddr = kmap_atomic(src_page, KM_USER1); + else + src_kaddr = dst_kaddr; + + memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); + kunmap_atomic(dst_kaddr, KM_USER0); + if (dst_page != src_page) + kunmap_atomic(src_kaddr, KM_USER1); +} + +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + + while(len > 0) { + dst_off_in_page = (start_offset + dst_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = (start_offset + src_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - + src_off_in_page)); + cur = min_t(unsigned long, cur, + (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); + + copy_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), + dst_off_in_page, src_off_in_page, cur); + + src_offset += cur; + dst_offset += cur; + len -= cur; + } +} +EXPORT_SYMBOL(memcpy_extent_buffer); + +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + unsigned long dst_end = dst_offset + len - 1; + unsigned long src_end = src_offset + len - 1; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset < src_offset) { + memcpy_extent_buffer(dst, dst_offset, src_offset, len); + return; + } + while(len > 0) { + dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; + + dst_off_in_page = (start_offset + dst_end) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = (start_offset + src_end) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + cur = min_t(unsigned long, len, src_off_in_page + 1); + cur = min(cur, dst_off_in_page + 1); + move_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), + dst_off_in_page - cur + 1, + src_off_in_page - cur + 1, cur); + + dst_end -= cur; + src_end -= cur; + len -= cur; + } +} +EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h new file mode 100644 index 00000000000..06be1fe84b2 --- /dev/null +++ b/fs/btrfs/extent_io.h @@ -0,0 +1,193 @@ +#ifndef __EXTENTIO__ +#define __EXTENTIO__ + +#include + +/* bits for the extent state */ +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) +#define EXTENT_DEFRAG (1 << 6) +#define EXTENT_DEFRAG_DONE (1 << 7) +#define EXTENT_BUFFER_FILLED (1 << 8) +#define EXTENT_CSUM (1 << 9) +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + +/* + * page->private values. Every page that is controlled by the extent + * map has page->private set to one. + */ +#define EXTENT_PAGE_PRIVATE 1 +#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 + +struct extent_io_ops { + int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); + int (*writepage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); + void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); +}; + +struct extent_io_tree { + struct rb_root state; + struct address_space *mapping; + u64 dirty_bytes; + rwlock_t lock; + struct extent_io_ops *ops; + spinlock_t lru_lock; + struct list_head buffer_lru; + int lru_size; +}; + +struct extent_state { + u64 start; + u64 end; /* inclusive */ + int in_tree; + struct rb_node rb_node; + wait_queue_head_t wq; + atomic_t refs; + unsigned long state; + + /* for use by the FS */ + u64 private; + + struct list_head list; +}; + +struct extent_buffer { + u64 start; + unsigned long len; + char *map_token; + char *kaddr; + unsigned long map_start; + unsigned long map_len; + struct page *first_page; + struct list_head lru; + atomic_t refs; + int flags; +}; + +struct extent_map_tree; + +typedef struct extent_map *(get_extent_t)(struct inode *inode, + struct page *page, + size_t page_offset, + u64 start, u64 len, + int create); + +void extent_io_tree_init(struct extent_io_tree *tree, + struct address_space *mapping, gfp_t mask); +void extent_io_tree_empty_lru(struct extent_io_tree *tree); +int try_release_extent_mapping(struct extent_map_tree *map, + struct extent_io_tree *tree, struct page *page); +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int extent_read_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent); +int __init extent_io_init(void); +void extent_io_exit(void); + +u64 count_range_bits(struct extent_io_tree *tree, + u64 *start, u64 search_end, + u64 max_bytes, unsigned long bits); + +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled); +int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); +int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); +int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits); +int extent_invalidatepage(struct extent_io_tree *tree, + struct page *page, unsigned long offset); +int extent_write_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc); +int extent_writepages(struct extent_io_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc); +int extent_readpages(struct extent_io_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent); +int extent_prepare_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent); +int extent_commit_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to); +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent); +int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end); +int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); +void set_page_extent_mapped(struct page *page); + +struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + struct page *page0, + gfp_t mask); +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +void free_extent_buffer(struct extent_buffer *eb); +int read_extent_buffer_pages(struct extent_io_tree *tree, + struct extent_buffer *eb, u64 start, int wait); + +static inline void extent_buffer_get(struct extent_buffer *eb) +{ + atomic_inc(&eb->refs); +} + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len); +void read_extent_buffer(struct extent_buffer *eb, void *dst, + unsigned long start, + unsigned long len); +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len); +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len); +int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, + struct extent_buffer *eb); +int clear_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); +int extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); +int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); +#endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 010a287fbd7..268ad8facf6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1,17 +1,10 @@ -#include -#include -#include -#include +#include #include -#include -#include +#include #include #include -#include -#include #include -#include -#include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -21,27 +14,6 @@ struct kmem_cache *btrfs_cache_create(const char *name, size_t size, unsigned long)); static struct kmem_cache *extent_map_cache; -static struct kmem_cache *extent_state_cache; -static struct kmem_cache *extent_buffer_cache; - -static LIST_HEAD(buffers); -static LIST_HEAD(states); - -static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; -#define BUFFER_LRU_MAX 64 - -struct tree_entry { - u64 start; - u64 end; - int in_tree; - struct rb_node rb_node; -}; - -struct extent_page_data { - struct bio *bio; - struct extent_map_tree *tree; - get_extent_t *get_extent; -}; int __init extent_map_init(void) { @@ -50,72 +22,23 @@ int __init extent_map_init(void) NULL); if (!extent_map_cache) return -ENOMEM; - extent_state_cache = btrfs_cache_create("extent_state", - sizeof(struct extent_state), 0, - NULL); - if (!extent_state_cache) - goto free_map_cache; - extent_buffer_cache = btrfs_cache_create("extent_buffers", - sizeof(struct extent_buffer), 0, - NULL); - if (!extent_buffer_cache) - goto free_state_cache; return 0; - -free_state_cache: - kmem_cache_destroy(extent_state_cache); -free_map_cache: - kmem_cache_destroy(extent_map_cache); - return -ENOMEM; } void extent_map_exit(void) { - struct extent_state *state; - - while (!list_empty(&states)) { - state = list_entry(states.next, struct extent_state, list); - printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); - list_del(&state->list); - kmem_cache_free(extent_state_cache, state); - - } - if (extent_map_cache) kmem_cache_destroy(extent_map_cache); - if (extent_state_cache) - kmem_cache_destroy(extent_state_cache); - if (extent_buffer_cache) - kmem_cache_destroy(extent_buffer_cache); } -void extent_map_tree_init(struct extent_map_tree *tree, - struct address_space *mapping, gfp_t mask) +void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) { tree->map.rb_node = NULL; - tree->state.rb_node = NULL; - tree->ops = NULL; - tree->dirty_bytes = 0; - rwlock_init(&tree->lock); - spin_lock_init(&tree->lru_lock); - tree->mapping = mapping; - INIT_LIST_HEAD(&tree->buffer_lru); - tree->lru_size = 0; + tree->last = NULL; + spin_lock_init(&tree->lock); } EXPORT_SYMBOL(extent_map_tree_init); -void extent_map_tree_empty_lru(struct extent_map_tree *tree) -{ - struct extent_buffer *eb; - while(!list_empty(&tree->buffer_lru)) { - eb = list_entry(tree->buffer_lru.next, struct extent_buffer, - lru); - list_del_init(&eb->lru); - free_extent_buffer(eb); - } -} -EXPORT_SYMBOL(extent_map_tree_empty_lru); - struct extent_map *alloc_extent_map(gfp_t mask) { struct extent_map *em; @@ -123,6 +46,7 @@ struct extent_map *alloc_extent_map(gfp_t mask) if (!em || IS_ERR(em)) return em; em->in_tree = 0; + em->flags = 0; atomic_set(&em->refs, 1); return em; } @@ -132,6 +56,7 @@ void free_extent_map(struct extent_map *em) { if (!em) return; + WARN_ON(atomic_read(&em->refs) == 0); if (atomic_dec_and_test(&em->refs)) { WARN_ON(em->in_tree); kmem_cache_free(extent_map_cache, em); @@ -139,64 +64,28 @@ void free_extent_map(struct extent_map *em) } EXPORT_SYMBOL(free_extent_map); - -struct extent_state *alloc_extent_state(gfp_t mask) -{ - struct extent_state *state; - unsigned long flags; - - state = kmem_cache_alloc(extent_state_cache, mask); - if (!state || IS_ERR(state)) - return state; - state->state = 0; - state->in_tree = 0; - state->private = 0; - - spin_lock_irqsave(&state_lock, flags); - list_add(&state->list, &states); - spin_unlock_irqrestore(&state_lock, flags); - - atomic_set(&state->refs, 1); - init_waitqueue_head(&state->wq); - return state; -} -EXPORT_SYMBOL(alloc_extent_state); - -void free_extent_state(struct extent_state *state) -{ - unsigned long flags; - if (!state) - return; - if (atomic_dec_and_test(&state->refs)) { - WARN_ON(state->in_tree); - spin_lock_irqsave(&state_lock, flags); - list_del(&state->list); - spin_unlock_irqrestore(&state_lock, flags); - kmem_cache_free(extent_state_cache, state); - } -} -EXPORT_SYMBOL(free_extent_state); - static struct rb_node *tree_insert(struct rb_root *root, u64 offset, struct rb_node *node) { struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; - struct tree_entry *entry; + struct extent_map *entry; while(*p) { parent = *p; - entry = rb_entry(parent, struct tree_entry, rb_node); + entry = rb_entry(parent, struct extent_map, rb_node); + + WARN_ON(!entry->in_tree); if (offset < entry->start) p = &(*p)->rb_left; - else if (offset > entry->end) + else if (offset >= extent_map_end(entry)) p = &(*p)->rb_right; else return parent; } - entry = rb_entry(node, struct tree_entry, rb_node); + entry = rb_entry(node, struct extent_map, rb_node); entry->in_tree = 1; rb_link_node(node, parent, p); rb_insert_color(node, root); @@ -210,17 +99,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, struct rb_node * n = root->rb_node; struct rb_node *prev = NULL; struct rb_node *orig_prev = NULL; - struct tree_entry *entry; - struct tree_entry *prev_entry = NULL; + struct extent_map *entry; + struct extent_map *prev_entry = NULL; while(n) { - entry = rb_entry(n, struct tree_entry, rb_node); + entry = rb_entry(n, struct extent_map, rb_node); prev = n; prev_entry = entry; + WARN_ON(!entry->in_tree); + if (offset < entry->start) n = n->rb_left; - else if (offset > entry->end) + else if (offset >= extent_map_end(entry)) n = n->rb_right; else return n; @@ -228,19 +119,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, if (prev_ret) { orig_prev = prev; - while(prev && offset > prev_entry->end) { + while(prev && offset >= extent_map_end(prev_entry)) { prev = rb_next(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); } *prev_ret = prev; prev = orig_prev; } if (next_ret) { - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); while(prev && offset < prev_entry->start) { prev = rb_prev(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); } *next_ret = prev; } @@ -257,22 +148,26 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) return ret; } -static int tree_delete(struct rb_root *root, u64 offset) +static int mergable_maps(struct extent_map *prev, struct extent_map *next) { - struct rb_node *node; - struct tree_entry *entry; - - node = __tree_search(root, offset, NULL, NULL); - if (!node) - return -ENOENT; - entry = rb_entry(node, struct tree_entry, rb_node); - entry->in_tree = 0; - rb_erase(node, root); + if (extent_map_end(prev) == next->start && + prev->flags == next->flags && + prev->bdev == next->bdev && + ((next->block_start == EXTENT_MAP_HOLE && + prev->block_start == EXTENT_MAP_HOLE) || + (next->block_start == EXTENT_MAP_INLINE && + prev->block_start == EXTENT_MAP_INLINE) || + (next->block_start == EXTENT_MAP_DELALLOC && + prev->block_start == EXTENT_MAP_DELALLOC) || + (next->block_start < EXTENT_MAP_LAST_BYTE - 1 && + next->block_start == extent_map_block_end(prev)))) { + return 1; + } return 0; } /* - * add_extent_mapping tries a simple backward merge with existing + * add_extent_mapping tries a simple forward/backward merge with existing * mappings. The extent_map struct passed in will be inserted into * the tree directly (no copies made, just a reference taken). */ @@ -280,13 +175,12 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; - struct extent_map *prev = NULL; + struct extent_map *merge = NULL; struct rb_node *rb; - write_lock_irq(&tree->lock); - rb = tree_insert(&tree->map, em->end, &em->rb_node); + rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { - prev = rb_entry(rb, struct extent_map, rb_node); + merge = rb_entry(rb, struct extent_map, rb_node); ret = -EEXIST; goto out; } @@ -294,53 +188,60 @@ int add_extent_mapping(struct extent_map_tree *tree, if (em->start != 0) { rb = rb_prev(&em->rb_node); if (rb) - prev = rb_entry(rb, struct extent_map, rb_node); - if (prev && prev->end + 1 == em->start && - ((em->block_start == EXTENT_MAP_HOLE && - prev->block_start == EXTENT_MAP_HOLE) || - (em->block_start == EXTENT_MAP_INLINE && - prev->block_start == EXTENT_MAP_INLINE) || - (em->block_start == EXTENT_MAP_DELALLOC && - prev->block_start == EXTENT_MAP_DELALLOC) || - (em->block_start < EXTENT_MAP_DELALLOC - 1 && - em->block_start == prev->block_end + 1))) { - em->start = prev->start; - em->block_start = prev->block_start; - rb_erase(&prev->rb_node, &tree->map); - prev->in_tree = 0; - free_extent_map(prev); + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(merge, em)) { + em->start = merge->start; + em->len += merge->len; + em->block_start = merge->block_start; + merge->in_tree = 0; + rb_erase(&merge->rb_node, &tree->map); + free_extent_map(merge); } } + rb = rb_next(&em->rb_node); + if (rb) + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(em, merge)) { + em->len += merge->len; + rb_erase(&merge->rb_node, &tree->map); + merge->in_tree = 0; + free_extent_map(merge); + } + tree->last = em; out: - write_unlock_irq(&tree->lock); return ret; } EXPORT_SYMBOL(add_extent_mapping); +static u64 range_end(u64 start, u64 len) +{ + if (start + len < start) + return (u64)-1; + return start + len; +} + /* * lookup_extent_mapping returns the first extent_map struct in the - * tree that intersects the [start, end] (inclusive) range. There may + * tree that intersects the [start, len] range. There may * be additional objects in the tree that intersect, so check the object * returned carefully to make sure you don't need additional lookups. */ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 end) + u64 start, u64 len) { struct extent_map *em; struct rb_node *rb_node; - struct rb_node *prev = NULL; - struct rb_node *next = NULL; + struct rb_node *prev = NULL; struct rb_node *next = NULL; u64 end = range_end(start, len); em = tree->last; if (em && end > em->start && start < extent_map_end(em)) goto found; - read_lock_irq(&tree->lock); rb_node = __tree_search(&tree->map, start, &prev, &next); if (!rb_node && prev) { em = rb_entry(prev, struct extent_map, rb_node); - if (em->start <= end && em->end >= start) + if (end > em->start && start < extent_map_end(em)) goto found; } if (!rb_node && next) { em = rb_entry(next, struct extent_map, rb_node); - if (em->start <= end && em->end >= start) + if (end > em->start && start < extent_map_end(em)) goto found; } if (!rb_node) { @@ -352,14 +253,16 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, goto out; } em = rb_entry(rb_node, struct extent_map, rb_node); - if (em->end < start || em->start > end) { - em = NULL; - goto out; - } + if (end > em->start && start < extent_map_end(em)) + goto found; + + em = NULL; + goto out; + found: atomic_inc(&em->refs); + tree->last = em; out: - read_unlock_irq(&tree->lock); return em; } EXPORT_SYMBOL(lookup_extent_mapping); @@ -370,2866 +273,12 @@ EXPORT_SYMBOL(lookup_extent_mapping); */ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { - int ret; + int ret = 0; - write_lock_irq(&tree->lock); - ret = tree_delete(&tree->map, em->end); - write_unlock_irq(&tree->lock); + rb_erase(&em->rb_node, &tree->map); + em->in_tree = 0; + if (tree->last == em) + tree->last = NULL; return ret; } EXPORT_SYMBOL(remove_extent_mapping); - -/* - * utility function to look for merge candidates inside a given range. - * Any extents with matching state are merged together into a single - * extent in the tree. Extents with EXTENT_IO in their state field - * are not merged because the end_io handlers need to be able to do - * operations on them without sleeping (or doing allocations/splits). - * - * This should be called with the tree lock held. - */ -static int merge_state(struct extent_map_tree *tree, - struct extent_state *state) -{ - struct extent_state *other; - struct rb_node *other_node; - - if (state->state & EXTENT_IOBITS) - return 0; - - other_node = rb_prev(&state->rb_node); - if (other_node) { - other = rb_entry(other_node, struct extent_state, rb_node); - if (other->end == state->start - 1 && - other->state == state->state) { - state->start = other->start; - other->in_tree = 0; - rb_erase(&other->rb_node, &tree->state); - free_extent_state(other); - } - } - other_node = rb_next(&state->rb_node); - if (other_node) { - other = rb_entry(other_node, struct extent_state, rb_node); - if (other->start == state->end + 1 && - other->state == state->state) { - other->start = state->start; - state->in_tree = 0; - rb_erase(&state->rb_node, &tree->state); - free_extent_state(state); - } - } - return 0; -} - -/* - * insert an extent_state struct into the tree. 'bits' are set on the - * struct before it is inserted. - * - * This may return -EEXIST if the extent is already there, in which case the - * state struct is freed. - * - * The tree lock is not taken internally. This is a utility function and - * probably isn't what you want to call (see set/clear_extent_bit). - */ -static int insert_state(struct extent_map_tree *tree, - struct extent_state *state, u64 start, u64 end, - int bits) -{ - struct rb_node *node; - - if (end < start) { - printk("end < start %Lu %Lu\n", end, start); - WARN_ON(1); - } - if (bits & EXTENT_DIRTY) - tree->dirty_bytes += end - start + 1; - state->state |= bits; - state->start = start; - state->end = end; - node = tree_insert(&tree->state, end, &state->rb_node); - if (node) { - struct extent_state *found; - found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); - free_extent_state(state); - return -EEXIST; - } - merge_state(tree, state); - return 0; -} - -/* - * split a given extent state struct in two, inserting the preallocated - * struct 'prealloc' as the newly created second half. 'split' indicates an - * offset inside 'orig' where it should be split. - * - * Before calling, - * the tree has 'orig' at [orig->start, orig->end]. After calling, there - * are two extent state structs in the tree: - * prealloc: [orig->start, split - 1] - * orig: [ split, orig->end ] - * - * The tree locks are not taken by this function. They need to be held - * by the caller. - */ -static int split_state(struct extent_map_tree *tree, struct extent_state *orig, - struct extent_state *prealloc, u64 split) -{ - struct rb_node *node; - prealloc->start = orig->start; - prealloc->end = split - 1; - prealloc->state = orig->state; - orig->start = split; - - node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); - if (node) { - struct extent_state *found; - found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); - free_extent_state(prealloc); - return -EEXIST; - } - return 0; -} - -/* - * utility function to clear some bits in an extent state struct. - * it will optionally wake up any one waiting on this state (wake == 1), or - * forcibly remove the state from the tree (delete == 1). - * - * If no bits are set on the state struct after clearing things, the - * struct is freed and removed from the tree - */ -static int clear_state_bit(struct extent_map_tree *tree, - struct extent_state *state, int bits, int wake, - int delete) -{ - int ret = state->state & bits; - - if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { - u64 range = state->end - state->start + 1; - WARN_ON(range > tree->dirty_bytes); - tree->dirty_bytes -= range; - } - state->state &= ~bits; - if (wake) - wake_up(&state->wq); - if (delete || state->state == 0) { - if (state->in_tree) { - rb_erase(&state->rb_node, &tree->state); - state->in_tree = 0; - free_extent_state(state); - } else { - WARN_ON(1); - } - } else { - merge_state(tree, state); - } - return ret; -} - -/* - * clear some bits on a range in the tree. This may require splitting - * or inserting elements in the tree, so the gfp mask is used to - * indicate which allocations or sleeping are allowed. - * - * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove - * the given range from the tree regardless of state (ie for truncate). - * - * the range [start, end] is inclusive. - * - * This takes the tree lock, and returns < 0 on error, > 0 if any of the - * bits were already set, or zero if none of the bits were already set. - */ -int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int wake, int delete, gfp_t mask) -{ - struct extent_state *state; - struct extent_state *prealloc = NULL; - struct rb_node *node; - unsigned long flags; - int err; - int set = 0; - -again: - if (!prealloc && (mask & __GFP_WAIT)) { - prealloc = alloc_extent_state(mask); - if (!prealloc) - return -ENOMEM; - } - - write_lock_irqsave(&tree->lock, flags); - /* - * this search will find the extents that end after - * our range starts - */ - node = tree_search(&tree->state, start); - if (!node) - goto out; - state = rb_entry(node, struct extent_state, rb_node); - if (state->start > end) - goto out; - WARN_ON(state->end < start); - - /* - * | ---- desired range ---- | - * | state | or - * | ------------- state -------------- | - * - * We need to split the extent we found, and may flip - * bits on second half. - * - * If the extent we found extends past our range, we - * just split and search again. It'll get split again - * the next time though. - * - * If the extent we found is inside our range, we clear - * the desired bit on it. - */ - - if (state->start < start) { - err = split_state(tree, state, prealloc, start); - BUG_ON(err == -EEXIST); - prealloc = NULL; - if (err) - goto out; - if (state->end <= end) { - start = state->end + 1; - set |= clear_state_bit(tree, state, bits, - wake, delete); - } else { - start = state->start; - } - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | - * We need to split the extent, and clear the bit - * on the first half - */ - if (state->start <= end && state->end > end) { - err = split_state(tree, state, prealloc, end + 1); - BUG_ON(err == -EEXIST); - - if (wake) - wake_up(&state->wq); - set |= clear_state_bit(tree, prealloc, bits, - wake, delete); - prealloc = NULL; - goto out; - } - - start = state->end + 1; - set |= clear_state_bit(tree, state, bits, wake, delete); - goto search_again; - -out: - write_unlock_irqrestore(&tree->lock, flags); - if (prealloc) - free_extent_state(prealloc); - - return set; - -search_again: - if (start > end) - goto out; - write_unlock_irqrestore(&tree->lock, flags); - if (mask & __GFP_WAIT) - cond_resched(); - goto again; -} -EXPORT_SYMBOL(clear_extent_bit); - -static int wait_on_state(struct extent_map_tree *tree, - struct extent_state *state) -{ - DEFINE_WAIT(wait); - prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); - read_unlock_irq(&tree->lock); - schedule(); - read_lock_irq(&tree->lock); - finish_wait(&state->wq, &wait); - return 0; -} - -/* - * waits for one or more bits to clear on a range in the state tree. - * The range [start, end] is inclusive. - * The tree lock is taken by this function - */ -int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) -{ - struct extent_state *state; - struct rb_node *node; - - read_lock_irq(&tree->lock); -again: - while (1) { - /* - * this search will find all the extents that end after - * our range starts - */ - node = tree_search(&tree->state, start); - if (!node) - break; - - state = rb_entry(node, struct extent_state, rb_node); - - if (state->start > end) - goto out; - - if (state->state & bits) { - start = state->start; - atomic_inc(&state->refs); - wait_on_state(tree, state); - free_extent_state(state); - goto again; - } - start = state->end + 1; - - if (start > end) - break; - - if (need_resched()) { - read_unlock_irq(&tree->lock); - cond_resched(); - read_lock_irq(&tree->lock); - } - } -out: - read_unlock_irq(&tree->lock); - return 0; -} -EXPORT_SYMBOL(wait_extent_bit); - -static void set_state_bits(struct extent_map_tree *tree, - struct extent_state *state, - int bits) -{ - if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { - u64 range = state->end - state->start + 1; - tree->dirty_bytes += range; - } - state->state |= bits; -} - -/* - * set some bits on a range in the tree. This may require allocations - * or sleeping, so the gfp mask is used to indicate what is allowed. - * - * If 'exclusive' == 1, this will fail with -EEXIST if some part of the - * range already has the desired bits set. The start of the existing - * range is returned in failed_start in this case. - * - * [start, end] is inclusive - * This takes the tree lock. - */ -int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, - int exclusive, u64 *failed_start, gfp_t mask) -{ - struct extent_state *state; - struct extent_state *prealloc = NULL; - struct rb_node *node; - unsigned long flags; - int err = 0; - int set; - u64 last_start; - u64 last_end; -again: - if (!prealloc && (mask & __GFP_WAIT)) { - prealloc = alloc_extent_state(mask); - if (!prealloc) - return -ENOMEM; - } - - write_lock_irqsave(&tree->lock, flags); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node) { - err = insert_state(tree, prealloc, start, end, bits); - prealloc = NULL; - BUG_ON(err == -EEXIST); - goto out; - } - - state = rb_entry(node, struct extent_state, rb_node); - last_start = state->start; - last_end = state->end; - - /* - * | ---- desired range ---- | - * | state | - * - * Just lock what we found and keep going - */ - if (state->start == start && state->end <= end) { - set = state->state & bits; - if (set && exclusive) { - *failed_start = state->start; - err = -EEXIST; - goto out; - } - set_state_bits(tree, state, bits); - start = state->end + 1; - merge_state(tree, state); - goto search_again; - } - - /* - * | ---- desired range ---- | - * | state | - * or - * | ------------- state -------------- | - * - * We need to split the extent we found, and may flip bits on - * second half. - * - * If the extent we found extends past our - * range, we just split and search again. It'll get split - * again the next time though. - * - * If the extent we found is inside our range, we set the - * desired bit on it. - */ - if (state->start < start) { - set = state->state & bits; - if (exclusive && set) { - *failed_start = start; - err = -EEXIST; - goto out; - } - err = split_state(tree, state, prealloc, start); - BUG_ON(err == -EEXIST); - prealloc = NULL; - if (err) - goto out; - if (state->end <= end) { - set_state_bits(tree, state, bits); - start = state->end + 1; - merge_state(tree, state); - } else { - start = state->start; - } - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | or | state | - * - * There's a hole, we need to insert something in it and - * ignore the extent we found. - */ - if (state->start > start) { - u64 this_end; - if (end < last_start) - this_end = end; - else - this_end = last_start -1; - err = insert_state(tree, prealloc, start, this_end, - bits); - prealloc = NULL; - BUG_ON(err == -EEXIST); - if (err) - goto out; - start = this_end + 1; - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | - * We need to split the extent, and set the bit - * on the first half - */ - if (state->start <= end && state->end > end) { - set = state->state & bits; - if (exclusive && set) { - *failed_start = start; - err = -EEXIST; - goto out; - } - err = split_state(tree, state, prealloc, end + 1); - BUG_ON(err == -EEXIST); - - set_state_bits(tree, prealloc, bits); - merge_state(tree, prealloc); - prealloc = NULL; - goto out; - } - - goto search_again; - -out: - write_unlock_irqrestore(&tree->lock, flags); - if (prealloc) - free_extent_state(prealloc); - - return err; - -search_again: - if (start > end) - goto out; - write_unlock_irqrestore(&tree->lock, flags); - if (mask & __GFP_WAIT) - cond_resched(); - goto again; -} -EXPORT_SYMBOL(set_extent_bit); - -/* wrappers around set/clear extent bit */ -int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_dirty); - -int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask) -{ - return set_extent_bit(tree, start, end, bits, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_bits); - -int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask) -{ - return clear_extent_bit(tree, start, end, bits, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_bits); - -int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_delalloc); - -int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_dirty); - -int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_new); - -int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_new); - -int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_uptodate); - -int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_uptodate); - -int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, - 0, NULL, mask); -} -EXPORT_SYMBOL(set_extent_writeback); - -int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); -} -EXPORT_SYMBOL(clear_extent_writeback); - -int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) -{ - return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); -} -EXPORT_SYMBOL(wait_on_extent_writeback); - -/* - * locks a range in ascending order, waiting for any locked regions - * it hits on the way. [start,end] are inclusive, and this will sleep. - */ -int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) -{ - int err; - u64 failed_start; - while (1) { - err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, - &failed_start, mask); - if (err == -EEXIST && (mask & __GFP_WAIT)) { - wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); - start = failed_start; - } else { - break; - } - WARN_ON(start > end); - } - return err; -} -EXPORT_SYMBOL(lock_extent); - -int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); -} -EXPORT_SYMBOL(unlock_extent); - -/* - * helper function to set pages and extents in the tree dirty - */ -int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - BUG_ON(!page); - __set_page_dirty_nobuffers(page); - page_cache_release(page); - index++; - } - set_extent_dirty(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(set_range_dirty); - -/* - * helper function to set both pages and extents in the tree writeback - */ -int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - BUG_ON(!page); - set_page_writeback(page); - page_cache_release(page); - index++; - } - set_extent_writeback(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(set_range_writeback); - -int find_first_extent_bit(struct extent_map_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 1; - - read_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (state->end >= start && (state->state & bits)) { - *start_ret = state->start; - *end_ret = state->end; - ret = 0; - break; - } - node = rb_next(node); - if (!node) - break; - } -out: - read_unlock_irq(&tree->lock); - return ret; -} -EXPORT_SYMBOL(find_first_extent_bit); - -u64 find_lock_delalloc_range(struct extent_map_tree *tree, - u64 *start, u64 *end, u64 max_bytes) -{ - struct rb_node *node; - struct extent_state *state; - u64 cur_start = *start; - u64 found = 0; - u64 total_bytes = 0; - - write_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ -search_again: - node = tree_search(&tree->state, cur_start); - if (!node || IS_ERR(node)) { - *end = (u64)-1; - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (found && state->start != cur_start) { - goto out; - } - if (!(state->state & EXTENT_DELALLOC)) { - if (!found) - *end = state->end; - goto out; - } - if (!found) { - struct extent_state *prev_state; - struct rb_node *prev_node = node; - while(1) { - prev_node = rb_prev(prev_node); - if (!prev_node) - break; - prev_state = rb_entry(prev_node, - struct extent_state, - rb_node); - if (!(prev_state->state & EXTENT_DELALLOC)) - break; - state = prev_state; - node = prev_node; - } - } - if (state->state & EXTENT_LOCKED) { - DEFINE_WAIT(wait); - atomic_inc(&state->refs); - prepare_to_wait(&state->wq, &wait, - TASK_UNINTERRUPTIBLE); - write_unlock_irq(&tree->lock); - schedule(); - write_lock_irq(&tree->lock); - finish_wait(&state->wq, &wait); - free_extent_state(state); - goto search_again; - } - state->state |= EXTENT_LOCKED; - if (!found) - *start = state->start; - found++; - *end = state->end; - cur_start = state->end + 1; - node = rb_next(node); - if (!node) - break; - total_bytes += state->end - state->start + 1; - if (total_bytes >= max_bytes) - break; - } -out: - write_unlock_irq(&tree->lock); - return found; -} - -u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 search_end, u64 max_bytes, - unsigned long bits) -{ - struct rb_node *node; - struct extent_state *state; - u64 cur_start = *start; - u64 total_bytes = 0; - int found = 0; - - if (search_end <= cur_start) { - printk("search_end %Lu start %Lu\n", search_end, cur_start); - WARN_ON(1); - return 0; - } - - write_lock_irq(&tree->lock); - if (cur_start == 0 && bits == EXTENT_DIRTY) { - total_bytes = tree->dirty_bytes; - goto out; - } - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, cur_start); - if (!node || IS_ERR(node)) { - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (state->start > search_end) - break; - if (state->end >= cur_start && (state->state & bits)) { - total_bytes += min(search_end, state->end) + 1 - - max(cur_start, state->start); - if (total_bytes >= max_bytes) - break; - if (!found) { - *start = state->start; - found = 1; - } - } - node = rb_next(node); - if (!node) - break; - } -out: - write_unlock_irq(&tree->lock); - return total_bytes; -} -/* - * helper function to lock both pages and extents in the tree. - * pages must be locked first. - */ -int lock_range(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - int err; - - while (index <= end_index) { - page = grab_cache_page(tree->mapping, index); - if (!page) { - err = -ENOMEM; - goto failed; - } - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto failed; - } - index++; - } - lock_extent(tree, start, end, GFP_NOFS); - return 0; - -failed: - /* - * we failed above in getting the page at 'index', so we undo here - * up to but not including the page at 'index' - */ - end_index = index; - index = start >> PAGE_CACHE_SHIFT; - while (index < end_index) { - page = find_get_page(tree->mapping, index); - unlock_page(page); - page_cache_release(page); - index++; - } - return err; -} -EXPORT_SYMBOL(lock_range); - -/* - * helper function to unlock both pages and extents in the tree. - */ -int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - unlock_page(page); - page_cache_release(page); - index++; - } - unlock_extent(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(unlock_range); - -int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 0; - - write_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; - } - state->private = private; -out: - write_unlock_irq(&tree->lock); - return ret; -} - -int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 0; - - read_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; - } - *private = state->private; -out: - read_unlock_irq(&tree->lock); - return ret; -} - -/* - * searches a range in the state tree for a given mask. - * If 'filled' == 1, this returns 1 only if ever extent in the tree - * has the bits set. Otherwise, 1 is returned if any bit in the - * range is found set. - */ -int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int filled) -{ - struct extent_state *state = NULL; - struct rb_node *node; - int bitset = 0; - - read_lock_irq(&tree->lock); - node = tree_search(&tree->state, start); - while (node && start <= end) { - state = rb_entry(node, struct extent_state, rb_node); - - if (filled && state->start > start) { - bitset = 0; - break; - } - - if (state->start > end) - break; - - if (state->state & bits) { - bitset = 1; - if (!filled) - break; - } else if (filled) { - bitset = 0; - break; - } - start = state->end + 1; - if (start > end) - break; - node = rb_next(node); - if (!node) { - if (filled) - bitset = 0; - break; - } - } - read_unlock_irq(&tree->lock); - return bitset; -} -EXPORT_SYMBOL(test_range_bit); - -/* - * helper function to set a given page up to date if all the - * extents in the tree for that page are up to date - */ -static int check_page_uptodate(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) - SetPageUptodate(page); - return 0; -} - -/* - * helper function to unlock a page if all the extents in the tree - * for that page are unlocked - */ -static int check_page_locked(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) - unlock_page(page); - return 0; -} - -/* - * helper function to end page writeback if all the extents - * in the tree for that page are done with writeback - */ -static int check_page_writeback(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) - end_page_writeback(page); - return 0; -} - -/* lots and lots of room for performance fixes in the end_bio funcs */ - -/* - * after a writepage IO is done, we need to: - * clear the uptodate bits on error - * clear the writeback bits in the extent tree for this IO - * end_page_writeback if the page has no more pending IO - * - * Scheduling is not allowed, so the extent state tree is expected - * to have one and only one object corresponding to this IO. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_writepage(struct bio *bio, int err) -#else -static int end_bio_extent_writepage(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - int whole_page; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) - whole_page = 1; - else - whole_page = 0; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (!uptodate) { - clear_extent_uptodate(tree, start, end, GFP_ATOMIC); - ClearPageUptodate(page); - SetPageError(page); - } - clear_extent_writeback(tree, start, end, GFP_ATOMIC); - - if (whole_page) - end_page_writeback(page); - else - check_page_writeback(tree, page); - if (tree->ops && tree->ops->writepage_end_io_hook) - tree->ops->writepage_end_io_hook(page, start, end); - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -/* - * after a readpage IO is done, we need to: - * clear the uptodate bits on error - * set the uptodate bits if things worked - * set the page up to date if all extents in the tree are uptodate - * clear the lock bit in the extent tree - * unlock the page if there are no other extents locked for it - * - * Scheduling is not allowed, so the extent state tree is expected - * to have one and only one object corresponding to this IO. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_readpage(struct bio *bio, int err) -#else -static int end_bio_extent_readpage(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - int whole_page; - int ret; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) - whole_page = 1; - else - whole_page = 0; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { - ret = tree->ops->readpage_end_io_hook(page, start, end); - if (ret) - uptodate = 0; - } - if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); - if (whole_page) - SetPageUptodate(page); - else - check_page_uptodate(tree, page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - - unlock_extent(tree, start, end, GFP_ATOMIC); - - if (whole_page) - unlock_page(page); - else - check_page_locked(tree, page); - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -/* - * IO done from prepare_write is pretty simple, we just unlock - * the structs in the extent tree when done, and set the uptodate bits - * as appropriate. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_preparewrite(struct bio *bio, int err) -#else -static int end_bio_extent_preparewrite(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - - unlock_extent(tree, start, end, GFP_ATOMIC); - - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -static struct bio * -extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, - gfp_t gfp_flags) -{ - struct bio *bio; - - bio = bio_alloc(gfp_flags, nr_vecs); - - if (bio == NULL && (current->flags & PF_MEMALLOC)) { - while (!bio && (nr_vecs /= 2)) - bio = bio_alloc(gfp_flags, nr_vecs); - } - - if (bio) { - bio->bi_bdev = bdev; - bio->bi_sector = first_sector; - } - return bio; -} - -static int submit_one_bio(int rw, struct bio *bio) -{ - u64 maxsector; - int ret = 0; - - bio_get(bio); - - maxsector = bio->bi_bdev->bd_inode->i_size >> 9; - if (maxsector < bio->bi_sector) { - printk("sector too large max %Lu got %llu\n", maxsector, - (unsigned long long)bio->bi_sector); - WARN_ON(1); - } - - submit_bio(rw, bio); - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; - bio_put(bio); - return ret; -} - -static int submit_extent_page(int rw, struct extent_map_tree *tree, - struct page *page, sector_t sector, - size_t size, unsigned long offset, - struct block_device *bdev, - struct bio **bio_ret, - unsigned long max_pages, - bio_end_io_t end_io_func) -{ - int ret = 0; - struct bio *bio; - int nr; - - if (bio_ret && *bio_ret) { - bio = *bio_ret; - if (bio->bi_sector + (bio->bi_size >> 9) != sector || - bio_add_page(bio, page, size, offset) < size) { - ret = submit_one_bio(rw, bio); - bio = NULL; - } else { - return 0; - } - } - nr = min_t(int, max_pages, bio_get_nr_vecs(bdev)); - bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); - if (!bio) { - printk("failed to allocate bio nr %d\n", nr); - } - bio_add_page(bio, page, size, offset); - bio->bi_end_io = end_io_func; - bio->bi_private = tree; - if (bio_ret) { - *bio_ret = bio; - } else { - ret = submit_one_bio(rw, bio); - } - - return ret; -} - -void set_page_extent_mapped(struct page *page) -{ - if (!PagePrivate(page)) { - SetPagePrivate(page); - WARN_ON(!page->mapping->a_ops->invalidatepage); - set_page_private(page, EXTENT_PAGE_PRIVATE); - page_cache_get(page); - } -} - -void set_page_extent_head(struct page *page, unsigned long len) -{ - set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); -} - -/* - * basic readpage implementation. Locked extent state structs are inserted - * into the tree that are removed when the IO is done (by the end_io - * handlers) - */ -static int __extent_read_full_page(struct extent_map_tree *tree, - struct page *page, - get_extent_t *get_extent, - struct bio **bio) -{ - struct inode *inode = page->mapping->host; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = start + PAGE_CACHE_SIZE - 1; - u64 end; - u64 cur = start; - u64 extent_offset; - u64 last_byte = i_size_read(inode); - u64 block_start; - u64 cur_end; - sector_t sector; - struct extent_map *em; - struct block_device *bdev; - int ret; - int nr = 0; - size_t page_offset = 0; - size_t iosize; - size_t blocksize = inode->i_sb->s_blocksize; - - set_page_extent_mapped(page); - - end = page_end; - lock_extent(tree, start, end, GFP_NOFS); - - while (cur <= end) { - if (cur >= last_byte) { - char *userpage; - iosize = PAGE_CACHE_SIZE - page_offset; - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, iosize); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - break; - } - em = get_extent(inode, page, page_offset, cur, end, 0); - if (IS_ERR(em) || !em) { - SetPageError(page); - unlock_extent(tree, cur, end, GFP_NOFS); - break; - } - - extent_offset = cur - em->start; - BUG_ON(em->end < cur); - BUG_ON(end < cur); - - iosize = min(em->end - cur, end - cur) + 1; - cur_end = min(em->end, end); - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); - sector = (em->block_start + extent_offset) >> 9; - bdev = em->bdev; - block_start = em->block_start; - free_extent_map(em); - em = NULL; - - /* we've found a hole, just zero and go on */ - if (block_start == EXTENT_MAP_HOLE) { - char *userpage; - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, iosize); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - - set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - /* the get_extent function already copied into the page */ - if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - - ret = 0; - if (tree->ops && tree->ops->readpage_io_hook) { - ret = tree->ops->readpage_io_hook(page, cur, - cur + iosize - 1); - } - if (!ret) { - unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1; - nr -= page->index; - ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, - bdev, bio, nr, - end_bio_extent_readpage); - } - if (ret) - SetPageError(page); - cur = cur + iosize; - page_offset += iosize; - nr++; - } - if (!nr) { - if (!PageError(page)) - SetPageUptodate(page); - unlock_page(page); - } - return 0; -} - -int extent_read_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent) -{ - struct bio *bio = NULL; - int ret; - - ret = __extent_read_full_page(tree, page, get_extent, &bio); - if (bio) - submit_one_bio(READ, bio); - return ret; -} -EXPORT_SYMBOL(extent_read_full_page); - -/* - * the writepage semantics are similar to regular writepage. extent - * records are inserted to lock ranges in the tree, and as dirty areas - * are found, they are marked writeback. Then the lock bits are removed - * and the end_io handler clears the writeback ranges - */ -static int __extent_writepage(struct page *page, struct writeback_control *wbc, - void *data) -{ - struct inode *inode = page->mapping->host; - struct extent_page_data *epd = data; - struct extent_map_tree *tree = epd->tree; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 delalloc_start; - u64 page_end = start + PAGE_CACHE_SIZE - 1; - u64 end; - u64 cur = start; - u64 extent_offset; - u64 last_byte = i_size_read(inode); - u64 block_start; - u64 iosize; - sector_t sector; - struct extent_map *em; - struct block_device *bdev; - int ret; - int nr = 0; - size_t page_offset = 0; - size_t blocksize; - loff_t i_size = i_size_read(inode); - unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; - u64 nr_delalloc; - u64 delalloc_end; - - WARN_ON(!PageLocked(page)); - if (page->index > end_index) { - clear_extent_dirty(tree, start, page_end, GFP_NOFS); - unlock_page(page); - return 0; - } - - if (page->index == end_index) { - char *userpage; - - size_t offset = i_size & (PAGE_CACHE_SIZE - 1); - - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - } - - set_page_extent_mapped(page); - - delalloc_start = start; - delalloc_end = 0; - while(delalloc_end < page_end) { - nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, - &delalloc_end, - 128 * 1024 * 1024); - if (nr_delalloc == 0) { - delalloc_start = delalloc_end + 1; - continue; - } - tree->ops->fill_delalloc(inode, delalloc_start, - delalloc_end); - clear_extent_bit(tree, delalloc_start, - delalloc_end, - EXTENT_LOCKED | EXTENT_DELALLOC, - 1, 0, GFP_NOFS); - delalloc_start = delalloc_end + 1; - } - lock_extent(tree, start, page_end, GFP_NOFS); - - end = page_end; - if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { - printk("found delalloc bits after lock_extent\n"); - } - - if (last_byte <= start) { - clear_extent_dirty(tree, start, page_end, GFP_NOFS); - goto done; - } - - set_extent_uptodate(tree, start, page_end, GFP_NOFS); - blocksize = inode->i_sb->s_blocksize; - - while (cur <= end) { - if (cur >= last_byte) { - clear_extent_dirty(tree, cur, page_end, GFP_NOFS); - break; - } - em = epd->get_extent(inode, page, page_offset, cur, end, 1); - if (IS_ERR(em) || !em) { - SetPageError(page); - break; - } - - extent_offset = cur - em->start; - BUG_ON(em->end < cur); - BUG_ON(end < cur); - iosize = min(em->end - cur, end - cur) + 1; - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); - sector = (em->block_start + extent_offset) >> 9; - bdev = em->bdev; - block_start = em->block_start; - free_extent_map(em); - em = NULL; - - if (block_start == EXTENT_MAP_HOLE || - block_start == EXTENT_MAP_INLINE) { - clear_extent_dirty(tree, cur, - cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - - /* leave this out until we have a page_mkwrite call */ - if (0 && !test_range_bit(tree, cur, cur + iosize - 1, - EXTENT_DIRTY, 0)) { - cur = cur + iosize; - page_offset += iosize; - continue; - } - clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); - if (tree->ops && tree->ops->writepage_io_hook) { - ret = tree->ops->writepage_io_hook(page, cur, - cur + iosize - 1); - } else { - ret = 0; - } - if (ret) - SetPageError(page); - else { - unsigned long max_nr = end_index + 1; - set_range_writeback(tree, cur, cur + iosize - 1); - if (!PageWriteback(page)) { - printk("warning page %lu not writeback, " - "cur %llu end %llu\n", page->index, - (unsigned long long)cur, - (unsigned long long)end); - } - - ret = submit_extent_page(WRITE, tree, page, sector, - iosize, page_offset, bdev, - &epd->bio, max_nr, - end_bio_extent_writepage); - if (ret) - SetPageError(page); - } - cur = cur + iosize; - page_offset += iosize; - nr++; - } -done: - if (nr == 0) { - /* make sure the mapping tag for page dirty gets cleared */ - set_page_writeback(page); - end_page_writeback(page); - } - unlock_extent(tree, start, page_end, GFP_NOFS); - unlock_page(page); - return 0; -} - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - -/* Taken directly from 2.6.23 for 2.6.18 back port */ -typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, - void *data); - -/** - * write_cache_pages - walk the list of dirty pages of the given address space - * and write all of them. - * @mapping: address space structure to write - * @wbc: subtract the number of written pages from *@wbc->nr_to_write - * @writepage: function called for each page - * @data: data passed to writepage function - * - * If a page is already under I/O, write_cache_pages() skips it, even - * if it's dirty. This is desirable behaviour for memory-cleaning writeback, - * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() - * and msync() need to guarantee that all the data which was dirty at the time - * the call was made get new I/O started against them. If wbc->sync_mode is - * WB_SYNC_ALL then we were called for data integrity and we must wait for - * existing IO to complete. - */ -static int write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, writepage_t writepage, - void *data) -{ - struct backing_dev_info *bdi = mapping->backing_dev_info; - int ret = 0; - int done = 0; - struct pagevec pvec; - int nr_pages; - pgoff_t index; - pgoff_t end; /* Inclusive */ - int scanned = 0; - int range_whole = 0; - - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } - - pagevec_init(&pvec, 0); - if (wbc->range_cyclic) { - index = mapping->writeback_index; /* Start from prev offset */ - end = -1; - } else { - index = wbc->range_start >> PAGE_CACHE_SHIFT; - end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) - range_whole = 1; - scanned = 1; - } -retry: - while (!done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { - unsigned i; - - scanned = 1; - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - - /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping - */ - lock_page(page); - - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - continue; - } - - if (!wbc->range_cyclic && page->index > end) { - done = 1; - unlock_page(page); - continue; - } - - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - continue; - } - - ret = (*writepage)(page, wbc, data); - - if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { - unlock_page(page); - ret = 0; - } - if (ret || (--(wbc->nr_to_write) <= 0)) - done = 1; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - done = 1; - } - } - pagevec_release(&pvec); - cond_resched(); - } - if (!scanned && !done) { - /* - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - scanned = 1; - index = 0; - goto retry; - } - if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) - mapping->writeback_index = index; - return ret; -} -#endif - -int extent_write_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent, - struct writeback_control *wbc) -{ - int ret; - struct address_space *mapping = page->mapping; - struct extent_page_data epd = { - .bio = NULL, - .tree = tree, - .get_extent = get_extent, - }; - struct writeback_control wbc_writepages = { - .bdi = wbc->bdi, - .sync_mode = WB_SYNC_NONE, - .older_than_this = NULL, - .nr_to_write = 64, - .range_start = page_offset(page) + PAGE_CACHE_SIZE, - .range_end = (loff_t)-1, - }; - - - ret = __extent_writepage(page, wbc, &epd); - - write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); - if (epd.bio) { - submit_one_bio(WRITE, epd.bio); - } - return ret; -} -EXPORT_SYMBOL(extent_write_full_page); - - -int extent_writepages(struct extent_map_tree *tree, - struct address_space *mapping, - get_extent_t *get_extent, - struct writeback_control *wbc) -{ - int ret = 0; - struct extent_page_data epd = { - .bio = NULL, - .tree = tree, - .get_extent = get_extent, - }; - - ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); - if (epd.bio) { - submit_one_bio(WRITE, epd.bio); - } - return ret; -} -EXPORT_SYMBOL(extent_writepages); - -int extent_readpages(struct extent_map_tree *tree, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages, - get_extent_t get_extent) -{ - struct bio *bio = NULL; - unsigned page_idx; - struct pagevec pvec; - - pagevec_init(&pvec, 0); - for (page_idx = 0; page_idx < nr_pages; page_idx++) { - struct page *page = list_entry(pages->prev, struct page, lru); - - prefetchw(&page->flags); - list_del(&page->lru); - /* - * what we want to do here is call add_to_page_cache_lru, - * but that isn't exported, so we reproduce it here - */ - if (!add_to_page_cache(page, mapping, - page->index, GFP_KERNEL)) { - - /* open coding of lru_cache_add, also not exported */ - page_cache_get(page); - if (!pagevec_add(&pvec, page)) - __pagevec_lru_add(&pvec); - __extent_read_full_page(tree, page, get_extent, &bio); - } - page_cache_release(page); - } - if (pagevec_count(&pvec)) - __pagevec_lru_add(&pvec); - BUG_ON(!list_empty(pages)); - if (bio) - submit_one_bio(READ, bio); - return 0; -} -EXPORT_SYMBOL(extent_readpages); - -/* - * basic invalidatepage code, this waits on any locked or writeback - * ranges corresponding to the page, and then deletes any extent state - * records from the tree - */ -int extent_invalidatepage(struct extent_map_tree *tree, - struct page *page, unsigned long offset) -{ - u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); - u64 end = start + PAGE_CACHE_SIZE - 1; - size_t blocksize = page->mapping->host->i_sb->s_blocksize; - - start += (offset + blocksize -1) & ~(blocksize - 1); - if (start > end) - return 0; - - lock_extent(tree, start, end, GFP_NOFS); - wait_on_extent_writeback(tree, start, end); - clear_extent_bit(tree, start, end, - EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, - 1, 1, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(extent_invalidatepage); - -/* - * simple commit_write call, set_range_dirty is used to mark both - * the pages and the extent records as dirty - */ -int extent_commit_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to) -{ - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - - set_page_extent_mapped(page); - set_page_dirty(page); - - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} -EXPORT_SYMBOL(extent_commit_write); - -int extent_prepare_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to, get_extent_t *get_extent) -{ - u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - u64 block_start; - u64 orig_block_start; - u64 block_end; - u64 cur_end; - struct extent_map *em; - unsigned blocksize = 1 << inode->i_blkbits; - size_t page_offset = 0; - size_t block_off_start; - size_t block_off_end; - int err = 0; - int iocount = 0; - int ret = 0; - int isnew; - - set_page_extent_mapped(page); - - block_start = (page_start + from) & ~((u64)blocksize - 1); - block_end = (page_start + to - 1) | (blocksize - 1); - orig_block_start = block_start; - - lock_extent(tree, page_start, page_end, GFP_NOFS); - while(block_start <= block_end) { - em = get_extent(inode, page, page_offset, block_start, - block_end, 1); - if (IS_ERR(em) || !em) { - goto err; - } - cur_end = min(block_end, em->end); - block_off_start = block_start & (PAGE_CACHE_SIZE - 1); - block_off_end = block_off_start + blocksize; - isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); - - if (!PageUptodate(page) && isnew && - (block_off_end > to || block_off_start < from)) { - void *kaddr; - - kaddr = kmap_atomic(page, KM_USER0); - if (block_off_end > to) - memset(kaddr + to, 0, block_off_end - to); - if (block_off_start < from) - memset(kaddr + block_off_start, 0, - from - block_off_start); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - } - if ((em->block_start != EXTENT_MAP_HOLE && - em->block_start != EXTENT_MAP_INLINE) && - !isnew && !PageUptodate(page) && - (block_off_end > to || block_off_start < from) && - !test_range_bit(tree, block_start, cur_end, - EXTENT_UPTODATE, 1)) { - u64 sector; - u64 extent_offset = block_start - em->start; - size_t iosize; - sector = (em->block_start + extent_offset) >> 9; - iosize = (cur_end - block_start + blocksize) & - ~((u64)blocksize - 1); - /* - * we've already got the extent locked, but we - * need to split the state such that our end_bio - * handler can clear the lock. - */ - set_extent_bit(tree, block_start, - block_start + iosize - 1, - EXTENT_LOCKED, 0, NULL, GFP_NOFS); - ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, em->bdev, - NULL, 1, - end_bio_extent_preparewrite); - iocount++; - block_start = block_start + iosize; - } else { - set_extent_uptodate(tree, block_start, cur_end, - GFP_NOFS); - unlock_extent(tree, block_start, cur_end, GFP_NOFS); - block_start = cur_end + 1; - } - page_offset = block_start & (PAGE_CACHE_SIZE - 1); - free_extent_map(em); - } - if (iocount) { - wait_extent_bit(tree, orig_block_start, - block_end, EXTENT_LOCKED); - } - check_page_uptodate(tree, page); -err: - /* FIXME, zero out newly allocated blocks on error */ - return err; -} -EXPORT_SYMBOL(extent_prepare_write); - -/* - * a helper for releasepage. As long as there are no locked extents - * in the range corresponding to the page, both state records and extent - * map records are removed - */ -int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) -{ - struct extent_map *em; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - u64 orig_start = start; - int ret = 1; - - while (start <= end) { - em = lookup_extent_mapping(tree, start, end); - if (!em || IS_ERR(em)) - break; - if (!test_range_bit(tree, em->start, em->end, - EXTENT_LOCKED, 0)) { - remove_extent_mapping(tree, em); - /* once for the rb tree */ - free_extent_map(em); - } - start = em->end + 1; - /* once for us */ - free_extent_map(em); - } - if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) - ret = 0; - else - clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, - 1, 1, GFP_NOFS); - return ret; -} -EXPORT_SYMBOL(try_release_extent_mapping); - -sector_t extent_bmap(struct address_space *mapping, sector_t iblock, - get_extent_t *get_extent) -{ - struct inode *inode = mapping->host; - u64 start = iblock << inode->i_blkbits; - u64 end = start + (1 << inode->i_blkbits) - 1; - sector_t sector = 0; - struct extent_map *em; - - em = get_extent(inode, NULL, 0, start, end, 0); - if (!em || IS_ERR(em)) - return 0; - - if (em->block_start == EXTENT_MAP_INLINE || - em->block_start == EXTENT_MAP_HOLE) - goto out; - - sector = (em->block_start + start - em->start) >> inode->i_blkbits; -out: - free_extent_map(em); - return sector; -} - -static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb) -{ - if (list_empty(&eb->lru)) { - extent_buffer_get(eb); - list_add(&eb->lru, &tree->buffer_lru); - tree->lru_size++; - if (tree->lru_size >= BUFFER_LRU_MAX) { - struct extent_buffer *rm; - rm = list_entry(tree->buffer_lru.prev, - struct extent_buffer, lru); - tree->lru_size--; - list_del_init(&rm->lru); - free_extent_buffer(rm); - } - } else - list_move(&eb->lru, &tree->buffer_lru); - return 0; -} -static struct extent_buffer *find_lru(struct extent_map_tree *tree, - u64 start, unsigned long len) -{ - struct list_head *lru = &tree->buffer_lru; - struct list_head *cur = lru->next; - struct extent_buffer *eb; - - if (list_empty(lru)) - return NULL; - - do { - eb = list_entry(cur, struct extent_buffer, lru); - if (eb->start == start && eb->len == len) { - extent_buffer_get(eb); - return eb; - } - cur = cur->next; - } while (cur != lru); - return NULL; -} - -static inline unsigned long num_extent_pages(u64 start, u64 len) -{ - return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT); -} - -static inline struct page *extent_buffer_page(struct extent_buffer *eb, - unsigned long i) -{ - struct page *p; - struct address_space *mapping; - - if (i == 0) - return eb->first_page; - i += eb->start >> PAGE_CACHE_SHIFT; - mapping = eb->first_page->mapping; - read_lock_irq(&mapping->tree_lock); - p = radix_tree_lookup(&mapping->page_tree, i); - read_unlock_irq(&mapping->tree_lock); - return p; -} - -static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, - unsigned long len, - gfp_t mask) -{ - struct extent_buffer *eb = NULL; - - spin_lock(&tree->lru_lock); - eb = find_lru(tree, start, len); - spin_unlock(&tree->lru_lock); - if (eb) { - return eb; - } - - eb = kmem_cache_zalloc(extent_buffer_cache, mask); - INIT_LIST_HEAD(&eb->lru); - eb->start = start; - eb->len = len; - atomic_set(&eb->refs, 1); - - return eb; -} - -static void __free_extent_buffer(struct extent_buffer *eb) -{ - kmem_cache_free(extent_buffer_cache, eb); -} - -struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - struct page *page0, - gfp_t mask) -{ - unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; - struct extent_buffer *eb; - struct page *p; - struct address_space *mapping = tree->mapping; - int uptodate = 1; - - eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb || IS_ERR(eb)) - return NULL; - - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - - if (page0) { - eb->first_page = page0; - i = 1; - index++; - page_cache_get(page0); - mark_page_accessed(page0); - set_page_extent_mapped(page0); - WARN_ON(!PageUptodate(page0)); - set_page_extent_head(page0, len); - } else { - i = 0; - } - for (; i < num_pages; i++, index++) { - p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); - if (!p) { - WARN_ON(1); - goto fail; - } - set_page_extent_mapped(p); - mark_page_accessed(p); - if (i == 0) { - eb->first_page = p; - set_page_extent_head(p, len); - } else { - set_page_private(p, EXTENT_PAGE_PRIVATE); - } - if (!PageUptodate(p)) - uptodate = 0; - unlock_page(p); - } - if (uptodate) - eb->flags |= EXTENT_UPTODATE; - eb->flags |= EXTENT_BUFFER_FILLED; - -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); - return eb; - -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); - if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { - page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); - return NULL; -} -EXPORT_SYMBOL(alloc_extent_buffer); - -struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - gfp_t mask) -{ - unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; - struct extent_buffer *eb; - struct page *p; - struct address_space *mapping = tree->mapping; - int uptodate = 1; - - eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb || IS_ERR(eb)) - return NULL; - - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - - for (i = 0; i < num_pages; i++, index++) { - p = find_lock_page(mapping, index); - if (!p) { - goto fail; - } - set_page_extent_mapped(p); - mark_page_accessed(p); - - if (i == 0) { - eb->first_page = p; - set_page_extent_head(p, len); - } else { - set_page_private(p, EXTENT_PAGE_PRIVATE); - } - - if (!PageUptodate(p)) - uptodate = 0; - unlock_page(p); - } - if (uptodate) - eb->flags |= EXTENT_UPTODATE; - eb->flags |= EXTENT_BUFFER_FILLED; - -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); - return eb; -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); - if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { - page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); - return NULL; -} -EXPORT_SYMBOL(find_extent_buffer); - -void free_extent_buffer(struct extent_buffer *eb) -{ - unsigned long i; - unsigned long num_pages; - - if (!eb) - return; - - if (!atomic_dec_and_test(&eb->refs)) - return; - - WARN_ON(!list_empty(&eb->lru)); - num_pages = num_extent_pages(eb->start, eb->len); - - for (i = 1; i < num_pages; i++) { - page_cache_release(extent_buffer_page(eb, i)); - } - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); -} -EXPORT_SYMBOL(free_extent_buffer); - -int clear_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - int set; - unsigned long i; - unsigned long num_pages; - struct page *page; - - u64 start = eb->start; - u64 end = start + eb->len - 1; - - set = clear_extent_dirty(tree, start, end, GFP_NOFS); - num_pages = num_extent_pages(eb->start, eb->len); - - for (i = 0; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - lock_page(page); - if (i == 0) - set_page_extent_head(page, eb->len); - else - set_page_private(page, EXTENT_PAGE_PRIVATE); - - /* - * if we're on the last page or the first page and the - * block isn't aligned on a page boundary, do extra checks - * to make sure we don't clean page that is partially dirty - */ - if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || - ((i == num_pages - 1) && - ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { - start = (u64)page->index << PAGE_CACHE_SHIFT; - end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, - EXTENT_DIRTY, 0)) { - unlock_page(page); - continue; - } - } - clear_page_dirty_for_io(page); - write_lock_irq(&page->mapping->tree_lock); - if (!PageDirty(page)) { - radix_tree_tag_clear(&page->mapping->page_tree, - page_index(page), - PAGECACHE_TAG_DIRTY); - } - write_unlock_irq(&page->mapping->tree_lock); - unlock_page(page); - } - return 0; -} -EXPORT_SYMBOL(clear_extent_buffer_dirty); - -int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - return wait_on_extent_writeback(tree, eb->start, - eb->start + eb->len - 1); -} -EXPORT_SYMBOL(wait_on_extent_buffer_writeback); - -int set_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - unsigned long i; - unsigned long num_pages; - - num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { - struct page *page = extent_buffer_page(eb, i); - /* writepage may need to do something special for the - * first page, we have to make sure page->private is - * properly set. releasepage may drop page->private - * on us if the page isn't already dirty. - */ - if (i == 0) { - lock_page(page); - set_page_extent_head(page, eb->len); - } else if (PagePrivate(page) && - page->private != EXTENT_PAGE_PRIVATE) { - lock_page(page); - set_page_extent_mapped(page); - unlock_page(page); - } - __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); - if (i == 0) - unlock_page(page); - } - return set_extent_dirty(tree, eb->start, - eb->start + eb->len - 1, GFP_NOFS); -} -EXPORT_SYMBOL(set_extent_buffer_dirty); - -int set_extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - unsigned long i; - struct page *page; - unsigned long num_pages; - - num_pages = num_extent_pages(eb->start, eb->len); - - set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - GFP_NOFS); - for (i = 0; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || - ((i == num_pages - 1) && - ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { - check_page_uptodate(tree, page); - continue; - } - SetPageUptodate(page); - } - return 0; -} -EXPORT_SYMBOL(set_extent_buffer_uptodate); - -int extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - if (eb->flags & EXTENT_UPTODATE) - return 1; - return test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1); -} -EXPORT_SYMBOL(extent_buffer_uptodate); - -int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, - u64 start, - int wait) -{ - unsigned long i; - unsigned long start_i; - struct page *page; - int err; - int ret = 0; - unsigned long num_pages; - - if (eb->flags & EXTENT_UPTODATE) - return 0; - - if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1)) { - return 0; - } - - if (start) { - WARN_ON(start < eb->start); - start_i = (start >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT); - } else { - start_i = 0; - } - - num_pages = num_extent_pages(eb->start, eb->len); - for (i = start_i; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - if (PageUptodate(page)) { - continue; - } - if (!wait) { - if (TestSetPageLocked(page)) { - continue; - } - } else { - lock_page(page); - } - if (!PageUptodate(page)) { - err = page->mapping->a_ops->readpage(NULL, page); - if (err) { - ret = err; - } - } else { - unlock_page(page); - } - } - - if (ret || !wait) { - return ret; - } - - for (i = start_i; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - wait_on_page_locked(page); - if (!PageUptodate(page)) { - ret = -EIO; - } - } - if (!ret) - eb->flags |= EXTENT_UPTODATE; - return ret; -} -EXPORT_SYMBOL(read_extent_buffer_pages); - -void read_extent_buffer(struct extent_buffer *eb, void *dstv, - unsigned long start, - unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *dst = (char *)dstv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long num_pages = num_extent_pages(eb->start, eb->len); - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - if (!PageUptodate(page)) { - printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); - WARN_ON(1); - } - WARN_ON(!PageUptodate(page)); - - cur = min(len, (PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER1); - memcpy(dst, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER1); - - dst += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(read_extent_buffer); - -int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) -{ - size_t offset = start & (PAGE_CACHE_SIZE - 1); - char *kaddr; - struct page *p; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long end_i = (start_offset + start + min_len - 1) >> - PAGE_CACHE_SHIFT; - - if (i != end_i) - return -EINVAL; - - if (i == 0) { - offset = start_offset; - *map_start = 0; - } else { - offset = 0; - *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; - } - if (start + min_len > eb->len) { -printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); - WARN_ON(1); - } - - p = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(p)); - kaddr = kmap_atomic(p, km); - *token = kaddr; - *map = kaddr + offset; - *map_len = PAGE_CACHE_SIZE - offset; - return 0; -} -EXPORT_SYMBOL(map_private_extent_buffer); - -int map_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, - char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) -{ - int err; - int save = 0; - if (eb->map_token) { - unmap_extent_buffer(eb, eb->map_token, km); - eb->map_token = NULL; - save = 1; - } - err = map_private_extent_buffer(eb, start, min_len, token, map, - map_start, map_len, km); - if (!err && save) { - eb->map_token = *token; - eb->kaddr = *map; - eb->map_start = *map_start; - eb->map_len = *map_len; - } - return err; -} -EXPORT_SYMBOL(map_extent_buffer); - -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) -{ - kunmap_atomic(token, km); -} -EXPORT_SYMBOL(unmap_extent_buffer); - -int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, - unsigned long start, - unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *ptr = (char *)ptrv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - int ret = 0; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, (PAGE_CACHE_SIZE - offset)); - - kaddr = kmap_atomic(page, KM_USER0); - ret = memcmp(ptr, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER0); - if (ret) - break; - - ptr += cur; - len -= cur; - offset = 0; - i++; - } - return ret; -} -EXPORT_SYMBOL(memcmp_extent_buffer); - -void write_extent_buffer(struct extent_buffer *eb, const void *srcv, - unsigned long start, unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *src = (char *)srcv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER1); - memcpy(kaddr + offset, src, cur); - kunmap_atomic(kaddr, KM_USER1); - - src += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(write_extent_buffer); - -void memset_extent_buffer(struct extent_buffer *eb, char c, - unsigned long start, unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, c, cur); - kunmap_atomic(kaddr, KM_USER0); - - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(memset_extent_buffer); - -void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, - unsigned long dst_offset, unsigned long src_offset, - unsigned long len) -{ - u64 dst_len = dst->len; - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; - - WARN_ON(src->len != dst_len); - - offset = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(dst, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); - - kaddr = kmap_atomic(page, KM_USER0); - read_extent_buffer(src, kaddr + offset, src_offset, cur); - kunmap_atomic(kaddr, KM_USER0); - - src_offset += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(copy_extent_buffer); - -static void move_pages(struct page *dst_page, struct page *src_page, - unsigned long dst_off, unsigned long src_off, - unsigned long len) -{ - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); - if (dst_page == src_page) { - memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); - } else { - char *src_kaddr = kmap_atomic(src_page, KM_USER1); - char *p = dst_kaddr + dst_off + len; - char *s = src_kaddr + src_off + len; - - while (len--) - *--p = *--s; - - kunmap_atomic(src_kaddr, KM_USER1); - } - kunmap_atomic(dst_kaddr, KM_USER0); -} - -static void copy_pages(struct page *dst_page, struct page *src_page, - unsigned long dst_off, unsigned long src_off, - unsigned long len) -{ - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); - char *src_kaddr; - - if (dst_page != src_page) - src_kaddr = kmap_atomic(src_page, KM_USER1); - else - src_kaddr = dst_kaddr; - - memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); - kunmap_atomic(dst_kaddr, KM_USER0); - if (dst_page != src_page) - kunmap_atomic(src_kaddr, KM_USER1); -} - -void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len) -{ - size_t cur; - size_t dst_off_in_page; - size_t src_off_in_page; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long dst_i; - unsigned long src_i; - - if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); - BUG_ON(1); - } - - while(len > 0) { - dst_off_in_page = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = (start_offset + src_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; - src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; - - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - - src_off_in_page)); - cur = min_t(unsigned long, cur, - (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); - - copy_pages(extent_buffer_page(dst, dst_i), - extent_buffer_page(dst, src_i), - dst_off_in_page, src_off_in_page, cur); - - src_offset += cur; - dst_offset += cur; - len -= cur; - } -} -EXPORT_SYMBOL(memcpy_extent_buffer); - -void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len) -{ - size_t cur; - size_t dst_off_in_page; - size_t src_off_in_page; - unsigned long dst_end = dst_offset + len - 1; - unsigned long src_end = src_offset + len - 1; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long dst_i; - unsigned long src_i; - - if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset < src_offset) { - memcpy_extent_buffer(dst, dst_offset, src_offset, len); - return; - } - while(len > 0) { - dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; - src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; - - dst_off_in_page = (start_offset + dst_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = (start_offset + src_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - cur = min_t(unsigned long, len, src_off_in_page + 1); - cur = min(cur, dst_off_in_page + 1); - move_pages(extent_buffer_page(dst, dst_i), - extent_buffer_page(dst, src_i), - dst_off_in_page - cur + 1, - src_off_in_page - cur + 1, cur); - - dst_end -= cur; - src_end -= cur; - len -= cur; - } -} -EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ea60f5447b5..56314217cfc 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -3,215 +3,53 @@ #include +#define EXTENT_MAP_LAST_BYTE (u64)-4 #define EXTENT_MAP_HOLE (u64)-3 #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 -/* bits for the extent state */ -#define EXTENT_DIRTY 1 -#define EXTENT_WRITEBACK (1 << 1) -#define EXTENT_UPTODATE (1 << 2) -#define EXTENT_LOCKED (1 << 3) -#define EXTENT_NEW (1 << 4) -#define EXTENT_DELALLOC (1 << 5) -#define EXTENT_DEFRAG (1 << 6) -#define EXTENT_DEFRAG_DONE (1 << 7) -#define EXTENT_BUFFER_FILLED (1 << 8) -#define EXTENT_CSUM (1 << 9) -#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) - -/* - * page->private values. Every page that is controlled by the extent - * map has page->private set to one. - */ -#define EXTENT_PAGE_PRIVATE 1 -#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 - - -struct extent_map_ops { - int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); - int (*writepage_io_hook)(struct page *page, u64 start, u64 end); - int (*readpage_io_hook)(struct page *page, u64 start, u64 end); - int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); - void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); -}; - -struct extent_map_tree { - struct rb_root map; - struct rb_root state; - struct address_space *mapping; - u64 dirty_bytes; - rwlock_t lock; - struct extent_map_ops *ops; - spinlock_t lru_lock; - struct list_head buffer_lru; - int lru_size; -}; - -/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ struct extent_map { - u64 start; - u64 end; /* inclusive */ - int in_tree; struct rb_node rb_node; - /* block_start and block_end are in bytes */ + + /* all of these are in bytes */ + u64 start; + u64 len; u64 block_start; - u64 block_end; /* inclusive */ + unsigned long flags; struct block_device *bdev; atomic_t refs; -}; - -/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ -struct extent_state { - u64 start; - u64 end; /* inclusive */ int in_tree; - struct rb_node rb_node; - wait_queue_head_t wq; - atomic_t refs; - unsigned long state; - - /* for use by the FS */ - u64 private; - - struct list_head list; }; -struct extent_buffer { - u64 start; - unsigned long len; - char *map_token; - char *kaddr; - unsigned long map_start; - unsigned long map_len; - struct page *first_page; - struct list_head lru; - atomic_t refs; - int flags; +struct extent_map_tree { + struct rb_root map; + struct extent_map *last; + spinlock_t lock; }; -typedef struct extent_map *(get_extent_t)(struct inode *inode, - struct page *page, - size_t page_offset, - u64 start, u64 end, - int create); +static inline u64 extent_map_end(struct extent_map *em) +{ + if (em->start + em->len < em->start) + return (u64)-1; + return em->start + em->len; +} + +static inline u64 extent_map_block_end(struct extent_map *em) +{ + if (em->block_start + em->len < em->block_start) + return (u64)-1; + return em->block_start + em->len; +} -void extent_map_tree_init(struct extent_map_tree *tree, - struct address_space *mapping, gfp_t mask); -void extent_map_tree_empty_lru(struct extent_map_tree *tree); +void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 end); + u64 start, u64 len); int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); -int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page); -int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); -int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); + struct extent_map *alloc_extent_map(gfp_t mask); void free_extent_map(struct extent_map *em); -int extent_read_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent); int __init extent_map_init(void); void extent_map_exit(void); - -u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 search_end, - u64 max_bytes, unsigned long bits); - -int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int filled); -int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask); -int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask); -int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int find_first_extent_bit(struct extent_map_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits); -int extent_invalidatepage(struct extent_map_tree *tree, - struct page *page, unsigned long offset); -int extent_write_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent, - struct writeback_control *wbc); -int extent_writepages(struct extent_map_tree *tree, - struct address_space *mapping, - get_extent_t *get_extent, - struct writeback_control *wbc); -int extent_readpages(struct extent_map_tree *tree, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages, - get_extent_t get_extent); -int extent_prepare_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to, get_extent_t *get_extent); -int extent_commit_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to); -sector_t extent_bmap(struct address_space *mapping, sector_t iblock, - get_extent_t *get_extent); -int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); -int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); -int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); -void set_page_extent_mapped(struct page *page); - -struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - struct page *page0, - gfp_t mask); -struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - gfp_t mask); -void free_extent_buffer(struct extent_buffer *eb); -int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, u64 start, int wait); - -static inline void extent_buffer_get(struct extent_buffer *eb) -{ - atomic_inc(&eb->refs); -} - -int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, - unsigned long start, - unsigned long len); -void read_extent_buffer(struct extent_buffer *eb, void *dst, - unsigned long start, - unsigned long len); -void write_extent_buffer(struct extent_buffer *eb, const void *src, - unsigned long start, unsigned long len); -void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, - unsigned long dst_offset, unsigned long src_offset, - unsigned long len); -void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len); -void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len); -void memset_extent_buffer(struct extent_buffer *eb, char c, - unsigned long start, unsigned long len); -int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, - struct extent_buffer *eb); -int clear_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb); -int set_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb); -int set_extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb); -int extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb); -int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km); -int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km); -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1cd8c908811..c5bb00f9239 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -233,8 +233,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, int err = 0; int i; struct inode *inode = fdentry(file)->d_inode; - struct extent_map *em; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 hint_byte; u64 num_bytes; u64 start_pos; @@ -242,11 +241,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 end_pos = pos + write_bytes; u64 inline_size; loff_t isize = i_size_read(inode); - em = alloc_extent_map(GFP_NOFS); - if (!em) - return -ENOMEM; - - em->bdev = inode->i_sb->s_bdev; start_pos = pos & ~((u64)root->sectorsize - 1); num_bytes = (write_bytes + pos - start_pos + @@ -254,7 +248,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; - lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); + lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { @@ -268,7 +262,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, if ((end_of_last_block & 4095) == 0) { printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block); } - set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS); + set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); /* FIXME...EIEIO, ENOSPC and more */ @@ -293,6 +287,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inode->i_ino, last_pos_in_file, 0, 0, hole_size); + btrfs_drop_extent_cache(inode, last_pos_in_file, + last_pos_in_file + hole_size -1); btrfs_check_file(root, inode); } if (err) @@ -320,12 +316,12 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, last_end += PAGE_CACHE_SIZE - 1; if (start_pos < isize) { u64 delalloc_start = start_pos; - existing_delalloc = count_range_bits(em_tree, + existing_delalloc = count_range_bits(io_tree, &delalloc_start, end_of_last_block, (u64)-1, EXTENT_DELALLOC); } - set_extent_delalloc(em_tree, start_pos, end_of_last_block, + set_extent_delalloc(io_tree, start_pos, end_of_last_block, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); root->fs_info->delalloc_bytes += (end_of_last_block + 1 - @@ -346,6 +342,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size -= start_pos; err = insert_inline_extent(trans, root, inode, start_pos, inline_size, pages, 0, num_pages); + btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1); BUG_ON(err); } if (end_pos > isize) { @@ -356,8 +353,7 @@ failed: err = btrfs_end_transaction(trans, root); out_unlock: mutex_unlock(&root->fs_info->fs_mutex); - unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); - free_extent_map(em); + unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; } @@ -367,10 +363,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; while(1) { + spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, end); - if (!em) + if (!em) { + spin_unlock(&em_tree->lock); break; + } remove_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + /* once for us */ free_extent_map(em); /* once for the tree*/ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 67005480e13..16d3aef45d1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -53,7 +53,7 @@ static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct address_space_operations btrfs_symlink_aops; static struct file_operations btrfs_dir_file_operations; -static struct extent_map_ops btrfs_extent_map_ops; +static struct extent_io_ops btrfs_extent_io_ops; static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; @@ -104,6 +104,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) u64 num_bytes; u64 cur_alloc_size; u64 blocksize = root->sectorsize; + u64 orig_start = start; + u64 orig_num_bytes; struct btrfs_key ins; int ret; @@ -115,6 +117,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) num_bytes = max(blocksize, num_bytes); ret = btrfs_drop_extents(trans, root, inode, start, start + num_bytes, start, &alloc_hint); + orig_num_bytes = num_bytes; if (alloc_hint == EXTENT_MAP_INLINE) goto out; @@ -138,6 +141,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } + btrfs_drop_extent_cache(inode, orig_start, + orig_start + orig_num_bytes - 1); btrfs_add_ordered_inode(inode); out: btrfs_end_transaction(trans, root); @@ -297,7 +302,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) int ret = 0; struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_csum_item *item; struct btrfs_path *path = NULL; u32 csum; @@ -317,7 +322,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) } read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, BTRFS_CRC32_SIZE); - set_state_private(em_tree, start, csum); + set_state_private(io_tree, start, csum); out: if (path) btrfs_free_path(path); @@ -329,17 +334,19 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) { size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; char *kaddr; u64 private; int ret; struct btrfs_root *root = BTRFS_I(inode)->root; u32 csum = ~(u32)0; unsigned long flags; + if (btrfs_test_opt(root, NODATASUM) || btrfs_test_flag(inode, NODATASUM)) return 0; - ret = get_state_private(em_tree, start, &private); + + ret = get_state_private(io_tree, start, &private); local_irq_save(flags); kaddr = kmap_atomic(page, KM_IRQ0); if (ret) { @@ -428,7 +435,7 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; break; @@ -873,7 +880,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, size_t zero_start) { char *kaddr; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_root *root = BTRFS_I(inode)->root; u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; @@ -884,12 +891,12 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, WARN_ON(!PageLocked(page)); set_page_extent_mapped(page); - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; - existing_delalloc = count_range_bits(&BTRFS_I(inode)->extent_tree, + existing_delalloc = count_range_bits(&BTRFS_I(inode)->io_tree, &delalloc_start, page_end, PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start, + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -903,7 +910,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, kunmap(page); } set_page_dirty(page); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); return ret; } @@ -961,7 +968,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 mask = root->sectorsize - 1; u64 pos = (inode->i_size + mask) & ~mask; @@ -986,7 +993,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); - lock_extent(em_tree, pos, block_end, GFP_NOFS); + lock_extent(io_tree, pos, block_end, GFP_NOFS); hole_size = block_end - hole_start; mutex_lock(&root->fs_info->fs_mutex); @@ -1001,11 +1008,13 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) inode->i_ino, hole_start, 0, 0, hole_size); + btrfs_drop_extent_cache(inode, hole_start, + hole_size - 1); btrfs_check_file(root, inode); } btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - unlock_extent(em_tree, pos, block_end, GFP_NOFS); + unlock_extent(io_tree, pos, block_end, GFP_NOFS); if (err) return err; } @@ -1189,7 +1198,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) struct btrfs_iget_args *args = p; inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); return 0; } @@ -1485,7 +1495,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->root = root; @@ -1672,9 +1683,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -1816,7 +1828,7 @@ out_unlock: } struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 end, + size_t page_offset, u64 start, u64 len, int create) { int ret; @@ -1826,7 +1838,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, u64 extent_end = 0; u64 objectid = inode->i_ino; u32 found_type; - int failed_insert = 0; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; @@ -1834,6 +1845,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct btrfs_key found_key; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_trans_handle *trans = NULL; path = btrfs_alloc_path(); @@ -1841,24 +1853,26 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, mutex_lock(&root->fs_info->fs_mutex); again: - em = lookup_extent_mapping(em_tree, start, end); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + spin_unlock(&em_tree->lock); + if (em) { if (em->start > start) { - printk("get_extent start %Lu em start %Lu\n", - start, em->start); + printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n", + start, len, em->start, em->len); WARN_ON(1); } goto out; } + em = alloc_extent_map(GFP_NOFS); if (!em) { - em = alloc_extent_map(GFP_NOFS); - if (!em) { - err = -ENOMEM; - goto out; - } - em->start = EXTENT_MAP_HOLE; - em->end = EXTENT_MAP_HOLE; + err = -ENOMEM; + goto out; } + + em->start = EXTENT_MAP_HOLE; + em->len = (u64)-1; em->bdev = inode->i_sb->s_bdev; ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); @@ -1893,28 +1907,25 @@ again: if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { - if (end < extent_start) + if (start + len <= extent_start) goto not_found; - em->end = extent_end - 1; + em->len = extent_end - extent_start; } else { - em->end = end; + em->len = len; } goto not_found_em; } bytenr = btrfs_file_extent_disk_bytenr(leaf, item); if (bytenr == 0) { em->start = extent_start; - em->end = extent_end - 1; + em->len = extent_end - extent_start; em->block_start = EXTENT_MAP_HOLE; - em->block_end = EXTENT_MAP_HOLE; goto insert; } bytenr += btrfs_file_extent_offset(leaf, item); em->block_start = bytenr; - em->block_end = em->block_start + - btrfs_file_extent_num_bytes(leaf, item) - 1; em->start = extent_start; - em->end = extent_end - 1; + em->len = extent_end - extent_start; goto insert; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { unsigned long ptr; @@ -1925,25 +1936,24 @@ again: size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, path->slots[0])); - extent_end = (extent_start + size - 1) | - ((u64)root->sectorsize - 1); + extent_end = (extent_start + size + root->sectorsize - 1) & + ~((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { - if (end < extent_start) + if (start + len <= extent_start) goto not_found; - em->end = extent_end; + em->len = extent_end - extent_start; } else { - em->end = end; + em->len = len; } goto not_found_em; } em->block_start = EXTENT_MAP_INLINE; - em->block_end = EXTENT_MAP_INLINE; if (!page) { em->start = extent_start; - em->end = extent_start + size - 1; + em->len = size; goto out; } @@ -1952,8 +1962,7 @@ again: copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, size - extent_offset); em->start = extent_start + extent_offset; - em->end = (em->start + copy_size -1) | - ((u64)root->sectorsize -1); + em->len = copy_size; map = kmap(page); ptr = btrfs_file_extent_inline_start(item) + extent_offset; if (create == 0 && !PageUptodate(page)) { @@ -1974,7 +1983,8 @@ again: btrfs_mark_buffer_dirty(leaf); } kunmap(page); - set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS); + set_extent_uptodate(io_tree, em->start, + extent_map_end(em) - 1, GFP_NOFS); goto insert; } else { printk("unkknown found_type %d\n", found_type); @@ -1982,33 +1992,29 @@ again: } not_found: em->start = start; - em->end = end; + em->len = len; not_found_em: em->block_start = EXTENT_MAP_HOLE; - em->block_end = EXTENT_MAP_HOLE; insert: btrfs_release_path(root, path); - if (em->start > start || em->end < start) { - printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end); + if (em->start > start || extent_map_end(em) <= start) { + printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len); err = -EIO; goto out; } + + err = 0; + spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); if (ret == -EEXIST) { free_extent_map(em); - em = NULL; - if (0 && failed_insert == 1) { - btrfs_drop_extent_cache(inode, start, end); - } - failed_insert++; - if (failed_insert > 5) { - printk("failing to insert %Lu %Lu\n", start, end); + em = lookup_extent_mapping(em_tree, start, len); + if (!em) { err = -EIO; - goto out; + printk("failing to insert %Lu %Lu\n", start, len); } - goto again; } - err = 0; + spin_unlock(&em_tree->lock); out: btrfs_free_path(path); if (trans) { @@ -2032,14 +2038,14 @@ static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) int btrfs_readpage(struct file *file, struct page *page) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_read_full_page(tree, page, btrfs_get_extent); } static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; if (current->flags & PF_MEMALLOC) { @@ -2047,15 +2053,15 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); return 0; } - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } static int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; return extent_writepages(tree, mapping, btrfs_get_extent, wbc); } @@ -2063,19 +2069,21 @@ static int btrfs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; return extent_readpages(tree, mapping, pages, nr_pages, btrfs_get_extent); } static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; + struct extent_map_tree *map; int ret; - tree = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(tree, page); + tree = &BTRFS_I(page->mapping->host)->io_tree; + map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(map, tree, page); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); @@ -2086,9 +2094,9 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) static void btrfs_invalidatepage(struct page *page, unsigned long offset) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btrfs_releasepage(page, GFP_NOFS); } @@ -2374,7 +2382,7 @@ unsigned long btrfs_force_ra(struct address_space *mapping, int btrfs_defrag_file(struct file *file) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct page *page; unsigned long last_index; unsigned long ra_index = 0; @@ -2414,13 +2422,13 @@ int btrfs_defrag_file(struct file *file) { page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; existing_delalloc = - count_range_bits(&BTRFS_I(inode)->extent_tree, + count_range_bits(&BTRFS_I(inode)->io_tree, &delalloc_start, page_end, PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(em_tree, page_start, + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -2428,7 +2436,7 @@ int btrfs_defrag_file(struct file *file) { existing_delalloc; spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); page_cache_release(page); @@ -2842,9 +2850,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2934,7 +2943,7 @@ static struct file_operations btrfs_dir_file_operations = { #endif }; -static struct extent_map_ops btrfs_extent_map_ops = { +static struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, .writepage_io_hook = btrfs_writepage_io_hook, .readpage_io_hook = btrfs_readpage_io_hook, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8b52c69fda2..f8a1016600b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -468,10 +468,15 @@ static int __init init_btrfs_fs(void) err = btrfs_init_cachep(); if (err) goto free_transaction_sys; - err = extent_map_init(); + + err = extent_io_init(); if (err) goto free_cachep; + err = extent_map_init(); + if (err) + goto free_extent_io; + err = register_filesystem(&btrfs_fs_type); if (err) goto free_extent_map; @@ -479,6 +484,8 @@ static int __init init_btrfs_fs(void) free_extent_map: extent_map_exit(); +free_extent_io: + extent_io_exit(); free_cachep: btrfs_destroy_cachep(); free_transaction_sys: @@ -492,6 +499,7 @@ static void __exit exit_btrfs_fs(void) btrfs_exit_transaction_sys(); btrfs_destroy_cachep(); extent_map_exit(); + extent_io_exit(); unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 163c01a2449..b4a1bc62a78 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -70,7 +70,7 @@ static int join_transaction(struct btrfs_root *root) INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); - extent_map_tree_init(&cur_trans->dirty_pages, + extent_io_tree_init(&cur_trans->dirty_pages, root->fs_info->btree_inode->i_mapping, GFP_NOFS); } else { @@ -153,7 +153,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int ret; int err; int werr = 0; - struct extent_map_tree *dirty_pages; + struct extent_io_tree *dirty_pages; struct page *page; struct inode *btree_inode = root->fs_info->btree_inode; u64 start; @@ -610,7 +610,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; - struct extent_map_tree *pinned_copy; + struct extent_io_tree *pinned_copy; DEFINE_WAIT(wait); int ret; @@ -639,7 +639,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (!pinned_copy) return -ENOMEM; - extent_map_tree_init(pinned_copy, + extent_io_tree_init(pinned_copy, root->fs_info->btree_inode->i_mapping, GFP_NOFS); trans->transaction->in_commit = 1; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index fd52e9b2392..c3172ddb332 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -29,7 +29,7 @@ struct btrfs_transaction { int use_count; int commit_done; struct list_head list; - struct extent_map_tree dirty_pages; + struct extent_io_tree dirty_pages; unsigned long start_time; struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; -- cgit v1.2.3-70-g09d2 From bea495e5b4d9746f26cf5b4100d24c77c3dcbd84 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jan 2008 16:13:14 -0500 Subject: Btrfs: Tune readahead during defrag to avoid reading too much at once Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1cf125ab782..c4fe37791d3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1856,27 +1856,44 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, } static void noinline reada_walk_down(struct btrfs_root *root, - struct extent_buffer *node) + struct extent_buffer *node, + int slot) { - int i; - u32 nritems; u64 bytenr; - int ret; + u64 last = 0; + u32 nritems; u32 refs; - int level; u32 blocksize; + int ret; + int i; + int level; + int skipped = 0; nritems = btrfs_header_nritems(node); level = btrfs_header_level(node); - for (i = 0; i < nritems; i++) { + if (level) + return; + + for (i = slot; i < nritems && skipped < 32; i++) { bytenr = btrfs_node_blockptr(node, i); - blocksize = btrfs_level_size(root, level - 1); - ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs); - BUG_ON(ret); - if (refs != 1) + if (last && ((bytenr > last && bytenr - last > 32 * 1024) || + (last > bytenr && last - bytenr > 32 * 1024))) { + skipped++; continue; + } + blocksize = btrfs_level_size(root, level - 1); + if (i != slot) { + ret = lookup_extent_ref(NULL, root, bytenr, + blocksize, &refs); + BUG_ON(ret); + if (refs != 1) { + skipped++; + continue; + } + } mutex_unlock(&root->fs_info->fs_mutex); ret = readahead_tree_block(root, bytenr, blocksize); + last = bytenr + blocksize; cond_resched(); mutex_lock(&root->fs_info->fs_mutex); if (ret) @@ -1919,9 +1936,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; - if (*level > 0 && path->slots[*level] == 0) - reada_walk_down(root, cur); - if (btrfs_header_level(cur) != *level) WARN_ON(1); @@ -1951,6 +1965,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); + reada_walk_down(root, cur, path->slots[*level]); mutex_unlock(&root->fs_info->fs_mutex); next = read_tree_block(root, bytenr, blocksize); mutex_lock(&root->fs_info->fs_mutex); -- cgit v1.2.3-70-g09d2 From 291d673e6a22d9c6834e939f66c7cfef90669021 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jan 2008 15:55:23 -0500 Subject: Btrfs: Do delalloc accounting via hooks in the extent_state code Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 ----- fs/btrfs/extent_io.c | 25 +++++++++++++++++++++ fs/btrfs/extent_io.h | 4 ++++ fs/btrfs/file.c | 4 ---- fs/btrfs/inode.c | 60 ++++++++++++++++++++------------------------------ 5 files changed, 53 insertions(+), 46 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c4fe37791d3..90718f77bbc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2180,7 +2180,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, unsigned long last_index; unsigned long i; struct page *page; - struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct file_ra_state *ra; @@ -2220,11 +2219,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); - spin_lock(&root->fs_info->delalloc_lock); - root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - - existing_delalloc; - spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8aec72253a1..e81b35b0618 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -256,6 +256,26 @@ static int merge_state(struct extent_io_tree *tree, return 0; } +static void set_state_cb(struct extent_io_tree *tree, + struct extent_state *state, + unsigned long bits) +{ + if (tree->ops && tree->ops->set_bit_hook) { + tree->ops->set_bit_hook(tree->mapping->host, state->start, + state->end, bits); + } +} + +static void clear_state_cb(struct extent_io_tree *tree, + struct extent_state *state, + unsigned long bits) +{ + if (tree->ops && tree->ops->set_bit_hook) { + tree->ops->clear_bit_hook(tree->mapping->host, state->start, + state->end, bits); + } +} + /* * insert an extent_state struct into the tree. 'bits' are set on the * struct before it is inserted. @@ -281,6 +301,7 @@ static int insert_state(struct extent_io_tree *tree, state->state |= bits; state->start = start; state->end = end; + set_state_cb(tree, state, bits); node = tree_insert(&tree->state, end, &state->rb_node); if (node) { struct extent_state *found; @@ -349,6 +370,7 @@ static int clear_state_bit(struct extent_io_tree *tree, tree->dirty_bytes -= range; } state->state &= ~bits; + clear_state_cb(tree, state, bits); if (wake) wake_up(&state->wq); if (delete || state->state == 0) { @@ -553,6 +575,7 @@ static void set_state_bits(struct extent_io_tree *tree, tree->dirty_bytes += range; } state->state |= bits; + set_state_cb(tree, state, bits); } /* @@ -975,6 +998,7 @@ search_again: goto search_again; } state->state |= EXTENT_LOCKED; + set_state_cb(tree, state, EXTENT_LOCKED); if (!found) *start = state->start; found++; @@ -1474,6 +1498,7 @@ static int end_bio_extent_readpage(struct bio *bio, state = NULL; } clear->state |= EXTENT_UPTODATE; + set_state_cb(tree, clear, EXTENT_UPTODATE); clear_state_bit(tree, clear, EXTENT_LOCKED, 1, 0); if (cur == start) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f1e8bf251f3..a96c5a14134 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -33,6 +33,10 @@ struct extent_io_ops { struct extent_state *state); void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); + int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, + unsigned long bits); + int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, + unsigned long bits); }; struct extent_io_tree { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c5bb00f9239..8e210616d70 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -323,10 +323,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, } set_extent_delalloc(io_tree, start_pos, end_of_last_block, GFP_NOFS); - spin_lock(&root->fs_info->delalloc_lock); - root->fs_info->delalloc_bytes += (end_of_last_block + 1 - - start_pos) - existing_delalloc; - spin_unlock(&root->fs_info->delalloc_lock); btrfs_add_ordered_inode(inode); } else { u64 aligned_end; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 413b1012de5..5a38443a24e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -80,8 +80,6 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, u64 thresh; int ret = 0; - return 0; - if (for_del) thresh = total * 90; else @@ -249,7 +247,6 @@ not_found: static int run_delalloc_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; - u64 num_bytes; int ret; mutex_lock(&root->fs_info->fs_mutex); if (btrfs_test_opt(root, NODATACOW) || @@ -258,20 +255,34 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) else ret = cow_file_range(inode, start, end); - spin_lock(&root->fs_info->delalloc_lock); - num_bytes = end + 1 - start; - if (root->fs_info->delalloc_bytes < num_bytes) { - printk("delalloc accounting error total %llu sub %llu\n", - root->fs_info->delalloc_bytes, num_bytes); - } else { - root->fs_info->delalloc_bytes -= num_bytes; - } - spin_unlock(&root->fs_info->delalloc_lock); - mutex_unlock(&root->fs_info->fs_mutex); return ret; } +int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, + unsigned long bits) +{ + if ((bits & EXTENT_DELALLOC)) { + struct btrfs_root *root = BTRFS_I(inode)->root; + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += end - start + 1; + spin_unlock(&root->fs_info->delalloc_lock); + } + return 0; +} + +int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, + unsigned long bits) +{ + if ((bits & EXTENT_DELALLOC)) { + struct btrfs_root *root = BTRFS_I(inode)->root; + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes -= end - start + 1; + spin_unlock(&root->fs_info->delalloc_lock); + } + return 0; +} + int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) { struct inode *inode = page->mapping->host; @@ -908,28 +919,17 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, { char *kaddr; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct btrfs_root *root = BTRFS_I(inode)->root; u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - u64 existing_delalloc; - u64 delalloc_start; int ret = 0; WARN_ON(!PageLocked(page)); set_page_extent_mapped(page); lock_extent(io_tree, page_start, page_end, GFP_NOFS); - delalloc_start = page_start; - existing_delalloc = count_range_bits(&BTRFS_I(inode)->io_tree, - &delalloc_start, page_end, - PAGE_CACHE_SIZE, EXTENT_DELALLOC); set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); - spin_lock(&root->fs_info->delalloc_lock); - root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - existing_delalloc; - spin_unlock(&root->fs_info->delalloc_lock); - if (zero_start != PAGE_CACHE_SIZE) { kaddr = kmap(page); memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); @@ -2456,8 +2456,6 @@ int btrfs_defrag_file(struct file *file) { unsigned long ra_index = 0; u64 page_start; u64 page_end; - u64 delalloc_start; - u64 existing_delalloc; unsigned long i; int ret; @@ -2491,19 +2489,9 @@ int btrfs_defrag_file(struct file *file) { page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(io_tree, page_start, page_end, GFP_NOFS); - delalloc_start = page_start; - existing_delalloc = - count_range_bits(&BTRFS_I(inode)->io_tree, - &delalloc_start, page_end, - PAGE_CACHE_SIZE, EXTENT_DELALLOC); set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); - spin_lock(&root->fs_info->delalloc_lock); - root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - - existing_delalloc; - spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); -- cgit v1.2.3-70-g09d2 From 4529ba495c6fd0d79247784d0df55ae6512fa883 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 31 Jan 2008 16:45:07 -0500 Subject: Btrfs: Add data block hints to SSD mode too Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 28 ++++++++++++++++------------ fs/btrfs/transaction.c | 1 + 4 files changed, 19 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8f93e066bf8..35e9a7af10a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -354,6 +354,7 @@ struct btrfs_fs_info { spinlock_t new_trans_lock; u64 delalloc_bytes; u64 last_alloc; + u64 last_data_alloc; }; /* * in ram representation of the tree. extent_root is used for all allocations diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ad72e219201..dda8ad6c007 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -688,6 +688,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->closing = 0; fs_info->total_pinned = 0; fs_info->last_alloc = 0; + fs_info->last_data_alloc = 0; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 90718f77bbc..fa54ea59007 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1410,6 +1410,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, int ret; int slot = 0; u64 last_byte = 0; + u64 *last_ptr = NULL; u64 orig_search_start = search_start; int start_found; struct extent_buffer *l; @@ -1433,14 +1434,17 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, /* for SSD, cluster allocations together as much as possible */ if (btrfs_test_opt(root, SSD)) { - if (!data) { - if (root->fs_info->last_alloc) - hint_byte = root->fs_info->last_alloc; - else { - hint_byte = hint_byte & - ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); - empty_size += 16 * 1024 * 1024; - } + if (data) + last_ptr = &root->fs_info->last_data_alloc; + else + last_ptr = &root->fs_info->last_alloc; + if (*last_ptr) { + hint_byte = *last_ptr; + } + else { + hint_byte = hint_byte & + ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); + empty_size += 16 * 1024 * 1024; } } @@ -1470,8 +1474,8 @@ check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); - if (!data && btrfs_test_opt(root, SSD) && info->last_alloc && - search_start != info->last_alloc) { + if (btrfs_test_opt(root, SSD) && *last_ptr && + search_start != *last_ptr) { info->last_alloc = 0; if (!empty_size) { empty_size += 16 * 1024 * 1024; @@ -1609,6 +1613,8 @@ check_pending: } ins->offset = num_bytes; btrfs_free_path(path); + if (btrfs_test_opt(root, SSD)) + *last_ptr = ins->objectid + ins->offset; return 0; new_group: @@ -1636,8 +1642,6 @@ enospc: error: btrfs_release_path(root, path); btrfs_free_path(path); - if (btrfs_test_opt(root, SSD) && !ret && !data) - info->last_alloc = ins->objectid + ins->offset; return ret; } /* diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b4a1bc62a78..3f64d0c7ddb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -58,6 +58,7 @@ static int join_transaction(struct btrfs_root *root) root->fs_info->generation++; root->fs_info->running_transaction = cur_trans; root->fs_info->last_alloc = 0; + root->fs_info->last_data_alloc = 0; cur_trans->num_writers = 1; cur_trans->num_joined = 0; cur_trans->transid = root->fs_info->generation; -- cgit v1.2.3-70-g09d2 From 47e4bb988c853d9af79d76fc5135aee9eeffed77 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Feb 2008 14:51:59 -0500 Subject: Btrfs: Insert extent record and the first backref in a single balance Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fa54ea59007..0fc8dfd58da 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1664,12 +1664,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 root_used; u64 search_start = 0; u64 new_hint; + u32 sizes[2]; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; - struct btrfs_extent_item extent_item; + struct btrfs_extent_item *extent_item; + struct btrfs_extent_ref *ref; struct btrfs_path *path; - - btrfs_set_stack_extent_refs(&extent_item, 1); + struct btrfs_key keys[2]; new_hint = max(hint_byte, root->fs_info->alloc_start); if (new_hint < btrfs_super_total_bytes(&info->super_copy)) @@ -1707,20 +1708,37 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, WARN_ON(trans->alloc_exclude_nr); trans->alloc_exclude_start = ins->objectid; trans->alloc_exclude_nr = ins->offset; - ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, - sizeof(extent_item)); - trans->alloc_exclude_start = 0; - trans->alloc_exclude_nr = 0; - BUG_ON(ret); + memcpy(&keys[0], ins, sizeof(*ins)); + keys[1].offset = hash_extent_ref(root_objectid, ref_generation, + owner, owner_offset); + keys[1].objectid = ins->objectid; + keys[1].type = BTRFS_EXTENT_REF_KEY; + sizes[0] = sizeof(*extent_item); + sizes[1] = sizeof(*ref); path = btrfs_alloc_path(); BUG_ON(!path); - ret = btrfs_insert_extent_backref(trans, extent_root, path, - ins->objectid, root_objectid, - ref_generation, owner, owner_offset); + + ret = btrfs_insert_empty_items(trans, extent_root, path, keys, + sizes, 2); BUG_ON(ret); + extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_item); + btrfs_set_extent_refs(path->nodes[0], extent_item, 1); + ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, + struct btrfs_extent_ref); + + btrfs_set_ref_root(path->nodes[0], ref, root_objectid); + btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); + btrfs_set_ref_objectid(path->nodes[0], ref, owner); + btrfs_set_ref_offset(path->nodes[0], ref, owner_offset); + + btrfs_mark_buffer_dirty(path->nodes[0]); + + trans->alloc_exclude_start = 0; + trans->alloc_exclude_nr = 0; btrfs_free_path(path); finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); -- cgit v1.2.3-70-g09d2 From 21a4989d26d5ce43aac452fd67be592463a5996d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Feb 2008 14:51:59 -0500 Subject: Btrfs: Hash in the offset and owner for file extent backref keys This makes searches for backrefs and backref insertion much more efficient when there are many backrefs for a single extent Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0fc8dfd58da..8761aec59e3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -397,13 +397,12 @@ static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(ref_generation); low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); - -#if 0 - lenum = cpu_to_le64(owner); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); - lenum = cpu_to_le64(owner_offset); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); -#endif + if (owner >= BTRFS_FIRST_FREE_OBJECTID) { + lenum = cpu_to_le64(owner); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + lenum = cpu_to_le64(owner_offset); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + } return ((u64)high_crc << 32) | (u64)low_crc; } -- cgit v1.2.3-70-g09d2 From 60cde612c8eb51ba79dabb4088f149c6a9802fdf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Feb 2008 14:51:59 -0500 Subject: Btrfs: Use last_alloc optimizations for metadata, even without -o ssd Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8761aec59e3..6f161abe286 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1431,19 +1431,19 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_MIXED; } - /* for SSD, cluster allocations together as much as possible */ - if (btrfs_test_opt(root, SSD)) { - if (data) - last_ptr = &root->fs_info->last_data_alloc; - else - last_ptr = &root->fs_info->last_alloc; - if (*last_ptr) { + if (!data) + last_ptr = &root->fs_info->last_alloc; + + if (data && btrfs_test_opt(root, SSD)) + last_ptr = &root->fs_info->last_data_alloc; + + if (last_ptr) { + if (*last_ptr) hint_byte = *last_ptr; - } else { hint_byte = hint_byte & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); - empty_size += 16 * 1024 * 1024; + empty_size += 2 * 1024 * 1024; } } @@ -1473,9 +1473,8 @@ check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); - if (btrfs_test_opt(root, SSD) && *last_ptr && - search_start != *last_ptr) { - info->last_alloc = 0; + if (last_ptr && *last_ptr && search_start != *last_ptr) { + *last_ptr = 0; if (!empty_size) { empty_size += 16 * 1024 * 1024; total_needed += empty_size; @@ -1612,7 +1611,7 @@ check_pending: } ins->offset = num_bytes; btrfs_free_path(path); - if (btrfs_test_opt(root, SSD)) + if (last_ptr) *last_ptr = ins->objectid + ins->offset; return 0; @@ -1827,8 +1826,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, buf->start, buf->start + buf->len - 1, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; - if (!btrfs_test_opt(root, SSD)) - btrfs_set_buffer_defrag(buf); + btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; } -- cgit v1.2.3-70-g09d2 From f594706643607d7d24901479e985ec62e0ce129a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Feb 2008 10:10:13 -0500 Subject: Btrfs: Add debugging for block group update failure Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6f161abe286..a340eeedd6c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1751,7 +1751,11 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, update_block: ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, data); - BUG_ON(ret); + if (ret) { + printk("update block group failed for %Lu %Lu\n", + ins->objectid, ins->offset); + BUG(); + } return 0; } -- cgit v1.2.3-70-g09d2 From 068fe39fa18e8921062e2e16e6aa2b46768b2a35 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 5 Feb 2008 10:20:17 -0500 Subject: Btrfs: Add checks for last byte in disk to allocator grouping Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a340eeedd6c..9e0869959cd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1611,8 +1611,13 @@ check_pending: } ins->offset = num_bytes; btrfs_free_path(path); - if (last_ptr) + if (last_ptr) { *last_ptr = ins->objectid + ins->offset; + if (*last_ptr == + btrfs_super_total_bytes(&root->fs_info->super_copy)) { + *last_ptr = 0; + } + } return 0; new_group: -- cgit v1.2.3-70-g09d2 From 5d196fc15dbe1cc54b81801694aafe218121e054 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 6 Feb 2008 10:00:57 -0500 Subject: Btrfs: Use 2MB as the empty_size for clustered allocations Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9e0869959cd..220e9be9ad8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1476,7 +1476,7 @@ check_failed: if (last_ptr && *last_ptr && search_start != *last_ptr) { *last_ptr = 0; if (!empty_size) { - empty_size += 16 * 1024 * 1024; + empty_size += 2 * 1024 * 1024; total_needed += empty_size; } search_start = find_search_start(root, &block_group, -- cgit v1.2.3-70-g09d2 From 9afbb0b752ef30a429c45b9de6706e28ad1a36e1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Feb 2008 13:19:35 -0500 Subject: Btrfs: Disable tree defrag in SSD mode Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 ++- fs/btrfs/tree-defrag.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 220e9be9ad8..af5d4a08528 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1835,7 +1835,8 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, buf->start, buf->start + buf->len - 1, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; - btrfs_set_buffer_defrag(buf); + if (!btrfs_test_opt(root, SSD)) + btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; } diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 5c58630dce0..5935cbd8f2b 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -179,6 +179,9 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (root->ref_cows == 0 && !is_extent) goto out; + if (btrfs_test_opt(root, SSD)) + goto out; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From d7fc640e6fed46932f7c74e14f9b58b8637c66cf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 18 Feb 2008 12:12:38 -0500 Subject: Btrfs: Allocator improvements Reduce CPU time searching for free blocks by optimizing find_first_extent_bit Fix find_free_extent to make better use of the last_alloc hint. Before it was often finding blocks just before the hint. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 34 ++++++++++++++++++++++++---------- fs/btrfs/extent_io.c | 38 ++++++++++++++++++++++++++++++++++---- fs/btrfs/extent_io.h | 11 +++++++++++ 4 files changed, 70 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6f0825b617d..88e21bdbc47 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -951,7 +951,7 @@ void btrfs_throttle(struct btrfs_root *root) void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { balance_dirty_pages_ratelimited_nr( - root->fs_info->btree_inode->i_mapping, 1); + root->fs_info->btree_inode->i_mapping, 1); } void btrfs_set_buffer_defrag(struct extent_buffer *buf) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index af5d4a08528..239e9d8669c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -188,9 +188,10 @@ static u64 noinline find_search_start(struct btrfs_root *root, { int ret; struct btrfs_block_group_cache *cache = *cache_ret; + struct extent_io_tree *free_space_cache; + struct extent_state *state; u64 last; u64 start = 0; - u64 end = 0; u64 cache_miss = 0; u64 total_fs_bytes; int wrapped = 0; @@ -199,6 +200,8 @@ static u64 noinline find_search_start(struct btrfs_root *root, goto out; } total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); + free_space_cache = &root->fs_info->free_space_cache; + again: ret = cache_block_group(root, cache); if (ret) @@ -206,22 +209,27 @@ again: last = max(search_start, cache->key.objectid); + spin_lock_irq(&free_space_cache->lock); + state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); while(1) { - ret = find_first_extent_bit(&root->fs_info->free_space_cache, - last, &start, &end, EXTENT_DIRTY); - if (ret) { + if (!state) { if (!cache_miss) cache_miss = last; + spin_unlock_irq(&free_space_cache->lock); goto new_group; } - start = max(last, start); - last = end + 1; + start = max(last, state->start); + last = state->end + 1; if (last - start < num) { if (last == cache->key.objectid + cache->key.offset) cache_miss = start; + do { + state = extent_state_next(state); + } while(state && !(state->state & EXTENT_DIRTY)); continue; } + spin_unlock_irq(&free_space_cache->lock); if (data != BTRFS_BLOCK_GROUP_MIXED && start + num > cache->key.objectid + cache->key.offset) goto new_group; @@ -1420,6 +1428,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; + int empty_cluster; u64 cached_start; WARN_ON(num_bytes < root->sectorsize); @@ -1431,11 +1440,15 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_MIXED; } - if (!data) + if (!data) { last_ptr = &root->fs_info->last_alloc; + empty_cluster = 128 * 1024; + } - if (data && btrfs_test_opt(root, SSD)) + if (data && btrfs_test_opt(root, SSD)) { last_ptr = &root->fs_info->last_data_alloc; + empty_cluster = 2 * 1024 * 1024; + } if (last_ptr) { if (*last_ptr) @@ -1443,8 +1456,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, else { hint_byte = hint_byte & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); - empty_size += 2 * 1024 * 1024; + empty_size += empty_cluster; } + search_start = max(search_start, hint_byte); } search_end = min(search_end, @@ -1476,7 +1490,7 @@ check_failed: if (last_ptr && *last_ptr && search_start != *last_ptr) { *last_ptr = 0; if (!empty_size) { - empty_size += 2 * 1024 * 1024; + empty_size += empty_cluster; total_needed += empty_size; } search_start = find_search_start(root, &block_group, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9262ab37a7c..fb6400895ed 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -255,7 +255,7 @@ static int merge_state(struct extent_io_tree *tree, state->start = other->start; other->tree = NULL; if (tree->last == other) - tree->last = NULL; + tree->last = state; rb_erase(&other->rb_node, &tree->state); free_extent_state(other); } @@ -268,7 +268,7 @@ static int merge_state(struct extent_io_tree *tree, other->start = state->start; state->tree = NULL; if (tree->last == state) - tree->last = NULL; + tree->last = other; rb_erase(&state->rb_node, &tree->state); free_extent_state(state); } @@ -397,8 +397,9 @@ static int clear_state_bit(struct extent_io_tree *tree, if (delete || state->state == 0) { if (state->tree) { clear_state_cb(tree, state, state->state); - if (tree->last == state) - tree->last = NULL; + if (tree->last == state) { + tree->last = extent_state_next(state); + } rb_erase(&state->rb_node, &tree->state); state->tree = NULL; free_extent_state(state); @@ -962,6 +963,35 @@ out: } EXPORT_SYMBOL(find_first_extent_bit); +struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, + u64 start, int bits) +{ + struct rb_node *node; + struct extent_state *state; + + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(tree, start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->end >= start && (state->state & bits)) { + return state; + } + node = rb_next(node); + if (!node) + break; + } +out: + return NULL; +} +EXPORT_SYMBOL(find_first_extent_bit_state); + u64 find_lock_delalloc_range(struct extent_io_tree *tree, u64 *start, u64 *end, u64 max_bytes) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 151fdada4dc..fcc4bb078c2 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -81,6 +81,15 @@ struct extent_buffer { struct extent_map_tree; +static inline struct extent_state *extent_state_next(struct extent_state *state) +{ + struct rb_node *node; + node = rb_next(&state->rb_node); + if (!node) + return NULL; + return rb_entry(node, struct extent_state, rb_node); +} + typedef struct extent_map *(get_extent_t)(struct inode *inode, struct page *page, size_t page_offset, @@ -122,6 +131,8 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, int bits); +struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, + u64 start, int bits); int extent_invalidatepage(struct extent_io_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_io_tree *tree, struct page *page, -- cgit v1.2.3-70-g09d2 From 952fccac50350481742425cac0c80f36ba8b83f2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 18 Feb 2008 16:33:44 -0500 Subject: Btrfs: Remove extent back refs in batches, and avoid duplicate searches Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 63 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 239e9d8669c..8f441783a6e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1251,6 +1251,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_root *extent_root = info->extent_root; struct extent_buffer *leaf; int ret; + int extent_slot = 0; + int found_extent = 0; + int num_to_del = 1; struct btrfs_extent_item *ei; u32 refs; @@ -1267,7 +1270,24 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ref_generation, owner_objectid, owner_offset, 1); if (ret == 0) { - ret = btrfs_del_item(trans, extent_root, path); + struct btrfs_key found_key; + extent_slot = path->slots[0]; + while(extent_slot > 0) { + extent_slot--; + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + extent_slot); + if (found_key.objectid != bytenr) + break; + if (found_key.type == BTRFS_EXTENT_ITEM_KEY && + found_key.offset == num_bytes) { + found_extent = 1; + break; + } + if (path->slots[0] - extent_slot > 5) + break; + } + if (!found_extent) + ret = btrfs_del_item(trans, extent_root, path); } else { btrfs_print_leaf(extent_root, path->nodes[0]); WARN_ON(1); @@ -1276,21 +1296,46 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root root_objectid, ref_generation, owner_objectid, owner_offset); } - btrfs_release_path(extent_root, path); - ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); - if (ret < 0) - return ret; - BUG_ON(ret); + if (!found_extent) { + btrfs_release_path(extent_root, path); + ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); + if (ret < 0) + return ret; + BUG_ON(ret); + extent_slot = path->slots[0]; + } leaf = path->nodes[0]; - ei = btrfs_item_ptr(leaf, path->slots[0], + ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, ei); BUG_ON(refs == 0); refs -= 1; btrfs_set_extent_refs(leaf, ei, refs); + btrfs_mark_buffer_dirty(leaf); + if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { + /* if the back ref and the extent are next to each other + * they get deleted below in one shot + */ + path->slots[0] = extent_slot; + num_to_del = 2; + } else if (found_extent) { + /* otherwise delete the extent back ref */ + ret = btrfs_del_item(trans, extent_root, path); + BUG_ON(ret); + /* if refs are 0, we need to setup the path for deletion */ + if (refs == 0) { + btrfs_release_path(extent_root, path); + ret = btrfs_search_slot(trans, extent_root, &key, path, + -1, 1); + if (ret < 0) + return ret; + BUG_ON(ret); + } + } + if (refs == 0) { u64 super_used; u64 root_used; @@ -1311,8 +1356,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root root_used = btrfs_root_used(&root->root_item); btrfs_set_root_used(&root->root_item, root_used - num_bytes); - - ret = btrfs_del_item(trans, extent_root, path); + ret = btrfs_del_items(trans, extent_root, path, path->slots[0], + num_to_del); if (ret) { return ret; } -- cgit v1.2.3-70-g09d2 From 7f93bf8d27653726e3721c01fefc523487ecf2af Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:01:28 -0400 Subject: Match the extent tree code to btrfs-progs for multi-device merging Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 59 ++++++-------------------------------------------- 1 file changed, 7 insertions(+), 52 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8f441783a6e..ebfd304138c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -209,27 +209,22 @@ again: last = max(search_start, cache->key.objectid); - spin_lock_irq(&free_space_cache->lock); - state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); while(1) { - if (!state) { + ret = find_first_extent_bit(&root->fs_info->free_space_cache, + last, &start, &end, EXTENT_DIRTY); + if (ret) { if (!cache_miss) cache_miss = last; - spin_unlock_irq(&free_space_cache->lock); goto new_group; } - start = max(last, state->start); - last = state->end + 1; + start = max(last, start); + last = end + 1; if (last - start < num) { if (last == cache->key.objectid + cache->key.offset) cache_miss = start; - do { - state = extent_state_next(state); - } while(state && !(state->state & EXTENT_DIRTY)); continue; } - spin_unlock_irq(&free_space_cache->lock); if (data != BTRFS_BLOCK_GROUP_MIXED && start + num > cache->key.objectid + cache->key.offset) goto new_group; @@ -1485,29 +1480,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_MIXED; } - if (!data) { - last_ptr = &root->fs_info->last_alloc; - empty_cluster = 128 * 1024; - } - - if (data && btrfs_test_opt(root, SSD)) { - last_ptr = &root->fs_info->last_data_alloc; - empty_cluster = 2 * 1024 * 1024; - } - - if (last_ptr) { - if (*last_ptr) - hint_byte = *last_ptr; - else { - hint_byte = hint_byte & - ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); - empty_size += empty_cluster; - } - search_start = max(search_start, hint_byte); - } - - search_end = min(search_end, - btrfs_super_total_bytes(&info->super_copy)); + if (search_end == (u64)-1) + search_end = btrfs_super_total_bytes(&info->super_copy); if (hint_byte) { block_group = btrfs_lookup_block_group(info, hint_byte); if (!block_group) @@ -1531,18 +1505,6 @@ check_failed: } search_start = find_search_start(root, &block_group, search_start, total_needed, data); - - if (last_ptr && *last_ptr && search_start != *last_ptr) { - *last_ptr = 0; - if (!empty_size) { - empty_size += empty_cluster; - total_needed += empty_size; - } - search_start = find_search_start(root, &block_group, - search_start, total_needed, - data); - } - search_start = stripe_align(root, search_start); cached_start = search_start; btrfs_init_path(path); @@ -1670,13 +1632,6 @@ check_pending: } ins->offset = num_bytes; btrfs_free_path(path); - if (last_ptr) { - *last_ptr = ins->objectid + ins->offset; - if (*last_ptr == - btrfs_super_total_bytes(&root->fs_info->super_copy)) { - *last_ptr = 0; - } - } return 0; new_group: -- cgit v1.2.3-70-g09d2 From 0b86a832a1f38abec695864ec2eaedc9d2383f1b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:01:56 -0400 Subject: Btrfs: Add support for multiple devices per filesystem Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 38 ++- fs/btrfs/ctree.h | 313 ++++++++++++++++-- fs/btrfs/disk-io.c | 97 +++++- fs/btrfs/disk-io.h | 2 + fs/btrfs/extent-tree.c | 429 +++++++------------------ fs/btrfs/extent_io.c | 6 +- fs/btrfs/extent_io.h | 2 +- fs/btrfs/inode.c | 26 +- fs/btrfs/print-tree.c | 48 +++ fs/btrfs/transaction.c | 50 ++- fs/btrfs/volumes.c | 852 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.h | 78 +++++ 13 files changed, 1563 insertions(+), 380 deletions(-) create mode 100644 fs/btrfs/volumes.c create mode 100644 fs/btrfs/volumes.h (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9c61609f371..5ac6f2946c2 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,7 +6,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ - extent_io.o + extent_io.o volumes.o ifeq ($(CONFIG_FS_POSIX_ACL),y) btrfs-y += acl.o diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ef8809c1e69..70b6ddfe15a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -70,6 +70,14 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } +static void add_root_to_dirty_list(struct btrfs_root *root) +{ + if (root->track_dirty && list_empty(&root->dirty_list)) { + list_add(&root->dirty_list, + &root->fs_info->dirty_cowonly_roots); + } +} + int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -196,6 +204,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, root_gen, 0, 0, 1); } free_extent_buffer(buf); + add_root_to_dirty_list(root); } else { root_gen = btrfs_header_generation(parent); btrfs_set_node_blockptr(parent, parent_slot, @@ -241,7 +250,7 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } - search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); + search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); return ret; @@ -724,6 +733,7 @@ static int balance_level(struct btrfs_trans_handle *trans, BUG_ON(ret); root->node = child; + add_root_to_dirty_list(root); path->nodes[level] = NULL; clean_tree_block(trans, root, mid); wait_on_tree_block_writeback(root, mid); @@ -1369,6 +1379,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, /* the super has an extra ref to root->node */ free_extent_buffer(root->node); root->node = c; + add_root_to_dirty_list(root); extent_buffer_get(c); path->nodes[level] = c; path->slots[level] = 0; @@ -2777,3 +2788,28 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) } return 0; } + +int btrfs_previous_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid, + int type) +{ + struct btrfs_key found_key; + struct extent_buffer *leaf; + int ret; + + while(1) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret != 0) + return ret; + } else { + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.type == type) + return 0; + } + return 1; +} + diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 92d892f9207..1453d995fef 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -40,12 +40,44 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_B4RfS_M" #define BTRFS_MAX_LEVEL 8 + +/* holds pointers to all of the tree roots */ #define BTRFS_ROOT_TREE_OBJECTID 1ULL + +/* stores information about which extents are in use, and reference counts */ #define BTRFS_EXTENT_TREE_OBJECTID 2ULL + +/* one per subvolume, storing files and directories */ #define BTRFS_FS_TREE_OBJECTID 3ULL + +/* directory objectid inside the root tree */ #define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL + + +/* + * chunk tree stores translations from logical -> physical block numbering + * the super block points to the chunk tree + */ +#define BTRFS_CHUNK_TREE_OBJECTID 5ULL + +/* + * stores information about which areas of a given device are in use. + * one per device. The tree of tree roots points to the device tree + */ +#define BTRFS_DEV_TREE_OBJECTID 6ULL + +/* + * All files have objectids higher than this. + */ #define BTRFS_FIRST_FREE_OBJECTID 256ULL + +/* + * the device items go into the chunk tree. The key is in the form + * [ 1 BTRFS_DEV_ITEM_KEY device_id ] + */ +#define BTRFS_DEV_ITEMS_OBJECTID 1ULL + /* * we can actually store much bigger names, but lets not confuse the rest * of linux @@ -95,6 +127,81 @@ struct btrfs_key { u64 offset; } __attribute__ ((__packed__)); +struct btrfs_mapping_tree { + struct extent_map_tree map_tree; +}; + +#define BTRFS_DEV_UUID_SIZE 16 +struct btrfs_dev_item { + /* the internal btrfs device id */ + __le64 devid; + + /* size of the device */ + __le64 total_bytes; + + /* bytes used */ + __le64 bytes_used; + + /* optimal io alignment for this device */ + __le32 io_align; + + /* optimal io width for this device */ + __le32 io_width; + + /* minimal io size for this device */ + __le32 sector_size; + + /* the kernel device number */ + __le64 rdev; + + /* type and info about this device */ + __le64 type; + + /* partition number, 0 for whole dev */ + __le32 partition; + + /* length of the name data at the end of the item */ + __le16 name_len; + + /* physical drive uuid (or lvm uuid) */ + u8 uuid[BTRFS_DEV_UUID_SIZE]; + /* name goes here */ +} __attribute__ ((__packed__)); + +struct btrfs_stripe { + __le64 devid; + __le64 offset; +} __attribute__ ((__packed__)); + +struct btrfs_chunk { + __le64 owner; + __le64 stripe_len; + __le64 type; + + /* optimal io alignment for this chunk */ + __le32 io_align; + + /* optimal io width for this chunk */ + __le32 io_width; + + /* minimal io size for this chunk */ + __le32 sector_size; + + /* 2^16 stripes is quite a lot, a second limit is the size of a single + * item in the btree + */ + __le16 num_stripes; + struct btrfs_stripe stripe; + /* additional stripes go here */ +} __attribute__ ((__packed__)); + +static inline unsigned long btrfs_chunk_item_size(int num_stripes) +{ + BUG_ON(num_stripes == 0); + return sizeof(struct btrfs_chunk) + + sizeof(struct btrfs_stripe) * (num_stripes - 1); +} + #define BTRFS_FSID_SIZE 16 /* * every tree block (leaf or node) starts with this header. @@ -119,6 +226,13 @@ struct btrfs_header { sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) + +/* + * this is a very generous portion of the super block, giving us + * room to translate 14 chunks with 3 stripes each. + */ +#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 + /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -131,6 +245,7 @@ struct btrfs_super_block { __le64 magic; __le64 generation; __le64 root; + __le64 chunk_root; __le64 total_bytes; __le64 bytes_used; __le64 root_dir_objectid; @@ -138,7 +253,10 @@ struct btrfs_super_block { __le32 nodesize; __le32 leafsize; __le32 stripesize; + __le32 sys_chunk_array_size; u8 root_level; + u8 chunk_root_level; + u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; } __attribute__ ((__packed__)); /* @@ -208,12 +326,22 @@ struct btrfs_extent_ref { __le64 offset; } __attribute__ ((__packed__)); +/* dev extents record free space on individual devices. The owner + * field points back to the chunk allocation mapping tree that allocated + * the extent + */ +struct btrfs_dev_extent { + __le64 owner; + __le64 length; +} __attribute__ ((__packed__)); + + struct btrfs_inode_ref { __le16 name_len; /* name goes here */ } __attribute__ ((__packed__)); -struct btrfs_inode_timespec { +struct btrfs_timespec { __le64 sec; __le32 nsec; } __attribute__ ((__packed__)); @@ -231,13 +359,13 @@ struct btrfs_inode_item { __le32 uid; __le32 gid; __le32 mode; - __le32 rdev; + __le64 rdev; __le16 flags; __le16 compat_flags; - struct btrfs_inode_timespec atime; - struct btrfs_inode_timespec ctime; - struct btrfs_inode_timespec mtime; - struct btrfs_inode_timespec otime; + struct btrfs_timespec atime; + struct btrfs_timespec ctime; + struct btrfs_timespec mtime; + struct btrfs_timespec otime; } __attribute__ ((__packed__)); struct btrfs_dir_item { @@ -290,29 +418,34 @@ struct btrfs_csum_item { u8 csum; } __attribute__ ((__packed__)); -/* tag for the radix tree of block groups in ram */ -#define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) - +/* different types of block groups (and chunks) */ +#define BTRFS_BLOCK_GROUP_DATA (1 << 0) +#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) +#define BTRFS_BLOCK_GROUP_METADATA (1 << 2) -#define BTRFS_BLOCK_GROUP_DATA 1 -#define BTRFS_BLOCK_GROUP_MIXED 2 struct btrfs_block_group_item { __le64 used; - u8 flags; + __le64 chunk_tree; + __le64 chunk_objectid; + __le64 flags; } __attribute__ ((__packed__)); struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; - int data; - int cached; u64 pinned; + u64 flags; + int cached; }; + +struct btrfs_device; struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *extent_root; struct btrfs_root *tree_root; + struct btrfs_root *chunk_root; + struct btrfs_root *dev_root; struct radix_tree_root fs_roots_radix; struct extent_io_tree free_space_cache; @@ -321,6 +454,9 @@ struct btrfs_fs_info { struct extent_io_tree pending_del; struct extent_io_tree extent_ins; + /* logical->physical extent mapping */ + struct btrfs_mapping_tree mapping_tree; + u64 generation; u64 last_trans_committed; unsigned long mount_opt; @@ -330,6 +466,7 @@ struct btrfs_fs_info { struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; struct extent_buffer *sb_buffer; + struct block_device *__bdev; struct super_block *sb; struct inode *btree_inode; spinlock_t hash_lock; @@ -350,12 +487,17 @@ struct btrfs_fs_info { unsigned long throttles; u64 total_pinned; + struct list_head dirty_cowonly_roots; + + struct list_head devices; + struct list_head *last_device; spinlock_t delalloc_lock; spinlock_t new_trans_lock; u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; }; + /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. @@ -387,14 +529,19 @@ struct btrfs_root { u64 highest_inode; u64 last_inode_alloc; int ref_cows; + int track_dirty; struct btrfs_key defrag_progress; int defrag_running; int defrag_level; char *name; int in_sysfs; + + /* the dirty list is only used by non-reference counted roots */ + struct list_head dirty_list; }; /* + * inode items have the data typically returned from stat and store other * info about object characteristics. There is one for every file and dir in * the FS @@ -439,6 +586,10 @@ struct btrfs_root { */ #define BTRFS_BLOCK_GROUP_ITEM_KEY 50 +#define BTRFS_DEV_EXTENT_KEY 75 +#define BTRFS_DEV_ITEM_KEY 76 +#define BTRFS_CHUNK_ITEM_KEY 77 + /* * string items are for debugging. They just store a short string of * data in the FS @@ -518,13 +669,104 @@ static inline void btrfs_set_##name(type *s, u##bits val) \ s->member = cpu_to_le##bits(val); \ } +BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64); +BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64); +BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64); +BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); +BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); +BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); +BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_FUNCS(device_rdev, struct btrfs_dev_item, rdev, 64); +BTRFS_SETGET_FUNCS(device_partition, struct btrfs_dev_item, partition, 32); +BTRFS_SETGET_FUNCS(device_name_len, struct btrfs_dev_item, name_len, 16); + +static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) +{ + return (char *)d + offsetof(struct btrfs_dev_item, uuid); +} + +static inline char *btrfs_device_name(struct btrfs_dev_item *d) +{ + return (char *)(d + 1); +} + +BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); +BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32); +BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32); +BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32); +BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); +BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk, + stripe_len, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk, + io_align, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk, + io_width, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk, + sector_size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk, + num_stripes, 16); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64); + +static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c, + int nr) +{ + unsigned long offset = (unsigned long)c; + offset += offsetof(struct btrfs_chunk, stripe); + offset += nr * sizeof(struct btrfs_stripe); + return (struct btrfs_stripe *)offset; +} + +static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); +} + +static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr, + u64 val) +{ + btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val); +} + +static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); +} + +static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr, + u64 val) +{ + btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val); +} + /* struct btrfs_block_group_item */ BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, used, 64); BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, used, 64); -BTRFS_SETGET_FUNCS(disk_block_group_flags, struct btrfs_block_group_item, - flags, 8); +BTRFS_SETGET_STACK_FUNCS(block_group_chunk_tree, struct btrfs_block_group_item, + chunk_tree, 64); +BTRFS_SETGET_FUNCS(disk_block_group_chunk_tree, struct btrfs_block_group_item, + chunk_tree, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid, + struct btrfs_block_group_item, chunk_objectid, 64); +BTRFS_SETGET_FUNCS(disk_block_group_chunk_objecitd, + struct btrfs_block_group_item, chunk_objectid, 64); +BTRFS_SETGET_FUNCS(disk_block_group_flags, + struct btrfs_block_group_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_flags, + struct btrfs_block_group_item, flags, 64); /* struct btrfs_inode_ref */ BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); @@ -538,49 +780,53 @@ BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); -BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 32); +BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16); BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item, compat_flags, 16); -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_atime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, atime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_mtime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, mtime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_ctime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, ctime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_otime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, otime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_inode_timespec, sec, 64); -BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32); +BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); +BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); /* struct btrfs_extent_item */ BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); +/* struct btrfs_dev_extent */ +BTRFS_SETGET_FUNCS(dev_extent_owner, struct btrfs_dev_extent, owner, 64); +BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); + /* struct btrfs_extent_ref */ BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); @@ -846,8 +1092,14 @@ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); +BTRFS_SETGET_STACK_FUNCS(super_sys_array_size, + struct btrfs_super_block, sys_chunk_array_size, 32); BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, + chunk_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, + chunk_root_level, 64); BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, total_bytes, 64); BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, @@ -1009,7 +1261,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytes_used, + u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 size); /* ctree.c */ +int btrfs_previous_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid, + int type); int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 88e21bdbc47..8e37fa120cc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -28,6 +28,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "volumes.h" #include "print-tree.h" #if 0 @@ -234,6 +235,19 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) return 0; } +static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 offset; + offset = bio->bi_sector << 9; + if (offset == BTRFS_SUPER_INFO_OFFSET) { + bio->bi_bdev = root->fs_info->sb->s_bdev; + submit_bio(rw, bio); + return 0; + } + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_io_tree *tree; @@ -345,6 +359,23 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) return ret; } +static int close_all_devices(struct btrfs_fs_info *fs_info) +{ + struct list_head *list; + struct list_head *next; + struct btrfs_device *device; + + list = &fs_info->devices; + while(!list_empty(list)) { + next = list->next; + list_del(next); + device = list_entry(next, struct btrfs_device, dev_list); + kfree(device->name); + kfree(device); + } + return 0; +} + struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -420,6 +451,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->leafsize = leafsize; root->stripesize = stripesize; root->ref_cows = 0; + root->track_dirty = 0; + root->fs_info = fs_info; root->objectid = objectid; root->last_trans = 0; @@ -427,6 +460,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->last_inode_alloc = 0; root->name = NULL; root->in_sysfs = 0; + + INIT_LIST_HEAD(&root->dirty_list); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -634,6 +669,10 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); + struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); int ret; int err = -EIO; struct btrfs_super_block *disk_super; @@ -657,6 +696,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->last_trans_committed = 0; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; + fs_info->chunk_root = chunk_root; + fs_info->dev_root = dev_root; + INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); + INIT_LIST_HEAD(&fs_info->devices); + btrfs_mapping_init(&fs_info->mapping_tree); + fs_info->last_device = &fs_info->devices; fs_info->sb = sb; fs_info->throttles = 0; fs_info->mount_opt = 0; @@ -714,12 +759,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) goto fail_iput; } #endif - __setup_root(512, 512, 512, 512, tree_root, + __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); fs_info->sb_buffer = read_tree_block(tree_root, BTRFS_SUPER_INFO_OFFSET, - 512); + 4096); if (!fs_info->sb_buffer) goto fail_iput; @@ -730,6 +775,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), BTRFS_FSID_SIZE); + disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; @@ -753,23 +799,47 @@ struct btrfs_root *open_ctree(struct super_block *sb) goto fail_sb_buffer; } + mutex_lock(&fs_info->fs_mutex); + ret = btrfs_read_sys_array(tree_root); + BUG_ON(ret); + + blocksize = btrfs_level_size(tree_root, + btrfs_super_chunk_root_level(disk_super)); + + __setup_root(nodesize, leafsize, sectorsize, stripesize, + chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); + + chunk_root->node = read_tree_block(chunk_root, + btrfs_super_chunk_root(disk_super), + blocksize); + BUG_ON(!chunk_root->node); + + ret = btrfs_read_chunk_tree(chunk_root); + BUG_ON(ret); + blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), blocksize); if (!tree_root->node) goto fail_sb_buffer; - mutex_lock(&fs_info->fs_mutex); ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); - if (ret) { - mutex_unlock(&fs_info->fs_mutex); + if (ret) goto fail_tree_root; - } + extent_root->track_dirty = 1; + + ret = find_and_setup_root(tree_root, fs_info, + BTRFS_DEV_TREE_OBJECTID, dev_root); + dev_root->track_dirty = 1; + + if (ret) + goto fail_extent_root; btrfs_read_block_groups(extent_root); @@ -777,7 +847,10 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_unlock(&fs_info->fs_mutex); return tree_root; +fail_extent_root: + free_extent_buffer(extent_root->node); fail_tree_root: + mutex_unlock(&fs_info->fs_mutex); free_extent_buffer(tree_root->node); fail_sb_buffer: free_extent_buffer(fs_info->sb_buffer); @@ -874,6 +947,12 @@ int close_ctree(struct btrfs_root *root) if (fs_info->tree_root->node) free_extent_buffer(fs_info->tree_root->node); + if (root->fs_info->chunk_root->node); + free_extent_buffer(root->fs_info->chunk_root->node); + + if (root->fs_info->dev_root->node); + free_extent_buffer(root->fs_info->dev_root->node); + free_extent_buffer(fs_info->sb_buffer); btrfs_free_block_groups(root->fs_info); @@ -901,8 +980,13 @@ int close_ctree(struct btrfs_root *root) kfree(hasher); } #endif + close_all_devices(fs_info); + btrfs_mapping_tree_free(&fs_info->mapping_tree); + kfree(fs_info->extent_root); kfree(fs_info->tree_root); + kfree(fs_info->chunk_root); + kfree(fs_info->dev_root); return 0; } @@ -1016,4 +1100,5 @@ int btrfs_read_buffer(struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, + .submit_bio_hook = btree_submit_bio_hook, }; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 828f3a2081b..206cb48638f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -20,6 +20,7 @@ #define __DISKIO__ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) +struct btrfs_device; struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); @@ -65,4 +66,5 @@ int btrfs_read_buffer(struct extent_buffer *buf); u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); void btrfs_throttle(struct btrfs_root *root); +int btrfs_open_device(struct btrfs_device *dev); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ebfd304138c..2cd957d6e8d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -24,37 +24,19 @@ #include "disk-io.h" #include "print-tree.h" #include "transaction.h" +#include "volumes.h" -#define BLOCK_GROUP_DATA EXTENT_WRITEBACK +#define BLOCK_GROUP_DATA EXTENT_WRITEBACK #define BLOCK_GROUP_METADATA EXTENT_UPTODATE +#define BLOCK_GROUP_SYSTEM EXTENT_NEW + #define BLOCK_GROUP_DIRTY EXTENT_DIRTY static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int find_previous_extent(struct btrfs_root *root, - struct btrfs_path *path) -{ - struct btrfs_key found_key; - struct extent_buffer *leaf; - int ret; - while(1) { - if (path->slots[0] == 0) { - ret = btrfs_prev_leaf(root, path); - if (ret != 0) - return ret; - } else { - path->slots[0]--; - } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.type == BTRFS_EXTENT_ITEM_KEY) - return 0; - } - return 1; -} static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) @@ -91,7 +73,7 @@ static int cache_block_group(struct btrfs_root *root, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) return ret; - ret = find_previous_extent(root, path); + ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); if (ret < 0) return ret; if (ret == 0) { @@ -168,7 +150,8 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct block_group_cache = &info->block_group_cache; ret = find_first_extent_bit(block_group_cache, bytenr, &start, &end, - BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA); + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | + BLOCK_GROUP_SYSTEM); if (ret) { return NULL; } @@ -182,23 +165,38 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return block_group; return NULL; } -static u64 noinline find_search_start(struct btrfs_root *root, + +static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) +{ + if ((bits & BLOCK_GROUP_DATA) && + (cache->flags & BTRFS_BLOCK_GROUP_DATA)) + return 1; + if ((bits & BLOCK_GROUP_METADATA) && + (cache->flags & BTRFS_BLOCK_GROUP_METADATA)) + return 1; + if ((bits & BLOCK_GROUP_SYSTEM) && + (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM)) + return 1; + return 0; +} + +static int noinline find_search_start(struct btrfs_root *root, struct btrfs_block_group_cache **cache_ret, - u64 search_start, int num, int data) + u64 *start_ret, int num, int data) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; struct extent_io_tree *free_space_cache; - struct extent_state *state; u64 last; u64 start = 0; + u64 end = 0; u64 cache_miss = 0; u64 total_fs_bytes; + u64 search_start = *start_ret; int wrapped = 0; - if (!cache) { + if (!cache) goto out; - } total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); free_space_cache = &root->fs_info->free_space_cache; @@ -208,6 +206,9 @@ again: goto out; last = max(search_start, cache->key.objectid); + if (!block_group_bits(cache, data)) { + goto new_group; + } while(1) { ret = find_first_extent_bit(&root->fs_info->free_space_cache, @@ -225,22 +226,20 @@ again: cache_miss = start; continue; } - if (data != BTRFS_BLOCK_GROUP_MIXED && - start + num > cache->key.objectid + cache->key.offset) + if (start + num > cache->key.objectid + cache->key.offset) goto new_group; if (start + num > total_fs_bytes) goto new_group; - return start; + *start_ret = start; + return 0; } out: cache = btrfs_lookup_block_group(root->fs_info, search_start); if (!cache) { - printk("Unable to find block group for %Lu\n", - search_start); + printk("Unable to find block group for %Lu\n", search_start); WARN_ON(1); - return search_start; } - return search_start; + return -ENOSPC; new_group: last = cache->key.objectid + cache->key.offset; @@ -251,7 +250,6 @@ no_cache: if (!wrapped) { wrapped = 1; last = search_start; - data = BTRFS_BLOCK_GROUP_MIXED; goto wrapped; } goto out; @@ -299,7 +297,6 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int ret; int full_search = 0; int factor = 8; - int data_swap = 0; block_group_cache = &info->block_group_cache; total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); @@ -307,19 +304,12 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (!owner) factor = 8; - if (data == BTRFS_BLOCK_GROUP_MIXED) { - bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; - factor = 10; - } else if (data) - bit = BLOCK_GROUP_DATA; - else - bit = BLOCK_GROUP_METADATA; + bit = data; if (search_start && search_start < total_fs_bytes) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_block_group(info, search_start); - if (shint && (shint->data == data || - shint->data == BTRFS_BLOCK_GROUP_MIXED)) { + if (shint && block_group_bits(shint, data)) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < div_factor(shint->key.offset, factor)) { @@ -327,8 +317,8 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } } } - if (hint && hint->key.objectid < total_fs_bytes && - (hint->data == data || hint->data == BTRFS_BLOCK_GROUP_MIXED)) { + if (hint && block_group_bits(hint, data) && + hint->key.objectid < total_fs_bytes) { used = btrfs_block_group_used(&hint->item); if (used + hint->pinned < div_factor(hint->key.offset, factor)) { @@ -379,12 +369,6 @@ again: full_search = 1; goto again; } - if (!data_swap) { - data_swap = 1; - bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; - last = search_start; - goto again; - } found: return found_group; } @@ -1002,7 +986,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, - int mark_free, int data) + int mark_free) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; @@ -1027,41 +1011,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { - if (cache->data != data && - old_val < (cache->key.offset >> 1)) { - int bit_to_clear; - int bit_to_set; - cache->data = data; - if (data) { - bit_to_clear = BLOCK_GROUP_METADATA; - bit_to_set = BLOCK_GROUP_DATA; - cache->item.flags &= - ~BTRFS_BLOCK_GROUP_MIXED; - cache->item.flags |= - BTRFS_BLOCK_GROUP_DATA; - } else { - bit_to_clear = BLOCK_GROUP_DATA; - bit_to_set = BLOCK_GROUP_METADATA; - cache->item.flags &= - ~BTRFS_BLOCK_GROUP_MIXED; - cache->item.flags &= - ~BTRFS_BLOCK_GROUP_DATA; - } - clear_extent_bits(&info->block_group_cache, - start, end, bit_to_clear, - GFP_NOFS); - set_extent_bits(&info->block_group_cache, - start, end, bit_to_set, - GFP_NOFS); - } else if (cache->data != data && - cache->data != BTRFS_BLOCK_GROUP_MIXED) { - cache->data = BTRFS_BLOCK_GROUP_MIXED; - set_extent_bits(&info->block_group_cache, - start, end, - BLOCK_GROUP_DATA | - BLOCK_GROUP_METADATA, - GFP_NOFS); - } old_val += num_bytes; } else { old_val -= num_bytes; @@ -1357,7 +1306,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } ret = update_block_group(trans, root, bytenr, num_bytes, 0, - mark_free, 0); + mark_free); BUG_ON(ret); } btrfs_free_path(path); @@ -1450,38 +1399,21 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, u64 exclude_start, u64 exclude_nr, int data) { - struct btrfs_path *path; - struct btrfs_key key; - u64 hole_size = 0; - u64 aligned; int ret; - int slot = 0; - u64 last_byte = 0; - u64 *last_ptr = NULL; u64 orig_search_start = search_start; - int start_found; - struct extent_buffer *l; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; u64 total_needed = num_bytes; - int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; - int empty_cluster; - u64 cached_start; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); - level = btrfs_header_level(root->node); - - if (num_bytes >= 32 * 1024 * 1024 && hint_byte) { - data = BTRFS_BLOCK_GROUP_MIXED; - } - if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); + if (hint_byte) { block_group = btrfs_lookup_block_group(info, hint_byte); if (!block_group) @@ -1495,7 +1427,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, } total_needed += empty_size; - path = btrfs_alloc_path(); + check_failed: if (!block_group) { block_group = btrfs_lookup_block_group(info, search_start); @@ -1503,135 +1435,49 @@ check_failed: block_group = btrfs_lookup_block_group(info, orig_search_start); } - search_start = find_search_start(root, &block_group, search_start, - total_needed, data); - search_start = stripe_align(root, search_start); - cached_start = search_start; - btrfs_init_path(path); - ins->objectid = search_start; - ins->offset = 0; - start_found = 0; - path->reada = 2; - - ret = btrfs_search_slot(trans, root, ins, path, 0, 0); - if (ret < 0) - goto error; - ret = find_previous_extent(root, path); - if (ret < 0) + ret = find_search_start(root, &block_group, &search_start, + total_needed, data); + if (ret) goto error; - l = path->nodes[0]; - btrfs_item_key_to_cpu(l, &key, path->slots[0]); - while (1) { - l = path->nodes[0]; - slot = path->slots[0]; - if (slot >= btrfs_header_nritems(l)) { - ret = btrfs_next_leaf(root, path); - if (ret == 0) - continue; - if (ret < 0) - goto error; - search_start = max(search_start, - block_group->key.objectid); - if (!start_found) { - aligned = stripe_align(root, search_start); - ins->objectid = aligned; - if (aligned >= search_end) { - ret = -ENOSPC; - goto error; - } - ins->offset = search_end - aligned; - start_found = 1; - goto check_pending; - } - ins->objectid = stripe_align(root, - last_byte > search_start ? - last_byte : search_start); - if (search_end <= ins->objectid) { - ret = -ENOSPC; - goto error; - } - ins->offset = search_end - ins->objectid; - BUG_ON(ins->objectid >= search_end); - goto check_pending; - } - btrfs_item_key_to_cpu(l, &key, slot); - - if (key.objectid >= search_start && key.objectid > last_byte && - start_found) { - if (last_byte < search_start) - last_byte = search_start; - aligned = stripe_align(root, last_byte); - hole_size = key.objectid - aligned; - if (key.objectid > aligned && hole_size >= num_bytes) { - ins->objectid = aligned; - ins->offset = hole_size; - goto check_pending; - } - } - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { - if (!start_found && btrfs_key_type(&key) == - BTRFS_BLOCK_GROUP_ITEM_KEY) { - last_byte = key.objectid; - start_found = 1; - } - goto next; - } - - - start_found = 1; - last_byte = key.objectid + key.offset; - - if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && - last_byte >= block_group->key.objectid + - block_group->key.offset) { - btrfs_release_path(root, path); - search_start = block_group->key.objectid + - block_group->key.offset; - goto new_group; - } -next: - path->slots[0]++; - cond_resched(); - } -check_pending: - /* we have to make sure we didn't find an extent that has already - * been allocated by the map tree or the original allocation - */ - btrfs_release_path(root, path); - BUG_ON(ins->objectid < search_start); + search_start = stripe_align(root, search_start); + ins->objectid = search_start; + ins->offset = num_bytes; if (ins->objectid + num_bytes >= search_end) goto enospc; - if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && - ins->objectid + num_bytes > block_group-> - key.objectid + block_group->key.offset) { + + if (ins->objectid + num_bytes > + block_group->key.objectid + block_group->key.offset) { search_start = block_group->key.objectid + block_group->key.offset; goto new_group; } + if (test_range_bit(&info->extent_ins, ins->objectid, ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { search_start = ins->objectid + num_bytes; goto new_group; } + if (test_range_bit(&info->pinned_extents, ins->objectid, ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { search_start = ins->objectid + num_bytes; goto new_group; } + if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; goto new_group; } - if (!data) { + + if (!(data & BLOCK_GROUP_DATA)) { block_group = btrfs_lookup_block_group(info, ins->objectid); if (block_group) trans->block_group = block_group; } ins->offset = num_bytes; - btrfs_free_path(path); return 0; new_group: @@ -1646,7 +1492,6 @@ enospc: if (!full_scan) total_needed -= empty_size; full_scan = 1; - data = BTRFS_BLOCK_GROUP_MIXED; } else wrapped = 1; } @@ -1657,8 +1502,6 @@ enospc: goto check_failed; error: - btrfs_release_path(root, path); - btrfs_free_path(path); return ret; } /* @@ -1689,6 +1532,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key keys[2]; + if (data) + data = BLOCK_GROUP_DATA; + else if (root == root->fs_info->chunk_root) + data = BLOCK_GROUP_SYSTEM; + else + data = BLOCK_GROUP_METADATA; + new_hint = max(hint_byte, root->fs_info->alloc_start); if (new_hint < btrfs_super_total_bytes(&info->super_copy)) hint_byte = new_hint; @@ -1718,7 +1568,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, EXTENT_LOCKED, GFP_NOFS); - WARN_ON(data == 1); goto update_block; } @@ -1768,8 +1617,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } update_block: - ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, - data); + ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); if (ret) { printk("update block group failed for %Lu %Lu\n", ins->objectid, ins->offset); @@ -2457,7 +2305,7 @@ again: if (ret < 0) goto out; - ret = find_previous_extent(root, path); + ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); if (ret < 0) goto out; if (ret == 0) { @@ -2604,95 +2452,48 @@ out: int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 new_size) { - struct btrfs_path *path; - u64 nr = 0; - u64 cur_byte; - u64 old_size; - unsigned long rem; - struct btrfs_block_group_cache *cache; - struct btrfs_block_group_item *item; - struct btrfs_fs_info *info = root->fs_info; - struct extent_io_tree *block_group_cache; - struct btrfs_key key; - struct extent_buffer *leaf; - int ret; - int bit; - - old_size = btrfs_super_total_bytes(&info->super_copy); - block_group_cache = &info->block_group_cache; - - root = info->extent_root; - - cache = btrfs_lookup_block_group(root->fs_info, old_size - 1); - - cur_byte = cache->key.objectid + cache->key.offset; - if (cur_byte >= new_size) - goto set_size; - - key.offset = BTRFS_BLOCK_GROUP_SIZE; - btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); + btrfs_set_super_total_bytes(&root->fs_info->super_copy, new_size); + return 0; +} - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; +int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *key) +{ + int ret; + struct btrfs_key found_key; + struct extent_buffer *leaf; + int slot; - while(cur_byte < new_size) { - key.objectid = cur_byte; - ret = btrfs_insert_empty_item(trans, root, path, &key, - sizeof(struct btrfs_block_group_item)); - BUG_ON(ret); + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret < 0) + return ret; + while(1) { + slot = path->slots[0]; leaf = path->nodes[0]; - item = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_block_group_item); - - btrfs_set_disk_block_group_used(leaf, item, 0); - div_long_long_rem(nr, 3, &rem); - if (rem) { - btrfs_set_disk_block_group_flags(leaf, item, - BTRFS_BLOCK_GROUP_DATA); - } else { - btrfs_set_disk_block_group_flags(leaf, item, 0); - } - nr++; - - cache = kmalloc(sizeof(*cache), GFP_NOFS); - BUG_ON(!cache); - - read_extent_buffer(leaf, &cache->item, (unsigned long)item, - sizeof(cache->item)); - - memcpy(&cache->key, &key, sizeof(key)); - cache->cached = 0; - cache->pinned = 0; - cur_byte = key.objectid + key.offset; - btrfs_release_path(root, path); - - if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { - bit = BLOCK_GROUP_DATA; - cache->data = BTRFS_BLOCK_GROUP_DATA; - } else { - bit = BLOCK_GROUP_METADATA; - cache->data = 0; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; + break; } + btrfs_item_key_to_cpu(leaf, &found_key, slot); - /* use EXTENT_LOCKED to prevent merging */ - set_extent_bits(block_group_cache, key.objectid, - key.objectid + key.offset - 1, - bit | EXTENT_LOCKED, GFP_NOFS); - set_state_private(block_group_cache, key.objectid, - (unsigned long)cache); + if (found_key.objectid >= key->objectid && + found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) + return 0; + path->slots[0]++; } - btrfs_free_path(path); -set_size: - btrfs_set_super_total_bytes(&info->super_copy, new_size); - return 0; + ret = -ENOENT; +error: + return ret; } int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; int ret; - int err = 0; int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; @@ -2702,28 +2503,28 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct extent_buffer *leaf; block_group_cache = &info->block_group_cache; - root = info->extent_root; key.objectid = 0; - key.offset = BTRFS_BLOCK_GROUP_SIZE; + key.offset = 0; btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); - path = btrfs_alloc_path(); if (!path) return -ENOMEM; while(1) { - ret = btrfs_search_slot(NULL, info->extent_root, - &key, path, 0, 0); - if (ret != 0) { - err = ret; - break; + ret = find_first_block_group(root, path, &key); + if (ret > 0) { + ret = 0; + goto error; } + if (ret != 0) + goto error; + leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); cache = kmalloc(sizeof(*cache), GFP_NOFS); if (!cache) { - err = -1; + ret = -ENOMEM; break; } @@ -2733,18 +2534,17 @@ int btrfs_read_block_groups(struct btrfs_root *root) memcpy(&cache->key, &found_key, sizeof(found_key)); cache->cached = 0; cache->pinned = 0; + key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); - - if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) { - bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; - cache->data = BTRFS_BLOCK_GROUP_MIXED; - } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { + cache->flags = btrfs_block_group_flags(&cache->item); + bit = 0; + if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { bit = BLOCK_GROUP_DATA; - cache->data = BTRFS_BLOCK_GROUP_DATA; - } else { + } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { + bit = BLOCK_GROUP_SYSTEM; + } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { bit = BLOCK_GROUP_METADATA; - cache->data = 0; } /* use EXTENT_LOCKED to prevent merging */ @@ -2758,7 +2558,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) btrfs_super_total_bytes(&info->super_copy)) break; } - + ret = 0; +error: btrfs_free_path(path); - return 0; + return ret; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e8130c87633..7e3a1ebde9f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1706,9 +1706,9 @@ static int submit_one_bio(int rw, struct bio *bio) WARN_ON(1); } if (tree->ops && tree->ops->submit_bio_hook) - tree->ops->submit_bio_hook(rw, bio); - - submit_bio(rw, bio); + tree->ops->submit_bio_hook(page->mapping->host, rw, bio); + else + submit_bio(rw, bio); if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; bio_put(bio); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 9d665466708..0dca89328f9 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -28,7 +28,7 @@ struct extent_state; struct extent_io_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); - int (*submit_bio_hook)(int rw, struct bio *bio); + int (*submit_bio_hook)(struct inode *inode, int rw, struct bio *bio); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 315dd550d56..17063cd2cb7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -40,6 +40,7 @@ #include "btrfs_inode.h" #include "ioctl.h" #include "print-tree.h" +#include "volumes.h" struct btrfs_iget_args { u64 ino; @@ -295,20 +296,20 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, return 0; } -int btrfs_submit_bio_hook(int rw, struct bio *bio) +int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) { - struct bio_vec *bvec = bio->bi_io_vec; - struct inode *inode = bvec->bv_page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; int ret = 0; - if (rw != WRITE) - return 0; + if (rw != WRITE) { + goto mapit; + } if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) - return 0; + btrfs_test_flag(inode, NODATASUM)) { + goto mapit; + } mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -317,7 +318,8 @@ int btrfs_submit_bio_hook(int rw, struct bio *bio) ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - return ret; +mapit: + return btrfs_map_bio(root, rw, bio); } int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) @@ -406,7 +408,7 @@ void btrfs_read_locked_inode(struct inode *inode) struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_inode_item *inode_item; - struct btrfs_inode_timespec *tspec; + struct btrfs_timespec *tspec; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; u64 alloc_group_block; @@ -455,7 +457,8 @@ void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); if (!BTRFS_I(inode)->block_group) { BTRFS_I(inode)->block_group = btrfs_find_block_group(root, - NULL, 0, 0, 0); + NULL, 0, + BTRFS_BLOCK_GROUP_METADATA, 0); } btrfs_free_path(path); inode_item = NULL; @@ -1550,7 +1553,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 0; else owner = 1; - group = btrfs_find_block_group(root, group, 0, 0, owner); + group = btrfs_find_block_group(root, group, 0, + BTRFS_BLOCK_GROUP_METADATA, owner); BTRFS_I(inode)->block_group = group; BTRFS_I(inode)->flags = 0; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index da0b4dcf361..9c1335dad40 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -20,6 +20,40 @@ #include "disk-io.h" #include "print-tree.h" +static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) +{ + int num_stripes = btrfs_chunk_num_stripes(eb, chunk); + int i; + printk("\t\tchunk owner %llu type %llu num_stripes %d\n", + (unsigned long long)btrfs_chunk_owner(eb, chunk), + (unsigned long long)btrfs_chunk_type(eb, chunk), + num_stripes); + for (i = 0 ; i < num_stripes ; i++) { + printk("\t\t\tstripe %d devid %llu offset %llu\n", i, + (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i), + (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i)); + } +} +static void print_dev_item(struct extent_buffer *eb, + struct btrfs_dev_item *dev_item) +{ + char *name; + int name_len; + + name_len = btrfs_device_name_len(eb, dev_item); + name = kmalloc(name_len, GFP_NOFS); + if (name) { + read_extent_buffer(eb, name, + (unsigned long)btrfs_device_name(dev_item), + name_len); + } + printk("\t\tdev item name %.*s devid %llu " + "total_bytes %llu bytes used %Lu\n", name_len, name, + (unsigned long long)btrfs_device_id(eb, dev_item), + (unsigned long long)btrfs_device_total_bytes(eb, dev_item), + (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); + kfree(name); +} void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; @@ -34,6 +68,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_key key; struct btrfs_key found_key; struct btrfs_extent_ref *ref; + struct btrfs_dev_extent *dev_extent; u32 type; printk("leaf %llu total ptrs %d free space %d\n", @@ -106,6 +141,19 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) printk("\t\tblock group used %llu\n", (unsigned long long)btrfs_disk_block_group_used(l, bi)); break; + case BTRFS_CHUNK_ITEM_KEY: + print_chunk(l, btrfs_item_ptr(l, i, struct btrfs_chunk)); + break; + case BTRFS_DEV_ITEM_KEY: + print_dev_item(l, btrfs_item_ptr(l, i, + struct btrfs_dev_item)); + break; + case BTRFS_DEV_EXTENT_KEY: + dev_extent = btrfs_item_ptr(l, i, + struct btrfs_dev_extent); + printk("\t\tdev extent owner %llu length %llu\n", + (unsigned long long)btrfs_dev_extent_owner(l, dev_extent), + (unsigned long long)btrfs_dev_extent_length(l, dev_extent)); }; } } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e9a0983897f..5e9f69244f9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -198,29 +198,42 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, return werr; } -int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static int update_cowonly_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { int ret; - u64 old_extent_block; - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_root *tree_root = fs_info->tree_root; - struct btrfs_root *extent_root = fs_info->extent_root; + u64 old_root_bytenr; + struct btrfs_root *tree_root = root->fs_info->tree_root; - btrfs_write_dirty_block_groups(trans, extent_root); + btrfs_write_dirty_block_groups(trans, root); while(1) { - old_extent_block = btrfs_root_bytenr(&extent_root->root_item); - if (old_extent_block == extent_root->node->start) + old_root_bytenr = btrfs_root_bytenr(&root->root_item); + if (old_root_bytenr == root->node->start) break; - btrfs_set_root_bytenr(&extent_root->root_item, - extent_root->node->start); - btrfs_set_root_level(&extent_root->root_item, - btrfs_header_level(extent_root->node)); + btrfs_set_root_bytenr(&root->root_item, + root->node->start); + btrfs_set_root_level(&root->root_item, + btrfs_header_level(root->node)); ret = btrfs_update_root(trans, tree_root, - &extent_root->root_key, - &extent_root->root_item); + &root->root_key, + &root->root_item); BUG_ON(ret); - btrfs_write_dirty_block_groups(trans, extent_root); + btrfs_write_dirty_block_groups(trans, root); + } + return 0; +} + +int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct list_head *next; + + while(!list_empty(&fs_info->dirty_cowonly_roots)) { + next = fs_info->dirty_cowonly_roots.next; + list_del_init(next); + root = list_entry(next, struct btrfs_root, dirty_list); + update_cowonly_root(trans, root); } return 0; } @@ -616,6 +629,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, unsigned long timeout = 1; struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; + struct btrfs_root *chunk_root = root->fs_info->chunk_root; struct list_head dirty_fs_roots; struct extent_io_tree *pinned_copy; DEFINE_WAIT(wait); @@ -714,6 +728,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_super_root_level(&root->fs_info->super_copy, btrfs_header_level(root->fs_info->tree_root->node)); + btrfs_set_super_chunk_root(&root->fs_info->super_copy, + chunk_root->node->start); + btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, + btrfs_header_level(chunk_root->node)); write_extent_buffer(root->fs_info->sb_buffer, &root->fs_info->super_copy, 0, sizeof(root->fs_info->super_copy)); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c new file mode 100644 index 00000000000..90a8d45dc6d --- /dev/null +++ b/fs/btrfs/volumes.c @@ -0,0 +1,852 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#include +#include +#include "ctree.h" +#include "extent_map.h" +#include "disk-io.h" +#include "transaction.h" +#include "print-tree.h" +#include "volumes.h" + +struct map_lookup { + struct btrfs_device *dev; + u64 physical; +}; + +/* + * this uses a pretty simple search, the expectation is that it is + * called very infrequently and that a given device has a small number + * of extents + */ +static int find_free_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + struct btrfs_path *path, + u64 num_bytes, u64 *start) +{ + struct btrfs_key key; + struct btrfs_root *root = device->dev_root; + struct btrfs_dev_extent *dev_extent = NULL; + u64 hole_size = 0; + u64 last_byte = 0; + u64 search_start = 0; + u64 search_end = device->total_bytes; + int ret; + int slot = 0; + int start_found; + struct extent_buffer *l; + + start_found = 0; + path->reada = 2; + + /* FIXME use last free of some kind */ + + key.objectid = device->devid; + key.offset = search_start; + key.type = BTRFS_DEV_EXTENT_KEY; + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + if (ret < 0) + goto error; + ret = btrfs_previous_item(root, path, 0, key.type); + if (ret < 0) + goto error; + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &key, path->slots[0]); + while (1) { + l = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(l)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; +no_more_items: + if (!start_found) { + if (search_start >= search_end) { + ret = -ENOSPC; + goto error; + } + *start = search_start; + start_found = 1; + goto check_pending; + } + *start = last_byte > search_start ? + last_byte : search_start; + if (search_end <= *start) { + ret = -ENOSPC; + goto error; + } + goto check_pending; + } + btrfs_item_key_to_cpu(l, &key, slot); + + if (key.objectid < device->devid) + goto next; + + if (key.objectid > device->devid) + goto no_more_items; + + if (key.offset >= search_start && key.offset > last_byte && + start_found) { + if (last_byte < search_start) + last_byte = search_start; + hole_size = key.offset - last_byte; + if (key.offset > last_byte && + hole_size >= num_bytes) { + *start = last_byte; + goto check_pending; + } + } + if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) { + goto next; + } + + start_found = 1; + dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); + last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); +next: + path->slots[0]++; + cond_resched(); + } +check_pending: + /* we have to make sure we didn't find an extent that has already + * been allocated by the map tree or the original allocation + */ + btrfs_release_path(root, path); + BUG_ON(*start < search_start); + + if (*start + num_bytes >= search_end) { + ret = -ENOSPC; + goto error; + } + /* check for pending inserts here */ + return 0; + +error: + btrfs_release_path(root, path); + return ret; +} + +int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + u64 owner, u64 num_bytes, u64 *start) +{ + int ret; + struct btrfs_path *path; + struct btrfs_root *root = device->dev_root; + struct btrfs_dev_extent *extent; + struct extent_buffer *leaf; + struct btrfs_key key; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = find_free_dev_extent(trans, device, path, num_bytes, start); + if (ret) + goto err; + + key.objectid = device->devid; + key.offset = *start; + key.type = BTRFS_DEV_EXTENT_KEY; + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(*extent)); + BUG_ON(ret); + + leaf = path->nodes[0]; + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_extent); + btrfs_set_dev_extent_owner(leaf, extent, owner); + btrfs_set_dev_extent_length(leaf, extent, num_bytes); + btrfs_mark_buffer_dirty(leaf); +err: + btrfs_free_path(path); + return ret; +} + +static int find_next_chunk(struct btrfs_root *root, u64 *objectid) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + key.objectid = (u64)-1; + key.offset = (u64)-1; + key.type = BTRFS_CHUNK_ITEM_KEY; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto error; + + BUG_ON(ret == 0); + + ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); + if (ret) { + *objectid = 0; + } else { + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + *objectid = found_key.objectid + found_key.offset; + } + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + +static struct btrfs_device *next_device(struct list_head *head, + struct list_head *last) +{ + struct list_head *next = last->next; + struct btrfs_device *dev; + + if (list_empty(head)) + return NULL; + + if (next == head) + next = next->next; + + dev = list_entry(next, struct btrfs_device, dev_list); + return dev; +} + +static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path, + u64 *objectid) +{ + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto error; + + BUG_ON(ret == 0); + + ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, + BTRFS_DEV_ITEM_KEY); + if (ret) { + *objectid = 1; + } else { + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + *objectid = found_key.offset + 1; + } + ret = 0; +error: + btrfs_release_path(root, path); + return ret; +} + +/* + * the device information is stored in the chunk root + * the btrfs_device struct should be fully filled in + */ +int btrfs_add_device(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_device *device) +{ + int ret; + struct btrfs_path *path; + struct btrfs_dev_item *dev_item; + struct extent_buffer *leaf; + struct btrfs_key key; + unsigned long ptr; + u64 free_devid; + + root = root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = find_next_devid(root, path, &free_devid); + if (ret) + goto out; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = free_devid; + + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(*dev_item) + device->name_len); + if (ret) + goto out; + + leaf = path->nodes[0]; + dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); + + btrfs_set_device_id(leaf, dev_item, device->devid); + btrfs_set_device_type(leaf, dev_item, device->type); + btrfs_set_device_io_align(leaf, dev_item, device->io_align); + btrfs_set_device_io_width(leaf, dev_item, device->io_width); + btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); + btrfs_set_device_rdev(leaf, dev_item, device->rdev); + btrfs_set_device_partition(leaf, dev_item, device->partition); + btrfs_set_device_name_len(leaf, dev_item, device->name_len); + btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); + + ptr = (unsigned long)btrfs_device_name(dev_item); + write_extent_buffer(leaf, device->name, ptr, device->name_len); + + ptr = (unsigned long)btrfs_device_uuid(dev_item); + write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); + btrfs_mark_buffer_dirty(leaf); + ret = 0; + +out: + btrfs_free_path(path); + return ret; +} +int btrfs_update_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device) +{ + int ret; + struct btrfs_path *path; + struct btrfs_root *root; + struct btrfs_dev_item *dev_item; + struct extent_buffer *leaf; + struct btrfs_key key; + + root = device->dev_root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = device->devid; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) + goto out; + + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + leaf = path->nodes[0]; + dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); + + btrfs_set_device_id(leaf, dev_item, device->devid); + btrfs_set_device_type(leaf, dev_item, device->type); + btrfs_set_device_io_align(leaf, dev_item, device->io_align); + btrfs_set_device_io_width(leaf, dev_item, device->io_width); + btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); + btrfs_set_device_rdev(leaf, dev_item, device->rdev); + btrfs_set_device_partition(leaf, dev_item, device->partition); + btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); + btrfs_mark_buffer_dirty(leaf); + +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *key, + struct btrfs_chunk *chunk, int item_size) +{ + struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_disk_key disk_key; + u32 array_size; + u8 *ptr; + + array_size = btrfs_super_sys_array_size(super_copy); + if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) + return -EFBIG; + + ptr = super_copy->sys_chunk_array + array_size; + btrfs_cpu_key_to_disk(&disk_key, key); + memcpy(ptr, &disk_key, sizeof(disk_key)); + ptr += sizeof(disk_key); + memcpy(ptr, chunk, item_size); + item_size += sizeof(disk_key); + btrfs_set_super_sys_array_size(super_copy, array_size + item_size); + return 0; +} + +int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 *start, + u64 *num_bytes, u32 type) +{ + u64 dev_offset; + struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; + struct btrfs_stripe *stripes; + struct btrfs_device *device = NULL; + struct btrfs_chunk *chunk; + struct list_head *dev_list = &extent_root->fs_info->devices; + struct list_head *last_dev = extent_root->fs_info->last_device; + struct extent_map_tree *em_tree; + struct map_lookup *map; + struct extent_map *em; + u64 physical; + u64 calc_size = 1024 * 1024 * 1024; + int num_stripes; + int ret; + int index = 0; + struct btrfs_key key; + + + ret = find_next_chunk(chunk_root, &key.objectid); + if (ret) + return ret; + + num_stripes = 1; + chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); + if (!chunk) + return -ENOMEM; + + stripes = &chunk->stripe; + + *num_bytes = calc_size; + while(index < num_stripes) { + device = next_device(dev_list, last_dev); + BUG_ON(!device); + last_dev = &device->dev_list; + extent_root->fs_info->last_device = last_dev; + + ret = btrfs_alloc_dev_extent(trans, device, + key.objectid, + calc_size, &dev_offset); + BUG_ON(ret); + + device->bytes_used += calc_size; + ret = btrfs_update_device(trans, device); + BUG_ON(ret); + + btrfs_set_stack_stripe_devid(stripes + index, device->devid); + btrfs_set_stack_stripe_offset(stripes + index, dev_offset); + physical = dev_offset; + index++; + } + + /* key.objectid was set above */ + key.offset = *num_bytes; + key.type = BTRFS_CHUNK_ITEM_KEY; + btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); + btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024); + btrfs_set_stack_chunk_type(chunk, type); + btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); + btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); + + ret = btrfs_insert_item(trans, chunk_root, &key, chunk, + btrfs_chunk_item_size(num_stripes)); + BUG_ON(ret); + *start = key.objectid; + + em = alloc_extent_map(GFP_NOFS); + if (!em) + return -ENOMEM; + map = kmalloc(sizeof(*map), GFP_NOFS); + if (!map) { + free_extent_map(em); + return -ENOMEM; + } + + em->bdev = (struct block_device *)map; + em->start = key.objectid; + em->len = key.offset; + em->block_start = 0; + + map->physical = physical; + map->dev = device; + + if (!map->dev) { + kfree(map); + free_extent_map(em); + return -EIO; + } + kfree(chunk); + + em_tree = &extent_root->fs_info->mapping_tree.map_tree; + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + BUG_ON(ret); + spin_unlock(&em_tree->lock); + free_extent_map(em); + return ret; +} + +void btrfs_mapping_init(struct btrfs_mapping_tree *tree) +{ + extent_map_tree_init(&tree->map_tree, GFP_NOFS); +} + +void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) +{ + struct extent_map *em; + + while(1) { + spin_lock(&tree->map_tree.lock); + em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); + if (em) + remove_extent_mapping(&tree->map_tree, em); + spin_unlock(&tree->map_tree.lock); + if (!em) + break; + kfree(em->bdev); + /* once for us */ + free_extent_map(em); + /* once for the tree */ + free_extent_map(em); + } +} + +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, + u64 logical, u64 *phys, u64 *length, + struct btrfs_device **dev) +{ + struct extent_map *em; + struct map_lookup *map; + struct extent_map_tree *em_tree = &map_tree->map_tree; + u64 offset; + + + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, logical, *length); + BUG_ON(!em); + + BUG_ON(em->start > logical || em->start + em->len < logical); + map = (struct map_lookup *)em->bdev; + offset = logical - em->start; + *phys = map->physical + offset; + *length = em->len - offset; + *dev = map->dev; + free_extent_map(em); + spin_unlock(&em_tree->lock); + return 0; +} + +int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) +{ + struct btrfs_mapping_tree *map_tree; + struct btrfs_device *dev; + u64 logical = bio->bi_sector << 9; + u64 physical; + u64 length = 0; + u64 map_length; + struct bio_vec *bvec; + int i; + int ret; + + bio_for_each_segment(bvec, bio, i) { + length += bvec->bv_len; + } + map_tree = &root->fs_info->mapping_tree; + map_length = length; + ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); + BUG_ON(map_length < length); + bio->bi_sector = physical >> 9; + bio->bi_bdev = dev->bdev; + submit_bio(rw, bio); + return 0; +} + +struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid) +{ + struct btrfs_device *dev; + struct list_head *cur = root->fs_info->devices.next; + struct list_head *head = &root->fs_info->devices; + + while(cur != head) { + dev = list_entry(cur, struct btrfs_device, dev_list); + if (dev->devid == devid) + return dev; + cur = cur->next; + } + return NULL; +} + +static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk) +{ + struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; + struct map_lookup *map; + struct extent_map *em; + u64 logical; + u64 length; + u64 devid; + int ret; + + logical = key->objectid; + length = key->offset; + spin_lock(&map_tree->map_tree.lock); + em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); + + /* already mapped? */ + if (em && em->start <= logical && em->start + em->len > logical) { + free_extent_map(em); + spin_unlock(&map_tree->map_tree.lock); + return 0; + } else if (em) { + free_extent_map(em); + } + spin_unlock(&map_tree->map_tree.lock); + + map = kzalloc(sizeof(*map), GFP_NOFS); + if (!map) + return -ENOMEM; + + em = alloc_extent_map(GFP_NOFS); + if (!em) + return -ENOMEM; + map = kmalloc(sizeof(*map), GFP_NOFS); + if (!map) { + free_extent_map(em); + return -ENOMEM; + } + + em->bdev = (struct block_device *)map; + em->start = logical; + em->len = length; + em->block_start = 0; + + map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0); + devid = btrfs_stripe_devid_nr(leaf, chunk, 0); + map->dev = btrfs_find_device(root, devid); + if (!map->dev) { + kfree(map); + free_extent_map(em); + return -EIO; + } + + spin_lock(&map_tree->map_tree.lock); + ret = add_extent_mapping(&map_tree->map_tree, em); + BUG_ON(ret); + spin_unlock(&map_tree->map_tree.lock); + free_extent_map(em); + + return 0; +} + +static int fill_device_from_item(struct extent_buffer *leaf, + struct btrfs_dev_item *dev_item, + struct btrfs_device *device) +{ + unsigned long ptr; + char *name; + + device->devid = btrfs_device_id(leaf, dev_item); + device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); + device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); + device->type = btrfs_device_type(leaf, dev_item); + device->io_align = btrfs_device_io_align(leaf, dev_item); + device->io_width = btrfs_device_io_width(leaf, dev_item); + device->sector_size = btrfs_device_sector_size(leaf, dev_item); + device->rdev = btrfs_device_rdev(leaf, dev_item); + device->partition = btrfs_device_partition(leaf, dev_item); + device->name_len = btrfs_device_name_len(leaf, dev_item); + + ptr = (unsigned long)btrfs_device_uuid(dev_item); + read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); + + name = kmalloc(device->name_len + 1, GFP_NOFS); + if (!name) + return -ENOMEM; + device->name = name; + ptr = (unsigned long)btrfs_device_name(dev_item); + read_extent_buffer(leaf, name, ptr, device->name_len); + name[device->name_len] = '\0'; + return 0; +} + +static int read_one_dev(struct btrfs_root *root, struct btrfs_key *key, + struct extent_buffer *leaf, + struct btrfs_dev_item *dev_item) +{ + struct btrfs_device *device; + u64 devid; + int ret; + + devid = btrfs_device_id(leaf, dev_item); + if (btrfs_find_device(root, devid)) + return 0; + + device = kmalloc(sizeof(*device), GFP_NOFS); + if (!device) + return -ENOMEM; + + fill_device_from_item(leaf, dev_item, device); + device->dev_root = root->fs_info->dev_root; + device->bdev = root->fs_info->sb->s_bdev; + list_add(&device->dev_list, &root->fs_info->devices); + memcpy(&device->dev_key, key, sizeof(*key)); + ret = 0; +#if 0 + ret = btrfs_open_device(device); + if (ret) { + kfree(device); + } +#endif + return ret; +} + +int btrfs_read_sys_array(struct btrfs_root *root) +{ + struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct extent_buffer *sb = root->fs_info->sb_buffer; + struct btrfs_disk_key *disk_key; + struct btrfs_dev_item *dev_item; + struct btrfs_chunk *chunk; + struct btrfs_key key; + u32 num_stripes; + u32 array_size; + u32 len = 0; + u8 *ptr; + unsigned long sb_ptr; + u32 cur; + int ret; + int dev_only = 1; + + array_size = btrfs_super_sys_array_size(super_copy); + + /* + * we do this loop twice, once for the device items and + * once for all of the chunks. This way there are device + * structs filled in for every chunk + */ +again: + ptr = super_copy->sys_chunk_array; + sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); + cur = 0; + + while (cur < array_size) { + disk_key = (struct btrfs_disk_key *)ptr; + btrfs_disk_key_to_cpu(&key, disk_key); + + len = sizeof(*disk_key); + ptr += len; + sb_ptr += len; + cur += len; + + if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID && + key.type == BTRFS_DEV_ITEM_KEY) { + dev_item = (struct btrfs_dev_item *)sb_ptr; + if (dev_only) { + ret = read_one_dev(root, &key, sb, dev_item); + BUG_ON(ret); + } + len = sizeof(*dev_item); + len += btrfs_device_name_len(sb, dev_item); + } else if (key.type == BTRFS_CHUNK_ITEM_KEY) { + + chunk = (struct btrfs_chunk *)sb_ptr; + if (!dev_only) { + ret = read_one_chunk(root, &key, sb, chunk); + BUG_ON(ret); + } + num_stripes = btrfs_chunk_num_stripes(sb, chunk); + len = btrfs_chunk_item_size(num_stripes); + } else { + BUG(); + } + ptr += len; + sb_ptr += len; + cur += len; + } + if (dev_only == 1) { + dev_only = 0; + goto again; + } + return 0; +} + +int btrfs_read_chunk_tree(struct btrfs_root *root) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_key key; + struct btrfs_key found_key; + int ret; + int slot; + + root = root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* first we search for all of the device items, and then we + * read in all of the chunk items. This way we can create chunk + * mappings that reference all of the devices that are afound + */ + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.offset = 0; + key.type = 0; +again: + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + while(1) { + leaf = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; + break; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { + if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID) + break; + if (found_key.type == BTRFS_DEV_ITEM_KEY) { + struct btrfs_dev_item *dev_item; + dev_item = btrfs_item_ptr(leaf, slot, + struct btrfs_dev_item); + ret = read_one_dev(root, &found_key, leaf, + dev_item); + BUG_ON(ret); + } + } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { + struct btrfs_chunk *chunk; + chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + ret = read_one_chunk(root, &found_key, leaf, chunk); + } + path->slots[0]++; + } + if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { + key.objectid = 0; + btrfs_release_path(root, path); + goto again; + } + + btrfs_free_path(path); + ret = 0; +error: + return ret; +} + diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h new file mode 100644 index 00000000000..4a47dcb00a7 --- /dev/null +++ b/fs/btrfs/volumes.h @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_VOLUMES_ +#define __BTRFS_VOLUMES_ +struct btrfs_device { + struct list_head dev_list; + struct btrfs_root *dev_root; + struct btrfs_key dev_key; + + struct block_device *bdev; + + /* the internal btrfs device id */ + u64 devid; + + /* size of the device */ + u64 total_bytes; + + /* bytes used */ + u64 bytes_used; + + /* optimal io alignment for this device */ + u32 io_align; + + /* optimal io width for this device */ + u32 io_width; + + /* minimal io size for this device */ + u32 sector_size; + + /* the kernel device number */ + u64 rdev; + + /* type and info about this device */ + u64 type; + + /* partition number, 0 for whole dev */ + int partition; + + /* length of the name data at the end of the item */ + int name_len; + + /* physical drive uuid (or lvm uuid) */ + u8 uuid[BTRFS_DEV_UUID_SIZE]; + + char *name; +}; + +int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + u64 owner, u64 num_bytes, u64 *start); +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, + u64 logical, u64 *phys, u64 *length, + struct btrfs_device **dev); +int btrfs_read_sys_array(struct btrfs_root *root); +int btrfs_read_chunk_tree(struct btrfs_root *root); +int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 *start, + u64 *num_bytes, u32 type); +void btrfs_mapping_init(struct btrfs_mapping_tree *tree); +void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); +int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio); +#endif -- cgit v1.2.3-70-g09d2 From 6324fbf334f4586325057197da7752f4ffa409d3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:01:59 -0400 Subject: Btrfs: Dynamic chunk and block group allocation Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 12 ++- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 208 ++++++++++++++++++++++++++++++++++++++++++++----- fs/btrfs/inode.c | 9 ++- fs/btrfs/volumes.c | 87 +++++++++++++-------- fs/btrfs/volumes.h | 2 +- 6 files changed, 263 insertions(+), 57 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1453d995fef..a877105f5c4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -431,9 +431,19 @@ struct btrfs_block_group_item { __le64 flags; } __attribute__ ((__packed__)); +struct btrfs_space_info { + u64 flags; + u64 total_bytes; + u64 bytes_used; + u64 bytes_pinned; + int full; + struct list_head list; +}; + struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; + struct btrfs_space_info *space_info; u64 pinned; u64 flags; int cached; @@ -490,7 +500,7 @@ struct btrfs_fs_info { struct list_head dirty_cowonly_roots; struct list_head devices; - struct list_head *last_device; + struct list_head space_info; spinlock_t delalloc_lock; spinlock_t new_trans_lock; u64 delalloc_bytes; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e37fa120cc..2a239ae49f7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -700,8 +700,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->dev_root = dev_root; INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->devices); + INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); - fs_info->last_device = &fs_info->devices; fs_info->sb = sb; fs_info->throttles = 0; fs_info->mount_opt = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2cd957d6e8d..15082b1087b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -36,6 +36,10 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytes_used, + u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 size); static int cache_block_group(struct btrfs_root *root, @@ -168,16 +172,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) { - if ((bits & BLOCK_GROUP_DATA) && - (cache->flags & BTRFS_BLOCK_GROUP_DATA)) - return 1; - if ((bits & BLOCK_GROUP_METADATA) && - (cache->flags & BTRFS_BLOCK_GROUP_METADATA)) - return 1; - if ((bits & BLOCK_GROUP_SYSTEM) && - (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM)) - return 1; - return 0; + return (cache->flags & bits); } static int noinline find_search_start(struct btrfs_root *root, @@ -276,6 +271,18 @@ static u64 div_factor(u64 num, int factor) return num; } +static int block_group_state_bits(u64 flags) +{ + int bits = 0; + if (flags & BTRFS_BLOCK_GROUP_DATA) + bits |= BLOCK_GROUP_DATA; + if (flags & BTRFS_BLOCK_GROUP_METADATA) + bits |= BLOCK_GROUP_METADATA; + if (flags & BTRFS_BLOCK_GROUP_SYSTEM) + bits |= BLOCK_GROUP_SYSTEM; + return bits; +} + struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, @@ -304,7 +311,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (!owner) factor = 8; - bit = data; + bit = block_group_state_bits(data); if (search_start && search_start < total_fs_bytes) { struct btrfs_block_group_cache *shint; @@ -358,10 +365,15 @@ again: free_check = cache->key.offset; else free_check = div_factor(cache->key.offset, factor); + if (used + cache->pinned < free_check) { found_group = cache; goto found; } + if (full_search) { + printk("failed on cache %Lu used %Lu total %Lu\n", + cache->key.objectid, used, cache->key.offset); + } cond_resched(); } if (!full_search) { @@ -983,6 +995,58 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, return werr; } +static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, + u64 flags) +{ + struct list_head *head = &info->space_info; + struct list_head *cur; + struct btrfs_space_info *found; + list_for_each(cur, head) { + found = list_entry(cur, struct btrfs_space_info, list); + if (found->flags == flags) + return found; + } + return NULL; + +} + +static int do_chunk_alloc(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 alloc_bytes, + u64 flags) +{ + struct btrfs_space_info *space_info; + u64 thresh; + u64 start; + u64 num_bytes; + int ret; + + space_info = __find_space_info(extent_root->fs_info, flags); + BUG_ON(!space_info); + + if (space_info->full) + return 0; + + thresh = div_factor(space_info->total_bytes, 7); + if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < + thresh) + return 0; + + ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags); + if (ret == -ENOSPC) { +printk("space info full %Lu\n", flags); + space_info->full = 1; + return 0; + } + + BUG_ON(ret); + + ret = btrfs_make_block_group(trans, extent_root, 0, flags, + extent_root->fs_info->chunk_root->root_key.objectid, + start, num_bytes); + BUG_ON(ret); + return 0; +} + static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, @@ -1012,8 +1076,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { old_val += num_bytes; + cache->space_info->bytes_used += num_bytes; } else { old_val -= num_bytes; + cache->space_info->bytes_used -= num_bytes; if (mark_free) { set_extent_dirty(&info->free_space_cache, bytenr, bytenr + num_bytes - 1, @@ -1026,6 +1092,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, } return 0; } + static int update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin) { @@ -1047,9 +1114,11 @@ static int update_pinned_extents(struct btrfs_root *root, (bytenr - cache->key.objectid)); if (pin) { cache->pinned += len; + cache->space_info->bytes_pinned += len; fs_info->total_pinned += len; } else { cache->pinned -= len; + cache->space_info->bytes_pinned -= len; fs_info->total_pinned -= len; } bytenr += len; @@ -1472,7 +1541,7 @@ check_failed: goto new_group; } - if (!(data & BLOCK_GROUP_DATA)) { + if (!(data & BTRFS_BLOCK_GROUP_DATA)) { block_group = btrfs_lookup_block_group(info, ins->objectid); if (block_group) trans->block_group = block_group; @@ -1532,12 +1601,25 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key keys[2]; - if (data) - data = BLOCK_GROUP_DATA; - else if (root == root->fs_info->chunk_root) - data = BLOCK_GROUP_SYSTEM; - else - data = BLOCK_GROUP_METADATA; + if (data) { + data = BTRFS_BLOCK_GROUP_DATA; + } else if (root == root->fs_info->chunk_root) { + data = BTRFS_BLOCK_GROUP_SYSTEM; + } else { + data = BTRFS_BLOCK_GROUP_METADATA; + } + + if (root->ref_cows) { + if (data != BTRFS_BLOCK_GROUP_METADATA) { + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes, + BTRFS_BLOCK_GROUP_METADATA); + BUG_ON(ret); + } + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes, data); + BUG_ON(ret); + } new_hint = max(hint_byte, root->fs_info->alloc_start); if (new_hint < btrfs_super_total_bytes(&info->super_copy)) @@ -2490,6 +2572,34 @@ error: return ret; } +static int update_space_info(struct btrfs_fs_info *info, u64 flags, + u64 total_bytes, u64 bytes_used, + struct btrfs_space_info **space_info) +{ + struct btrfs_space_info *found; + + found = __find_space_info(info, flags); + if (found) { + found->total_bytes += total_bytes; + found->bytes_used += bytes_used; + WARN_ON(found->total_bytes < found->bytes_used); + *space_info = found; + return 0; + } + found = kmalloc(sizeof(*found), GFP_NOFS); + if (!found) + return -ENOMEM; + + list_add(&found->list, &info->space_info); + found->flags = flags; + found->total_bytes = total_bytes; + found->bytes_used = bytes_used; + found->bytes_pinned = 0; + found->full = 0; + *space_info = found; + return 0; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -2497,6 +2607,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *space_info; struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; @@ -2547,6 +2658,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) bit = BLOCK_GROUP_METADATA; } + ret = update_space_info(info, cache->flags, found_key.offset, + btrfs_block_group_used(&cache->item), + &space_info); + BUG_ON(ret); + cache->space_info = space_info; + /* use EXTENT_LOCKED to prevent merging */ set_extent_bits(block_group_cache, found_key.objectid, found_key.objectid + found_key.offset - 1, @@ -2563,3 +2680,58 @@ error: btrfs_free_path(path); return ret; } + +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytes_used, + u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 size) +{ + int ret; + int bit = 0; + struct btrfs_root *extent_root; + struct btrfs_block_group_cache *cache; + struct extent_io_tree *block_group_cache; + + extent_root = root->fs_info->extent_root; + block_group_cache = &root->fs_info->block_group_cache; + + cache = kmalloc(sizeof(*cache), GFP_NOFS); + BUG_ON(!cache); + cache->key.objectid = chunk_objectid; + cache->key.offset = size; + cache->cached = 0; + cache->pinned = 0; + btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); + memset(&cache->item, 0, sizeof(cache->item)); + btrfs_set_block_group_used(&cache->item, bytes_used); + btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree); + btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); + cache->flags = type; + btrfs_set_block_group_flags(&cache->item, type); + + ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, + &cache->space_info); + BUG_ON(ret); + + if (type & BTRFS_BLOCK_GROUP_DATA) { + bit = BLOCK_GROUP_DATA; + } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { + bit = BLOCK_GROUP_SYSTEM; + } else if (type & BTRFS_BLOCK_GROUP_METADATA) { + bit = BLOCK_GROUP_METADATA; + } + set_extent_bits(block_group_cache, chunk_objectid, + chunk_objectid + size - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, chunk_objectid, + (unsigned long)cache); + + ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, + sizeof(cache->item)); + BUG_ON(ret); + + finish_current_insert(trans, extent_root); + ret = del_pending_extents(trans, extent_root); + BUG_ON(ret); + return 0; +} diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 17063cd2cb7..109576b57f6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1527,6 +1527,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, { struct inode *inode; struct btrfs_inode_item *inode_item; + struct btrfs_block_group_cache *new_inode_group; struct btrfs_key *location; struct btrfs_path *path; struct btrfs_inode_ref *ref; @@ -1553,9 +1554,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 0; else owner = 1; - group = btrfs_find_block_group(root, group, 0, + new_inode_group = btrfs_find_block_group(root, group, 0, BTRFS_BLOCK_GROUP_METADATA, owner); - BTRFS_I(inode)->block_group = group; + if (!new_inode_group) { + printk("find_block group failed\n"); + new_inode_group = group; + } + BTRFS_I(inode)->block_group = new_inode_group; BTRFS_I(inode)->flags = 0; key[0].objectid = objectid; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 90a8d45dc6d..a52a13f365d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -131,7 +131,7 @@ check_pending: btrfs_release_path(root, path); BUG_ON(*start < search_start); - if (*start + num_bytes >= search_end) { + if (*start + num_bytes > search_end) { ret = -ENOSPC; goto error; } @@ -159,8 +159,9 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, return -ENOMEM; ret = find_free_dev_extent(trans, device, path, num_bytes, start); - if (ret) + if (ret) { goto err; + } key.objectid = device->devid; key.offset = *start; @@ -214,22 +215,6 @@ error: return ret; } -static struct btrfs_device *next_device(struct list_head *head, - struct list_head *last) -{ - struct list_head *next = last->next; - struct btrfs_device *dev; - - if (list_empty(head)) - return NULL; - - if (next == head) - next = next->next; - - dev = list_entry(next, struct btrfs_device, dev_list); - return dev; -} - static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path, u64 *objectid) { @@ -397,31 +382,63 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 *start, - u64 *num_bytes, u32 type) + u64 *num_bytes, u64 type) { u64 dev_offset; struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; struct btrfs_stripe *stripes; struct btrfs_device *device = NULL; struct btrfs_chunk *chunk; + struct list_head private_devs; struct list_head *dev_list = &extent_root->fs_info->devices; - struct list_head *last_dev = extent_root->fs_info->last_device; + struct list_head *cur; struct extent_map_tree *em_tree; struct map_lookup *map; struct extent_map *em; u64 physical; u64 calc_size = 1024 * 1024 * 1024; - int num_stripes; + u64 avail; + u64 max_avail = 0; + int num_stripes = 1; + int looped = 0; int ret; - int index = 0; + int index; struct btrfs_key key; + if (list_empty(dev_list)) + return -ENOSPC; +again: + INIT_LIST_HEAD(&private_devs); + cur = dev_list->next; + index = 0; + /* build a private list of devices we will allocate from */ + while(index < num_stripes) { + device = list_entry(cur, struct btrfs_device, dev_list); + avail = device->total_bytes - device->bytes_used; + cur = cur->next; + if (avail > max_avail) + max_avail = avail; + if (avail >= calc_size) { + list_move_tail(&device->dev_list, &private_devs); + index++; + } + if (cur == dev_list) + break; + } + if (index < num_stripes) { + list_splice(&private_devs, dev_list); + if (!looped && max_avail > 0) { + looped = 1; + calc_size = max_avail; + goto again; + } + return -ENOSPC; + } ret = find_next_chunk(chunk_root, &key.objectid); if (ret) return ret; - num_stripes = 1; chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); if (!chunk) return -ENOMEM; @@ -429,11 +446,12 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, stripes = &chunk->stripe; *num_bytes = calc_size; + index = 0; while(index < num_stripes) { - device = next_device(dev_list, last_dev); - BUG_ON(!device); - last_dev = &device->dev_list; - extent_root->fs_info->last_device = last_dev; + BUG_ON(list_empty(&private_devs)); + cur = private_devs.next; + device = list_entry(cur, struct btrfs_device, dev_list); + list_move_tail(&device->dev_list, dev_list); ret = btrfs_alloc_dev_extent(trans, device, key.objectid, @@ -449,6 +467,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, physical = dev_offset; index++; } + BUG_ON(!list_empty(&private_devs)); /* key.objectid was set above */ key.offset = *num_bytes; @@ -692,17 +711,17 @@ static int read_one_dev(struct btrfs_root *root, struct btrfs_key *key, int ret; devid = btrfs_device_id(leaf, dev_item); - if (btrfs_find_device(root, devid)) - return 0; - - device = kmalloc(sizeof(*device), GFP_NOFS); - if (!device) - return -ENOMEM; + device = btrfs_find_device(root, devid); + if (!device) { + device = kmalloc(sizeof(*device), GFP_NOFS); + if (!device) + return -ENOMEM; + list_add(&device->dev_list, &root->fs_info->devices); + } fill_device_from_item(leaf, dev_item, device); device->dev_root = root->fs_info->dev_root; device->bdev = root->fs_info->sb->s_bdev; - list_add(&device->dev_list, &root->fs_info->devices); memcpy(&device->dev_key, key, sizeof(*key)); ret = 0; #if 0 diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 4a47dcb00a7..4ab9891878a 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -71,7 +71,7 @@ int btrfs_read_sys_array(struct btrfs_root *root); int btrfs_read_chunk_tree(struct btrfs_root *root); int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 *start, - u64 *num_bytes, u32 type); + u64 *num_bytes, u64 type); void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio); -- cgit v1.2.3-70-g09d2 From 7d1660d411a24f9b5a2f38e41c9896d7151c934f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:03 -0400 Subject: Btrfs: Bring back find_free_extent CPU usage optimizations Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 15082b1087b..5e392804ab1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -182,9 +182,9 @@ static int noinline find_search_start(struct btrfs_root *root, int ret; struct btrfs_block_group_cache *cache = *cache_ret; struct extent_io_tree *free_space_cache; + struct extent_state *state; u64 last; u64 start = 0; - u64 end = 0; u64 cache_miss = 0; u64 total_fs_bytes; u64 search_start = *start_ret; @@ -205,30 +205,34 @@ again: goto new_group; } + spin_lock_irq(&free_space_cache->lock); + state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); while(1) { - ret = find_first_extent_bit(&root->fs_info->free_space_cache, - last, &start, &end, EXTENT_DIRTY); - if (ret) { + if (!state) { if (!cache_miss) cache_miss = last; + spin_unlock_irq(&free_space_cache->lock); goto new_group; } - start = max(last, start); - last = end + 1; + start = max(last, state->start); + last = state->end + 1; if (last - start < num) { if (last == cache->key.objectid + cache->key.offset) cache_miss = start; + do { + state = extent_state_next(state); + } while(state && !(state->state & EXTENT_DIRTY)); continue; } + spin_unlock_irq(&free_space_cache->lock); if (start + num > cache->key.objectid + cache->key.offset) goto new_group; if (start + num > total_fs_bytes) goto new_group; *start_ret = start; return 0; - } -out: + } out: cache = btrfs_lookup_block_group(root->fs_info, search_start); if (!cache) { printk("Unable to find block group for %Lu\n", search_start); -- cgit v1.2.3-70-g09d2 From a9218f6b00ada101c9772ee9c44af04fa5125d2a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:04 -0400 Subject: Add /dev/btrfs-control for device scanning ioctls Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ---- fs/btrfs/super.c | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5e392804ab1..14eb8fc8701 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -374,10 +374,6 @@ again: found_group = cache; goto found; } - if (full_search) { - printk("failed on cache %Lu used %Lu total %Lu\n", - cache->key.objectid, used, cache->key.offset); - } cond_resched(); } if (!full_search) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4423a91206a..67ed216df47 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -444,6 +445,13 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static long btrfs_control_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + printk("btrfs control ioctl %d\n", cmd); + return 0; +} + static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -451,6 +459,7 @@ static struct file_system_type btrfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; + static void btrfs_write_super_lockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); @@ -482,6 +491,30 @@ static struct super_operations btrfs_super_ops = { .write_super_lockfs = btrfs_write_super_lockfs, .unlockfs = btrfs_unlockfs, }; + +static const struct file_operations btrfs_ctl_fops = { + .unlocked_ioctl = btrfs_control_ioctl, + .compat_ioctl = btrfs_control_ioctl, + .owner = THIS_MODULE, +}; + +static struct miscdevice btrfs_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "btrfs-control", + .fops = &btrfs_ctl_fops +}; + +static int btrfs_interface_init(void) +{ + return misc_register(&btrfs_misc); +} + +void btrfs_interface_exit(void) +{ + if (misc_deregister(&btrfs_misc) < 0) + printk("misc_deregister failed for control device"); +} + static int __init init_btrfs_fs(void) { int err; @@ -503,11 +536,16 @@ static int __init init_btrfs_fs(void) if (err) goto free_extent_io; - err = register_filesystem(&btrfs_fs_type); + err = btrfs_interface_init(); if (err) goto free_extent_map; + err = register_filesystem(&btrfs_fs_type); + if (err) + goto unregister_ioctl; return 0; +unregister_ioctl: + btrfs_interface_exit(); free_extent_map: extent_map_exit(); free_extent_io: @@ -526,6 +564,7 @@ static void __exit exit_btrfs_fs(void) btrfs_destroy_cachep(); extent_map_exit(); extent_io_exit(); + btrfs_interface_exit(); unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); } -- cgit v1.2.3-70-g09d2 From 239b14b32dc39232ebf9cce29ff77c4c564355fd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:07 -0400 Subject: Btrfs: Bring back mount -o ssd optimizations Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +++ fs/btrfs/disk-io.c | 2 ++ fs/btrfs/extent-tree.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/extent_io.c | 2 ++ fs/btrfs/extent_io.h | 2 ++ fs/btrfs/inode.c | 29 +++++++++++++++++++++++++ fs/btrfs/volumes.c | 5 +++++ fs/btrfs/volumes.h | 3 +++ 8 files changed, 103 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 96a49321786..acf22ad6115 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1405,6 +1405,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +int btrfs_merge_bio_hook(struct page *page, unsigned long offset, + size_t size, struct bio *bio); + static inline void dec_i_blocks(struct inode *inode, u64 dec) { dec = dec >> 9; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 26185d46712..4890151cd68 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1103,4 +1103,6 @@ int btrfs_read_buffer(struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, .submit_bio_hook = btree_submit_bio_hook, + /* note we're sharing with inode.c for the merge bio hook */ + .merge_bio_hook = btrfs_merge_bio_hook, }; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 14eb8fc8701..e9ef644ff56 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1473,13 +1473,31 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; u64 total_needed = num_bytes; + u64 *last_ptr = NULL; struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; + int empty_cluster = 2 * 1024 * 1024; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + if (data & BTRFS_BLOCK_GROUP_METADATA) { + last_ptr = &root->fs_info->last_alloc; + } + + if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { + last_ptr = &root->fs_info->last_data_alloc; + } + + if (last_ptr) { + if (*last_ptr) + hint_byte = *last_ptr; + else { + empty_size += empty_cluster; + } + } + if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); @@ -1489,11 +1507,14 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, hint_byte = search_start; block_group = btrfs_find_block_group(root, block_group, hint_byte, data, 1); + if (last_ptr && *last_ptr == 0 && block_group) + hint_byte = block_group->key.objectid; } else { block_group = btrfs_find_block_group(root, trans->block_group, search_start, data, 1); } + search_start = max(search_start, hint_byte); total_needed += empty_size; @@ -1506,9 +1527,36 @@ check_failed: } ret = find_search_start(root, &block_group, &search_start, total_needed, data); + if (ret == -ENOSPC && last_ptr && *last_ptr) { + *last_ptr = 0; + block_group = btrfs_lookup_block_group(info, + orig_search_start); + search_start = orig_search_start; + ret = find_search_start(root, &block_group, &search_start, + total_needed, data); + } + if (ret == -ENOSPC) + goto enospc; if (ret) goto error; + if (last_ptr && *last_ptr && search_start != *last_ptr) { + *last_ptr = 0; + if (!empty_size) { + empty_size += empty_cluster; + total_needed += empty_size; + } + block_group = btrfs_lookup_block_group(info, + orig_search_start); + search_start = orig_search_start; + ret = find_search_start(root, &block_group, + &search_start, total_needed, data); + if (ret == -ENOSPC) + goto enospc; + if (ret) + goto error; + } + search_start = stripe_align(root, search_start); ins->objectid = search_start; ins->offset = num_bytes; @@ -1547,6 +1595,13 @@ check_failed: trans->block_group = block_group; } ins->offset = num_bytes; + if (last_ptr) { + *last_ptr = ins->objectid + ins->offset; + if (*last_ptr == + btrfs_super_total_bytes(&root->fs_info->super_copy)) { + *last_ptr = 0; + } + } return 0; new_group: @@ -1612,12 +1667,12 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (root->ref_cows) { if (data != BTRFS_BLOCK_GROUP_METADATA) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes, + 2 * 1024 * 1024, BTRFS_BLOCK_GROUP_METADATA); BUG_ON(ret); } ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes, data); + num_bytes + 2 * 1024 * 1024, data); BUG_ON(ret); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7e3a1ebde9f..6dab664529c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1730,6 +1730,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, if (bio_ret && *bio_ret) { bio = *bio_ret; if (bio->bi_sector + (bio->bi_size >> 9) != sector || + (tree->ops && tree->ops->merge_bio_hook && + tree->ops->merge_bio_hook(page, offset, size, bio)) || bio_add_page(bio, page, size, offset) < size) { ret = submit_one_bio(rw, bio); bio = NULL; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 0dca89328f9..8b5319db251 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -29,6 +29,8 @@ struct extent_io_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); int (*submit_bio_hook)(struct inode *inode, int rw, struct bio *bio); + int (*merge_bio_hook)(struct page *page, unsigned long offset, + size_t size, struct bio *bio); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 109576b57f6..5140d680184 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -296,6 +296,34 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, return 0; } +int btrfs_merge_bio_hook(struct page *page, unsigned long offset, + size_t size, struct bio *bio) +{ + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + struct btrfs_mapping_tree *map_tree; + struct btrfs_device *dev; + u64 logical = bio->bi_sector << 9; + u64 physical; + u64 length = 0; + u64 map_length; + struct bio_vec *bvec; + int i; + int ret; + + bio_for_each_segment(bvec, bio, i) { + length += bvec->bv_len; + } + map_tree = &root->fs_info->mapping_tree; + map_length = length; + ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); + if (map_length < length + size) { + printk("merge bio hook logical %Lu bio len %Lu physical %Lu " + "len %Lu\n", logical, length, physical, map_length); + return 1; + } + return 0; +} + int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -3033,6 +3061,7 @@ static struct file_operations btrfs_dir_file_operations = { static struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, .submit_bio_hook = btrfs_submit_bio_hook, + .merge_bio_hook = btrfs_merge_bio_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, .set_bit_hook = btrfs_set_bit_hook, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ae22d01ecf5..16fb6bbe6e2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -578,6 +578,11 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) map_tree = &root->fs_info->mapping_tree; map_length = length; ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); + if (map_length < length) { + printk("mapping failed logical %Lu bio len %Lu physical %Lu " + "len %Lu\n", logical, length, physical, map_length); + BUG(); + } BUG_ON(map_length < length); bio->bi_sector = physical >> 9; bio->bi_bdev = dev->bdev; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 77fa6efd79c..20259128152 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -64,4 +64,7 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio); int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, + u64 logical, u64 *phys, u64 *length, + struct btrfs_device **dev); #endif -- cgit v1.2.3-70-g09d2 From 593060d756e0c2382d59cf00d5f3b9e3a336c408 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 25 Mar 2008 16:50:33 -0400 Subject: Btrfs: Implement raid0 when multiple devices are present Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 ++ fs/btrfs/disk-io.c | 2 + fs/btrfs/extent-tree.c | 102 +++++++++++++++++++++++++------------- fs/btrfs/inode.c | 2 - fs/btrfs/volumes.c | 130 +++++++++++++++++++++++++++++++++++++------------ 5 files changed, 173 insertions(+), 66 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7556f8319c6..f43dfe7ffe6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -413,6 +413,7 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_DATA (1 << 0) #define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) #define BTRFS_BLOCK_GROUP_METADATA (1 << 2) +#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) struct btrfs_block_group_item { @@ -498,6 +499,8 @@ struct btrfs_fs_info { u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; + int extra_data_alloc_bits; + int extra_alloc_bits; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f971a29e4f2..d8474796dea 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -736,6 +736,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->total_pinned = 0; fs_info->last_alloc = 0; fs_info->last_data_alloc = 0; + fs_info->extra_alloc_bits = 0; + fs_info->extra_data_alloc_bits = 0; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e9ef644ff56..c226656f29b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -172,7 +172,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) { - return (cache->flags & bits); + return (cache->flags & bits) == bits; } static int noinline find_search_start(struct btrfs_root *root, @@ -1010,6 +1010,35 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, } +static int update_space_info(struct btrfs_fs_info *info, u64 flags, + u64 total_bytes, u64 bytes_used, + struct btrfs_space_info **space_info) +{ + struct btrfs_space_info *found; + + found = __find_space_info(info, flags); + if (found) { + found->total_bytes += total_bytes; + found->bytes_used += bytes_used; + WARN_ON(found->total_bytes < found->bytes_used); + *space_info = found; + return 0; + } + found = kmalloc(sizeof(*found), GFP_NOFS); + if (!found) + return -ENOMEM; + + list_add(&found->list, &info->space_info); + found->flags = flags; + found->total_bytes = total_bytes; + found->bytes_used = bytes_used; + found->bytes_pinned = 0; + found->full = 0; + *space_info = found; + return 0; +} + + static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags) @@ -1021,6 +1050,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, int ret; space_info = __find_space_info(extent_root->fs_info, flags); + if (!space_info) { + ret = update_space_info(extent_root->fs_info, flags, + 0, 0, &space_info); + BUG_ON(ret); + } BUG_ON(!space_info); if (space_info->full) @@ -1044,6 +1078,17 @@ printk("space info full %Lu\n", flags); extent_root->fs_info->chunk_root->root_key.objectid, start, num_bytes); BUG_ON(ret); + + if (flags & BTRFS_BLOCK_GROUP_RAID0) { + if (flags & BTRFS_BLOCK_GROUP_DATA) { + extent_root->fs_info->extra_data_alloc_bits = + BTRFS_BLOCK_GROUP_RAID0; + } + if (flags & BTRFS_BLOCK_GROUP_METADATA) { + extent_root->fs_info->extra_alloc_bits = + BTRFS_BLOCK_GROUP_RAID0; + } + } return 0; } @@ -1655,24 +1700,31 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_extent_ref *ref; struct btrfs_path *path; struct btrfs_key keys[2]; + int extra_chunk_alloc_bits = 0; if (data) { - data = BTRFS_BLOCK_GROUP_DATA; + data = BTRFS_BLOCK_GROUP_DATA | info->extra_data_alloc_bits; } else if (root == root->fs_info->chunk_root) { data = BTRFS_BLOCK_GROUP_SYSTEM; } else { - data = BTRFS_BLOCK_GROUP_METADATA; + data = BTRFS_BLOCK_GROUP_METADATA | info->extra_alloc_bits; } + if (btrfs_super_num_devices(&info->super_copy) > 1 && + !(data & BTRFS_BLOCK_GROUP_SYSTEM)) + extra_chunk_alloc_bits = BTRFS_BLOCK_GROUP_RAID0; if (root->ref_cows) { - if (data != BTRFS_BLOCK_GROUP_METADATA) { + if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, - BTRFS_BLOCK_GROUP_METADATA); + BTRFS_BLOCK_GROUP_METADATA | + info->extra_alloc_bits | + extra_chunk_alloc_bits); BUG_ON(ret); } ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data); + num_bytes + 2 * 1024 * 1024, data | + extra_chunk_alloc_bits); BUG_ON(ret); } @@ -2627,34 +2679,6 @@ error: return ret; } -static int update_space_info(struct btrfs_fs_info *info, u64 flags, - u64 total_bytes, u64 bytes_used, - struct btrfs_space_info **space_info) -{ - struct btrfs_space_info *found; - - found = __find_space_info(info, flags); - if (found) { - found->total_bytes += total_bytes; - found->bytes_used += bytes_used; - WARN_ON(found->total_bytes < found->bytes_used); - *space_info = found; - return 0; - } - found = kmalloc(sizeof(*found), GFP_NOFS); - if (!found) - return -ENOMEM; - - list_add(&found->list, &info->space_info); - found->flags = flags; - found->total_bytes = total_bytes; - found->bytes_used = bytes_used; - found->bytes_pinned = 0; - found->full = 0; - *space_info = found; - return 0; -} - int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -2712,6 +2736,16 @@ int btrfs_read_block_groups(struct btrfs_root *root) } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { bit = BLOCK_GROUP_METADATA; } + if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) { + if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { + info->extra_data_alloc_bits = + BTRFS_BLOCK_GROUP_RAID0; + } + if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { + info->extra_alloc_bits = + BTRFS_BLOCK_GROUP_RAID0; + } + } ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5140d680184..db60d85598c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -317,8 +317,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, map_length = length; ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); if (map_length < length + size) { - printk("merge bio hook logical %Lu bio len %Lu physical %Lu " - "len %Lu\n", logical, length, physical, map_length); return 1; } return 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 263f01cc3db..d8fce32a3bb 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -25,10 +26,24 @@ #include "print-tree.h" #include "volumes.h" -struct map_lookup { +struct stripe { struct btrfs_device *dev; u64 physical; }; + +struct map_lookup { + u64 type; + int io_align; + int io_width; + int stripe_len; + int sector_size; + int num_stripes; + struct stripe stripes[]; +}; + +#define map_lookup_size(n) (sizeof(struct map_lookup) + \ + (sizeof(struct stripe) * (n))) + static DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); @@ -592,6 +607,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 *num_bytes, u64 type) { u64 dev_offset; + struct btrfs_fs_info *info = extent_root->fs_info; struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; struct btrfs_stripe *stripes; struct btrfs_device *device = NULL; @@ -610,10 +626,18 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int looped = 0; int ret; int index; + int stripe_len = 64 * 1024; struct btrfs_key key; if (list_empty(dev_list)) return -ENOSPC; + + if (type & BTRFS_BLOCK_GROUP_RAID0) + num_stripes = btrfs_super_num_devices(&info->super_copy); + if (type & BTRFS_BLOCK_GROUP_DATA) + stripe_len = 64 * 1024; + if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) + stripe_len = 32 * 1024; again: INIT_LIST_HEAD(&private_devs); cur = dev_list->next; @@ -650,9 +674,15 @@ again: if (!chunk) return -ENOMEM; + map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); + if (!map) { + kfree(chunk); + return -ENOMEM; + } + stripes = &chunk->stripe; - *num_bytes = calc_size; + *num_bytes = calc_size * num_stripes; index = 0; while(index < num_stripes) { BUG_ON(list_empty(&private_devs)); @@ -669,6 +699,8 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); ret = btrfs_update_device(trans, device); BUG_ON(ret); + map->stripes[index].dev = device; + map->stripes[index].physical = dev_offset; btrfs_set_stack_stripe_devid(stripes + index, device->devid); btrfs_set_stack_stripe_offset(stripes + index, dev_offset); physical = dev_offset; @@ -680,12 +712,18 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); key.offset = *num_bytes; key.type = BTRFS_CHUNK_ITEM_KEY; btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); - btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024); + btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); btrfs_set_stack_chunk_type(chunk, type); btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); - btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize); - btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_io_align(chunk, stripe_len); + btrfs_set_stack_chunk_io_width(chunk, stripe_len); btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); + map->sector_size = extent_root->sectorsize; + map->stripe_len = stripe_len; + map->io_align = stripe_len; + map->io_width = stripe_len; + map->type = type; + map->num_stripes = num_stripes; ret = btrfs_insert_item(trans, chunk_root, &key, chunk, btrfs_chunk_item_size(num_stripes)); @@ -695,25 +733,11 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); em = alloc_extent_map(GFP_NOFS); if (!em) return -ENOMEM; - map = kmalloc(sizeof(*map), GFP_NOFS); - if (!map) { - free_extent_map(em); - return -ENOMEM; - } - em->bdev = (struct block_device *)map; em->start = key.objectid; em->len = key.offset; em->block_start = 0; - map->physical = physical; - map->dev = device; - - if (!map->dev) { - kfree(map); - free_extent_map(em); - return -EIO; - } kfree(chunk); em_tree = &extent_root->fs_info->mapping_tree.map_tree; @@ -758,6 +782,9 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, struct map_lookup *map; struct extent_map_tree *em_tree = &map_tree->map_tree; u64 offset; + u64 stripe_offset; + u64 stripe_nr; + int stripe_index; spin_lock(&em_tree->lock); @@ -767,9 +794,40 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, BUG_ON(em->start > logical || em->start + em->len < logical); map = (struct map_lookup *)em->bdev; offset = logical - em->start; - *phys = map->physical + offset; - *length = em->len - offset; - *dev = map->dev; + + stripe_nr = offset; + /* + * stripe_nr counts the total number of stripes we have to stride + * to get to this block + */ + do_div(stripe_nr, map->stripe_len); + + stripe_offset = stripe_nr * map->stripe_len; + BUG_ON(offset < stripe_offset); + + /* stripe_offset is the offset of this block in its stripe*/ + stripe_offset = offset - stripe_offset; + + /* + * after this do_div call, stripe_nr is the number of stripes + * on this device we have to walk to find the data, and + * stripe_index is the number of our device in the stripe array + */ + stripe_index = do_div(stripe_nr, map->num_stripes); + + BUG_ON(stripe_index >= map->num_stripes); + + *phys = map->stripes[stripe_index].physical + stripe_offset + + stripe_nr * map->stripe_len; + + if (map->type & BTRFS_BLOCK_GROUP_RAID0) { + /* we limit the length of each bio to what fits in a stripe */ + *length = min_t(u64, em->len - offset, + map->stripe_len - stripe_offset); + } else { + *length = em->len - offset; + } + *dev = map->stripes[stripe_index].dev; free_extent_map(em); spin_unlock(&em_tree->lock); return 0; @@ -822,7 +880,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, u64 logical; u64 length; u64 devid; + int num_stripes; int ret; + int i; logical = key->objectid; length = key->offset; @@ -846,7 +906,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, em = alloc_extent_map(GFP_NOFS); if (!em) return -ENOMEM; - map = kmalloc(sizeof(*map), GFP_NOFS); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); if (!map) { free_extent_map(em); return -ENOMEM; @@ -857,13 +918,22 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, em->len = length; em->block_start = 0; - map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0); - devid = btrfs_stripe_devid_nr(leaf, chunk, 0); - map->dev = btrfs_find_device(root, devid); - if (!map->dev) { - kfree(map); - free_extent_map(em); - return -EIO; + map->num_stripes = num_stripes; + map->io_width = btrfs_chunk_io_width(leaf, chunk); + map->io_align = btrfs_chunk_io_align(leaf, chunk); + map->sector_size = btrfs_chunk_sector_size(leaf, chunk); + map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + map->type = btrfs_chunk_type(leaf, chunk); + for (i = 0; i < num_stripes; i++) { + map->stripes[i].physical = + btrfs_stripe_offset_nr(leaf, chunk, i); + devid = btrfs_stripe_devid_nr(leaf, chunk, i); + map->stripes[i].dev = btrfs_find_device(root, devid); + if (!map->stripes[i].dev) { + kfree(map); + free_extent_map(em); + return -EIO; + } } spin_lock(&map_tree->map_tree.lock); -- cgit v1.2.3-70-g09d2 From ecbe2402cb4e4e7413544dc392c1a78d0f290292 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 1 Apr 2008 11:26:07 -0400 Subject: Btrfs: Keep fs_mutex during reads done by snapshot deletion There was an optimization to drop the fs_mutex when doing snapshot deletion reads, but this can lead to false positives on checksumming errors. Keep the lock for now. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c226656f29b..cf283b0271a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2046,11 +2046,12 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); reada_walk_down(root, cur, path->slots[*level]); - mutex_unlock(&root->fs_info->fs_mutex); next = read_tree_block(root, bytenr, blocksize); - mutex_lock(&root->fs_info->fs_mutex); - /* we dropped the lock, check one more time */ + /* we used to drop the lock above, keep the + * code to double check so that we won't forget + * when we drop the lock again in the future + */ ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); -- cgit v1.2.3-70-g09d2 From 0999df54f850fe1aba29b10d5c869493af107478 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 1 Apr 2008 13:48:14 -0400 Subject: Btrfs: Verify checksums on tree blocks found without read_tree_block Checksums were only verified by btrfs_read_tree_block, which meant the functions to probe the page cache for blocks were not validating checksums. Normally this is fine because the buffers will only be in cache if they have already been validated. But, there is a window while the buffer is being read from disk where it could be up to date in the cache but not yet verified. This patch makes sure all buffers go through checksum verification before they are used. This is safer, and it prevents modification of buffers before they go through the csum code. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 +++ fs/btrfs/disk-io.c | 86 +++++++++++++++++++++++++++++--------------------- fs/btrfs/disk-io.h | 2 ++ fs/btrfs/extent-tree.c | 2 ++ fs/btrfs/tree-defrag.c | 1 + 5 files changed, 60 insertions(+), 36 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6ba5394834b..df090bf2eec 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -158,6 +158,8 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, } else { root_gen = 0; } + if (!(buf->flags & EXTENT_CSUM)) + WARN_ON(1); WARN_ON(root->ref_cows && trans->transid != root->fs_info->running_transaction->transid); @@ -245,6 +247,8 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, root->fs_info->generation); WARN_ON(1); } + if (!(buf->flags & EXTENT_CSUM)) + WARN_ON(1); header_trans = btrfs_header_generation(buf); spin_lock(&root->fs_info->hash_lock); @@ -396,6 +400,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (search_start == 0) search_start = last_block; + btrfs_verify_block_csum(root, cur); err = __btrfs_cow_block(trans, root, cur, parent, i, &tmp, search_start, min(16 * blocksize, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5547607681f..e40fb318ad9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -46,27 +46,6 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops; -struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 bytenr, u32 blocksize) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_buffer *eb; - eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, - bytenr, blocksize, GFP_NOFS); - return eb; -} - -struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 bytenr, u32 blocksize) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_buffer *eb; - - eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, - bytenr, blocksize, NULL, GFP_NOFS); - return eb; -} - struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 len, int create) @@ -380,36 +359,29 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) return 0; } -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize) +int btrfs_verify_block_csum(struct btrfs_root *root, + struct extent_buffer *buf) { - struct extent_buffer *buf = NULL; - struct inode *btree_inode = root->fs_info->btree_inode; struct extent_io_tree *io_tree; u64 end; int ret; - io_tree = &BTRFS_I(btree_inode)->io_tree; - - buf = btrfs_find_create_tree_block(root, bytenr, blocksize); - if (!buf) - return NULL; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1, - btree_get_extent); - + io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; if (buf->flags & EXTENT_CSUM) - return buf; + return 0; - end = buf->start + PAGE_CACHE_SIZE - 1; + end = min_t(u64, buf->len, PAGE_CACHE_SIZE); + end = buf->start + end - 1; if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; - return buf; + return 0; } lock_extent(io_tree, buf->start, end, GFP_NOFS); if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; + ret = 0; goto out_unlock; } @@ -419,6 +391,48 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, out_unlock: unlock_extent(io_tree, buf->start, end, GFP_NOFS); + return ret; +} + +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 bytenr, u32 blocksize) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_buffer *eb; + eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, + bytenr, blocksize, GFP_NOFS); + return eb; +} + +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 bytenr, u32 blocksize) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_buffer *eb; + + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, + bytenr, blocksize, NULL, GFP_NOFS); + return eb; +} + + +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, + u32 blocksize) +{ + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_io_tree *io_tree; + int ret; + + io_tree = &BTRFS_I(btree_inode)->io_tree; + + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); + if (!buf) + return NULL; + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1, + btree_get_extent); + + ret = btrfs_verify_block_csum(root, buf); return buf; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b7cbc58a555..05b88d0e75e 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -69,4 +69,6 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); void btrfs_throttle(struct btrfs_root *root); int btrfs_open_device(struct btrfs_device *dev); +int btrfs_verify_block_csum(struct btrfs_root *root, + struct extent_buffer *buf); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cf283b0271a..a34c289aec2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2069,6 +2069,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, BUG_ON(ret); continue; } + } else if (next) { + btrfs_verify_block_csum(root, next); } WARN_ON(*level <= 0); if (path->nodes[*level-1]) diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 5935cbd8f2b..256af1870ee 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -101,6 +101,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, path->slots[*level]++; continue; } + btrfs_verify_block_csum(root, next); } else { next = read_tree_block(root, bytenr, btrfs_level_size(root, *level - 1)); -- cgit v1.2.3-70-g09d2 From 8790d502e4401a4a3a4175b83a3a47e8d595c771 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Apr 2008 16:29:03 -0400 Subject: Btrfs: Add support for mirroring across drives Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 9 ++- fs/btrfs/disk-io.c | 20 +++---- fs/btrfs/extent-tree.c | 93 +++++++++++++++-------------- fs/btrfs/inode.c | 4 +- fs/btrfs/volumes.c | 154 ++++++++++++++++++++++++++++++++++++++++--------- fs/btrfs/volumes.h | 11 ++-- 6 files changed, 198 insertions(+), 93 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 67d533cf8f4..0a207861472 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -418,6 +418,7 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) #define BTRFS_BLOCK_GROUP_METADATA (1 << 2) #define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) +#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) struct btrfs_block_group_item { @@ -504,8 +505,12 @@ struct btrfs_fs_info { u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; - int extra_data_alloc_bits; - int extra_alloc_bits; + int avail_data_alloc_bits; + int avail_metadata_alloc_bits; + int avail_system_alloc_bits; + int data_alloc_profile; + int metadata_alloc_profile; + int system_alloc_profile; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e40fb318ad9..ff75ad58676 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -735,7 +735,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), + struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), GFP_NOFS); struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); @@ -744,6 +744,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, int ret; int err = -EINVAL; struct btrfs_super_block *disk_super; + if (!extent_root || !tree_root || !fs_info) { err = -ENOMEM; goto fail; @@ -756,11 +757,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); - memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); init_completion(&fs_info->kobj_unregister); sb_set_blocksize(sb, 4096); - fs_info->running_transaction = NULL; - fs_info->last_trans_committed = 0; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; @@ -770,11 +768,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); fs_info->sb = sb; - fs_info->throttles = 0; - fs_info->mount_opt = 0; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; - fs_info->delalloc_bytes = 0; setup_bdi(fs_info, &fs_info->bdi); fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -802,12 +797,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, extent_io_tree_init(&fs_info->extent_ins, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; - fs_info->closing = 0; - fs_info->total_pinned = 0; - fs_info->last_alloc = 0; - fs_info->last_data_alloc = 0; - fs_info->extra_alloc_bits = 0; - fs_info->extra_data_alloc_bits = 0; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); @@ -923,6 +912,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; + if (btrfs_super_num_devices(disk_super) > 0) { + fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; + fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1; + fs_info->system_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; + } mutex_unlock(&fs_info->fs_mutex); return tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a34c289aec2..4ab98d8b73f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -230,9 +230,13 @@ again: goto new_group; if (start + num > total_fs_bytes) goto new_group; + if (!block_group_bits(cache, data)) { + printk("block group bits don't match %Lu %Lu\n", cache->flags, data); + } *start_ret = start; return 0; - } out: + } +out: cache = btrfs_lookup_block_group(root->fs_info, search_start); if (!cache) { printk("Unable to find block group for %Lu\n", search_start); @@ -365,14 +369,17 @@ again: if (cache->key.objectid > total_fs_bytes) break; - if (full_search) - free_check = cache->key.offset; - else - free_check = div_factor(cache->key.offset, factor); + if (block_group_bits(cache, data)) { + if (full_search) + free_check = cache->key.offset; + else + free_check = div_factor(cache->key.offset, + factor); - if (used + cache->pinned < free_check) { - found_group = cache; - goto found; + if (used + cache->pinned < free_check) { + found_group = cache; + goto found; + } } cond_resched(); } @@ -1038,6 +1045,19 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, return 0; } +static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) +{ + u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1); + if (extra_flags) { + if (flags & BTRFS_BLOCK_GROUP_DATA) + fs_info->avail_data_alloc_bits |= extra_flags; + if (flags & BTRFS_BLOCK_GROUP_METADATA) + fs_info->avail_metadata_alloc_bits |= extra_flags; + if (flags & BTRFS_BLOCK_GROUP_SYSTEM) + fs_info->avail_system_alloc_bits |= extra_flags; + } +} static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, @@ -1060,7 +1080,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, if (space_info->full) return 0; - thresh = div_factor(space_info->total_bytes, 7); + thresh = div_factor(space_info->total_bytes, 6); if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < thresh) return 0; @@ -1079,16 +1099,7 @@ printk("space info full %Lu\n", flags); start, num_bytes); BUG_ON(ret); - if (flags & BTRFS_BLOCK_GROUP_RAID0) { - if (flags & BTRFS_BLOCK_GROUP_DATA) { - extent_root->fs_info->extra_data_alloc_bits = - BTRFS_BLOCK_GROUP_RAID0; - } - if (flags & BTRFS_BLOCK_GROUP_METADATA) { - extent_root->fs_info->extra_alloc_bits = - BTRFS_BLOCK_GROUP_RAID0; - } - } + set_avail_alloc_bits(extent_root->fs_info, flags); return 0; } @@ -1529,6 +1540,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (data & BTRFS_BLOCK_GROUP_METADATA) { last_ptr = &root->fs_info->last_alloc; + empty_cluster = 256 * 1024; } if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { @@ -1693,6 +1705,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 root_used; u64 search_start = 0; u64 new_hint; + u64 alloc_profile; u32 sizes[2]; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -1700,31 +1713,32 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_extent_ref *ref; struct btrfs_path *path; struct btrfs_key keys[2]; - int extra_chunk_alloc_bits = 0; if (data) { - data = BTRFS_BLOCK_GROUP_DATA | info->extra_data_alloc_bits; + alloc_profile = info->avail_data_alloc_bits & + info->data_alloc_profile; + data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; } else if (root == root->fs_info->chunk_root) { - data = BTRFS_BLOCK_GROUP_SYSTEM; + alloc_profile = info->avail_system_alloc_bits & + info->system_alloc_profile; + data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; } else { - data = BTRFS_BLOCK_GROUP_METADATA | info->extra_alloc_bits; + alloc_profile = info->avail_metadata_alloc_bits & + info->metadata_alloc_profile; + data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } - if (btrfs_super_num_devices(&info->super_copy) > 1 && - !(data & BTRFS_BLOCK_GROUP_SYSTEM)) - extra_chunk_alloc_bits = BTRFS_BLOCK_GROUP_RAID0; if (root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, BTRFS_BLOCK_GROUP_METADATA | - info->extra_alloc_bits | - extra_chunk_alloc_bits); + (info->metadata_alloc_profile & + info->avail_metadata_alloc_bits)); BUG_ON(ret); } ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data | - extra_chunk_alloc_bits); + num_bytes + 2 * 1024 * 1024, data); BUG_ON(ret); } @@ -2046,12 +2060,12 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); reada_walk_down(root, cur, path->slots[*level]); + + mutex_unlock(&root->fs_info->fs_mutex); next = read_tree_block(root, bytenr, blocksize); + mutex_lock(&root->fs_info->fs_mutex); - /* we used to drop the lock above, keep the - * code to double check so that we won't forget - * when we drop the lock again in the future - */ + /* we've dropped the lock, double check */ ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); @@ -2739,16 +2753,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { bit = BLOCK_GROUP_METADATA; } - if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) { - if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { - info->extra_data_alloc_bits = - BTRFS_BLOCK_GROUP_RAID0; - } - if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { - info->extra_alloc_bits = - BTRFS_BLOCK_GROUP_RAID0; - } - } + set_avail_alloc_bits(info, cache->flags); ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0fa7cf227f1..a8ae68c6fbb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -306,6 +306,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, u64 physical; u64 length = 0; u64 map_length; + int total_devs; struct bio_vec *bvec; int i; int ret; @@ -315,7 +316,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, } map_tree = &root->fs_info->mapping_tree; map_length = length; - ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); + ret = btrfs_map_block(map_tree, READ, 0, logical, &physical, + &map_length, &dev, &total_devs); if (map_length < length + size) { return 1; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 169be0f7285..bc3c0b97588 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -31,6 +31,13 @@ struct stripe { u64 physical; }; +struct multi_bio { + atomic_t stripes; + bio_end_io_t *end_io; + void *private; + int error; +}; + struct map_lookup { u64 type; int io_align; @@ -632,12 +639,12 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (list_empty(dev_list)) return -ENOSPC; - if (type & BTRFS_BLOCK_GROUP_RAID0) + if (type & (BTRFS_BLOCK_GROUP_RAID0)) num_stripes = btrfs_super_num_devices(&info->super_copy); - if (type & BTRFS_BLOCK_GROUP_DATA) - stripe_len = 64 * 1024; - if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) - stripe_len = 32 * 1024; + if (type & (BTRFS_BLOCK_GROUP_RAID1)) { + num_stripes = min_t(u64, 2, + btrfs_super_num_devices(&info->super_copy)); + } again: INIT_LIST_HEAD(&private_devs); cur = dev_list->next; @@ -682,7 +689,11 @@ again: stripes = &chunk->stripe; - *num_bytes = calc_size * num_stripes; + if (type & BTRFS_BLOCK_GROUP_RAID1) + *num_bytes = calc_size; + else + *num_bytes = calc_size * num_stripes; + index = 0; while(index < num_stripes) { BUG_ON(list_empty(&private_devs)); @@ -694,7 +705,7 @@ again: key.objectid, calc_size, &dev_offset); BUG_ON(ret); -printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); +printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.objectid, calc_size, device->devid, type); device->bytes_used += calc_size; ret = btrfs_update_device(trans, device); BUG_ON(ret); @@ -774,9 +785,9 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) } } -int btrfs_map_block(struct btrfs_mapping_tree *map_tree, - u64 logical, u64 *phys, u64 *length, - struct btrfs_device **dev) +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, + int dev_nr, u64 logical, u64 *phys, u64 *length, + struct btrfs_device **dev, int *total_devs) { struct extent_map *em; struct map_lookup *map; @@ -808,19 +819,39 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, /* stripe_offset is the offset of this block in its stripe*/ stripe_offset = offset - stripe_offset; - /* - * after this do_div call, stripe_nr is the number of stripes - * on this device we have to walk to find the data, and - * stripe_index is the number of our device in the stripe array - */ - stripe_index = do_div(stripe_nr, map->num_stripes); - + if (map->type & BTRFS_BLOCK_GROUP_RAID1) { + stripe_index = dev_nr; + if (rw & (1 << BIO_RW)) + *total_devs = map->num_stripes; + else { + int i; + u64 least = (u64)-1; + struct btrfs_device *cur; + + for (i = 0; i < map->num_stripes; i++) { + cur = map->stripes[i].dev; + spin_lock(&cur->io_lock); + if (cur->total_ios < least) { + least = cur->total_ios; + stripe_index = i; + } + spin_unlock(&cur->io_lock); + } + *total_devs = 1; + } + } else { + /* + * after this do_div call, stripe_nr is the number of stripes + * on this device we have to walk to find the data, and + * stripe_index is the number of our device in the stripe array + */ + stripe_index = do_div(stripe_nr, map->num_stripes); + } BUG_ON(stripe_index >= map->num_stripes); - *phys = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; - if (map->type & BTRFS_BLOCK_GROUP_RAID0) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) { /* we limit the length of each bio to what fits in a stripe */ *length = min_t(u64, em->len - offset, map->stripe_len - stripe_offset); @@ -833,33 +864,98 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, return 0; } +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_multi_stripe(struct bio *bio, int err) +#else +static int end_bio_multi_stripe(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + struct multi_bio *multi = bio->bi_private; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + if (err) + multi->error = err; + + if (atomic_dec_and_test(&multi->stripes)) { + bio->bi_private = multi->private; + bio->bi_end_io = multi->end_io; + + if (!err && multi->error) + err = multi->error; + kfree(multi); + + bio_endio(bio, err); + } else { + bio_put(bio); + } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) { struct btrfs_mapping_tree *map_tree; struct btrfs_device *dev; + struct bio *first_bio = bio; u64 logical = bio->bi_sector << 9; u64 physical; u64 length = 0; u64 map_length; struct bio_vec *bvec; + struct multi_bio *multi = NULL; int i; int ret; + int dev_nr = 0; + int total_devs = 1; bio_for_each_segment(bvec, bio, i) { length += bvec->bv_len; } + map_tree = &root->fs_info->mapping_tree; map_length = length; - ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); - if (map_length < length) { - printk("mapping failed logical %Lu bio len %Lu physical %Lu " - "len %Lu\n", logical, length, physical, map_length); - BUG(); + while(dev_nr < total_devs) { + ret = btrfs_map_block(map_tree, rw, dev_nr, logical, + &physical, &map_length, &dev, + &total_devs); + if (map_length < length) { + printk("mapping failed logical %Lu bio len %Lu physical %Lu " + "len %Lu\n", logical, length, physical, map_length); + BUG(); + } + BUG_ON(map_length < length); + if (total_devs > 1) { + if (!multi) { + multi = kmalloc(sizeof(*multi), GFP_NOFS); + atomic_set(&multi->stripes, 1); + multi->end_io = bio->bi_end_io; + multi->private = first_bio->bi_private; + multi->error = 0; + } else { + atomic_inc(&multi->stripes); + } + if (dev_nr < total_devs - 1) { + bio = bio_clone(first_bio, GFP_NOFS); + BUG_ON(!bio); + } else { + bio = first_bio; + } + bio->bi_private = multi; + bio->bi_end_io = end_bio_multi_stripe; + } + bio->bi_sector = physical >> 9; + bio->bi_bdev = dev->bdev; + spin_lock(&dev->io_lock); + dev->total_ios++; + spin_unlock(&dev->io_lock); + submit_bio(rw, bio); + dev_nr++; } - BUG_ON(map_length < length); - bio->bi_sector = physical >> 9; - bio->bi_bdev = dev->bdev; - submit_bio(rw, bio); return 0; } @@ -982,6 +1078,8 @@ static int read_one_dev(struct btrfs_root *root, return -ENOMEM; list_add(&device->dev_list, &root->fs_info->fs_devices->devices); + device->total_ios = 0; + spin_lock_init(&device->io_lock); } fill_device_from_item(leaf, dev_item, device); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 12f297eb055..35dec3efd78 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -18,12 +18,16 @@ #ifndef __BTRFS_VOLUMES_ #define __BTRFS_VOLUMES_ + struct btrfs_device { struct list_head dev_list; struct btrfs_root *dev_root; + spinlock_t io_lock; struct block_device *bdev; + u64 total_ios; + char *name; /* the internal btrfs device id */ @@ -68,9 +72,9 @@ struct btrfs_fs_devices { int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 owner, u64 num_bytes, u64 *start); -int btrfs_map_block(struct btrfs_mapping_tree *map_tree, +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int stripe_nr, u64 logical, u64 *phys, u64 *length, - struct btrfs_device **dev); + struct btrfs_device **dev, int *total_stripes); int btrfs_read_sys_array(struct btrfs_root *root); int btrfs_read_chunk_tree(struct btrfs_root *root); int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, @@ -80,9 +84,6 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio); int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); -int btrfs_map_block(struct btrfs_mapping_tree *map_tree, - u64 logical, u64 *phys, u64 *length, - struct btrfs_device **dev); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int flags, void *holder); int btrfs_scan_one_device(const char *path, int flags, void *holder, -- cgit v1.2.3-70-g09d2 From 611f0e00a27fe0e5a571194a12443ecdc99a43ef Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Apr 2008 16:29:03 -0400 Subject: Btrfs: Add support for duplicate blocks on a single spindle Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 8 +++++--- fs/btrfs/extent-tree.c | 5 +++-- fs/btrfs/volumes.c | 32 ++++++++++++++++++++++++++++---- 4 files changed, 37 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0a207861472..72deae63ec2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -419,6 +419,7 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_METADATA (1 << 2) #define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) +#define BTRFS_BLOCK_GROUP_DUP (1 << 5) struct btrfs_block_group_item { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ff75ad58676..42522232fde 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -913,9 +913,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->generation = btrfs_super_generation(disk_super) + 1; if (btrfs_super_num_devices(disk_super) > 0) { - fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; - fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1; - fs_info->system_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; + fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1; + fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP; + fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; } mutex_unlock(&fs_info->fs_mutex); return tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4ab98d8b73f..1885ec4280c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -231,7 +231,7 @@ again: if (start + num > total_fs_bytes) goto new_group; if (!block_group_bits(cache, data)) { - printk("block group bits don't match %Lu %Lu\n", cache->flags, data); + printk("block group bits don't match %Lu %d\n", cache->flags, data); } *start_ret = start; return 0; @@ -1048,7 +1048,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) { u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID1); + BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP); if (extra_flags) { if (flags & BTRFS_BLOCK_GROUP_DATA) fs_info->avail_data_alloc_bits |= extra_flags; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bc3c0b97588..b9294e3c05f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -627,6 +627,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct extent_map *em; u64 physical; u64 calc_size = 1024 * 1024 * 1024; + u64 min_free = calc_size; u64 avail; u64 max_avail = 0; int num_stripes = 1; @@ -641,6 +642,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & (BTRFS_BLOCK_GROUP_RAID0)) num_stripes = btrfs_super_num_devices(&info->super_copy); + if (type & (BTRFS_BLOCK_GROUP_DUP)) + num_stripes = 2; if (type & (BTRFS_BLOCK_GROUP_RAID1)) { num_stripes = min_t(u64, 2, btrfs_super_num_devices(&info->super_copy)); @@ -649,16 +652,23 @@ again: INIT_LIST_HEAD(&private_devs); cur = dev_list->next; index = 0; + + if (type & BTRFS_BLOCK_GROUP_DUP) + min_free = calc_size * 2; + /* build a private list of devices we will allocate from */ while(index < num_stripes) { device = list_entry(cur, struct btrfs_device, dev_list); + avail = device->total_bytes - device->bytes_used; cur = cur->next; if (avail > max_avail) max_avail = avail; - if (avail >= calc_size) { + if (avail >= min_free) { list_move_tail(&device->dev_list, &private_devs); index++; + if (type & BTRFS_BLOCK_GROUP_DUP) + index++; } if (cur == dev_list) break; @@ -689,17 +699,22 @@ again: stripes = &chunk->stripe; - if (type & BTRFS_BLOCK_GROUP_RAID1) + if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) *num_bytes = calc_size; else *num_bytes = calc_size * num_stripes; index = 0; +printk("new chunk type %Lu start %Lu size %Lu\n", type, key.objectid, *num_bytes); while(index < num_stripes) { BUG_ON(list_empty(&private_devs)); cur = private_devs.next; device = list_entry(cur, struct btrfs_device, dev_list); - list_move_tail(&device->dev_list, dev_list); + + /* loop over this device again if we're doing a dup group */ + if (!(type & BTRFS_BLOCK_GROUP_DUP) || + (index == num_stripes - 1)) + list_move_tail(&device->dev_list, dev_list); ret = btrfs_alloc_dev_extent(trans, device, key.objectid, @@ -839,6 +854,14 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, } *total_devs = 1; } + } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { + if (rw == WRITE) { + *total_devs = map->num_stripes; + stripe_index = dev_nr; + } else { + stripe_index = 0; + *total_devs = 1; + } } else { /* * after this do_div call, stripe_nr is the number of stripes @@ -851,7 +874,8 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, *phys = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; - if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP)) { /* we limit the length of each bio to what fits in a stripe */ *length = min_t(u64, em->len - offset, map->stripe_len - stripe_offset); -- cgit v1.2.3-70-g09d2 From 6bc34676c0b5836655ec1c7998e2647cabb933ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 4 Apr 2008 15:40:00 -0400 Subject: Btrfs: Don't allow written blocks from this transaction to be reallocated When a block is freed, it can be immediately reused if it is from the current transaction. But, an extra check is required to make sure the block had not been written yet. If it were reused after being written, the transid in the block header might match the transid of the next time the block was allocated. The parent node records the transaction ID of the block it is pointing to, and this is used as part of validating the block on reads. So, there can only be one version of a block per transaction. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1885ec4280c..0c0dde6d961 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1288,7 +1288,9 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, root->fs_info->running_transaction->transid; u64 header_transid = btrfs_header_generation(buf); - if (header_transid == transid) { + if (header_transid == transid && + !btrfs_header_flag(buf, + BTRFS_HEADER_FLAG_WRITTEN)) { clean_tree_block(NULL, root, buf); free_extent_buffer(buf); return 1; -- cgit v1.2.3-70-g09d2 From d18a2c447524751137a12cc8ccaf9d1e0b7fa1b3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 4 Apr 2008 15:40:00 -0400 Subject: Btrfs: Fix allocation profile init Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 13 +++++++------ fs/btrfs/disk-io.c | 11 ++++------- fs/btrfs/extent-tree.c | 10 ++-------- 3 files changed, 13 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 72deae63ec2..4b3b20459f6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -506,12 +506,13 @@ struct btrfs_fs_info { u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; - int avail_data_alloc_bits; - int avail_metadata_alloc_bits; - int avail_system_alloc_bits; - int data_alloc_profile; - int metadata_alloc_profile; - int system_alloc_profile; + + u64 avail_data_alloc_bits; + u64 avail_metadata_alloc_bits; + u64 avail_system_alloc_bits; + u64 data_alloc_profile; + u64 metadata_alloc_profile; + u64 system_alloc_profile; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 42522232fde..bc5bd5abb28 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -912,13 +912,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; - if (btrfs_super_num_devices(disk_super) > 0) { - fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID1; - fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_DUP; - fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; - } + fs_info->data_alloc_profile = (u64)-1; + fs_info->metadata_alloc_profile = (u64)-1; + fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; + mutex_unlock(&fs_info->fs_mutex); return tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0c0dde6d961..0cb742884cd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1100,7 +1100,6 @@ printk("space info full %Lu\n", flags); start, num_bytes); BUG_ON(ret); - set_avail_alloc_bits(extent_root->fs_info, flags); return 0; } @@ -2813,13 +2812,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, &cache->space_info); BUG_ON(ret); - if (type & BTRFS_BLOCK_GROUP_DATA) { - bit = BLOCK_GROUP_DATA; - } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { - bit = BLOCK_GROUP_SYSTEM; - } else if (type & BTRFS_BLOCK_GROUP_METADATA) { - bit = BLOCK_GROUP_METADATA; - } + bit = block_group_state_bits(type); set_extent_bits(block_group_cache, chunk_objectid, chunk_objectid + size - 1, bit | EXTENT_LOCKED, GFP_NOFS); @@ -2833,5 +2826,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, finish_current_insert(trans, extent_root); ret = del_pending_extents(trans, extent_root); BUG_ON(ret); + set_avail_alloc_bits(extent_root->fs_info, type); return 0; } -- cgit v1.2.3-70-g09d2 From ce9adaa5a792c2099a83246265eb4055bc38b6b8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Apr 2008 16:28:12 -0400 Subject: Btrfs: Do metadata checksums for reads via a workqueue Before, metadata checksumming was done by the callers of read_tree_block, which would set EXTENT_CSUM bits in the extent tree to show that a given range of pages was already checksummed and didn't need to be verified again. But, those bits could go away via try_to_releasepage, and the end result was bogus checksum failures on pages that never left the cache. The new code validates checksums when the page is read. It is a little tricky because metadata blocks can span pages and a single read may end up going via multiple bios. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 - fs/btrfs/ctree.h | 4 + fs/btrfs/disk-io.c | 258 ++++++++++++++++++++++++++++++++++++++++++------- fs/btrfs/extent-tree.c | 4 - fs/btrfs/extent_io.c | 100 ++++++++++++++----- fs/btrfs/extent_io.h | 4 +- fs/btrfs/transaction.c | 2 +- 7 files changed, 310 insertions(+), 67 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c92c6b0ee58..efce173a935 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -158,9 +158,6 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, } else { root_gen = 0; } - if (!(buf->flags & EXTENT_CSUM)) - WARN_ON(1); - WARN_ON(root->ref_cows && trans->transid != root->fs_info->running_transaction->transid); WARN_ON(root->ref_cows && trans->transid != root->last_trans); @@ -247,8 +244,6 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, root->fs_info->generation); WARN_ON(1); } - if (!(buf->flags & EXTENT_CSUM)) - WARN_ON(1); header_trans = btrfs_header_generation(buf); spin_lock(&root->fs_info->hash_lock); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4b3b20459f6..e803c4daad2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -485,6 +485,10 @@ struct btrfs_fs_info { struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; + struct list_head end_io_work_list; + struct work_struct end_io_work; + spinlock_t end_io_work_lock; + #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) struct work_struct trans_work; #else diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e444b99e02d..82109204788 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -24,6 +24,7 @@ #include #include #include // for block_sync_page +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -45,6 +46,16 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) #endif static struct extent_io_ops btree_extent_io_ops; +static struct workqueue_struct *end_io_workqueue; + +struct end_io_wq { + struct bio *bio; + bio_end_io_t *end_io; + void *private; + struct btrfs_fs_info *info; + int error; + struct list_head list; +}; struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 len, @@ -219,11 +230,108 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) return 0; } +int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, + struct extent_state *state) +{ + struct extent_io_tree *tree; + u64 found_start; + int found_level; + unsigned long len; + struct extent_buffer *eb; + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + int ret; + + tree = &BTRFS_I(page->mapping->host)->io_tree; + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + if (!page->private) + goto out; + len = page->private >> 2; + if (len == 0) { + WARN_ON(1); + } + eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); + read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1, + btree_get_extent); + btrfs_clear_buffer_defrag(eb); + found_start = btrfs_header_bytenr(eb); + if (found_start != start) { + printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", + start, found_start, len); + WARN_ON(1); + goto err; + } + if (eb->first_page != page) { + printk("bad first page %lu %lu\n", eb->first_page->index, + page->index); + WARN_ON(1); + goto err; + } + found_level = btrfs_header_level(eb); + + ret = csum_tree_block(root, eb, 1); + + end = min_t(u64, eb->len, PAGE_CACHE_SIZE); + end = eb->start + end - 1; + release_extent_buffer_tail_pages(eb); +err: + free_extent_buffer(eb); +out: + return 0; +} + +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_workqueue_bio(struct bio *bio, int err) +#else +static int end_workqueue_bio(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + struct end_io_wq *end_io_wq = bio->bi_private; + struct btrfs_fs_info *fs_info; + unsigned long flags; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + fs_info = end_io_wq->info; + spin_lock_irqsave(&fs_info->end_io_work_lock, flags); + end_io_wq->error = err; + list_add_tail(&end_io_wq->list, &fs_info->end_io_work_list); + spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags); + queue_work(end_io_workqueue, &fs_info->end_io_work); + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) { struct btrfs_root *root = BTRFS_I(inode)->root; + struct end_io_wq *end_io_wq; u64 offset; offset = bio->bi_sector << 9; + + if (rw & (1 << BIO_RW)) { + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); + } + + end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); + if (!end_io_wq) + return -ENOMEM; + + end_io_wq->private = bio->bi_private; + end_io_wq->end_io = bio->bi_end_io; + end_io_wq->info = root->fs_info; + end_io_wq->error = 0; + end_io_wq->bio = bio; + + bio->bi_private = end_io_wq; + bio->bi_end_io = end_workqueue_bio; + if (offset == BTRFS_SUPER_INFO_OFFSET) { bio->bi_bdev = root->fs_info->sb->s_bdev; submit_bio(rw, bio); @@ -363,36 +471,7 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) int btrfs_verify_block_csum(struct btrfs_root *root, struct extent_buffer *buf) { - struct extent_io_tree *io_tree; - u64 end; - int ret; - - io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; - if (buf->flags & EXTENT_CSUM) - return 0; - - end = min_t(u64, buf->len, PAGE_CACHE_SIZE); - end = buf->start + end - 1; - if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { - buf->flags |= EXTENT_CSUM; - return 0; - } - lock_extent(io_tree, buf->start, end, GFP_NOFS); - - if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { - buf->flags |= EXTENT_CSUM; - ret = 0; - goto out_unlock; - } -WARN_ON(buf->flags & EXTENT_CSUM); - - ret = csum_tree_block(root, buf, 1); - set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); - buf->flags |= EXTENT_CSUM; - -out_unlock: - unlock_extent(io_tree, buf->start, end, GFP_NOFS); - return ret; + return btrfs_buffer_uptodate(buf); } struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, @@ -430,11 +509,15 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1, - btree_get_extent); - ret = btrfs_verify_block_csum(root, buf); + ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, + 1, btree_get_extent); + + if (ret == 0) { + buf->flags |= EXTENT_UPTODATE; + } return buf; + } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -724,6 +807,99 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) return 0; } +static int bio_ready_for_csum(struct bio *bio) +{ + u64 length = 0; + u64 buf_len = 0; + u64 start = 0; + struct page *page; + struct extent_io_tree *io_tree = NULL; + struct btrfs_fs_info *info = NULL; + struct bio_vec *bvec; + int i; + int ret; + + bio_for_each_segment(bvec, bio, i) { + page = bvec->bv_page; + if (page->private == EXTENT_PAGE_PRIVATE) { + length += bvec->bv_len; + continue; + } + if (!page->private) { + length += bvec->bv_len; + continue; + } + length = bvec->bv_len; + buf_len = page->private >> 2; + start = page_offset(page) + bvec->bv_offset; + io_tree = &BTRFS_I(page->mapping->host)->io_tree; + info = BTRFS_I(page->mapping->host)->root->fs_info; + } + /* are we fully contained in this bio? */ + if (buf_len <= length) + return 1; + + ret = extent_range_uptodate(io_tree, start + length, + start + buf_len - 1); + if (ret == 1) + return ret; + return ret; +} + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +void btrfs_end_io_csum(void *p) +#else +void btrfs_end_io_csum(struct work_struct *work) +#endif +{ +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + struct btrfs_fs_info *fs_info = p; +#else + struct btrfs_fs_info *fs_info = container_of(work, + struct btrfs_fs_info, + end_io_work); +#endif + unsigned long flags; + struct end_io_wq *end_io_wq; + struct bio *bio; + struct list_head *next; + int error; + int was_empty; + + while(1) { + spin_lock_irqsave(&fs_info->end_io_work_lock, flags); + if (list_empty(&fs_info->end_io_work_list)) { + spin_unlock_irqrestore(&fs_info->end_io_work_lock, + flags); + return; + } + next = fs_info->end_io_work_list.next; + list_del(next); + spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags); + + end_io_wq = list_entry(next, struct end_io_wq, list); + + bio = end_io_wq->bio; + if (!bio_ready_for_csum(bio)) { + spin_lock_irqsave(&fs_info->end_io_work_lock, flags); + was_empty = list_empty(&fs_info->end_io_work_list); + list_add_tail(&end_io_wq->list, + &fs_info->end_io_work_list); + spin_unlock_irqrestore(&fs_info->end_io_work_lock, + flags); + if (was_empty) + return; + continue; + } + error = end_io_wq->error; + bio->bi_private = end_io_wq->private; + bio->bi_end_io = end_io_wq->end_io; + kfree(end_io_wq); + bio_endio(bio, error); + } +} + + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices) { @@ -750,11 +926,16 @@ struct btrfs_root *open_ctree(struct super_block *sb, err = -ENOMEM; goto fail; } + end_io_workqueue = create_workqueue("btrfs-end-io"); + BUG_ON(!end_io_workqueue); + INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); + INIT_LIST_HEAD(&fs_info->end_io_work_list); spin_lock_init(&fs_info->hash_lock); + spin_lock_init(&fs_info->end_io_work_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); @@ -799,6 +980,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; + INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); #else @@ -1044,6 +1226,8 @@ int close_ctree(struct btrfs_root *root) extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + flush_workqueue(end_io_workqueue); + destroy_workqueue(end_io_workqueue); iput(fs_info->btree_inode); #if 0 @@ -1171,12 +1355,18 @@ int btrfs_read_buffer(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, + int ret; + ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1, btree_get_extent); + if (ret == 0) { + buf->flags |= EXTENT_UPTODATE; + } + return ret; } static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, + .readpage_end_io_hook = btree_readpage_end_io_hook, .submit_bio_hook = btree_submit_bio_hook, /* note we're sharing with inode.c for the merge bio hook */ .merge_bio_hook = btrfs_merge_bio_hook, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0cb742884cd..283b08a32a4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1898,10 +1898,6 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_CSUM, GFP_NOFS); - buf->flags |= EXTENT_CSUM; if (!btrfs_test_opt(root, SSD)) btrfs_set_buffer_defrag(buf); trans->blocks_used++; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 13cc2360e37..cfc383c17a3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2592,6 +2592,22 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, return p; } +int release_extent_buffer_tail_pages(struct extent_buffer *eb) +{ + unsigned long num_pages = num_extent_pages(eb->start, eb->len); + struct page *page; + unsigned long i; + + if (num_pages == 1) + return 0; + for (i = 1; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + page_cache_release(page); + } + return 0; +} + + int invalidate_extent_lru(struct extent_io_tree *tree, u64 start, unsigned long len) { @@ -2609,9 +2625,6 @@ int invalidate_extent_lru(struct extent_io_tree *tree, u64 start, if (eb->start <= start && eb->start + eb->len > start) { eb->flags &= ~EXTENT_UPTODATE; } - if (eb->start == start) { - eb->flags &= ~EXTENT_CSUM; - } cur = cur->next; } while (cur != lru); out: @@ -2682,7 +2695,6 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, page_cache_get(page0); mark_page_accessed(page0); set_page_extent_mapped(page0); - WARN_ON(!PageUptodate(page0)); set_page_extent_head(page0, len); } else { i = 0; @@ -2933,13 +2945,39 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, } EXPORT_SYMBOL(set_extent_buffer_uptodate); +int extent_range_uptodate(struct extent_io_tree *tree, + u64 start, u64 end) +{ + struct page *page; + int ret; + int pg_uptodate = 1; + int uptodate; + unsigned long index; + + ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); + if (ret) + return 1; + while(start <= end) { + index = start >> PAGE_CACHE_SHIFT; + page = find_get_page(tree->mapping, index); + uptodate = PageUptodate(page); + page_cache_release(page); + if (!uptodate) { + pg_uptodate = 0; + break; + } + start += PAGE_CACHE_SIZE; + } + return pg_uptodate; +} + int extent_buffer_uptodate(struct extent_io_tree *tree, - struct extent_buffer *eb) + struct extent_buffer *eb) { int ret = 0; int ret2; - int num_pages; - int i; + unsigned long num_pages; + unsigned long i; struct page *page; int pg_uptodate = 1; @@ -2975,13 +3013,16 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, struct page *page; int err; int ret = 0; + int locked_pages = 0; + int all_uptodate = 1; + int inc_all_pages = 0; unsigned long num_pages; struct bio *bio = NULL; if (eb->flags & EXTENT_UPTODATE) return 0; - if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, + if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, EXTENT_UPTODATE, 1)) { return 0; } @@ -2997,17 +3038,30 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); - if (PageUptodate(page)) { - continue; - } if (!wait) { - if (TestSetPageLocked(page)) { - continue; - } + if (TestSetPageLocked(page)) + goto unlock_exit; } else { lock_page(page); } + locked_pages++; + if (!PageUptodate(page)) { + all_uptodate = 0; + } + } + if (all_uptodate) { + if (start_i == 0) + eb->flags |= EXTENT_UPTODATE; + goto unlock_exit; + } + + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if (inc_all_pages) + page_cache_get(page); if (!PageUptodate(page)) { + if (start_i == 0) + inc_all_pages = 1; err = __extent_read_full_page(tree, page, get_extent, &bio); if (err) { @@ -3034,6 +3088,16 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (!ret) eb->flags |= EXTENT_UPTODATE; return ret; + +unlock_exit: + i = start_i; + while(locked_pages > 0) { + page = extent_buffer_page(eb, i); + i++; + unlock_page(page); + locked_pages--; + } + return ret; } EXPORT_SYMBOL(read_extent_buffer_pages); @@ -3048,7 +3112,6 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, char *dst = (char *)dstv; size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long num_pages = num_extent_pages(eb->start, eb->len); WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); @@ -3057,11 +3120,6 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, while(len > 0) { page = extent_buffer_page(eb, i); - if (!PageUptodate(page)) { - printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); - WARN_ON(1); - } - WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); kaddr = kmap_atomic(page, KM_USER1); @@ -3105,7 +3163,6 @@ printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, } p = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(p)); kaddr = kmap_atomic(p, km); *token = kaddr; *map = kaddr + offset; @@ -3165,7 +3222,6 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, while(len > 0) { page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index d9f5bc4dbd7..9d2991d1d3c 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -13,7 +13,6 @@ #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) #define EXTENT_BUFFER_FILLED (1 << 8) -#define EXTENT_CSUM (1 << 9) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) /* @@ -218,4 +217,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); int invalidate_extent_lru(struct extent_io_tree *tree, u64 start, unsigned long len); +int release_extent_buffer_tail_pages(struct extent_buffer *eb); +int extent_range_uptodate(struct extent_io_tree *tree, + u64 start, u64 end); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5e9f69244f9..1ed179c020a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -843,7 +843,7 @@ void btrfs_transaction_flush_work(struct btrfs_root *root) void __init btrfs_init_transaction_sys(void) { - trans_wq = create_workqueue("btrfs"); + trans_wq = create_workqueue("btrfs-transaction"); } void btrfs_exit_transaction_sys(void) -- cgit v1.2.3-70-g09d2 From a5eb62e345fc1818d0d8b6181463200a9e8dfe39 Mon Sep 17 00:00:00 2001 From: Miguel Date: Fri, 11 Apr 2008 15:45:51 -0400 Subject: Btrfs: Endianess bug fix for v0.13 with kernels Fix for a endianess BUG when using btrfs v0.13 with kernels older than 2.6.23 Problem: Has of v0.13, btrfs-progs is using crc32c.c equivalent to the one found on linux-2.6.23/lib/libcrc32c.c Since crc32c_le() changed in linux-2.6.23, when running btrfs v0.13 with older kernels we have a missmatch between the versions of crc32c_le() from btrfs-progs and libcrc32c in the kernel. This missmatch causes a bug when using btrfs on big endian machines. Solution: btrfs_crc32c() macro that when compiling for kernels older than 2.6.23, does endianess conversion to parameters and return value of crc32c(). This endianess conversion nullifies the differences in implementation of crc32c_le(). If kernel 2.6.23 or better, it calls crc32c(). Signed-off-by: Miguel Sousa Filipe --- Signed-off-by: Chris Mason --- fs/btrfs/crc32c.h | 17 +++++++++++++++++ fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/extent-tree.c | 12 +++++------- 3 files changed, 24 insertions(+), 9 deletions(-) create mode 100644 fs/btrfs/crc32c.h (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h new file mode 100644 index 00000000000..a93255b4ee2 --- /dev/null +++ b/fs/btrfs/crc32c.h @@ -0,0 +1,17 @@ +#include +#include +#include + +/** + * implementation of crc32c_le() changed in linux-2.6.23, + * has of v0.13 btrfs-progs is using the latest version. + * We must workaround older implementations of crc32c_le() + * found on older kernel versions. + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) +#define btrfs_crc32c(seed, data, length) \ + __cpu_to_le32( crc32c( __le32_to_cpu(seed), data, length) ) +#else +#define btrfs_crc32c(seed, data, length) \ + crc32c(seed, data, length) +#endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e22960e2350..0bfcc31d94d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -18,13 +18,13 @@ #include #include -#include #include #include #include #include #include // for block_sync_page #include +#include "crc32c.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -100,7 +100,7 @@ out: u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) { - return crc32c(seed, data, len); + return btrfs_crc32c(seed, data, len); } void btrfs_csum_final(u32 crc, char *result) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 283b08a32a4..a056a2df689 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -15,11 +15,10 @@ * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */ - #include -#include #include #include "hash.h" +#include "crc32c.h" #include "ctree.h" #include "disk-io.h" #include "print-tree.h" @@ -398,16 +397,15 @@ static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, u32 high_crc = ~(u32)0; u32 low_crc = ~(u32)0; __le64 lenum; - lenum = cpu_to_le64(root_objectid); - high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); + high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(ref_generation); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); if (owner >= BTRFS_FIRST_FREE_OBJECTID) { lenum = cpu_to_le64(owner); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(owner_offset); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); } return ((u64)high_crc << 32) | (u64)low_crc; } -- cgit v1.2.3-70-g09d2 From 98d20f67cf99ccda638dbcdf7b3a9ee0a428d932 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Apr 2008 09:46:10 -0400 Subject: Add a min size parameter to btrfs_alloc_extent On huge machines, delayed allocation may try to allocate massive extents. This change allows btrfs_alloc_extent to return something smaller than the caller asked for, and the data allocation routines will loop over the allocations until it fills the whole delayed alloc. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 ++- fs/btrfs/extent-tree.c | 12 +++++++++--- fs/btrfs/inode.c | 2 ++ 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e803c4daad2..09d614fcafb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1286,7 +1286,8 @@ int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, u64 owner, u64 owner_offset); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 num_bytes, u64 root_objectid, u64 ref_generation, + u64 num_bytes, u64 min_bytes, + u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, int data); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a056a2df689..e49147e767d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1694,7 +1694,8 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 num_bytes, u64 root_objectid, u64 ref_generation, + u64 num_bytes, u64 min_alloc_size, + u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, int data) @@ -1727,7 +1728,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, info->metadata_alloc_profile; data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } - +again: if (root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, @@ -1751,6 +1752,11 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); + if (ret == -ENOSPC && num_bytes > min_alloc_size) { + num_bytes = num_bytes >> 1; + num_bytes = max(num_bytes, min_alloc_size); + goto again; + } BUG_ON(ret); if (ret) return ret; @@ -1869,7 +1875,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int ret; struct extent_buffer *buf; - ret = btrfs_alloc_extent(trans, root, blocksize, + ret = btrfs_alloc_extent(trans, root, blocksize, blocksize, root_objectid, ref_generation, level, first_objectid, empty_size, hint, (u64)-1, &ins, 0); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 46f54dfa2b0..7869c0157a1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -125,6 +125,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) while(num_bytes > 0) { cur_alloc_size = min(num_bytes, root->fs_info->max_extent); ret = btrfs_alloc_extent(trans, root, cur_alloc_size, + root->sectorsize, root->root_key.objectid, trans->transid, inode->i_ino, start, 0, @@ -133,6 +134,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) WARN_ON(1); goto out; } + cur_alloc_size = ins.offset; ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start, ins.objectid, ins.offset, ins.offset); -- cgit v1.2.3-70-g09d2 From e17cade25ff8074101d653557a78df09c16ca276 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 15 Apr 2008 15:41:47 -0400 Subject: Btrfs: Add chunk uuids and update multi-device back references Block headers now store the chunk tree uuid Chunk items records the device uuid for each stripes Device extent items record better back refs to the chunk tree Block groups record better back refs to the chunk tree The chunk tree format has also changed. The objectid of BTRFS_CHUNK_ITEM_KEY used to be the logical offset of the chunk. Now it is a chunk tree id, with the logical offset being stored in the offset field of the key. This allows a single chunk tree to record multiple logical address spaces, upping the number of bytes indexed by a chunk tree from 2^64 to 2^128. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 12 ++++++++ fs/btrfs/ctree.h | 80 ++++++++++++++++++++++++++++++++++++++++++-------- fs/btrfs/disk-io.c | 6 +++- fs/btrfs/extent-tree.c | 22 +++++--------- fs/btrfs/print-tree.c | 20 +++++++++---- fs/btrfs/volumes.c | 76 +++++++++++++++++++++++++++++++---------------- fs/btrfs/volumes.h | 6 ++-- 7 files changed, 160 insertions(+), 62 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e8bf6c221e4..618e526c904 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1382,6 +1382,11 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, write_extent_buffer(c, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(c), BTRFS_FSID_SIZE); + + write_extent_buffer(c, root->fs_info->chunk_tree_uuid, + (unsigned long)btrfs_header_chunk_tree_uuid(c), + BTRFS_UUID_SIZE); + btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); lower_gen = btrfs_header_generation(lower); @@ -1513,6 +1518,9 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root write_extent_buffer(split, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(split), BTRFS_FSID_SIZE); + write_extent_buffer(split, root->fs_info->chunk_tree_uuid, + (unsigned long)btrfs_header_chunk_tree_uuid(split), + BTRFS_UUID_SIZE); mid = (c_nritems + 1) / 2; @@ -2043,6 +2051,10 @@ again: write_extent_buffer(right, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(right), BTRFS_FSID_SIZE); + + write_extent_buffer(right, root->fs_info->chunk_tree_uuid, + (unsigned long)btrfs_header_chunk_tree_uuid(right), + BTRFS_UUID_SIZE); if (mid <= slot) { if (nritems == 1 || leaf_space_used(l, mid, nritems - mid) + space_needed > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 09d614fcafb..82d67c3db8b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -70,6 +70,7 @@ extern struct kmem_cache *btrfs_path_cachep; * All files have objectids higher than this. */ #define BTRFS_FIRST_FREE_OBJECTID 256ULL +#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL /* @@ -131,7 +132,7 @@ struct btrfs_mapping_tree { struct extent_map_tree map_tree; }; -#define BTRFS_DEV_UUID_SIZE 16 +#define BTRFS_UUID_SIZE 16 struct btrfs_dev_item { /* the internal btrfs device id */ __le64 devid; @@ -154,17 +155,32 @@ struct btrfs_dev_item { /* type and info about this device */ __le64 type; + /* grouping information for allocation decisions */ + __le32 dev_group; + + /* seek speed 0-100 where 100 is fastest */ + u8 seek_speed; + + /* bandwidth 0-100 where 100 is fastest */ + u8 bandwidth; + /* btrfs generated uuid for this device */ - u8 uuid[BTRFS_DEV_UUID_SIZE]; + u8 uuid[BTRFS_UUID_SIZE]; } __attribute__ ((__packed__)); struct btrfs_stripe { __le64 devid; __le64 offset; + u8 dev_uuid[BTRFS_UUID_SIZE]; } __attribute__ ((__packed__)); struct btrfs_chunk { + /* size of this chunk in bytes */ + __le64 length; + + /* objectid of the root referencing this chunk */ __le64 owner; + __le64 stripe_len; __le64 type; @@ -199,10 +215,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) * every tree block (leaf or node) starts with this header. */ struct btrfs_header { + /* these first four must match the super block */ u8 csum[BTRFS_CSUM_SIZE]; u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ __le64 bytenr; /* which block this node is supposed to live in */ __le64 flags; + + /* allowed to be different from the super from here on down */ + u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; __le64 generation; __le64 owner; __le32 nritems; @@ -235,6 +255,8 @@ struct btrfs_super_block { u8 fsid[16]; /* FS specific uuid */ __le64 bytenr; /* this block number */ __le64 flags; + + /* allowed to be different from the btrfs_header from here own down */ __le64 magic; __le64 generation; __le64 root; @@ -323,14 +345,16 @@ struct btrfs_extent_ref { /* dev extents record free space on individual devices. The owner * field points back to the chunk allocation mapping tree that allocated - * the extent + * the extent. The chunk tree uuid field is a way to double check the owner */ struct btrfs_dev_extent { - __le64 owner; + __le64 chunk_tree; + __le64 chunk_objectid; + __le64 chunk_offset; __le64 length; + u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; } __attribute__ ((__packed__)); - struct btrfs_inode_ref { __le16 name_len; /* name goes here */ @@ -424,7 +448,6 @@ struct btrfs_csum_item { struct btrfs_block_group_item { __le64 used; - __le64 chunk_tree; __le64 chunk_objectid; __le64 flags; } __attribute__ ((__packed__)); @@ -451,6 +474,7 @@ struct btrfs_device; struct btrfs_fs_devices; struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; + u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *chunk_root; @@ -697,6 +721,9 @@ BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32); +BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8); +BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8); BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64); BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item, @@ -710,12 +737,19 @@ BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item, BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item, sector_size, 32); BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item, + dev_group, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item, + seek_speed, 8); +BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item, + bandwidth, 8); static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) { return (char *)d + offsetof(struct btrfs_dev_item, uuid); } +BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32); @@ -726,6 +760,12 @@ BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); +static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s) +{ + return (char *)s + offsetof(struct btrfs_stripe, dev_uuid); +} + +BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64); BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64); BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); @@ -781,13 +821,10 @@ BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, used, 64); BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, used, 64); -BTRFS_SETGET_STACK_FUNCS(block_group_chunk_tree, struct btrfs_block_group_item, - chunk_tree, 64); -BTRFS_SETGET_FUNCS(disk_block_group_chunk_tree, struct btrfs_block_group_item, - chunk_tree, 64); BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid, struct btrfs_block_group_item, chunk_objectid, 64); -BTRFS_SETGET_FUNCS(disk_block_group_chunk_objecitd, + +BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid, struct btrfs_block_group_item, chunk_objectid, 64); BTRFS_SETGET_FUNCS(disk_block_group_flags, struct btrfs_block_group_item, flags, 64); @@ -850,9 +887,20 @@ BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); /* struct btrfs_dev_extent */ -BTRFS_SETGET_FUNCS(dev_extent_owner, struct btrfs_dev_extent, owner, 64); +BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, + chunk_tree, 64); +BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent, + chunk_objectid, 64); +BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent, + chunk_offset, 64); BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); +static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) +{ + unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid); + return (u8 *)((unsigned long)dev + ptr); +} + /* struct btrfs_extent_ref */ BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); @@ -1087,6 +1135,12 @@ static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) return (u8 *)ptr; } +static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) +{ + unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid); + return (u8 *)ptr; +} + static inline u8 *btrfs_super_fsid(struct extent_buffer *eb) { unsigned long ptr = offsetof(struct btrfs_super_block, fsid); @@ -1311,7 +1365,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, - u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 type, u64 chunk_objectid, u64 chunk_offset, u64 size); /* ctree.c */ int btrfs_previous_item(struct btrfs_root *root, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9c94dddde70..79c284c8728 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1125,6 +1125,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, blocksize); BUG_ON(!chunk_root->node); + read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, + (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), + BTRFS_UUID_SIZE); + ret = btrfs_read_chunk_tree(chunk_root); BUG_ON(ret); @@ -1229,7 +1233,7 @@ int write_all_supers(struct btrfs_root *root) btrfs_set_device_sector_size(sb, dev_item, dev->sector_size); write_extent_buffer(sb, dev->uuid, (unsigned long)btrfs_device_uuid(dev_item), - BTRFS_DEV_UUID_SIZE); + BTRFS_UUID_SIZE); btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN); csum_tree_block(root, sb, 0); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e49147e767d..71f045c6349 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -35,10 +35,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -int btrfs_make_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytes_used, - u64 type, u64 chunk_tree, u64 chunk_objectid, - u64 size); static int cache_block_group(struct btrfs_root *root, @@ -980,7 +976,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, ret = get_state_private(block_group_cache, start, &ptr); if (ret) break; - cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; err = write_one_cache_group(trans, root, path, cache); @@ -1094,8 +1089,7 @@ printk("space info full %Lu\n", flags); BUG_ON(ret); ret = btrfs_make_block_group(trans, extent_root, 0, flags, - extent_root->fs_info->chunk_root->root_key.objectid, - start, num_bytes); + BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); return 0; @@ -2782,7 +2776,7 @@ error: int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, - u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 type, u64 chunk_objectid, u64 chunk_offset, u64 size) { int ret; @@ -2796,14 +2790,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache = kmalloc(sizeof(*cache), GFP_NOFS); BUG_ON(!cache); - cache->key.objectid = chunk_objectid; + cache->key.objectid = chunk_offset; cache->key.offset = size; cache->cached = 0; cache->pinned = 0; + btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); memset(&cache->item, 0, sizeof(cache->item)); btrfs_set_block_group_used(&cache->item, bytes_used); - btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree); btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); cache->flags = type; btrfs_set_block_group_flags(&cache->item, type); @@ -2813,12 +2807,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, BUG_ON(ret); bit = block_group_state_bits(type); - set_extent_bits(block_group_cache, chunk_objectid, - chunk_objectid + size - 1, + set_extent_bits(block_group_cache, chunk_offset, + chunk_offset + size - 1, bit | EXTENT_LOCKED, GFP_NOFS); - set_state_private(block_group_cache, chunk_objectid, - (unsigned long)cache); + set_state_private(block_group_cache, chunk_offset, + (unsigned long)cache); ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, sizeof(cache->item)); BUG_ON(ret); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index ee0de112cf5..e99f3249d05 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -24,7 +24,8 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) { int num_stripes = btrfs_chunk_num_stripes(eb, chunk); int i; - printk("\t\tchunk owner %llu type %llu num_stripes %d\n", + printk("\t\tchunk length %llu owner %llu type %llu num_stripes %d\n", + (unsigned long long)btrfs_chunk_length(eb, chunk), (unsigned long long)btrfs_chunk_owner(eb, chunk), (unsigned long long)btrfs_chunk_type(eb, chunk), num_stripes); @@ -140,17 +141,24 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_DEV_EXTENT_KEY: dev_extent = btrfs_item_ptr(l, i, struct btrfs_dev_extent); - printk("\t\tdev extent owner %llu length %llu\n", - (unsigned long long)btrfs_dev_extent_owner(l, dev_extent), - (unsigned long long)btrfs_dev_extent_length(l, dev_extent)); + printk("\t\tdev extent chunk_tree %llu\n" + "\t\tchunk objectid %llu chunk offset %llu " + "length %llu\n", + (unsigned long long) + btrfs_dev_extent_chunk_tree(l, dev_extent), + (unsigned long long) + btrfs_dev_extent_chunk_objectid(l, dev_extent), + (unsigned long long) + btrfs_dev_extent_chunk_offset(l, dev_extent), + (unsigned long long) + btrfs_dev_extent_length(l, dev_extent)); }; } } void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) { - int i; - u32 nr; + int i; u32 nr; struct btrfs_key key; int level; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f81519f0e4a..23ebd95b25e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -180,7 +180,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, list_for_each(cur, head) { device = list_entry(cur, struct btrfs_device, dev_list); bdev = open_bdev_excl(device->name, flags, holder); -printk("opening %s devid %Lu\n", device->name, device->devid); + if (IS_ERR(bdev)) { printk("open %s failed\n", device->name); ret = PTR_ERR(bdev); @@ -190,7 +190,6 @@ printk("opening %s devid %Lu\n", device->name, device->devid); fs_devices->latest_bdev = bdev; if (device->devid == fs_devices->lowest_devid) { fs_devices->lowest_bdev = bdev; -printk("lowest bdev %s\n", device->name); } device->bdev = bdev; } @@ -372,7 +371,9 @@ error: int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, - u64 owner, u64 num_bytes, u64 *start) + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset, + u64 num_bytes, u64 *start) { int ret; struct btrfs_path *path; @@ -400,7 +401,14 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); - btrfs_set_dev_extent_owner(leaf, extent, owner); + btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree); + btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid); + btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); + + write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, + (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent), + BTRFS_UUID_SIZE); + btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(leaf); err: @@ -408,17 +416,18 @@ err: return ret; } -static int find_next_chunk(struct btrfs_root *root, u64 *objectid) +static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset) { struct btrfs_path *path; int ret; struct btrfs_key key; + struct btrfs_chunk *chunk; struct btrfs_key found_key; path = btrfs_alloc_path(); BUG_ON(!path); - key.objectid = (u64)-1; + key.objectid = objectid; key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; @@ -430,11 +439,18 @@ static int find_next_chunk(struct btrfs_root *root, u64 *objectid) ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); if (ret) { - *objectid = 0; + *offset = 0; } else { btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); - *objectid = found_key.objectid + found_key.offset; + if (found_key.objectid != objectid) + *offset = 0; + else { + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_chunk); + *offset = found_key.offset + + btrfs_chunk_length(path->nodes[0], chunk); + } } ret = 0; error: @@ -520,9 +536,12 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); + btrfs_set_device_group(leaf, dev_item, 0); + btrfs_set_device_seek_speed(leaf, dev_item, 0); + btrfs_set_device_bandwidth(leaf, dev_item, 0); ptr = (unsigned long)btrfs_device_uuid(dev_item); - write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); + write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); ret = 0; @@ -674,7 +693,10 @@ again: return -ENOSPC; } - ret = find_next_chunk(chunk_root, &key.objectid); + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.type = BTRFS_CHUNK_ITEM_KEY; + ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, + &key.offset); if (ret) return ret; @@ -696,8 +718,9 @@ again: *num_bytes = calc_size * num_stripes; index = 0; -printk("new chunk type %Lu start %Lu size %Lu\n", type, key.objectid, *num_bytes); +printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes); while(index < num_stripes) { + struct btrfs_stripe *stripe; BUG_ON(list_empty(&private_devs)); cur = private_devs.next; device = list_entry(cur, struct btrfs_device, dev_list); @@ -708,26 +731,28 @@ printk("new chunk type %Lu start %Lu size %Lu\n", type, key.objectid, *num_bytes list_move_tail(&device->dev_list, dev_list); ret = btrfs_alloc_dev_extent(trans, device, - key.objectid, - calc_size, &dev_offset); + info->chunk_root->root_key.objectid, + BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset, + calc_size, &dev_offset); BUG_ON(ret); -printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.objectid, calc_size, device->devid, type); +printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.offset, calc_size, device->devid, type); device->bytes_used += calc_size; ret = btrfs_update_device(trans, device); BUG_ON(ret); map->stripes[index].dev = device; map->stripes[index].physical = dev_offset; - btrfs_set_stack_stripe_devid(stripes + index, device->devid); - btrfs_set_stack_stripe_offset(stripes + index, dev_offset); + stripe = stripes + index; + btrfs_set_stack_stripe_devid(stripe, device->devid); + btrfs_set_stack_stripe_offset(stripe, dev_offset); + memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); physical = dev_offset; index++; } BUG_ON(!list_empty(&private_devs)); - /* key.objectid was set above */ - key.offset = *num_bytes; - key.type = BTRFS_CHUNK_ITEM_KEY; + /* key was set above */ + btrfs_set_stack_chunk_length(chunk, *num_bytes); btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); btrfs_set_stack_chunk_type(chunk, type); @@ -745,14 +770,14 @@ printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.objectid, c ret = btrfs_insert_item(trans, chunk_root, &key, chunk, btrfs_chunk_item_size(num_stripes)); BUG_ON(ret); - *start = key.objectid; + *start = key.offset;; em = alloc_extent_map(GFP_NOFS); if (!em) return -ENOMEM; em->bdev = (struct block_device *)map; - em->start = key.objectid; - em->len = key.offset; + em->start = key.offset; + em->len = *num_bytes; em->block_start = 0; kfree(chunk); @@ -1056,8 +1081,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, int ret; int i; - logical = key->objectid; - length = key->offset; + logical = key->offset; + length = btrfs_chunk_length(leaf, chunk); spin_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); spin_unlock(&map_tree->map_tree.lock); @@ -1131,7 +1156,7 @@ static int fill_device_from_item(struct extent_buffer *leaf, device->sector_size = btrfs_device_sector_size(leaf, dev_item); ptr = (unsigned long)btrfs_device_uuid(dev_item); - read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); + read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); return 0; } @@ -1143,7 +1168,6 @@ static int read_one_dev(struct btrfs_root *root, struct btrfs_device *device; u64 devid; int ret; - devid = btrfs_device_id(leaf, dev_item); device = btrfs_find_device(root, devid); if (!device) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 89548837a1c..f9cae307217 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -57,7 +57,7 @@ struct btrfs_device { u64 type; /* physical drive uuid (or lvm uuid) */ - u8 uuid[BTRFS_DEV_UUID_SIZE]; + u8 uuid[BTRFS_UUID_SIZE]; }; struct btrfs_fs_devices { @@ -93,7 +93,9 @@ struct btrfs_multi_bio { int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, - u64 owner, u64 num_bytes, u64 *start); + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset, + u64 num_bytes, u64 *start); int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, struct btrfs_multi_bio **multi_ret, int mirror_num); -- cgit v1.2.3-70-g09d2 From 321aecc65671ae8136bd2ca6879b56f0221f8ac8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 16 Apr 2008 10:49:51 -0400 Subject: Btrfs: Add RAID10 support Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 7 +++++++ fs/btrfs/extent-tree.c | 1 + fs/btrfs/volumes.c | 46 +++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 49 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 82d67c3db8b..a22edcf4917 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -197,6 +197,9 @@ struct btrfs_chunk { * item in the btree */ __le16 num_stripes; + + /* sub stripes only matter for raid10 */ + __le16 sub_stripes; struct btrfs_stripe stripe; /* additional stripes go here */ } __attribute__ ((__packed__)); @@ -444,6 +447,7 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) #define BTRFS_BLOCK_GROUP_DUP (1 << 5) +#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) struct btrfs_block_group_item { @@ -757,6 +761,7 @@ BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32); BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32); BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64); BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); +BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16); BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); @@ -778,6 +783,8 @@ BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk, BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64); BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk, + sub_stripes, 16); BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64); BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71f045c6349..4e5bd62e6e1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1042,6 +1042,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) { u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_DUP); if (extra_flags) { if (flags & BTRFS_BLOCK_GROUP_DATA) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 23ebd95b25e..e6417a573d4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -33,6 +33,7 @@ struct map_lookup { int stripe_len; int sector_size; int num_stripes; + int sub_stripes; struct btrfs_bio_stripe stripes[]; }; @@ -641,6 +642,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 avail; u64 max_avail = 0; int num_stripes = 1; + int sub_stripes = 0; int looped = 0; int ret; int index; @@ -658,6 +660,13 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, num_stripes = min_t(u64, 2, btrfs_super_num_devices(&info->super_copy)); } + if (type & (BTRFS_BLOCK_GROUP_RAID10)) { + num_stripes = btrfs_super_num_devices(&info->super_copy); + if (num_stripes < 4) + return -ENOSPC; + num_stripes &= ~(u32)1; + sub_stripes = 2; + } again: INIT_LIST_HEAD(&private_devs); cur = dev_list->next; @@ -714,6 +723,8 @@ again: if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) *num_bytes = calc_size; + else if (type & BTRFS_BLOCK_GROUP_RAID10) + *num_bytes = calc_size * num_stripes / sub_stripes; else *num_bytes = calc_size * num_stripes; @@ -760,12 +771,14 @@ printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.offset, cal btrfs_set_stack_chunk_io_align(chunk, stripe_len); btrfs_set_stack_chunk_io_width(chunk, stripe_len); btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes); map->sector_size = extent_root->sectorsize; map->stripe_len = stripe_len; map->io_align = stripe_len; map->io_width = stripe_len; map->type = type; map->num_stripes = num_stripes; + map->sub_stripes = sub_stripes; ret = btrfs_insert_item(trans, chunk_root, &key, chunk, btrfs_chunk_item_size(num_stripes)); @@ -832,6 +845,8 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) map = (struct map_lookup *)em->bdev; if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1)) ret = map->num_stripes; + else if (map->type & BTRFS_BLOCK_GROUP_RAID10) + ret = map->sub_stripes; else ret = 1; free_extent_map(em); @@ -849,6 +864,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 stripe_offset; u64 stripe_nr; int stripes_allocated = 8; + int stripes_required = 1; int stripe_index; int i; struct btrfs_multi_bio *multi = NULL; @@ -877,10 +893,16 @@ again: mirror_num = 0; /* if our multi bio struct is too small, back off and try again */ - if (multi_ret && (rw & (1 << BIO_RW)) && - stripes_allocated < map->num_stripes && - ((map->type & BTRFS_BLOCK_GROUP_RAID1) || - (map->type & BTRFS_BLOCK_GROUP_DUP))) { + if (rw & (1 << BIO_RW)) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP)) { + stripes_required = map->num_stripes; + } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + stripes_required = map->sub_stripes; + } + } + if (multi_ret && rw == WRITE && + stripes_allocated < stripes_required) { stripes_allocated = map->num_stripes; free_extent_map(em); kfree(multi); @@ -900,6 +922,7 @@ again: stripe_offset = offset - stripe_offset; if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_DUP)) { /* we limit the length of each bio to what fits in a stripe */ *length = min_t(u64, em->len - offset, @@ -937,6 +960,19 @@ again: multi->num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; + } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + int factor = map->num_stripes / map->sub_stripes; + int orig_stripe_nr = stripe_nr; + + stripe_index = do_div(stripe_nr, factor); + stripe_index *= map->sub_stripes; + + if (rw & (1 << BIO_RW)) + multi->num_stripes = map->sub_stripes; + else if (mirror_num) + stripe_index += mirror_num - 1; + else + stripe_index += orig_stripe_nr % map->sub_stripes; } else { /* * after this do_div call, stripe_nr is the number of stripes @@ -946,7 +982,6 @@ again: stripe_index = do_div(stripe_nr, map->num_stripes); } BUG_ON(stripe_index >= map->num_stripes); - BUG_ON(stripe_index != 0 && multi->num_stripes > 1); for (i = 0; i < multi->num_stripes; i++) { multi->stripes[i].physical = @@ -1120,6 +1155,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, map->sector_size = btrfs_chunk_sector_size(leaf, chunk); map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); map->type = btrfs_chunk_type(leaf, chunk); + map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); for (i = 0; i < num_stripes; i++) { map->stripes[i].physical = btrfs_stripe_offset_nr(leaf, chunk, i); -- cgit v1.2.3-70-g09d2 From 699122f55904bec252ea4f8a64228ce98e3d9fda Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 16 Apr 2008 12:59:22 -0400 Subject: Btrfs: Don't wait on tree block writeback before freeing them anymore This isn't required anymore because we don't reallocate blocks that have already been written in this transaction. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 ----- fs/btrfs/extent-tree.c | 1 - fs/btrfs/inode.c | 2 ++ 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 618e526c904..1c3d9d6fbda 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -751,7 +751,6 @@ static int balance_level(struct btrfs_trans_handle *trans, add_root_to_dirty_list(root); path->nodes[level] = NULL; clean_tree_block(trans, root, mid); - wait_on_tree_block_writeback(root, mid); /* once for the path */ free_extent_buffer(mid); ret = btrfs_free_extent(trans, root, mid->start, mid->len, @@ -810,7 +809,6 @@ static int balance_level(struct btrfs_trans_handle *trans, u32 blocksize = right->len; clean_tree_block(trans, root, right); - wait_on_tree_block_writeback(root, right); free_extent_buffer(right); right = NULL; wret = del_ptr(trans, root, path, level + 1, pslot + @@ -854,7 +852,6 @@ static int balance_level(struct btrfs_trans_handle *trans, u64 bytenr = mid->start; u32 blocksize = mid->len; clean_tree_block(trans, root, mid); - wait_on_tree_block_writeback(root, mid); free_extent_buffer(mid); mid = NULL; wret = del_ptr(trans, root, path, level + 1, pslot); @@ -2638,7 +2635,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, } else { u64 root_gen = btrfs_header_generation(path->nodes[1]); clean_tree_block(trans, root, leaf); - wait_on_tree_block_writeback(root, leaf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; @@ -2690,7 +2686,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, path->nodes[1]); clean_tree_block(trans, root, leaf); - wait_on_tree_block_writeback(root, leaf); wret = del_ptr(trans, root, path, 1, slot); if (wret) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4e5bd62e6e1..76fd5d7146e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1887,7 +1887,6 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, } btrfs_set_header_generation(buf, trans->transid); clean_tree_block(trans, root, buf); - wait_on_tree_block_writeback(root, buf); btrfs_set_buffer_uptodate(buf); if (PageDirty(buf->first_page)) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3e1f21c632c..5632ea76007 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -383,9 +383,11 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) struct btrfs_csum_item *item; struct btrfs_path *path = NULL; u32 csum; + if (btrfs_test_opt(root, NODATASUM) || btrfs_test_flag(inode, NODATASUM)) return 0; + mutex_lock(&root->fs_info->fs_mutex); path = btrfs_alloc_path(); item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); -- cgit v1.2.3-70-g09d2 From 3b951516ed703af0f6d82053937655ad69b60864 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Apr 2008 11:29:12 -0400 Subject: Btrfs: Use the extent map cache to find the logical disk block during data retries The data read retry code needs to find the logical disk block before it can resubmit new bios. But, finding this block isn't allowed to take the fs_mutex because that will deadlock with a number of different callers. This changes the retry code to use the extent map cache instead, but that requires the extent map cache to have the extent we're looking for. This is a problem because btrfs_drop_extent_cache just drops the entire extent instead of the little tiny part it is invalidating. The bulk of the code in this patch changes btrfs_drop_extent_cache to invalidate only a portion of the extent cache, and changes btrfs_get_extent to deal with the results. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 1 + fs/btrfs/extent_io.c | 12 +++-- fs/btrfs/file.c | 47 ++++++++++++++++++- fs/btrfs/inode.c | 120 ++++++++++++++++++++++++++++++++++++++++++++----- fs/btrfs/volumes.c | 3 ++ 5 files changed, 168 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 76fd5d7146e..593011e5d45 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1747,6 +1747,7 @@ again: search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); + if (ret == -ENOSPC && num_bytes > min_alloc_size) { num_bytes = num_bytes >> 1; num_bytes = max(num_bytes, min_alloc_size); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 88322684be6..21597bea21f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1025,7 +1025,8 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree, search_again: node = tree_search(tree, cur_start); if (!node) { - *end = (u64)-1; + if (!found) + *end = (u64)-1; goto out; } @@ -1540,6 +1541,8 @@ static int end_bio_extent_readpage(struct bio *bio, start, end, state); if (ret == 0) { state = NULL; + uptodate = + test_bit(BIO_UPTODATE, &bio->bi_flags); continue; } } @@ -1555,10 +1558,11 @@ static int end_bio_extent_readpage(struct bio *bio, !(state->state & EXTENT_LOCKED)) state = NULL; } - if (!state && uptodate) { + if (!state) { spin_unlock_irqrestore(&tree->lock, flags); - set_extent_uptodate(tree, start, end, - GFP_ATOMIC); + if (uptodate) + set_extent_uptodate(tree, start, end, + GFP_ATOMIC); unlock_extent(tree, start, end, GFP_ATOMIC); goto next_io; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9fbda655206..3f5525f0834 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -356,12 +356,23 @@ out_unlock: int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) { struct extent_map *em; + struct extent_map *split = NULL; + struct extent_map *split2 = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 len = end - start + 1; + int ret; + int testend = 1; - if (end == (u64)-1) + if (end == (u64)-1) { len = (u64)-1; + testend = 0; + } while(1) { + if (!split) + split = alloc_extent_map(GFP_NOFS); + if (!split2) + split2 = alloc_extent_map(GFP_NOFS); + spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (!em) { @@ -369,6 +380,36 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) break; } remove_extent_mapping(em_tree, em); + + if (em->block_start < EXTENT_MAP_LAST_BYTE && + em->start < start) { + split->start = em->start; + split->len = start - em->start; + split->block_start = em->block_start; + split->bdev = em->bdev; + split->flags = em->flags; + ret = add_extent_mapping(em_tree, split); + BUG_ON(ret); + free_extent_map(split); + split = split2; + split2 = NULL; + } + if (em->block_start < EXTENT_MAP_LAST_BYTE && + testend && em->start + em->len > start + len) { + u64 diff = start + len - em->start; + + split->start = start + len; + split->len = em->start + em->len - (start + len); + split->bdev = em->bdev; + split->flags = em->flags; + + split->block_start = em->block_start + diff; + + ret = add_extent_mapping(em_tree, split); + BUG_ON(ret); + free_extent_map(split); + split = NULL; + } spin_unlock(&em_tree->lock); /* once for us */ @@ -376,6 +417,10 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) /* once for the tree*/ free_extent_map(em); } + if (split) + free_extent_map(split); + if (split2) + free_extent_map(split2); return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5632ea76007..40f8da88409 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -122,6 +122,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) if (alloc_hint == EXTENT_MAP_INLINE) goto out; + BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); + while(num_bytes > 0) { cur_alloc_size = min(num_bytes, root->fs_info->max_extent); ret = btrfs_alloc_extent(trans, root, cur_alloc_size, @@ -140,6 +142,11 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) ins.offset); inode->i_blocks += ins.offset >> 9; btrfs_check_file(root, inode); + if (num_bytes < cur_alloc_size) { + printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, + cur_alloc_size); + break; + } num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; @@ -427,6 +434,7 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, struct extent_map *em; struct inode *inode = page->mapping->host; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct bio *bio; int num_copies; int ret; @@ -434,7 +442,6 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, ret = get_state_private(failure_tree, start, &private); if (ret) { - size_t pg_offset = start - page_offset(page); failrec = kmalloc(sizeof(*failrec), GFP_NOFS); if (!failrec) return -ENOMEM; @@ -442,8 +449,13 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, failrec->len = end - start + 1; failrec->last_mirror = 0; - em = btrfs_get_extent(inode, NULL, pg_offset, start, - failrec->len, 0); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, failrec->len); + if (em->start > start || em->start + em->len < start) { + free_extent_map(em); + em = NULL; + } + spin_unlock(&em_tree->lock); if (!em || IS_ERR(em)) { kfree(failrec); @@ -559,6 +571,8 @@ zeroit: flush_dcache_page(page); kunmap_atomic(kaddr, KM_IRQ0); local_irq_restore(flags); + if (private == 0) + return 0; return -EIO; } @@ -908,8 +922,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int pending_del_nr = 0; int pending_del_slot = 0; int extent_type = -1; + u64 mask = root->sectorsize - 1; - btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1); + btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); path = btrfs_alloc_path(); path->reada = -1; BUG_ON(!path); @@ -1212,7 +1227,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) hole_start, 0, 0, hole_size); btrfs_drop_extent_cache(inode, hole_start, - hole_size - 1); + (u64)-1); btrfs_check_file(root, inode); } btrfs_end_transaction(trans, root); @@ -2083,6 +2098,68 @@ out_unlock: return err; } +static int merge_extent_mapping(struct extent_map_tree *em_tree, + struct extent_map *existing, + struct extent_map *em) +{ + u64 start_diff; + u64 new_end; + int ret = 0; + int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE; + + if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE) + goto invalid; + + if (!real_blocks && em->block_start != existing->block_start) + goto invalid; + + new_end = max(existing->start + existing->len, em->start + em->len); + + if (existing->start >= em->start) { + if (em->start + em->len < existing->start) + goto invalid; + + start_diff = existing->start - em->start; + if (real_blocks && em->block_start + start_diff != + existing->block_start) + goto invalid; + + em->len = new_end - em->start; + + remove_extent_mapping(em_tree, existing); + /* free for the tree */ + free_extent_map(existing); + ret = add_extent_mapping(em_tree, em); + + } else if (em->start > existing->start) { + + if (existing->start + existing->len < em->start) + goto invalid; + + start_diff = em->start - existing->start; + if (real_blocks && existing->block_start + start_diff != + em->block_start) + goto invalid; + + remove_extent_mapping(em_tree, existing); + em->block_start = existing->block_start; + em->start = existing->start; + em->len = new_end - existing->start; + free_extent_map(existing); + + ret = add_extent_mapping(em_tree, em); + } else { + goto invalid; + } + return ret; + +invalid: + printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n", + existing->start, existing->len, existing->block_start, + em->start, em->len, em->block_start); + return -EIO; +} + struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create) @@ -2267,12 +2344,35 @@ insert: err = 0; spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); + + /* it is possible that someone inserted the extent into the tree + * while we had the lock dropped. It is also possible that + * an overlapping map exists in the tree + */ if (ret == -EEXIST) { - free_extent_map(em); - em = lookup_extent_mapping(em_tree, start, len); - if (!em) { - err = -EIO; - printk("failing to insert %Lu %Lu\n", start, len); + struct extent_map *existing; + existing = lookup_extent_mapping(em_tree, start, len); + if (!existing) { + existing = lookup_extent_mapping(em_tree, em->start, + em->len); + if (existing) { + err = merge_extent_mapping(em_tree, existing, + em); + free_extent_map(existing); + if (err) { + free_extent_map(em); + em = NULL; + } + } else { + err = -EIO; + printk("failing to insert %Lu %Lu\n", + start, len); + free_extent_map(em); + em = NULL; + } + } else { + free_extent_map(em); + em = existing; } } spin_unlock(&em_tree->lock); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e6417a573d4..0e658c1d821 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -883,6 +883,9 @@ again: spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, *length); spin_unlock(&em_tree->lock); + if (!em) { + printk("unable to find logical %Lu\n", logical); + } BUG_ON(!em); BUG_ON(em->start > logical || em->start + em->len < logical); -- cgit v1.2.3-70-g09d2 From 3c12ac7205d4bd679fefa722aa9b61385e4b4749 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 21 Apr 2008 12:01:38 -0400 Subject: Btrfs: Simplify device selection for mirrored reads Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 8 ++++---- fs/btrfs/volumes.c | 23 +++++++---------------- 2 files changed, 11 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 593011e5d45..cc0d7f30c36 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -640,7 +640,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - path->reada = 0; + path->reada = 1; key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_bytes; @@ -660,7 +660,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_release_path(root->fs_info->extent_root, path); - path->reada = 0; + path->reada = 1; ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, path, bytenr, root_objectid, ref_generation, owner, owner_offset); @@ -692,7 +692,7 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); - path->reada = 0; + path->reada = 1; key.objectid = bytenr; key.offset = num_bytes; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -1328,7 +1328,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - path->reada = 0; + path->reada = 1; ret = lookup_extent_backref(trans, extent_root, path, bytenr, root_objectid, ref_generation, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 46024070650..107fc74c3ab 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1029,19 +1029,8 @@ again: else if (mirror_num) { stripe_index = mirror_num - 1; } else { - int i; - u64 least = (u64)-1; - struct btrfs_device *cur; - - for (i = 0; i < map->num_stripes; i++) { - cur = map->stripes[i].dev; - spin_lock(&cur->io_lock); - if (cur->total_ios < least) { - least = cur->total_ios; - stripe_index = i; - } - spin_unlock(&cur->io_lock); - } + u64 orig_stripe_nr = stripe_nr; + stripe_index = do_div(orig_stripe_nr, num_stripes); } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { if (rw & (1 << BIO_RW)) @@ -1050,7 +1039,6 @@ again: stripe_index = mirror_num - 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { int factor = map->num_stripes / map->sub_stripes; - int orig_stripe_nr = stripe_nr; stripe_index = do_div(stripe_nr, factor); stripe_index *= map->sub_stripes; @@ -1059,8 +1047,11 @@ again: num_stripes = map->sub_stripes; else if (mirror_num) stripe_index += mirror_num - 1; - else - stripe_index += orig_stripe_nr % map->sub_stripes; + else { + u64 orig_stripe_nr = stripe_nr; + stripe_index += do_div(orig_stripe_nr, + map->sub_stripes); + } } else { /* * after this do_div call, stripe_nr is the number of stripes -- cgit v1.2.3-70-g09d2 From bce4eae9864e6b6ebde5d7f05576ff0a6c3724d0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Apr 2008 14:42:46 -0400 Subject: Btrfs: Fix balance_level to free the middle block if there is room in the left one balance level starts by trying to empty the middle block, and then pushes from the right to the middle. This might empty the right block and leave a small number of pointers in the middle. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 35 ++++++++++++++++++++++++++++------- fs/btrfs/extent-tree.c | 4 ++-- 2 files changed, 30 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 319f9f9bf8d..29e5674b043 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -789,7 +789,7 @@ static int balance_level(struct btrfs_trans_handle *trans, /* first, try to make some room in the middle buffer */ if (left) { orig_slot += btrfs_header_nritems(left); - wret = push_node_left(trans, root, left, mid, 0); + wret = push_node_left(trans, root, left, mid, 1); if (wret < 0) ret = wret; if (btrfs_header_nritems(mid) < 2) @@ -844,6 +844,11 @@ static int balance_level(struct btrfs_trans_handle *trans, ret = wret; goto enospc; } + if (wret == 1) { + wret = push_node_left(trans, root, left, mid, 1); + if (wret < 0) + ret = wret; + } BUG_ON(wret == 1); } if (btrfs_header_nritems(mid) == 0) { @@ -1252,17 +1257,27 @@ static int push_node_left(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(src) != trans->transid); WARN_ON(btrfs_header_generation(dst) != trans->transid); - if (!empty && src_nritems <= 2) + if (!empty && src_nritems <= 8) return 1; if (push_items <= 0) { return 1; } - if (empty) + if (empty) { push_items = min(src_nritems, push_items); - else - push_items = min(src_nritems - 2, push_items); + if (push_items < src_nritems) { + /* leave at least 8 pointers in the node if + * we aren't going to empty it + */ + if (src_nritems - push_items < 8) { + if (push_items <= 8) + return 1; + push_items -= 8; + } + } + } else + push_items = min(src_nritems - 8, push_items); copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(dst_nritems), @@ -1308,13 +1323,19 @@ static int balance_node_right(struct btrfs_trans_handle *trans, src_nritems = btrfs_header_nritems(src); dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; - if (push_items <= 0) + if (push_items <= 0) { return 1; + } + + if (src_nritems < 4) { + return 1; + } max_push = src_nritems / 2 + 1; /* don't try to empty the node */ - if (max_push >= src_nritems) + if (max_push >= src_nritems) { return 1; + } if (max_push < push_items) push_items = max_push; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cc0d7f30c36..c49592c5127 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -306,13 +306,13 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int bit; int ret; int full_search = 0; - int factor = 8; + int factor = 10; block_group_cache = &info->block_group_cache; total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); if (!owner) - factor = 8; + factor = 10; bit = block_group_state_bits(data); -- cgit v1.2.3-70-g09d2 From 8f18cf13396caae5a3d7ae91201cfb15181a9642 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 25 Apr 2008 16:53:30 -0400 Subject: Btrfs: Make the resizer work based on shrinking and growing devices Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 4 + fs/btrfs/extent-tree.c | 145 +++++++++------------- fs/btrfs/inode.c | 36 ++++-- fs/btrfs/volumes.c | 324 +++++++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/volumes.h | 5 + 6 files changed, 407 insertions(+), 108 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f00c4be59ad..ac7106ec535 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -474,6 +474,7 @@ struct btrfs_block_group_cache { u64 pinned; u64 flags; int cached; + int ro; }; struct btrfs_device; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 71838264ca6..a9ce491d279 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -818,6 +818,10 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, return fs_info->tree_root; if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID) return fs_info->extent_root; + if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID) + return fs_info->chunk_root; + if (location->objectid == BTRFS_DEV_TREE_OBJECTID) + return fs_info->dev_root; root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c49592c5127..6540095544e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -187,6 +187,7 @@ static int noinline find_search_start(struct btrfs_root *root, if (!cache) goto out; + total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); free_space_cache = &root->fs_info->free_space_cache; @@ -196,7 +197,7 @@ again: goto out; last = max(search_start, cache->key.objectid); - if (!block_group_bits(cache, data)) { + if (!block_group_bits(cache, data) || cache->ro) { goto new_group; } @@ -221,6 +222,8 @@ again: continue; } spin_unlock_irq(&free_space_cache->lock); + if (cache->ro) + goto new_group; if (start + num > cache->key.objectid + cache->key.offset) goto new_group; if (start + num > total_fs_bytes) @@ -319,7 +322,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (search_start && search_start < total_fs_bytes) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_block_group(info, search_start); - if (shint && block_group_bits(shint, data)) { + if (shint && block_group_bits(shint, data) && !shint->ro) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < div_factor(shint->key.offset, factor)) { @@ -327,7 +330,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } } } - if (hint && block_group_bits(hint, data) && + if (hint && !hint->ro && block_group_bits(hint, data) && hint->key.objectid < total_fs_bytes) { used = btrfs_block_group_used(&hint->item); if (used + hint->pinned < @@ -364,7 +367,7 @@ again: if (cache->key.objectid > total_fs_bytes) break; - if (block_group_bits(cache, data)) { + if (!cache->ro && block_group_bits(cache, data)) { if (full_search) free_check = cache->key.offset; else @@ -1020,6 +1023,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, if (found) { found->total_bytes += total_bytes; found->bytes_used += bytes_used; + found->full = 0; WARN_ON(found->total_bytes < found->bytes_used); *space_info = found; return 0; @@ -1700,7 +1704,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 super_used; u64 root_used; u64 search_start = 0; - u64 new_hint; u64 alloc_profile; u32 sizes[2]; struct btrfs_fs_info *info = root->fs_info; @@ -1724,7 +1727,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } again: - if (root->ref_cows) { + if (root != root->fs_info->extent_root) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, @@ -1738,10 +1741,6 @@ again: BUG_ON(ret); } - new_hint = max(hint_byte, root->fs_info->alloc_start); - if (new_hint < btrfs_super_total_bytes(&info->super_copy)) - hint_byte = new_hint; - WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, @@ -2473,15 +2472,16 @@ out: return ret; } -int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) +int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) { struct btrfs_trans_handle *trans; struct btrfs_root *tree_root = root->fs_info->tree_root; struct btrfs_path *path; u64 cur_byte; u64 total_found; + u64 shrink_last_byte; + struct btrfs_block_group_cache *shrink_block_group; struct btrfs_fs_info *info = root->fs_info; - struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; @@ -2489,17 +2489,29 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) int ret; int progress = 0; - btrfs_set_super_total_bytes(&info->super_copy, new_size); - clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, - GFP_NOFS); - block_group_cache = &info->block_group_cache; + shrink_block_group = btrfs_lookup_block_group(root->fs_info, + shrink_start); + BUG_ON(!shrink_block_group); + + shrink_last_byte = shrink_start + shrink_block_group->key.offset; + + shrink_block_group->space_info->total_bytes -= + shrink_block_group->key.offset; +printk("shrink_extent_tree %Lu -> %Lu type %Lu\n", shrink_start, shrink_last_byte, shrink_block_group->flags); path = btrfs_alloc_path(); root = root->fs_info->extent_root; path->reada = 2; again: + trans = btrfs_start_transaction(root, 1); + do_chunk_alloc(trans, root->fs_info->extent_root, + btrfs_block_group_used(&shrink_block_group->item) + + 2 * 1024 * 1024, shrink_block_group->flags); + btrfs_end_transaction(trans, root); + shrink_block_group->ro = 1; + total_found = 0; - key.objectid = new_size; + key.objectid = shrink_start; key.offset = 0; key.type = 0; cur_byte = key.objectid; @@ -2511,10 +2523,12 @@ again: ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); if (ret < 0) goto out; + if (ret == 0) { leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid + found_key.offset > new_size) { + if (found_key.objectid + found_key.offset > shrink_start && + found_key.objectid < shrink_last_byte) { cur_byte = found_key.objectid; key.objectid = cur_byte; } @@ -2543,6 +2557,9 @@ next: btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid >= shrink_last_byte) + break; + if (progress && need_resched()) { memcpy(&key, &found_key, sizeof(key)); mutex_unlock(&root->fs_info->fs_mutex); @@ -2583,68 +2600,31 @@ next: goto again; } + /* + * we've freed all the extents, now remove the block + * group item from the tree + */ trans = btrfs_start_transaction(root, 1); - key.objectid = new_size; - key.offset = 0; - key.type = 0; - while(1) { - u64 ptr; - - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto out; - - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); -bg_next: - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - break; - if (ret == 1) { - ret = 0; - break; - } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + memcpy(&key, &shrink_block_group->key, sizeof(key)); - /* - * btrfs_next_leaf doesn't cow buffers, we have to - * do the search again - */ - memcpy(&key, &found_key, sizeof(key)); - btrfs_release_path(root, path); - goto resched_check; - } + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) + ret = -EIO; + if (ret < 0) + goto out; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) { - printk("shrinker found key %Lu %u %Lu\n", - found_key.objectid, found_key.type, - found_key.offset); - path->slots[0]++; - goto bg_next; - } - ret = get_state_private(&info->block_group_cache, - found_key.objectid, &ptr); - if (!ret) - kfree((void *)(unsigned long)ptr); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + kfree(shrink_block_group); - clear_extent_bits(&info->block_group_cache, found_key.objectid, - found_key.objectid + found_key.offset - 1, - (unsigned int)-1, GFP_NOFS); + clear_extent_bits(&info->block_group_cache, found_key.objectid, + found_key.objectid + found_key.offset - 1, + (unsigned int)-1, GFP_NOFS); - key.objectid = found_key.objectid + 1; - btrfs_del_item(trans, root, path); - btrfs_release_path(root, path); -resched_check: - if (need_resched()) { - mutex_unlock(&root->fs_info->fs_mutex); - cond_resched(); - mutex_lock(&root->fs_info->fs_mutex); - } - } - clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, + btrfs_del_item(trans, root, path); + clear_extent_dirty(&info->free_space_cache, + shrink_start, shrink_last_byte - 1, GFP_NOFS); btrfs_commit_transaction(trans, root); out: @@ -2652,13 +2632,6 @@ out: return ret; } -int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 new_size) -{ - btrfs_set_super_total_bytes(&root->fs_info->super_copy, new_size); - return 0; -} - int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key) { @@ -2726,7 +2699,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - cache = kmalloc(sizeof(*cache), GFP_NOFS); + cache = kzalloc(sizeof(*cache), GFP_NOFS); if (!cache) { ret = -ENOMEM; break; @@ -2736,8 +2709,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); memcpy(&cache->key, &found_key, sizeof(found_key)); - cache->cached = 0; - cache->pinned = 0; key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); @@ -2789,12 +2760,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, extent_root = root->fs_info->extent_root; block_group_cache = &root->fs_info->block_group_cache; - cache = kmalloc(sizeof(*cache), GFP_NOFS); + cache = kzalloc(sizeof(*cache), GFP_NOFS); BUG_ON(!cache); cache->key.objectid = chunk_offset; cache->key.offset = size; - cache->cached = 0; - cache->pinned = 0; btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); memset(&cache->item, 0, sizeof(cache->item)); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b31f52d4f2c..4d12aa532c5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include #include #include @@ -2887,9 +2888,12 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) { u64 new_size; u64 old_size; + u64 devid = 1; struct btrfs_ioctl_vol_args *vol_args; struct btrfs_trans_handle *trans; + struct btrfs_device *device = NULL; char *sizestr; + char *devstr = NULL; int ret = 0; int namelen; int mod = 0; @@ -2909,9 +2913,25 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) goto out; } + mutex_lock(&root->fs_info->fs_mutex); sizestr = vol_args->name; + devstr = strchr(sizestr, ':'); + if (devstr) { + char *end; + sizestr = devstr + 1; + *devstr = '\0'; + devstr = vol_args->name; + devid = simple_strtoull(devstr, &end, 10); +printk("resizing devid %Lu\n", devid); + } + device = btrfs_find_device(root, devid, NULL); + if (!device) { + printk("resizer unable to find device %Lu\n", devid); + ret = -EINVAL; + goto out_unlock; + } if (!strcmp(sizestr, "max")) - new_size = root->fs_info->sb->s_bdev->bd_inode->i_size; + new_size = device->bdev->bd_inode->i_size; else { if (sizestr[0] == '-') { mod = -1; @@ -2923,12 +2943,11 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) new_size = btrfs_parse_size(sizestr); if (new_size == 0) { ret = -EINVAL; - goto out; + goto out_unlock; } } - mutex_lock(&root->fs_info->fs_mutex); - old_size = btrfs_super_total_bytes(&root->fs_info->super_copy); + old_size = device->total_bytes; if (mod < 0) { if (new_size > old_size) { @@ -2944,7 +2963,7 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) ret = -EINVAL; goto out_unlock; } - if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) { + if (new_size > device->bdev->bd_inode->i_size) { ret = -EFBIG; goto out_unlock; } @@ -2952,13 +2971,14 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) do_div(new_size, root->sectorsize); new_size *= root->sectorsize; -printk("new size is %Lu\n", new_size); +printk("new size for %s is %llu\n", device->name, (unsigned long long)new_size); + if (new_size > old_size) { trans = btrfs_start_transaction(root, 1); - ret = btrfs_grow_extent_tree(trans, root, new_size); + ret = btrfs_grow_device(trans, device, new_size); btrfs_commit_transaction(trans, root); } else { - ret = btrfs_shrink_extent_tree(root, new_size); + ret = btrfs_shrink_device(device, new_size); } out_unlock: diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c63a982e31d..a2c56de1548 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -77,7 +77,7 @@ static struct btrfs_device *__find_device(struct list_head *head, u64 devid, list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); if (dev->devid == devid && - !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE)) { + (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) { return dev; } } @@ -293,6 +293,10 @@ static int find_free_dev_extent(struct btrfs_trans_handle *trans, * so we make sure to start at an offset of at least 1MB */ search_start = max((u64)1024 * 1024, search_start); + + if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) + search_start = max(root->fs_info->alloc_start, search_start); + key.objectid = device->devid; key.offset = search_start; key.type = BTRFS_DEV_EXTENT_KEY; @@ -380,6 +384,33 @@ error: return ret; } +int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + u64 start) +{ + int ret; + struct btrfs_path *path; + struct btrfs_root *root = device->dev_root; + struct btrfs_key key; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = device->devid; + key.offset = start; + key.type = BTRFS_DEV_EXTENT_KEY; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + BUG_ON(ret); + + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + + btrfs_free_path(path); + return ret; +} + int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 chunk_tree, u64 chunk_objectid, @@ -560,6 +591,7 @@ out: btrfs_free_path(path); return ret; } + int btrfs_update_device(struct btrfs_trans_handle *trans, struct btrfs_device *device) { @@ -606,6 +638,254 @@ out: return ret; } +int btrfs_grow_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device, u64 new_size) +{ + struct btrfs_super_block *super_copy = + &device->dev_root->fs_info->super_copy; + u64 old_total = btrfs_super_total_bytes(super_copy); + u64 diff = new_size - device->total_bytes; + + btrfs_set_super_total_bytes(super_copy, old_total + diff); + return btrfs_update_device(trans, device); +} + +static int btrfs_free_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset) +{ + int ret; + struct btrfs_path *path; + struct btrfs_key key; + + root = root->fs_info->chunk_root; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = chunk_objectid; + key.offset = chunk_offset; + key.type = BTRFS_CHUNK_ITEM_KEY; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + BUG_ON(ret); + + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + + btrfs_free_path(path); + return 0; +} + +int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 + chunk_offset) +{ + struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_disk_key *disk_key; + struct btrfs_chunk *chunk; + u8 *ptr; + int ret = 0; + u32 num_stripes; + u32 array_size; + u32 len = 0; + u32 cur; + struct btrfs_key key; + + array_size = btrfs_super_sys_array_size(super_copy); + + ptr = super_copy->sys_chunk_array; + cur = 0; + + while (cur < array_size) { + disk_key = (struct btrfs_disk_key *)ptr; + btrfs_disk_key_to_cpu(&key, disk_key); + + len = sizeof(*disk_key); + + if (key.type == BTRFS_CHUNK_ITEM_KEY) { + chunk = (struct btrfs_chunk *)(ptr + len); + num_stripes = btrfs_stack_chunk_num_stripes(chunk); + len += btrfs_chunk_item_size(num_stripes); + } else { + ret = -EIO; + break; + } + if (key.objectid == chunk_objectid && + key.offset == chunk_offset) { + memmove(ptr, ptr + len, array_size - (cur + len)); + array_size -= len; + btrfs_set_super_sys_array_size(super_copy, array_size); + } else { + ptr += len; + cur += len; + } + } + return ret; +} + + +int btrfs_relocate_chunk(struct btrfs_root *root, + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset) +{ + struct extent_map_tree *em_tree; + struct btrfs_root *extent_root; + struct btrfs_trans_handle *trans; + struct extent_map *em; + struct map_lookup *map; + int ret; + int i; + + root = root->fs_info->chunk_root; + extent_root = root->fs_info->extent_root; + em_tree = &root->fs_info->mapping_tree.map_tree; + + /* step one, relocate all the extents inside this chunk */ + ret = btrfs_shrink_extent_tree(extent_root, chunk_offset); + BUG_ON(ret); + + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + /* + * step two, delete the device extents and the + * chunk tree entries + */ + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, chunk_offset, 1); + spin_unlock(&em_tree->lock); + + BUG_ON(em->start > chunk_offset || em->start + em->len < chunk_offset); + map = (struct map_lookup *)em->bdev; + + for (i = 0; i < map->num_stripes; i++) { + ret = btrfs_free_dev_extent(trans, map->stripes[i].dev, + map->stripes[i].physical); + BUG_ON(ret); + } + ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, + chunk_offset); + + BUG_ON(ret); + + if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { + ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); + BUG_ON(ret); + goto out; + } + + + + spin_lock(&em_tree->lock); + remove_extent_mapping(em_tree, em); + kfree(map); + em->bdev = NULL; + + /* once for the tree */ + free_extent_map(em); + spin_unlock(&em_tree->lock); + +out: + /* once for us */ + free_extent_map(em); + + btrfs_end_transaction(trans, root); + return 0; +} + +/* + * shrinking a device means finding all of the device extents past + * the new size, and then following the back refs to the chunks. + * The chunk relocation code actually frees the device extent + */ +int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = device->dev_root; + struct btrfs_dev_extent *dev_extent = NULL; + struct btrfs_path *path; + u64 length; + u64 chunk_tree; + u64 chunk_objectid; + u64 chunk_offset; + int ret; + int slot; + struct extent_buffer *l; + struct btrfs_key key; + struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + u64 old_total = btrfs_super_total_bytes(super_copy); + u64 diff = device->total_bytes - new_size; + + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 1); + if (!trans) { + ret = -ENOMEM; + goto done; + } + + path->reada = 2; + + device->total_bytes = new_size; + ret = btrfs_update_device(trans, device); + if (ret) { + btrfs_end_transaction(trans, root); + goto done; + } + WARN_ON(diff > old_total); + btrfs_set_super_total_bytes(super_copy, old_total - diff); + btrfs_end_transaction(trans, root); + + key.objectid = device->devid; + key.offset = (u64)-1; + key.type = BTRFS_DEV_EXTENT_KEY; + + while (1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto done; + + ret = btrfs_previous_item(root, path, 0, key.type); + if (ret < 0) + goto done; + if (ret) { + ret = 0; + goto done; + } + + l = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(l, &key, path->slots[0]); + + if (key.objectid != device->devid) + goto done; + + dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); + length = btrfs_dev_extent_length(l, dev_extent); + + if (key.offset + length <= new_size) + goto done; + + chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); + chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); + chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); + btrfs_release_path(root, path); + + ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, + chunk_offset); + if (ret) + goto done; + } + +done: + btrfs_free_path(path); + return ret; +} + int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, @@ -658,6 +938,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 dev_offset; struct btrfs_fs_info *info = extent_root->fs_info; struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; + struct btrfs_path *path; struct btrfs_stripe *stripes; struct btrfs_device *device = NULL; struct btrfs_chunk *chunk; @@ -724,6 +1005,10 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, min_stripe_size = 1 * 1024 * 1024; } + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + /* we don't want a chunk larger than 10% of the FS */ percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1); max_chunk_size = min(percent_max, max_chunk_size); @@ -759,11 +1044,19 @@ again: avail = device->total_bytes - device->bytes_used; cur = cur->next; + if (avail >= min_free) { - list_move_tail(&device->dev_alloc_list, &private_devs); - index++; - if (type & BTRFS_BLOCK_GROUP_DUP) + u64 ignored_start = 0; + ret = find_free_dev_extent(trans, device, path, + min_free, + &ignored_start); + if (ret == 0) { + list_move_tail(&device->dev_alloc_list, + &private_devs); index++; + if (type & BTRFS_BLOCK_GROUP_DUP) + index++; + } } else if (avail > max_avail) max_avail = avail; if (cur == dev_list) @@ -785,30 +1078,37 @@ again: calc_size = max_avail; goto again; } + btrfs_free_path(path); return -ENOSPC; } key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.type = BTRFS_CHUNK_ITEM_KEY; ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, &key.offset); - if (ret) + if (ret) { + btrfs_free_path(path); return ret; + } chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); - if (!chunk) + if (!chunk) { + btrfs_free_path(path); return -ENOMEM; + } map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); if (!map) { kfree(chunk); + btrfs_free_path(path); return -ENOMEM; } + btrfs_free_path(path); + path = NULL; stripes = &chunk->stripe; *num_bytes = chunk_bytes_by_type(type, calc_size, num_stripes, sub_stripes); - index = 0; printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes); while(index < num_stripes) { @@ -874,6 +1174,11 @@ printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.offset, cal em->len = *num_bytes; em->block_start = 0; + if (type & BTRFS_BLOCK_GROUP_SYSTEM) { + ret = btrfs_add_system_chunk(trans, chunk_root, &key, + chunk, btrfs_chunk_item_size(num_stripes)); + BUG_ON(ret); + } kfree(chunk); em_tree = &extent_root->fs_info->mapping_tree.map_tree; @@ -1376,11 +1681,6 @@ int btrfs_read_sys_array(struct btrfs_root *root) array_size = btrfs_super_sys_array_size(super_copy); - /* - * we do this loop twice, once for the device items and - * once for all of the chunks. This way there are device - * structs filled in for every chunk - */ ptr = super_copy->sys_chunk_array; sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); cur = 0; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index a2660d2b6b3..6fe8440b37a 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -128,4 +128,9 @@ int btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, u64 logical, struct page *page); +int btrfs_grow_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device, u64 new_size); +struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, + u8 *uuid); +int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); #endif -- cgit v1.2.3-70-g09d2 From 3bf3d9e9c256e1a249a47bb8ceff682e6430aeff Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 26 Apr 2008 11:03:32 -0400 Subject: Btrfs: Avoid recursive chunk allocations Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6540095544e..a589912fdd5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1727,7 +1727,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } again: - if (root != root->fs_info->extent_root) { + if (root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, -- cgit v1.2.3-70-g09d2 From 8e7bf94fd5f44fa585e29fbe6a1bfabc04aea7cf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 28 Apr 2008 09:02:36 -0400 Subject: Btrfs: Do more optimal file RA during shrinking and defrag Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 16 +++++++++++++++- fs/btrfs/inode.c | 15 +++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a589912fdd5..fe4fe709c31 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2263,6 +2263,12 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +static unsigned long calc_ra(unsigned long start, unsigned long last, + unsigned long nr) +{ + return min(last, start + nr - 1); +} + static int noinline relocate_inode_pages(struct inode *inode, u64 start, u64 len) { @@ -2275,6 +2281,8 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, struct page *page; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct file_ra_state *ra; + unsigned long total_read = 0; + unsigned long ra_pages; ra = kzalloc(sizeof(*ra), GFP_NOFS); @@ -2282,11 +2290,17 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, i = start >> PAGE_CACHE_SHIFT; last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; + ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; + file_ra_state_init(ra, inode->i_mapping); - btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index); kfree(ra); for (; i <= last_index; i++) { + if (total_read % ra_pages == 0) { + btrfs_force_ra(inode->i_mapping, ra, NULL, i, + calc_ra(i, last_index, ra_pages)); + } + total_read++; page = grab_cache_page(inode->i_mapping, i); if (!page) goto out_unlock; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4d12aa532c5..ab707c0930d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2814,14 +2814,12 @@ unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index) { - pgoff_t req_size; + pgoff_t req_size = last_index - offset + 1; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - req_size = last_index - offset + 1; offset = page_cache_readahead(mapping, ra, file, offset, req_size); return offset; #else - req_size = min(last_index - offset + 1, (pgoff_t)128); page_cache_sync_readahead(mapping, ra, file, offset, req_size); return offset + req_size; #endif @@ -2833,7 +2831,8 @@ int btrfs_defrag_file(struct file *file) { struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct page *page; unsigned long last_index; - unsigned long ra_index = 0; + unsigned long ra_pages = root->fs_info->bdi.ra_pages; + unsigned long total_read = 0; u64 page_start; u64 page_end; unsigned long i; @@ -2848,11 +2847,11 @@ int btrfs_defrag_file(struct file *file) { mutex_lock(&inode->i_mutex); last_index = inode->i_size >> PAGE_CACHE_SHIFT; for (i = 0; i <= last_index; i++) { - if (i == ra_index) { - ra_index = btrfs_force_ra(inode->i_mapping, - &file->f_ra, - file, ra_index, last_index); + if (total_read % ra_pages == 0) { + btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, + min(last_index, i + ra_pages - 1)); } + total_read++; page = grab_cache_page(inode->i_mapping, i); if (!page) goto out_unlock; -- cgit v1.2.3-70-g09d2 From ec44a35cbeb26ab2da84cb280d778260f2312feb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 28 Apr 2008 15:29:52 -0400 Subject: Btrfs: Add balance ioctl to restripe the chunks Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 106 +++++++++++++++++++++++++++++++++++++-------- fs/btrfs/inode.c | 11 +++++ fs/btrfs/volumes.c | 115 +++++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/volumes.h | 1 + 5 files changed, 208 insertions(+), 27 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b9257b37bb9..73b92dd150f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1364,7 +1364,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, int data); + u64 search_end, struct btrfs_key *ins, u64 data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe4fe709c31..95aee5a2937 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -17,6 +17,7 @@ */ #include #include +#include #include "hash.h" #include "crc32c.h" #include "ctree.h" @@ -1058,6 +1059,26 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) } } +static u64 reduce_alloc_profile(u64 flags) +{ + if ((flags & BTRFS_BLOCK_GROUP_DUP) && + (flags & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10))) + flags &= ~BTRFS_BLOCK_GROUP_DUP; + + if ((flags & BTRFS_BLOCK_GROUP_RAID1) && + (flags & BTRFS_BLOCK_GROUP_RAID10)) + flags &= ~BTRFS_BLOCK_GROUP_RAID1; + + if ((flags & BTRFS_BLOCK_GROUP_RAID0) && + ((flags & BTRFS_BLOCK_GROUP_RAID1) | + (flags & BTRFS_BLOCK_GROUP_RAID10) | + (flags & BTRFS_BLOCK_GROUP_DUP))) + flags &= ~BTRFS_BLOCK_GROUP_RAID0; + return flags; +} + + static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags) @@ -1068,6 +1089,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 num_bytes; int ret; + flags = reduce_alloc_profile(flags); + space_info = __find_space_info(extent_root->fs_info, flags); if (!space_info) { ret = update_space_info(extent_root->fs_info, flags, @@ -1684,6 +1707,7 @@ enospc: error: return ret; } + /* * finds a free extent and does all the dirty work required for allocation * returns the key for the extent through ins, and a tree buffer for @@ -1697,7 +1721,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, int data) + u64 search_end, struct btrfs_key *ins, u64 data) { int ret; int pending_ret; @@ -1727,6 +1751,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } again: + data = reduce_alloc_profile(data); if (root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, @@ -1752,6 +1777,9 @@ again: num_bytes = max(num_bytes, min_alloc_size); goto again; } + if (ret) { + printk("allocation failed flags %Lu\n", data); + } BUG_ON(ret); if (ret) return ret; @@ -2274,8 +2302,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, { u64 page_start; u64 page_end; - u64 delalloc_start; - u64 existing_delalloc; unsigned long last_index; unsigned long i; struct page *page; @@ -2293,7 +2319,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; file_ra_state_init(ra, inode->i_mapping); - kfree(ra); for (; i <= last_index; i++) { if (total_read % ra_pages == 0) { @@ -2313,26 +2338,30 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, goto out_unlock; } } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(page); +#else + cancel_dirty_page(page, PAGE_CACHE_SIZE); +#endif + wait_on_page_writeback(page); + set_page_extent_mapped(page); page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(io_tree, page_start, page_end, GFP_NOFS); - delalloc_start = page_start; - existing_delalloc = count_range_bits(io_tree, - &delalloc_start, page_end, - PAGE_CACHE_SIZE, EXTENT_DELALLOC); - + set_page_dirty(page); set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); - set_page_dirty(page); unlock_page(page); page_cache_release(page); + balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); } out_unlock: + kfree(ra); mutex_unlock(&inode->i_mutex); return 0; } @@ -2397,8 +2426,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, goto out; } relocate_inode_pages(inode, ref_offset, extent_key->offset); - /* FIXME, data=ordered will help get rid of this */ - filemap_fdatawrite(inode->i_mapping); iput(inode); mutex_lock(&extent_root->fs_info->fs_mutex); } else { @@ -2486,6 +2513,47 @@ out: return ret; } +static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) +{ + u64 num_devices; + u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; + + num_devices = btrfs_super_num_devices(&root->fs_info->super_copy); + if (num_devices == 1) { + stripped |= BTRFS_BLOCK_GROUP_DUP; + stripped = flags & ~stripped; + + /* turn raid0 into single device chunks */ + if (flags & BTRFS_BLOCK_GROUP_RAID0) + return stripped; + + /* turn mirroring into duplication */ + if (flags & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10)) + return stripped | BTRFS_BLOCK_GROUP_DUP; + return flags; + } else { + /* they already had raid on here, just return */ + if ((flags & BTRFS_BLOCK_GROUP_DUP) && + (flags & BTRFS_BLOCK_GROUP_RAID1)) { + } + if (flags & stripped) + return flags; + + stripped |= BTRFS_BLOCK_GROUP_DUP; + stripped = flags & ~stripped; + + /* switch duplicated blocks with raid1 */ + if (flags & BTRFS_BLOCK_GROUP_DUP) + return stripped | BTRFS_BLOCK_GROUP_RAID1; + + /* turn single device chunks into raid0 */ + return stripped | BTRFS_BLOCK_GROUP_RAID0; + } + return flags; +} + int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) { struct btrfs_trans_handle *trans; @@ -2494,6 +2562,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) u64 cur_byte; u64 total_found; u64 shrink_last_byte; + u64 new_alloc_flags; struct btrfs_block_group_cache *shrink_block_group; struct btrfs_fs_info *info = root->fs_info; struct btrfs_key key; @@ -2511,17 +2580,20 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) shrink_block_group->space_info->total_bytes -= shrink_block_group->key.offset; -printk("shrink_extent_tree %Lu -> %Lu type %Lu\n", shrink_start, shrink_last_byte, shrink_block_group->flags); path = btrfs_alloc_path(); root = root->fs_info->extent_root; path->reada = 2; again: - trans = btrfs_start_transaction(root, 1); - do_chunk_alloc(trans, root->fs_info->extent_root, + if (btrfs_block_group_used(&shrink_block_group->item) > 0) { + trans = btrfs_start_transaction(root, 1); + new_alloc_flags = update_block_group_flags(root, + shrink_block_group->flags); + do_chunk_alloc(trans, root->fs_info->extent_root, btrfs_block_group_used(&shrink_block_group->item) + - 2 * 1024 * 1024, shrink_block_group->flags); - btrfs_end_transaction(trans, root); + 2 * 1024 * 1024, new_alloc_flags); + btrfs_end_transaction(trans, root); + } shrink_block_group->ro = 1; total_found = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1a74b501869..99483447459 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2864,6 +2864,15 @@ int btrfs_defrag_file(struct file *file) { goto out_unlock; } } + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(page); +#else + cancel_dirty_page(page, PAGE_CACHE_SIZE); +#endif + wait_on_page_writeback(page); + set_page_extent_mapped(page); + page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; @@ -3105,6 +3114,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_resize(root, (void __user *)arg); case BTRFS_IOC_ADD_DEV: return btrfs_ioctl_add_dev(root, (void __user *)arg); + case BTRFS_IOC_BALANCE: + return btrfs_balance(root->fs_info->dev_root); } return -ENOTTY; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b93c15aa17d..6476ecbf132 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -869,6 +869,107 @@ out: return 0; } +static u64 div_factor(u64 num, int factor) +{ + if (factor == 10) + return num; + num *= factor; + do_div(num, 10); + return num; +} + + +int btrfs_balance(struct btrfs_root *dev_root) +{ + int ret; + struct list_head *cur; + struct list_head *devices = &dev_root->fs_info->fs_devices->devices; + struct btrfs_device *device; + u64 old_size; + u64 size_to_free; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_chunk *chunk; + struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; + struct btrfs_trans_handle *trans; + struct btrfs_key found_key; + + + dev_root = dev_root->fs_info->dev_root; + + mutex_lock(&dev_root->fs_info->fs_mutex); + /* step one make some room on all the devices */ + list_for_each(cur, devices) { + device = list_entry(cur, struct btrfs_device, dev_list); + old_size = device->total_bytes; + size_to_free = div_factor(old_size, 1); + size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); + if (device->total_bytes - device->bytes_used > size_to_free) + continue; + + ret = btrfs_shrink_device(device, old_size - size_to_free); + BUG_ON(ret); + + trans = btrfs_start_transaction(dev_root, 1); + BUG_ON(!trans); + + ret = btrfs_grow_device(trans, device, old_size); + BUG_ON(ret); + + btrfs_end_transaction(trans, dev_root); + } + + /* step two, relocate all the chunks */ + path = btrfs_alloc_path(); + BUG_ON(!path); + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.offset = (u64)-1; + key.type = BTRFS_CHUNK_ITEM_KEY; + + while(1) { + ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); + if (ret < 0) + goto error; + + /* + * this shouldn't happen, it means the last relocate + * failed + */ + if (ret == 0) + break; + + ret = btrfs_previous_item(chunk_root, path, 0, + BTRFS_CHUNK_ITEM_KEY); + if (ret) { + break; + } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + if (found_key.objectid != key.objectid) + break; + chunk = btrfs_item_ptr(path->nodes[0], + path->slots[0], + struct btrfs_chunk); + key.offset = found_key.offset; + /* chunk zero is special */ + if (key.offset == 0) + break; + + ret = btrfs_relocate_chunk(chunk_root, + chunk_root->root_key.objectid, + found_key.objectid, + found_key.offset); + BUG_ON(ret); + btrfs_release_path(chunk_root, path); + } + ret = 0; +error: + btrfs_free_path(path); + mutex_unlock(&dev_root->fs_info->fs_mutex); + return ret; +} + /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. @@ -985,15 +1086,6 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, return 0; } -static u64 div_factor(u64 num, int factor) -{ - if (factor == 10) - return num; - num *= factor; - do_div(num, 10); - return num; -} - static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes, int sub_stripes) { @@ -1040,6 +1132,11 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int stripe_len = 64 * 1024; struct btrfs_key key; + if ((type & BTRFS_BLOCK_GROUP_RAID1) && + (type & BTRFS_BLOCK_GROUP_DUP)) { + WARN_ON(1); + type &= ~BTRFS_BLOCK_GROUP_DUP; + } dev_list = &extent_root->fs_info->fs_devices->alloc_list; if (list_empty(dev_list)) return -ENOSPC; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 6f173450378..3f9a17f2e41 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -134,4 +134,5 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, u8 *uuid); int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_root *root, char *path); +int btrfs_balance(struct btrfs_root *dev_root); #endif -- cgit v1.2.3-70-g09d2 From a236aed14ccb0661611d4416f6b573d892bdc60a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Apr 2008 09:38:00 -0400 Subject: Btrfs: Deal with failed writes in mirrored configurations Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 17 +++++++++++++++-- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/volumes.c | 17 ++++++++++++++--- fs/btrfs/volumes.h | 3 ++- 4 files changed, 33 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1c7d84aff86..e35e70165b5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1385,7 +1385,10 @@ int write_all_supers(struct btrfs_root *root) struct buffer_head *bh; int ret; int do_barriers; + int max_errors; + int total_errors = 0; + max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; do_barriers = !btrfs_test_opt(root, NOBARRIER); sb = root->fs_info->sb_buffer; @@ -1433,8 +1436,14 @@ int write_all_supers(struct btrfs_root *root) } else { ret = submit_bh(WRITE, bh); } - BUG_ON(ret); + if (ret) + total_errors++; } + if (total_errors > max_errors) { + printk("btrfs: %d errors while writing supers\n", total_errors); + BUG(); + } + total_errors = 0; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); @@ -1454,13 +1463,17 @@ int write_all_supers(struct btrfs_root *root) wait_on_buffer(bh); BUG_ON(!buffer_uptodate(bh)); } else { - BUG(); + total_errors++; } } dev->pending_io = NULL; brelse(bh); } + if (total_errors > max_errors) { + printk("btrfs: %d errors while writing supers\n", total_errors); + BUG(); + } return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 95aee5a2937..f94794a9932 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -315,8 +315,8 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, block_group_cache = &info->block_group_cache; total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - if (!owner) - factor = 10; + if (data & BTRFS_BLOCK_GROUP_METADATA) + factor = 9; bit = block_group_state_bits(data); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9a724113456..57ab755aca7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1425,6 +1425,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int stripe_index; int i; int num_stripes; + int max_errors = 0; struct btrfs_multi_bio *multi = NULL; if (multi_ret && !(rw & (1 << BIO_RW))) { @@ -1436,6 +1437,8 @@ again: GFP_NOFS); if (!multi) return -ENOMEM; + + atomic_set(&multi->error, 0); } spin_lock(&em_tree->lock); @@ -1462,8 +1465,10 @@ again: if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) { stripes_required = map->num_stripes; + max_errors = 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { stripes_required = map->sub_stripes; + max_errors = 1; } } if (multi_ret && rw == WRITE && @@ -1561,6 +1566,7 @@ again: if (multi_ret) { *multi_ret = multi; multi->num_stripes = num_stripes; + multi->max_errors = max_errors; } out: free_extent_map(em); @@ -1598,14 +1604,19 @@ static int end_bio_multi_stripe(struct bio *bio, return 1; #endif if (err) - multi->error = err; + atomic_inc(&multi->error); if (atomic_dec_and_test(&multi->stripes_pending)) { bio->bi_private = multi->private; bio->bi_end_io = multi->end_io; - if (!err && multi->error) - err = multi->error; + /* only send an error to the higher layers if it is + * beyond the tolerance of the multi-bio + */ + if (atomic_read(&multi->error) > multi->max_errors) + err = -EIO; + else + err = 0; kfree(multi); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 3f9a17f2e41..a9663e92bb1 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -90,7 +90,8 @@ struct btrfs_multi_bio { atomic_t stripes_pending; bio_end_io_t *end_io; void *private; - int error; + atomic_t error; + int max_errors; int num_stripes; struct btrfs_bio_stripe stripes[]; }; -- cgit v1.2.3-70-g09d2 From a061fc8da7b990faa41ca503e66faef3ecdeead0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 May 2008 11:43:44 -0400 Subject: Btrfs: Add support for online device removal This required a few structural changes to the code that manages bdev pointers: The VFS super block now gets an anon-bdev instead of a pointer to the lowest bdev. This allows us to avoid swapping the super block bdev pointer around at run time. The code to read in the super block no longer goes through the extent buffer interface. Things got ugly keeping the mapping constant. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +- fs/btrfs/disk-io.c | 101 ++++++++++------------ fs/btrfs/extent-tree.c | 139 +++++++++++++++++++++++++------ fs/btrfs/inode.c | 27 +++++- fs/btrfs/super.c | 35 ++------ fs/btrfs/transaction.c | 5 +- fs/btrfs/volumes.c | 221 +++++++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/volumes.h | 3 + 8 files changed, 412 insertions(+), 122 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3b6f8524a4a..33ab165591c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -505,7 +505,7 @@ struct btrfs_fs_info { u64 alloc_start; struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; - struct extent_buffer *sb_buffer; + struct btrfs_super_block super_for_commit; struct block_device *__bdev; struct super_block *sb; struct inode *btree_inode; @@ -1208,6 +1208,7 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); /* struct btrfs_super_block */ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64); BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fabc31b334b..9d5424ad01a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -78,9 +78,13 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); - spin_unlock(&em_tree->lock); - if (em) + if (em) { + em->bdev = + BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; + spin_unlock(&em_tree->lock); goto out; + } + spin_unlock(&em_tree->lock); em = alloc_extent_map(GFP_NOFS); if (!em) { @@ -90,7 +94,7 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, em->start = 0; em->len = (u64)-1; em->block_start = 0; - em->bdev = inode->i_sb->s_bdev; + em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); @@ -435,11 +439,6 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); BUG_ON(ret); - if (offset == BTRFS_SUPER_INFO_OFFSET) { - bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; - submit_bio(rw, bio); - return 0; - } return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); } @@ -587,8 +586,7 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) list = &fs_info->fs_devices->devices; list_for_each(next, list) { device = list_entry(next, struct btrfs_device, dev_list); - if (device->bdev && device->bdev != fs_info->sb->s_bdev) - close_bdev_excl(device->bdev); + close_bdev_excl(device->bdev); device->bdev = NULL; } return 0; @@ -1118,6 +1116,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 leafsize; u32 blocksize; u32 stripesize; + struct buffer_head *bh; struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -1153,7 +1152,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->new_trans_lock); init_completion(&fs_info->kobj_unregister); - sb_set_blocksize(sb, BTRFS_SUPER_INFO_SIZE); fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; @@ -1170,6 +1168,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; + sb->s_blocksize = 4096; + sb->s_blocksize_bits = blksize_bits(4096); + /* * we set the i_size on the btree inode to the max possible int. * the real end of the address space is determined by all of @@ -1229,19 +1230,16 @@ struct btrfs_root *open_ctree(struct super_block *sb, __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - fs_info->sb_buffer = read_tree_block(tree_root, - BTRFS_SUPER_INFO_OFFSET, - 4096); - if (!fs_info->sb_buffer) + bh = __bread(fs_devices->latest_bdev, + BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + if (!bh) goto fail_iput; - read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0, - sizeof(fs_info->super_copy)); + memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); + brelse(bh); - read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, - (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), - BTRFS_FSID_SIZE); + memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) @@ -1263,7 +1261,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; tree_root->stripesize = stripesize; - sb_set_blocksize(sb, sectorsize); + + sb->s_blocksize = sectorsize; + sb->s_blocksize_bits = blksize_bits(sectorsize); if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { @@ -1339,7 +1339,6 @@ fail_tree_root: fail_sys_array: mutex_unlock(&fs_info->fs_mutex); fail_sb_buffer: - free_extent_buffer(fs_info->sb_buffer); extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); fail_iput: iput(fs_info->btree_inode); @@ -1380,41 +1379,44 @@ int write_all_supers(struct btrfs_root *root) struct list_head *cur; struct list_head *head = &root->fs_info->fs_devices->devices; struct btrfs_device *dev; - struct extent_buffer *sb; + struct btrfs_super_block *sb; struct btrfs_dev_item *dev_item; struct buffer_head *bh; int ret; int do_barriers; int max_errors; int total_errors = 0; + u32 crc; + u64 flags; max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; do_barriers = !btrfs_test_opt(root, NOBARRIER); - sb = root->fs_info->sb_buffer; - dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block, - dev_item); + sb = &root->fs_info->super_for_commit; + dev_item = &sb->dev_item; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); - btrfs_set_device_type(sb, dev_item, dev->type); - btrfs_set_device_id(sb, dev_item, dev->devid); - btrfs_set_device_total_bytes(sb, dev_item, dev->total_bytes); - btrfs_set_device_bytes_used(sb, dev_item, dev->bytes_used); - btrfs_set_device_io_align(sb, dev_item, dev->io_align); - btrfs_set_device_io_width(sb, dev_item, dev->io_width); - btrfs_set_device_sector_size(sb, dev_item, dev->sector_size); - write_extent_buffer(sb, dev->uuid, - (unsigned long)btrfs_device_uuid(dev_item), - BTRFS_UUID_SIZE); - - btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN); - csum_tree_block(root, sb, 0); - - bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / - root->fs_info->sb->s_blocksize, + btrfs_set_stack_device_type(dev_item, dev->type); + btrfs_set_stack_device_id(dev_item, dev->devid); + btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); + btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); + btrfs_set_stack_device_io_align(dev_item, dev->io_align); + btrfs_set_stack_device_io_width(dev_item, dev->io_width); + btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); + memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); + flags = btrfs_super_flags(sb); + btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); + + + crc = ~(u32)0; + crc = btrfs_csum_data(root, (char *)sb + BTRFS_CSUM_SIZE, crc, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, sb->csum); + + bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / 4096, BTRFS_SUPER_INFO_SIZE); - read_extent_buffer(sb, bh->b_data, 0, BTRFS_SUPER_INFO_SIZE); + memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); dev->pending_io = bh; get_bh(bh); @@ -1483,15 +1485,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root int ret; ret = write_all_supers(root); -#if 0 - if (!btrfs_test_opt(root, NOBARRIER)) - blkdev_issue_flush(sb->s_bdev, NULL); - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super); - ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, - super->start, super->len); - if (!btrfs_test_opt(root, NOBARRIER)) - blkdev_issue_flush(sb->s_bdev, NULL); -#endif return ret; } @@ -1570,8 +1563,6 @@ int close_ctree(struct btrfs_root *root) if (root->fs_info->dev_root->node); free_extent_buffer(root->fs_info->dev_root->node); - free_extent_buffer(fs_info->sb_buffer); - btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); @@ -1652,7 +1643,7 @@ void btrfs_throttle(struct btrfs_root *root) { struct backing_dev_info *bdi; - bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + bdi = &root->fs_info->bdi; if (root->fs_info->throttles && bdi_write_congested(bdi)) { #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) congestion_wait(WRITE, HZ/20); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f94794a9932..c0e67bde842 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -147,6 +147,8 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct u64 end; int ret; + bytenr = max_t(u64, bytenr, + BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); block_group_cache = &info->block_group_cache; ret = find_first_extent_bit(block_group_cache, bytenr, &start, &end, @@ -1059,16 +1061,25 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) } } -static u64 reduce_alloc_profile(u64 flags) +static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags) { + u64 num_devices = root->fs_info->fs_devices->num_devices; + + if (num_devices == 1) + flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); + if (num_devices < 4) + flags &= ~BTRFS_BLOCK_GROUP_RAID10; + if ((flags & BTRFS_BLOCK_GROUP_DUP) && (flags & (BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10))) + BTRFS_BLOCK_GROUP_RAID10))) { flags &= ~BTRFS_BLOCK_GROUP_DUP; + } if ((flags & BTRFS_BLOCK_GROUP_RAID1) && - (flags & BTRFS_BLOCK_GROUP_RAID10)) + (flags & BTRFS_BLOCK_GROUP_RAID10)) { flags &= ~BTRFS_BLOCK_GROUP_RAID1; + } if ((flags & BTRFS_BLOCK_GROUP_RAID0) && ((flags & BTRFS_BLOCK_GROUP_RAID1) | @@ -1078,7 +1089,6 @@ static u64 reduce_alloc_profile(u64 flags) return flags; } - static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags) @@ -1089,7 +1099,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 num_bytes; int ret; - flags = reduce_alloc_profile(flags); + flags = reduce_alloc_profile(extent_root, flags); space_info = __find_space_info(extent_root->fs_info, flags); if (!space_info) { @@ -1169,6 +1179,21 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } +static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) +{ + u64 start; + u64 end; + int ret; + ret = find_first_extent_bit(&root->fs_info->block_group_cache, + search_start, &start, &end, + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | + BLOCK_GROUP_SYSTEM); + if (ret) + return 0; + return start; +} + + static int update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin) { @@ -1185,16 +1210,25 @@ static int update_pinned_extents(struct btrfs_root *root, } while (num > 0) { cache = btrfs_lookup_block_group(fs_info, bytenr); - WARN_ON(!cache); - len = min(num, cache->key.offset - - (bytenr - cache->key.objectid)); + if (!cache) { + u64 first = first_logical_byte(root, bytenr); + WARN_ON(first < bytenr); + len = min(first - bytenr, num); + } else { + len = min(num, cache->key.offset - + (bytenr - cache->key.objectid)); + } if (pin) { - cache->pinned += len; - cache->space_info->bytes_pinned += len; + if (cache) { + cache->pinned += len; + cache->space_info->bytes_pinned += len; + } fs_info->total_pinned += len; } else { - cache->pinned -= len; - cache->space_info->bytes_pinned -= len; + if (cache) { + cache->pinned -= len; + cache->space_info->bytes_pinned -= len; + } fs_info->total_pinned -= len; } bytenr += len; @@ -1547,7 +1581,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, int data) { int ret; - u64 orig_search_start = search_start; + u64 orig_search_start; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; u64 total_needed = num_bytes; @@ -1577,6 +1611,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, } } + search_start = max(search_start, first_logical_byte(root, 0)); + orig_search_start = search_start; + if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); @@ -1751,7 +1788,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } again: - data = reduce_alloc_profile(data); + data = reduce_alloc_profile(root, data); if (root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, @@ -2309,6 +2346,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, struct file_ra_state *ra; unsigned long total_read = 0; unsigned long ra_pages; + struct btrfs_trans_handle *trans; ra = kzalloc(sizeof(*ra), GFP_NOFS); @@ -2326,9 +2364,13 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, calc_ra(i, last_index, ra_pages)); } total_read++; + if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size) + goto truncate_racing; + page = grab_cache_page(inode->i_mapping, i); - if (!page) + if (!page) { goto out_unlock; + } if (!PageUptodate(page)) { btrfs_readpage(NULL, page); lock_page(page); @@ -2350,20 +2392,33 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, lock_extent(io_tree, page_start, page_end, GFP_NOFS); - set_page_dirty(page); set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); + set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); page_cache_release(page); - balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); } + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + total_read); out_unlock: kfree(ra); + trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); + if (trans) { + btrfs_add_ordered_inode(inode); + btrfs_end_transaction(trans, BTRFS_I(inode)->root); + mark_inode_dirty(inode); + } mutex_unlock(&inode->i_mutex); return 0; + +truncate_racing: + vmtruncate(inode, inode->i_size); + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + total_read); + goto out_unlock; } /* @@ -2466,6 +2521,27 @@ out: return 0; } +static int noinline del_extent_zero(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key) +{ + int ret; + struct btrfs_trans_handle *trans; + + trans = btrfs_start_transaction(extent_root, 1); + ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); + if (ret > 0) { + ret = -EIO; + goto out; + } + if (ret < 0) + goto out; + ret = btrfs_del_item(trans, extent_root, path); +out: + btrfs_end_transaction(trans, extent_root); + return ret; +} + static int noinline relocate_one_extent(struct btrfs_root *extent_root, struct btrfs_path *path, struct btrfs_key *extent_key) @@ -2477,6 +2553,10 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, u32 item_size; int ret = 0; + if (extent_key->objectid == 0) { + ret = del_extent_zero(extent_root, path, extent_key); + goto out; + } key.objectid = extent_key->objectid; key.type = BTRFS_EXTENT_REF_KEY; key.offset = 0; @@ -2490,15 +2570,24 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, ret = 0; leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); - if (path->slots[0] == nritems) - goto out; + if (path->slots[0] == nritems) { + ret = btrfs_next_leaf(extent_root, path); + if (ret > 0) { + ret = 0; + goto out; + } + if (ret < 0) + goto out; + } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid != extent_key->objectid) + if (found_key.objectid != extent_key->objectid) { break; + } - if (found_key.type != BTRFS_EXTENT_REF_KEY) + if (found_key.type != BTRFS_EXTENT_REF_KEY) { break; + } key.offset = found_key.offset + 1; item_size = btrfs_item_size_nr(leaf, path->slots[0]); @@ -2519,7 +2608,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; - num_devices = btrfs_super_num_devices(&root->fs_info->super_copy); + num_devices = root->fs_info->fs_devices->num_devices; if (num_devices == 1) { stripped |= BTRFS_BLOCK_GROUP_DUP; stripped = flags & ~stripped; @@ -2535,9 +2624,6 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) return flags; } else { /* they already had raid on here, just return */ - if ((flags & BTRFS_BLOCK_GROUP_DUP) && - (flags & BTRFS_BLOCK_GROUP_RAID1)) { - } if (flags & stripped) return flags; @@ -2570,7 +2656,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) struct extent_buffer *leaf; u32 nritems; int ret; - int progress = 0; + int progress; shrink_block_group = btrfs_lookup_block_group(root->fs_info, shrink_start); @@ -2597,6 +2683,7 @@ again: shrink_block_group->ro = 1; total_found = 0; + progress = 0; key.objectid = shrink_start; key.offset = 0; key.type = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f7beb9b0d37..b437d3bdf95 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2194,6 +2194,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, again: spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); + if (em) + em->bdev = root->fs_info->fs_devices->latest_bdev; spin_unlock(&em_tree->lock); if (em) { @@ -2212,7 +2214,7 @@ again: em->start = EXTENT_MAP_HOLE; em->len = (u64)-1; - em->bdev = inode->i_sb->s_bdev; + em->bdev = root->fs_info->fs_devices->latest_bdev; ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); if (ret < 0) { @@ -3101,6 +3103,27 @@ out: return ret; } +long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_vol_args *vol_args; + int ret; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + ret = btrfs_rm_device(root, vol_args->name); + +out: + kfree(vol_args); + return ret; +} + int dup_item_to_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -3294,6 +3317,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_resize(root, (void __user *)arg); case BTRFS_IOC_ADD_DEV: return btrfs_ioctl_add_dev(root, (void __user *)arg); + case BTRFS_IOC_RM_DEV: + return btrfs_ioctl_rm_dev(root, (void __user *)arg); case BTRFS_IOC_BALANCE: return btrfs_balance(root->fs_info->dev_root); case BTRFS_IOC_CLONE: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7153dfaa340..020e5a83e31 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -315,24 +315,12 @@ static void btrfs_write_super(struct super_block *sb) sb->s_dirt = 0; } -/* - * This is almost a copy of get_sb_bdev in fs/super.c. - * We need the local copy to allow direct mounting of - * subvolumes, but this could be easily integrated back - * into the generic version. --hch - */ - -/* start copy & paste */ -static int set_bdev_super(struct super_block *s, void *data) +static int btrfs_test_super(struct super_block *s, void *data) { - s->s_bdev = data; - s->s_dev = s->s_bdev->bd_dev; - return 0; -} + struct btrfs_fs_devices *test_fs_devices = data; + struct btrfs_root *root = btrfs_sb(s); -static int test_bdev_super(struct super_block *s, void *data) -{ - return (void *)s->s_bdev == data; + return root->fs_info->fs_devices == test_fs_devices; } int btrfs_get_sb_bdev(struct file_system_type *fs_type, @@ -354,14 +342,9 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, return error; bdev = fs_devices->lowest_bdev; - /* - * once the super is inserted into the list by sget, s_umount - * will protect the lockfs code from trying to start a snapshot - * while we are mounting - */ - down(&bdev->bd_mount_sem); - s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); - up(&bdev->bd_mount_sem); + btrfs_lock_volumes(); + s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); + btrfs_unlock_volumes(); if (IS_ERR(s)) goto error_s; @@ -373,13 +356,11 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, goto error_bdev; } - close_bdev_excl(bdev); } else { char b[BDEVNAME_SIZE]; s->s_flags = flags; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); - sb_set_blocksize(s, block_size(bdev)); error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); if (error) { @@ -458,7 +439,7 @@ static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", .get_sb = btrfs_get_sb, - .kill_sb = kill_block_super, + .kill_sb = kill_anon_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9826942fa18..57746c11eae 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -738,9 +738,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, chunk_root->node->start); btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, btrfs_header_level(chunk_root->node)); - write_extent_buffer(root->fs_info->sb_buffer, - &root->fs_info->super_copy, 0, - sizeof(root->fs_info->super_copy)); + memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, + sizeof(root->fs_info->super_copy)); btrfs_copy_pinned(root, pinned_copy); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b3818757310..55da5f0c56e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -45,6 +45,16 @@ struct map_lookup { static DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); +void btrfs_lock_volumes(void) +{ + mutex_lock(&uuid_mutex); +} + +void btrfs_unlock_volumes(void) +{ + mutex_unlock(&uuid_mutex); +} + int btrfs_cleanup_fs_uuids(void) { struct btrfs_fs_devices *fs_devices; @@ -193,12 +203,14 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, ret = PTR_ERR(bdev); goto fail; } + set_blocksize(bdev, 4096); if (device->devid == fs_devices->latest_devid) fs_devices->latest_bdev = bdev; if (device->devid == fs_devices->lowest_devid) { fs_devices->lowest_bdev = bdev; } device->bdev = bdev; + } mutex_unlock(&uuid_mutex); return 0; @@ -393,6 +405,9 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_root *root = device->dev_root; struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *leaf = NULL; + struct btrfs_dev_extent *extent = NULL; path = btrfs_alloc_path(); if (!path) @@ -403,8 +418,25 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, key.type = BTRFS_DEV_EXTENT_KEY; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + ret = btrfs_previous_item(root, path, key.objectid, + BTRFS_DEV_EXTENT_KEY); + BUG_ON(ret); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_extent); + BUG_ON(found_key.offset > start || found_key.offset + + btrfs_dev_extent_length(leaf, extent) < start); + ret = 0; + } else if (ret == 0) { + leaf = path->nodes[0]; + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_extent); + } BUG_ON(ret); + device->bytes_used -= btrfs_dev_extent_length(leaf, extent); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -593,6 +625,170 @@ out: return ret; } +static int btrfs_rm_dev_item(struct btrfs_root *root, + struct btrfs_device *device) +{ + int ret; + struct btrfs_path *path; + struct block_device *bdev = device->bdev; + struct btrfs_device *next_dev; + struct btrfs_key key; + u64 total_bytes; + struct btrfs_fs_devices *fs_devices; + struct btrfs_trans_handle *trans; + + root = root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 1); + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = device->devid; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; + + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + + /* + * at this point, the device is zero sized. We want to + * remove it from the devices list and zero out the old super + */ + list_del_init(&device->dev_list); + list_del_init(&device->dev_alloc_list); + fs_devices = root->fs_info->fs_devices; + + next_dev = list_entry(fs_devices->devices.next, struct btrfs_device, + dev_list); + if (bdev == fs_devices->lowest_bdev) + fs_devices->lowest_bdev = next_dev->bdev; + if (bdev == root->fs_info->sb->s_bdev) + root->fs_info->sb->s_bdev = next_dev->bdev; + if (bdev == fs_devices->latest_bdev) + fs_devices->latest_bdev = next_dev->bdev; + + total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); + btrfs_set_super_total_bytes(&root->fs_info->super_copy, + total_bytes - device->total_bytes); + + total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); + btrfs_set_super_num_devices(&root->fs_info->super_copy, + total_bytes - 1); +out: + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return ret; +} + +int btrfs_rm_device(struct btrfs_root *root, char *device_path) +{ + struct btrfs_device *device; + struct block_device *bdev; + struct buffer_head *bh; + struct btrfs_super_block *disk_super; + u64 all_avail; + u64 devid; + int ret = 0; + + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&uuid_mutex); + + all_avail = root->fs_info->avail_data_alloc_bits | + root->fs_info->avail_system_alloc_bits | + root->fs_info->avail_metadata_alloc_bits; + + if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && + root->fs_info->fs_devices->num_devices <= 4) { + printk("btrfs: unable to go below four devices on raid10\n"); + ret = -EINVAL; + goto out; + } + + if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && + root->fs_info->fs_devices->num_devices <= 2) { + printk("btrfs: unable to go below two devices on raid1\n"); + ret = -EINVAL; + goto out; + } + + bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); + goto out; + } + + bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + if (!bh) { + ret = -EIO; + goto error_close; + } + disk_super = (struct btrfs_super_block *)bh->b_data; + if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, + sizeof(disk_super->magic))) { + ret = -ENOENT; + goto error_brelse; + } + if (memcmp(disk_super->fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) { + ret = -ENOENT; + goto error_brelse; + } + devid = le64_to_cpu(disk_super->dev_item.devid); + device = btrfs_find_device(root, devid, NULL); + if (!device) { + ret = -ENOENT; + goto error_brelse; + } + + root->fs_info->fs_devices->num_devices--; + + ret = btrfs_shrink_device(device, 0); + if (ret) + goto error_brelse; + + + ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); + if (ret) + goto error_brelse; + + /* make sure this device isn't detected as part of the FS anymore */ + memset(&disk_super->magic, 0, sizeof(disk_super->magic)); + set_buffer_dirty(bh); + sync_dirty_buffer(bh); + + brelse(bh); + + /* one close for the device struct or super_block */ + close_bdev_excl(device->bdev); + + /* one close for us */ + close_bdev_excl(device->bdev); + + kfree(device->name); + kfree(device); + ret = 0; + goto out; + +error_brelse: + brelse(bh); +error_close: + close_bdev_excl(bdev); +out: + mutex_unlock(&uuid_mutex); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + int btrfs_init_new_device(struct btrfs_root *root, char *device_path) { struct btrfs_trans_handle *trans; @@ -831,13 +1027,17 @@ int btrfs_relocate_chunk(struct btrfs_root *root, em = lookup_extent_mapping(em_tree, chunk_offset, 1); spin_unlock(&em_tree->lock); - BUG_ON(em->start > chunk_offset || em->start + em->len < chunk_offset); + BUG_ON(em->start > chunk_offset || + em->start + em->len < chunk_offset); map = (struct map_lookup *)em->bdev; for (i = 0; i < map->num_stripes; i++) { ret = btrfs_free_dev_extent(trans, map->stripes[i].dev, map->stripes[i].physical); BUG_ON(ret); + + ret = btrfs_update_device(trans, map->stripes[i].dev); + BUG_ON(ret); } ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, chunk_offset); @@ -847,11 +1047,8 @@ int btrfs_relocate_chunk(struct btrfs_root *root, if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); BUG_ON(ret); - goto out; } - - spin_lock(&em_tree->lock); remove_extent_mapping(em_tree, em); kfree(map); @@ -861,7 +1058,6 @@ int btrfs_relocate_chunk(struct btrfs_root *root, free_extent_map(em); spin_unlock(&em_tree->lock); -out: /* once for us */ free_extent_map(em); @@ -1449,7 +1645,7 @@ again: return 0; if (!em) { - printk("unable to find logical %Lu\n", logical); + printk("unable to find logical %Lu len %Lu\n", logical, *length); BUG(); } @@ -1712,6 +1908,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, logical = key->offset; length = btrfs_chunk_length(leaf, chunk); + spin_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); spin_unlock(&map_tree->map_tree.lock); @@ -1845,7 +2042,7 @@ int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf) int btrfs_read_sys_array(struct btrfs_root *root) { struct btrfs_super_block *super_copy = &root->fs_info->super_copy; - struct extent_buffer *sb = root->fs_info->sb_buffer; + struct extent_buffer *sb; struct btrfs_disk_key *disk_key; struct btrfs_chunk *chunk; u8 *ptr; @@ -1857,6 +2054,12 @@ int btrfs_read_sys_array(struct btrfs_root *root) u32 cur; struct btrfs_key key; + sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET, + BTRFS_SUPER_INFO_SIZE); + if (!sb) + return -ENOMEM; + btrfs_set_buffer_uptodate(sb); + write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); array_size = btrfs_super_sys_array_size(super_copy); ptr = super_copy->sys_chunk_array; @@ -1867,8 +2070,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) disk_key = (struct btrfs_disk_key *)ptr; btrfs_disk_key_to_cpu(&key, disk_key); - len = sizeof(*disk_key); - ptr += len; + len = sizeof(*disk_key); ptr += len; sb_ptr += len; cur += len; @@ -1887,6 +2089,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) sb_ptr += len; cur += len; } + free_extent_buffer(sb); return ret; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index a9663e92bb1..0f94a69e6eb 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -125,6 +125,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); int btrfs_add_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device); +int btrfs_rm_device(struct btrfs_root *root, char *device_path); int btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, @@ -136,4 +137,6 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_root *root, char *path); int btrfs_balance(struct btrfs_root *dev_root); +void btrfs_unlock_volumes(void); +void btrfs_lock_volumes(void); #endif -- cgit v1.2.3-70-g09d2 From bf4ef67924d87b0addb32f084e83a9283496350e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 8 May 2008 13:26:18 -0400 Subject: Btrfs: Properly find the root for snapshotted blocks during chunk relocation Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 + fs/btrfs/extent-tree.c | 120 ++++++++++++++++++++++++++++++++++++++++++++----- fs/btrfs/root-tree.c | 46 ++++++++++++++++++- 3 files changed, 156 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 33ab165591c..f4b4677bec7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1459,6 +1459,8 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item); int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); +int btrfs_search_root(struct btrfs_root *root, u64 search_start, + u64 *found_objectid); int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_root *latest_root); /* dir-item.c */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c0e67bde842..8e69b5acfef 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2421,6 +2421,85 @@ truncate_racing: goto out_unlock; } +/* + * The back references tell us which tree holds a ref on a block, + * but it is possible for the tree root field in the reference to + * reflect the original root before a snapshot was made. In this + * case we should search through all the children of a given root + * to find potential holders of references on a block. + * + * Instead, we do something a little less fancy and just search + * all the roots for a given key/block combination. + */ +static int find_root_for_ref(struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key0, + int level, + int file_key, + struct btrfs_root **found_root, + u64 bytenr) +{ + struct btrfs_key root_location; + struct btrfs_root *cur_root = *found_root; + struct btrfs_file_extent_item *file_extent; + u64 root_search_start = BTRFS_FS_TREE_OBJECTID; + u64 found_bytenr; + int ret; + int i; + + root_location.offset = (u64)-1; + root_location.type = BTRFS_ROOT_ITEM_KEY; + path->lowest_level = level; + path->reada = 0; + while(1) { + ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0); + found_bytenr = 0; + if (ret == 0 && file_key) { + struct extent_buffer *leaf = path->nodes[0]; + file_extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, file_extent) == + BTRFS_FILE_EXTENT_REG) { + found_bytenr = + btrfs_file_extent_disk_bytenr(leaf, + file_extent); + } + } else if (ret == 0) { + if (path->nodes[level]) + found_bytenr = path->nodes[level]->start; + } + + for (i = level; i < BTRFS_MAX_LEVEL; i++) { + if (!path->nodes[i]) + break; + free_extent_buffer(path->nodes[i]); + path->nodes[i] = NULL; + } + btrfs_release_path(cur_root, path); + + if (found_bytenr == bytenr) { + *found_root = cur_root; + ret = 0; + goto out; + } + ret = btrfs_search_root(root->fs_info->tree_root, + root_search_start, &root_search_start); + if (ret) + break; + + root_location.objectid = root_search_start; + cur_root = btrfs_read_fs_root_no_name(root->fs_info, + &root_location); + if (!cur_root) { + ret = 1; + break; + } + } +out: + path->lowest_level = 0; + return ret; +} + /* * note, this releases the path */ @@ -2430,13 +2509,15 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, { struct inode *inode; struct btrfs_root *found_root; - struct btrfs_key *root_location; + struct btrfs_key root_location; + struct btrfs_key found_key; struct btrfs_extent_ref *ref; u64 ref_root; u64 ref_gen; u64 ref_objectid; u64 ref_offset; int ret; + int level; ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_ref); @@ -2446,20 +2527,30 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, ref_offset = btrfs_ref_offset(path->nodes[0], ref); btrfs_release_path(extent_root, path); - root_location = kmalloc(sizeof(*root_location), GFP_NOFS); - root_location->objectid = ref_root; + root_location.objectid = ref_root; if (ref_gen == 0) - root_location->offset = 0; + root_location.offset = 0; else - root_location->offset = (u64)-1; - root_location->type = BTRFS_ROOT_ITEM_KEY; + root_location.offset = (u64)-1; + root_location.type = BTRFS_ROOT_ITEM_KEY; found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, - root_location); + &root_location); BUG_ON(!found_root); - kfree(root_location); if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + found_key.objectid = ref_objectid; + found_key.type = BTRFS_EXTENT_DATA_KEY; + found_key.offset = ref_offset; + level = 0; + + ret = find_root_for_ref(extent_root, path, &found_key, + level, 1, &found_root, + extent_key->objectid); + + if (ret) + goto out; + mutex_unlock(&extent_root->fs_info->fs_mutex); inode = btrfs_iget_locked(extent_root->fs_info->sb, ref_objectid, found_root); @@ -2485,12 +2576,9 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, mutex_lock(&extent_root->fs_info->fs_mutex); } else { struct btrfs_trans_handle *trans; - struct btrfs_key found_key; struct extent_buffer *eb; - int level; int i; - trans = btrfs_start_transaction(found_root, 1); eb = read_tree_block(found_root, extent_key->objectid, extent_key->offset); level = btrfs_header_level(eb); @@ -2502,6 +2590,15 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, free_extent_buffer(eb); + ret = find_root_for_ref(extent_root, path, &found_key, + level, 0, &found_root, + extent_key->objectid); + + if (ret) + goto out; + + trans = btrfs_start_transaction(found_root, 1); + path->lowest_level = level; path->reada = 2; ret = btrfs_search_slot(trans, found_root, &found_key, path, @@ -2578,6 +2675,7 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, } if (ret < 0) goto out; + leaf = path->nodes[0]; } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 08f19ec8809..8bf21ba0a43 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -21,6 +21,51 @@ #include "disk-io.h" #include "print-tree.h" +/* + * returns 0 on finding something, 1 if no more roots are there + * and < 0 on error + */ +int btrfs_search_root(struct btrfs_root *root, u64 search_start, + u64 *found_objectid) +{ + struct btrfs_path *path; + struct btrfs_key search_key; + int ret; + + root = root->fs_info->tree_root; + search_key.objectid = search_start; + search_key.type = (u8)-1; + search_key.offset = (u64)-1; + + path = btrfs_alloc_path(); + BUG_ON(!path); +again: + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); + if (ret < 0) + goto out; + if (ret == 0) { + ret = 1; + goto out; + } + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret) + goto out; + } + btrfs_item_key_to_cpu(path->nodes[0], &search_key, path->slots[0]); + if (search_key.type != BTRFS_ROOT_ITEM_KEY) { + search_key.offset++; + btrfs_release_path(root, path); + goto again; + } + ret = 0; + *found_objectid = search_key.objectid; + +out: + btrfs_free_path(path); + return ret; +} + int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key) { @@ -55,7 +100,6 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, memcpy(key, &found_key, sizeof(found_key)); ret = 0; out: - btrfs_release_path(root, path); btrfs_free_path(path); return ret; } -- cgit v1.2.3-70-g09d2 From a68d5933a0e409592860229b35230c8e87155472 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 8 May 2008 14:11:56 -0400 Subject: Btrfs: Update nodatacow mode to support cloned single files and resizing Before, nodatacow only checked to make sure multiple roots didn't have references on a single extent. This check makes sure that multiple inodes don't have references. nodatacow needed an extra check to see if the block group was currently readonly. This way cows forced by the chunk relocation code are honored. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 9 +++++++++ fs/btrfs/inode.c | 13 +++++++++---- 3 files changed, 19 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f4b4677bec7..6c2c2c4e4d2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1325,7 +1325,7 @@ static inline struct dentry *fdentry(struct file *file) { /* extent-tree.c */ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, struct btrfs_path *count_path, - u64 first_extent); + u64 expected_owner, u64 first_extent); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8e69b5acfef..30a5094fffa 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -721,12 +721,14 @@ out: u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, struct btrfs_path *count_path, + u64 expected_owner, u64 first_extent) { struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_path *path; u64 bytenr; u64 found_objectid; + u64 found_owner; u64 root_objectid = root->root_key.objectid; u32 total_count = 0; u32 cur_count; @@ -792,6 +794,13 @@ again: total_count = 2; goto out; } + if (level == -1) { + found_owner = btrfs_ref_objectid(l, ref_item); + if (found_owner != expected_owner) { + total_count = 2; + goto out; + } + } total_count = 1; path->slots[0]++; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b437d3bdf95..1bf37d15b17 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -172,6 +172,7 @@ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) u64 loops = 0; u64 total_fs_bytes; struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_block_group_cache *block_group; struct extent_buffer *leaf; int found_type; struct btrfs_path *path; @@ -230,16 +231,20 @@ again: if (bytenr == 0) goto not_found; + if (btrfs_count_snapshots_in_path(root, path, inode->i_ino, + bytenr) != 1) { + goto not_found; + } + /* * we may be called by the resizer, make sure we're inside * the limits of the FS */ - if (bytenr + extent_num_bytes > total_fs_bytes) + block_group = btrfs_lookup_block_group(root->fs_info, + bytenr); + if (!block_group || block_group->ro) goto not_found; - if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) { - goto not_found; - } start = extent_end; } else { -- cgit v1.2.3-70-g09d2 From bbaf549e0c3d28399fc5abd68020d4025ae5c3a7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 8 May 2008 16:31:21 -0400 Subject: Btrfs: A number of nodatacow fixes Once part of a delalloc request fails the cow checks, just cow the entire range It is possible for the back references to all be from the same root, but still have snapshots against an extent. The checks are now more strict, forcing cow any time there are multiple refs against the data extent. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 20 ++++++++++++++++++++ fs/btrfs/inode.c | 8 +++----- 2 files changed, 23 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 30a5094fffa..db996f0edf0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -731,6 +731,7 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, u64 found_owner; u64 root_objectid = root->root_key.objectid; u32 total_count = 0; + u32 extent_refs; u32 cur_count; u32 nritems; int ret; @@ -767,6 +768,7 @@ again: } item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); + extent_refs = btrfs_extent_refs(l, item); while (1) { l = path->nodes[0]; nritems = btrfs_header_nritems(l); @@ -800,10 +802,28 @@ again: total_count = 2; goto out; } + /* + * nasty. we don't count a reference held by + * the running transaction. This allows nodatacow + * to avoid cow most of the time + */ + if (found_owner >= BTRFS_FIRST_FREE_OBJECTID && + btrfs_ref_generation(l, ref_item) == + root->fs_info->generation) { + extent_refs--; + } } total_count = 1; path->slots[0]++; } + /* + * if there is more than one reference against a data extent, + * we have to assume the other ref is another snapshot + */ + if (level == -1 && extent_refs > 1) { + total_count = 2; + goto out; + } if (cur_count == 0) { total_count = 0; goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1bf37d15b17..a492fd238c8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -207,9 +207,8 @@ again: btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); if (found_key.objectid != inode->i_ino || - found_type != BTRFS_EXTENT_DATA_KEY) { + found_type != BTRFS_EXTENT_DATA_KEY) goto not_found; - } found_type = btrfs_file_extent_type(leaf, item); extent_start = found_key.offset; @@ -245,7 +244,6 @@ again: if (!block_group || block_group->ro) goto not_found; - start = extent_end; } else { goto not_found; @@ -260,8 +258,8 @@ loop: goto again; not_found: - cow_file_range(inode, start, cow_end); - start = cow_end + 1; + cow_file_range(inode, start, end); + start = end + 1; goto loop; } -- cgit v1.2.3-70-g09d2 From 323da79c9f096ed4da04e5ea00f766f75b28aeaa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 9 May 2008 11:46:48 -0400 Subject: Btrfs: Chunk relocation fine tuning, and add a few printks to show progress Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 20 +++++++++++++++++--- fs/btrfs/volumes.c | 2 ++ 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index db996f0edf0..df95fb660d4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2493,7 +2493,7 @@ static int find_root_for_ref(struct btrfs_root *root, btrfs_file_extent_disk_bytenr(leaf, file_extent); } - } else if (ret == 0) { + } else if (!file_key) { if (path->nodes[level]) found_bytenr = path->nodes[level]->start; } @@ -2797,14 +2797,25 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) root = root->fs_info->extent_root; path->reada = 2; + printk("btrfs relocating block group %llu flags %llu\n", + (unsigned long long)shrink_start, + (unsigned long long)shrink_block_group->flags); + again: if (btrfs_block_group_used(&shrink_block_group->item) > 0) { + u64 calc; + trans = btrfs_start_transaction(root, 1); new_alloc_flags = update_block_group_flags(root, shrink_block_group->flags); + if (new_alloc_flags != shrink_block_group->flags) { + calc = + btrfs_block_group_used(&shrink_block_group->item); + } else { + calc = shrink_block_group->key.offset; + } do_chunk_alloc(trans, root->fs_info->extent_root, - btrfs_block_group_used(&shrink_block_group->item) + - 2 * 1024 * 1024, new_alloc_flags); + calc + 2 * 1024 * 1024, new_alloc_flags); btrfs_end_transaction(trans, root); } shrink_block_group->ro = 1; @@ -2888,6 +2899,9 @@ next: btrfs_release_path(root, path); if (total_found > 0) { + printk("btrfs relocate found %llu last extent was %llu\n", + (unsigned long long)total_found, + (unsigned long long)found_key.objectid); trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index aa001b08ced..b5d7bd1915b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1011,6 +1011,8 @@ int btrfs_relocate_chunk(struct btrfs_root *root, int ret; int i; + printk("btrfs relocating chunk %llu\n", + (unsigned long long)chunk_offset); root = root->fs_info->chunk_root; extent_root = root->fs_info->extent_root; em_tree = &root->fs_info->mapping_tree.map_tree; -- cgit v1.2.3-70-g09d2 From ca7a79ad8dbe24669a8ced01f9fc0126b872745d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 May 2008 12:59:19 -0400 Subject: Btrfs: Pass down the expected generation number when reading tree blocks Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 55 +++++++++++++++++++------------------------------- fs/btrfs/disk-io.c | 30 ++++++++++++--------------- fs/btrfs/disk-io.h | 7 ++++--- fs/btrfs/extent-tree.c | 15 ++++++++------ fs/btrfs/print-tree.c | 3 ++- fs/btrfs/tree-defrag.c | 9 ++++++--- 6 files changed, 55 insertions(+), 64 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 29e5674b043..289d71d8653 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -302,6 +302,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct extent_buffer *cur; struct extent_buffer *tmp; u64 blocknr; + u64 gen; u64 search_start = *last_ret; u64 last_block = 0; u64 other; @@ -354,6 +355,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, progress_passed = 1; blocknr = btrfs_node_blockptr(parent, i); + gen = btrfs_node_ptr_generation(parent, i); if (last_block == 0) last_block = blocknr; @@ -387,15 +389,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, } if (!cur) { cur = read_tree_block(root, blocknr, - blocksize); + blocksize, gen); } else if (!uptodate) { - btrfs_read_buffer(cur); + btrfs_read_buffer(cur, gen); } } if (search_start == 0) search_start = last_block; - btrfs_verify_block_csum(root, cur); err = __btrfs_cow_block(trans, root, cur, parent, i, &tmp, search_start, min(16 * blocksize, @@ -696,12 +697,17 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, static struct extent_buffer *read_node_slot(struct btrfs_root *root, struct extent_buffer *parent, int slot) { + int level = btrfs_header_level(parent); if (slot < 0) return NULL; if (slot >= btrfs_header_nritems(parent)) return NULL; + + BUG_ON(level == 0); + return read_tree_block(root, btrfs_node_blockptr(parent, slot), - btrfs_level_size(root, btrfs_header_level(parent) - 1)); + btrfs_level_size(root, level - 1), + btrfs_node_ptr_generation(parent, slot)); } static int balance_level(struct btrfs_trans_handle *trans, @@ -1076,7 +1082,8 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, if ((search >= lowest_read && search <= highest_read) || (search < lowest_read && lowest_read - search <= 32768) || (search > highest_read && search - highest_read <= 32768)) { - readahead_tree_block(root, search, blocksize); + readahead_tree_block(root, search, blocksize, + btrfs_node_ptr_generation(node, nr)); nread += blocksize; } nscan++; @@ -1109,8 +1116,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow) { struct extent_buffer *b; - u64 bytenr; - u64 ptr_gen; int slot; int ret; int level; @@ -1174,20 +1179,12 @@ again: /* this is only true while dropping a snapshot */ if (level == lowest_level) break; - bytenr = btrfs_node_blockptr(b, slot); - ptr_gen = btrfs_node_ptr_generation(b, slot); + if (should_reada) reada_for_search(root, p, level, slot, key->objectid); - b = read_tree_block(root, bytenr, - btrfs_level_size(root, level - 1)); - if (ptr_gen != btrfs_header_generation(b)) { - printk("block %llu bad gen wanted %llu " - "found %llu\n", - (unsigned long long)b->start, - (unsigned long long)ptr_gen, - (unsigned long long)btrfs_header_generation(b)); - } + + b = read_node_slot(root, b, slot); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -1650,8 +1647,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; - right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1), - root->leafsize); + right = read_node_slot(root, upper, slot + 1); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(right); @@ -1826,8 +1822,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } - left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1], - slot - 1), root->leafsize); + left = read_node_slot(root, path->nodes[1], slot - 1); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(left); @@ -2742,7 +2737,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, */ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { - u64 bytenr; int slot; int level = 1; struct extent_buffer *c; @@ -2762,12 +2756,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) } slot--; - bytenr = btrfs_node_blockptr(c, slot); if (next) free_extent_buffer(next); - next = read_tree_block(root, bytenr, - btrfs_level_size(root, level - 1)); + next = read_node_slot(root, c, slot); break; } path->slots[level] = slot; @@ -2782,8 +2774,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) path->slots[level] = slot; if (!level) break; - next = read_tree_block(root, btrfs_node_blockptr(next, slot), - btrfs_level_size(root, level - 1)); + next = read_node_slot(root, next, slot); } return 0; } @@ -2797,7 +2788,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) { int slot; int level = 1; - u64 bytenr; struct extent_buffer *c; struct extent_buffer *next = NULL; @@ -2814,15 +2804,13 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) continue; } - bytenr = btrfs_node_blockptr(c, slot); if (next) free_extent_buffer(next); if (path->reada) reada_for_search(root, path, level, slot, 0); - next = read_tree_block(root, bytenr, - btrfs_level_size(root, level -1)); + next = read_node_slot(root, c, slot); break; } path->slots[level] = slot; @@ -2836,8 +2824,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) break; if (path->reada) reada_for_search(root, path, level, 0, 0); - next = read_tree_block(root, btrfs_node_blockptr(next, 0), - btrfs_level_size(root, level - 1)); + next = read_node_slot(root, next, 0); } return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c3083165b17..edee7a44f86 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -207,7 +207,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, static int btree_read_extent_buffer_pages(struct btrfs_root *root, struct extent_buffer *eb, - u64 start) + u64 start, u64 parent_transid) { struct extent_io_tree *io_tree; int ret; @@ -254,7 +254,8 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) WARN_ON(1); } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE); + ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, + btrfs_header_generation(eb)); BUG_ON(ret); btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); @@ -562,7 +563,8 @@ static struct address_space_operations btree_aops = { .sync_page = block_sync_page, }; -int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, + u64 parent_transid) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; @@ -592,12 +594,6 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) return 0; } -int btrfs_verify_block_csum(struct btrfs_root *root, - struct extent_buffer *buf) -{ - return btrfs_buffer_uptodate(buf); -} - struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -621,7 +617,7 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize) + u32 blocksize, u64 parent_transid) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; @@ -634,7 +630,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, if (!buf) return NULL; - ret = btree_read_extent_buffer_pages(root, buf, 0); + ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret == 0) { buf->flags |= EXTENT_UPTODATE; @@ -715,7 +711,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root, blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize); + blocksize, 0); BUG_ON(!root->node); return 0; } @@ -771,7 +767,7 @@ out: } blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize); + blocksize, 0); BUG_ON(!root->node); insert: root->ref_cows = 1; @@ -1288,7 +1284,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), - blocksize); + blocksize, 0); BUG_ON(!chunk_root->node); read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, @@ -1304,7 +1300,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), - blocksize); + blocksize, 0); if (!tree_root->node) goto fail_sb_buffer; @@ -1732,11 +1728,11 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) EXTENT_DEFRAG, GFP_NOFS); } -int btrfs_read_buffer(struct extent_buffer *buf) +int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; int ret; - ret = btree_read_extent_buffer_pages(root, buf, 0); + ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret == 0) { buf->flags |= EXTENT_UPTODATE; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 5d5f6979c41..e29c895d523 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -25,8 +25,9 @@ struct btrfs_device; struct btrfs_fs_devices; struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize); -int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); + u32 blocksize, u64 parent_transid); +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, + u64 parent_transid); struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); int clean_tree_block(struct btrfs_trans_handle *trans, @@ -65,7 +66,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf); int btrfs_buffer_defrag_done(struct extent_buffer *buf); int btrfs_clear_buffer_defrag(struct extent_buffer *buf); int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf); -int btrfs_read_buffer(struct extent_buffer *buf); +int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); void btrfs_throttle(struct btrfs_root *root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index df95fb660d4..db07dde4a87 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1338,7 +1338,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, &extent_item, sizeof(extent_item)); clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, GFP_NOFS); - eb = read_tree_block(extent_root, ins.objectid, ins.offset); + eb = read_tree_block(extent_root, ins.objectid, ins.offset, + trans->transid); level = btrfs_header_level(eb); if (level == 0) { btrfs_item_key(eb, &first, 0); @@ -2076,7 +2077,8 @@ static void noinline reada_walk_down(struct btrfs_root *root, } } mutex_unlock(&root->fs_info->fs_mutex); - ret = readahead_tree_block(root, bytenr, blocksize); + ret = readahead_tree_block(root, bytenr, blocksize, + btrfs_node_ptr_generation(node, i)); last = bytenr + blocksize; cond_resched(); mutex_lock(&root->fs_info->fs_mutex); @@ -2096,6 +2098,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, u64 root_owner; u64 root_gen; u64 bytenr; + u64 ptr_gen; struct extent_buffer *next; struct extent_buffer *cur; struct extent_buffer *parent; @@ -2132,6 +2135,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, break; } bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); blocksize = btrfs_level_size(root, *level - 1); ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); @@ -2152,7 +2156,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, reada_walk_down(root, cur, path->slots[*level]); mutex_unlock(&root->fs_info->fs_mutex); - next = read_tree_block(root, bytenr, blocksize); + next = read_tree_block(root, bytenr, blocksize, + ptr_gen); mutex_lock(&root->fs_info->fs_mutex); /* we've dropped the lock, double check */ @@ -2173,8 +2178,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, BUG_ON(ret); continue; } - } else if (next) { - btrfs_verify_block_csum(root, next); } WARN_ON(*level <= 0); if (path->nodes[*level-1]) @@ -2609,7 +2612,7 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, int i; eb = read_tree_block(found_root, extent_key->objectid, - extent_key->offset); + extent_key->offset, 0); level = btrfs_header_level(eb); if (level == 0) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index e99f3249d05..14d86372030 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -186,7 +186,8 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) for (i = 0; i < nr; i++) { struct extent_buffer *next = read_tree_block(root, btrfs_node_blockptr(c, i), - btrfs_level_size(root, level - 1)); + btrfs_level_size(root, level - 1), + btrfs_node_ptr_generation(c, i)); if (btrfs_is_leaf(next) && btrfs_header_level(c) != 1) BUG(); diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 256af1870ee..5085e9e693b 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -28,6 +28,7 @@ static void reada_defrag(struct btrfs_root *root, int i; u32 nritems; u64 bytenr; + u64 gen; u32 blocksize; int ret; @@ -35,7 +36,8 @@ static void reada_defrag(struct btrfs_root *root, nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { bytenr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, bytenr, blocksize); + gen = btrfs_node_ptr_generation(node, i); + ret = readahead_tree_block(root, bytenr, blocksize, gen); if (ret) break; } @@ -101,10 +103,11 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, path->slots[*level]++; continue; } - btrfs_verify_block_csum(root, next); } else { next = read_tree_block(root, bytenr, - btrfs_level_size(root, *level - 1)); + btrfs_level_size(root, *level - 1), + btrfs_node_ptr_generation(cur, + path->slots[*level])); } ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); -- cgit v1.2.3-70-g09d2 From 1259ab75c62462b8ffad90067b5e1f6312786a18 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 May 2008 13:39:03 -0400 Subject: Btrfs: Handle write errors on raid1 and raid10 When duplicate copies exist, writes are allowed to fail to one of those copies. This changeset includes a few changes that allow the FS to continue even when some IOs fail. It also adds verification of the parent generation number for btree blocks. This generation is stored in the pointer to a block, and it ensures that missed writes to are detected. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- fs/btrfs/disk-io.c | 56 ++++++++++++++++++++++++++++++++++++++---- fs/btrfs/disk-io.h | 2 +- fs/btrfs/extent-tree.c | 4 +-- fs/btrfs/extent_io.c | 49 +++++++++++++++++++++++++++++++------ fs/btrfs/extent_io.h | 7 +++++- fs/btrfs/inode.c | 66 +++++++++++++++++++++++++++++++------------------- fs/btrfs/tree-defrag.c | 7 +++--- fs/btrfs/volumes.c | 11 ++++++--- 9 files changed, 155 insertions(+), 49 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 289d71d8653..02e571e6ee6 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -379,7 +379,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, cur = btrfs_find_tree_block(root, blocknr, blocksize); if (cur) - uptodate = btrfs_buffer_uptodate(cur); + uptodate = btrfs_buffer_uptodate(cur, gen); else uptodate = 0; if (!cur || !uptodate) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index edee7a44f86..574b1245964 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -205,6 +205,33 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, return 0; } +static int verify_parent_transid(struct extent_io_tree *io_tree, + struct extent_buffer *eb, u64 parent_transid) +{ + int ret; + + if (!parent_transid || btrfs_header_generation(eb) == parent_transid) + return 0; + + lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); + if (extent_buffer_uptodate(io_tree, eb) && + btrfs_header_generation(eb) == parent_transid) { + ret = 0; + goto out; + } + printk("parent transid verify failed on %llu wanted %llu found %llu\n", + (unsigned long long)eb->start, + (unsigned long long)parent_transid, + (unsigned long long)btrfs_header_generation(eb)); + ret = 1; +out: + clear_extent_buffer_uptodate(io_tree, eb); + unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + return ret; + +} + static int btree_read_extent_buffer_pages(struct btrfs_root *root, struct extent_buffer *eb, u64 start, u64 parent_transid) @@ -218,7 +245,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, while (1) { ret = read_extent_buffer_pages(io_tree, eb, start, 1, btree_get_extent, mirror_num); - if (!ret) + if (!ret && + !verify_parent_transid(io_tree, eb, parent_transid)) return ret; num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, @@ -330,6 +358,13 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, ret = -EIO; goto err; } + if (memcmp_extent_buffer(eb, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(eb), + BTRFS_FSID_SIZE)) { + printk("bad fsid on block %Lu\n", eb->start); + ret = -EIO; + goto err; + } found_level = btrfs_header_level(eb); ret = csum_tree_block(root, eb, 1); @@ -1363,7 +1398,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) "I/O error on %s\n", bdevname(bh->b_bdev, b)); } - set_buffer_write_io_error(bh); + /* note, we dont' set_buffer_write_io_error because we have + * our own ways of dealing with the IO errors + */ clear_buffer_uptodate(bh); } unlock_buffer(bh); @@ -1459,7 +1496,8 @@ int write_all_supers(struct btrfs_root *root) ret = submit_bh(WRITE, bh); BUG_ON(ret); wait_on_buffer(bh); - BUG_ON(!buffer_uptodate(bh)); + if (!buffer_uptodate(bh)) + total_errors++; } else { total_errors++; } @@ -1607,10 +1645,18 @@ int close_ctree(struct btrfs_root *root) return 0; } -int btrfs_buffer_uptodate(struct extent_buffer *buf) +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) { + int ret; struct inode *btree_inode = buf->first_page->mapping->host; - return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); + + ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); + if (!ret) + return ret; + + ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf, + parent_transid); + return !ret; } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index e29c895d523..30d1ed293c2 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -56,7 +56,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct extent_buffer *buf); -int btrfs_buffer_uptodate(struct extent_buffer *buf); +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); int btrfs_set_buffer_uptodate(struct extent_buffer *buf); int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index db07dde4a87..605018c6045 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1366,7 +1366,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, if (!pending) { buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { - if (btrfs_buffer_uptodate(buf)) { + if (btrfs_buffer_uptodate(buf, 0)) { u64 transid = root->fs_info->running_transaction->transid; u64 header_transid = @@ -2151,7 +2151,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, continue; } next = btrfs_find_tree_block(root, bytenr, blocksize); - if (!next || !btrfs_buffer_uptodate(next)) { + if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); reada_walk_down(root, cur, path->slots[*level]); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index dd403b426ff..2a3624adc0c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1366,7 +1366,7 @@ static int end_bio_extent_writepage(struct bio *bio, unsigned int bytes_done, int err) #endif { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + int uptodate = err == 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct extent_state *state = bio->bi_private; struct extent_io_tree *tree = state->tree; @@ -1375,6 +1375,7 @@ static int end_bio_extent_writepage(struct bio *bio, u64 end; u64 cur; int whole_page; + int ret; unsigned long flags; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) @@ -1395,17 +1396,30 @@ static int end_bio_extent_writepage(struct bio *bio, if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); + if (tree->ops && tree->ops->writepage_end_io_hook) { + ret = tree->ops->writepage_end_io_hook(page, start, + end, state); + if (ret) + uptodate = 0; + } + + if (!uptodate && tree->ops && + tree->ops->writepage_io_failed_hook) { + ret = tree->ops->writepage_io_failed_hook(bio, page, + start, end, state); + if (ret == 0) { + state = NULL; + uptodate = (err == 0); + continue; + } + } + if (!uptodate) { clear_extent_uptodate(tree, start, end, GFP_ATOMIC); ClearPageUptodate(page); SetPageError(page); } - if (tree->ops && tree->ops->writepage_end_io_hook) { - tree->ops->writepage_end_io_hook(page, start, end, - state); - } - /* * bios can get merged in funny ways, and so we need to * be careful with the state variable. We know the @@ -2073,9 +2087,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } else { ret = 0; } - if (ret) + if (ret) { SetPageError(page); - else { + } else { unsigned long max_nr = end_index + 1; set_range_writeback(tree, cur, cur + iosize - 1); if (!PageWriteback(page)) { @@ -2948,6 +2962,25 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, } EXPORT_SYMBOL(set_extent_buffer_dirty); +int clear_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + struct page *page; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + eb->flags &= ~EXTENT_UPTODATE; + + clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + ClearPageUptodate(page); + } + return 0; +} + int set_extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index e4834614751..f1960dafaa1 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -36,9 +36,12 @@ struct extent_io_ops { int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, u64 start, u64 end, struct extent_state *state); + int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, + u64 start, u64 end, + struct extent_state *state); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); - void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, + int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits); @@ -212,6 +215,8 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, struct extent_buffer *eb); int set_extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb); +int clear_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); int extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb); int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a492fd238c8..08760ff9bab 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -430,9 +430,9 @@ struct io_failure_record { int last_mirror; }; -int btrfs_readpage_io_failed_hook(struct bio *failed_bio, - struct page *page, u64 start, u64 end, - struct extent_state *state) +int btrfs_io_failed_hook(struct bio *failed_bio, + struct page *page, u64 start, u64 end, + struct extent_state *state) { struct io_failure_record *failrec = NULL; u64 private; @@ -443,6 +443,7 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, struct bio *bio; int num_copies; int ret; + int rw; u64 logical; ret = get_state_private(failure_tree, start, &private); @@ -505,7 +506,41 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, bio->bi_bdev = failed_bio->bi_bdev; bio->bi_size = 0; bio_add_page(bio, page, failrec->len, start - page_offset(page)); - btrfs_submit_bio_hook(inode, READ, bio, failrec->last_mirror); + if (failed_bio->bi_rw & (1 << BIO_RW)) + rw = WRITE; + else + rw = READ; + + BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, + failrec->last_mirror); + return 0; +} + +int btrfs_clean_io_failures(struct inode *inode, u64 start) +{ + u64 private; + u64 private_failure; + struct io_failure_record *failure; + int ret; + + private = 0; + if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, + (u64)-1, 1, EXTENT_DIRTY)) { + ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, + start, &private_failure); + if (ret == 0) { + failure = (struct io_failure_record *)(unsigned long) + private_failure; + set_state_private(&BTRFS_I(inode)->io_failure_tree, + failure->start, 0); + clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, + failure->start, + failure->start + failure->len - 1, + EXTENT_DIRTY | EXTENT_LOCKED, + GFP_NOFS); + kfree(failure); + } + } return 0; } @@ -547,26 +582,7 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, /* if the io failure tree for this inode is non-empty, * check to see if we've recovered from a failed IO */ - private = 0; - if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, - (u64)-1, 1, EXTENT_DIRTY)) { - u64 private_failure; - struct io_failure_record *failure; - ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, - start, &private_failure); - if (ret == 0) { - failure = (struct io_failure_record *)(unsigned long) - private_failure; - set_state_private(&BTRFS_I(inode)->io_failure_tree, - failure->start, 0); - clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, - failure->start, - failure->start + failure->len - 1, - EXTENT_DIRTY | EXTENT_LOCKED, - GFP_NOFS); - kfree(failure); - } - } + btrfs_clean_io_failures(inode, start); return 0; zeroit: @@ -3657,7 +3673,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { .merge_bio_hook = btrfs_merge_bio_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, - .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, + .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, }; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 5085e9e693b..c02e2bf2f02 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -51,6 +51,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, struct extent_buffer *next; struct extent_buffer *cur; u64 bytenr; + u64 ptr_gen; int ret = 0; int is_extent = 0; @@ -93,11 +94,12 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; } bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); if (cache_only) { next = btrfs_find_tree_block(root, bytenr, btrfs_level_size(root, *level - 1)); - if (!next || !btrfs_buffer_uptodate(next) || + if (!next || !btrfs_buffer_uptodate(next, ptr_gen) || !btrfs_buffer_defrag(next)) { free_extent_buffer(next); path->slots[*level]++; @@ -106,8 +108,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, } else { next = read_tree_block(root, bytenr, btrfs_level_size(root, *level - 1), - btrfs_node_ptr_generation(cur, - path->slots[*level])); + ptr_gen); } ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b5d7bd1915b..5fc7fb48147 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1807,14 +1807,19 @@ static int end_bio_multi_stripe(struct bio *bio, if (atomic_dec_and_test(&multi->stripes_pending)) { bio->bi_private = multi->private; bio->bi_end_io = multi->end_io; - /* only send an error to the higher layers if it is * beyond the tolerance of the multi-bio */ - if (atomic_read(&multi->error) > multi->max_errors) + if (atomic_read(&multi->error) > multi->max_errors) { err = -EIO; - else + } else if (err) { + /* + * this bio is actually up to date, we didn't + * go over the max number of errors + */ + set_bit(BIO_UPTODATE, &bio->bi_flags); err = 0; + } kfree(multi); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) -- cgit v1.2.3-70-g09d2 From 0ef3e66b6700eb8f052daa8b89443ff872fbbdfc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 24 May 2008 14:04:53 -0400 Subject: Btrfs: Allocator fix variety pack * Force chunk allocation when find_free_extent has to do a full scan * Record the max key at the start of defrag so it doesn't run forever * Block groups might not be contiguous, make a forward search for the next block group in extent-tree.c * Get rid of extra checks for total fs size * Fix relocate_one_reference to avoid relocating the same file data block twice when referenced by an older transaction * Use the open device count when allocating chunks so that we don't try to allocate from devices that don't exist Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- fs/btrfs/ctree.h | 2 + fs/btrfs/extent-tree.c | 273 +++++++++++++++++++++++++++++++++---------------- fs/btrfs/tree-defrag.c | 17 +++ fs/btrfs/volumes.c | 12 +-- 5 files changed, 209 insertions(+), 97 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 02e571e6ee6..1d404bde3cf 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -363,7 +363,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, other = btrfs_node_blockptr(parent, i - 1); close = close_blocks(blocknr, other, blocksize); } - if (close && i < end_slot - 2) { + if (!close && i < end_slot - 2) { other = btrfs_node_blockptr(parent, i + 1); close = close_blocks(blocknr, other, blocksize); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 293464c0c6b..7b73a9c3d86 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -464,6 +464,7 @@ struct btrfs_space_info { u64 bytes_used; u64 bytes_pinned; int full; + int force_alloc; struct list_head list; }; @@ -589,6 +590,7 @@ struct btrfs_root { int ref_cows; int track_dirty; struct btrfs_key defrag_progress; + struct btrfs_key defrag_max; int defrag_running; int defrag_level; char *name; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 605018c6045..41a63462d3e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -136,6 +136,35 @@ err: return 0; } +struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct + btrfs_fs_info *info, + u64 bytenr) +{ + struct extent_io_tree *block_group_cache; + struct btrfs_block_group_cache *block_group = NULL; + u64 ptr; + u64 start; + u64 end; + int ret; + + bytenr = max_t(u64, bytenr, + BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); + block_group_cache = &info->block_group_cache; + ret = find_first_extent_bit(block_group_cache, + bytenr, &start, &end, + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | + BLOCK_GROUP_SYSTEM); + if (ret) { + return NULL; + } + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + return NULL; + + block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; + return block_group; +} + struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr) @@ -175,7 +204,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) static int noinline find_search_start(struct btrfs_root *root, struct btrfs_block_group_cache **cache_ret, - u64 *start_ret, int num, int data) + u64 *start_ret, u64 num, int data) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; @@ -188,21 +217,21 @@ static int noinline find_search_start(struct btrfs_root *root, u64 search_start = *start_ret; int wrapped = 0; - if (!cache) - goto out; - total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); free_space_cache = &root->fs_info->free_space_cache; + if (!cache) + goto out; + again: ret = cache_block_group(root, cache); - if (ret) + if (ret) { goto out; + } last = max(search_start, cache->key.objectid); - if (!block_group_bits(cache, data) || cache->ro) { + if (!block_group_bits(cache, data) || cache->ro) goto new_group; - } spin_lock_irq(&free_space_cache->lock); state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); @@ -217,20 +246,17 @@ again: start = max(last, state->start); last = state->end + 1; if (last - start < num) { - if (last == cache->key.objectid + cache->key.offset) - cache_miss = start; do { state = extent_state_next(state); } while(state && !(state->state & EXTENT_DIRTY)); continue; } spin_unlock_irq(&free_space_cache->lock); - if (cache->ro) + if (cache->ro) { goto new_group; + } if (start + num > cache->key.objectid + cache->key.offset) goto new_group; - if (start + num > total_fs_bytes) - goto new_group; if (!block_group_bits(cache, data)) { printk("block group bits don't match %Lu %d\n", cache->flags, data); } @@ -248,7 +274,7 @@ out: new_group: last = cache->key.objectid + cache->key.offset; wrapped: - cache = btrfs_lookup_block_group(root->fs_info, last); + cache = btrfs_lookup_first_block_group(root->fs_info, last); if (!cache || cache->key.objectid >= total_fs_bytes) { no_cache: if (!wrapped) { @@ -261,13 +287,13 @@ no_cache: if (cache_miss && !cache->cached) { cache_block_group(root, cache); last = cache_miss; - cache = btrfs_lookup_block_group(root->fs_info, last); + cache = btrfs_lookup_first_block_group(root->fs_info, last); } + cache_miss = 0; cache = btrfs_find_block_group(root, cache, last, data, 0); if (!cache) goto no_cache; *cache_ret = cache; - cache_miss = 0; goto again; } @@ -303,28 +329,26 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_fs_info *info = root->fs_info; u64 used; u64 last = 0; - u64 hint_last; u64 start; u64 end; u64 free_check; u64 ptr; - u64 total_fs_bytes; int bit; int ret; int full_search = 0; int factor = 10; + int wrapped = 0; block_group_cache = &info->block_group_cache; - total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); if (data & BTRFS_BLOCK_GROUP_METADATA) factor = 9; bit = block_group_state_bits(data); - if (search_start && search_start < total_fs_bytes) { + if (search_start) { struct btrfs_block_group_cache *shint; - shint = btrfs_lookup_block_group(info, search_start); + shint = btrfs_lookup_first_block_group(info, search_start); if (shint && block_group_bits(shint, data) && !shint->ro) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < @@ -333,24 +357,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } } } - if (hint && !hint->ro && block_group_bits(hint, data) && - hint->key.objectid < total_fs_bytes) { + if (hint && !hint->ro && block_group_bits(hint, data)) { used = btrfs_block_group_used(&hint->item); if (used + hint->pinned < div_factor(hint->key.offset, factor)) { return hint; } last = hint->key.objectid + hint->key.offset; - hint_last = last; } else { if (hint) - hint_last = max(hint->key.objectid, search_start); + last = max(hint->key.objectid, search_start); else - hint_last = search_start; - - if (hint_last >= total_fs_bytes) - hint_last = search_start; - last = hint_last; + last = search_start; } again: while(1) { @@ -360,23 +378,17 @@ again: break; ret = get_state_private(block_group_cache, start, &ptr); - if (ret) - break; + if (ret) { + last = end + 1; + continue; + } cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); - if (cache->key.objectid > total_fs_bytes) - break; - if (!cache->ro && block_group_bits(cache, data)) { - if (full_search) - free_check = cache->key.offset; - else - free_check = div_factor(cache->key.offset, - factor); - + free_check = div_factor(cache->key.offset, factor); if (used + cache->pinned < free_check) { found_group = cache; goto found; @@ -384,9 +396,15 @@ again: } cond_resched(); } - if (!full_search) { + if (!wrapped) { + last = search_start; + wrapped = 1; + goto again; + } + if (!full_search && factor < 10) { last = search_start; full_search = 1; + factor = 10; goto again; } found: @@ -1070,6 +1088,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_used = bytes_used; found->bytes_pinned = 0; found->full = 0; + found->force_alloc = 0; *space_info = found; return 0; } @@ -1120,7 +1139,7 @@ static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags) static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, - u64 flags) + u64 flags, int force) { struct btrfs_space_info *space_info; u64 thresh; @@ -1138,11 +1157,16 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, } BUG_ON(!space_info); + if (space_info->force_alloc) { + force = 1; + space_info->force_alloc = 0; + } if (space_info->full) return 0; thresh = div_factor(space_info->total_bytes, 6); - if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < + if (!force && + (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < thresh) return 0; @@ -1152,7 +1176,6 @@ printk("space info full %Lu\n", flags); space_info->full = 1; return 0; } - BUG_ON(ret); ret = btrfs_make_block_group(trans, extent_root, 0, flags, @@ -1619,11 +1642,16 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; + int chunk_alloc_done = 0; int empty_cluster = 2 * 1024 * 1024; + int allowed_chunk_alloc = 0; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + if (orig_root->ref_cows || empty_size) + allowed_chunk_alloc = 1; + if (data & BTRFS_BLOCK_GROUP_METADATA) { last_ptr = &root->fs_info->last_alloc; empty_cluster = 256 * 1024; @@ -1648,7 +1676,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, search_end = btrfs_super_total_bytes(&info->super_copy); if (hint_byte) { - block_group = btrfs_lookup_block_group(info, hint_byte); + block_group = btrfs_lookup_first_block_group(info, hint_byte); if (!block_group) hint_byte = search_start; block_group = btrfs_find_block_group(root, block_group, @@ -1666,17 +1694,28 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, check_failed: if (!block_group) { - block_group = btrfs_lookup_block_group(info, search_start); + block_group = btrfs_lookup_first_block_group(info, + search_start); if (!block_group) - block_group = btrfs_lookup_block_group(info, + block_group = btrfs_lookup_first_block_group(info, orig_search_start); } + if (full_scan && !chunk_alloc_done) { + if (allowed_chunk_alloc) { + do_chunk_alloc(trans, root, + num_bytes + 2 * 1024 * 1024, data, 1); + allowed_chunk_alloc = 0; + } else if (block_group && block_group_bits(block_group, data)) { + block_group->space_info->force_alloc = 1; + } + chunk_alloc_done = 1; + } ret = find_search_start(root, &block_group, &search_start, total_needed, data); if (ret == -ENOSPC && last_ptr && *last_ptr) { *last_ptr = 0; - block_group = btrfs_lookup_block_group(info, - orig_search_start); + block_group = btrfs_lookup_first_block_group(info, + orig_search_start); search_start = orig_search_start; ret = find_search_start(root, &block_group, &search_start, total_needed, data); @@ -1692,7 +1731,7 @@ check_failed: empty_size += empty_cluster; total_needed += empty_size; } - block_group = btrfs_lookup_block_group(info, + block_group = btrfs_lookup_first_block_group(info, orig_search_start); search_start = orig_search_start; ret = find_search_start(root, &block_group, @@ -1765,7 +1804,7 @@ enospc: } else wrapped = 1; } - block_group = btrfs_lookup_block_group(info, search_start); + block_group = btrfs_lookup_first_block_group(info, search_start); cond_resched(); block_group = btrfs_find_block_group(root, block_group, search_start, data, 0); @@ -1819,17 +1858,21 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } again: data = reduce_alloc_profile(root, data); - if (root->ref_cows) { + /* + * the only place that sets empty_size is btrfs_realloc_node, which + * is not called recursively on allocations + */ + if (empty_size || root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, - 2 * 1024 * 1024, - BTRFS_BLOCK_GROUP_METADATA | - (info->metadata_alloc_profile & - info->avail_metadata_alloc_bits)); + 2 * 1024 * 1024, + BTRFS_BLOCK_GROUP_METADATA | + (info->metadata_alloc_profile & + info->avail_metadata_alloc_bits), 0); BUG_ON(ret); } ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data); + num_bytes + 2 * 1024 * 1024, data, 0); BUG_ON(ret); } @@ -1842,6 +1885,8 @@ again: if (ret == -ENOSPC && num_bytes > min_alloc_size) { num_bytes = num_bytes >> 1; num_bytes = max(num_bytes, min_alloc_size); + do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes, data, 1); goto again; } if (ret) { @@ -2537,7 +2582,11 @@ out: */ static int noinline relocate_one_reference(struct btrfs_root *extent_root, struct btrfs_path *path, - struct btrfs_key *extent_key) + struct btrfs_key *extent_key, + u64 *last_file_objectid, + u64 *last_file_offset, + u64 *last_file_root, + u64 last_extent) { struct inode *inode; struct btrfs_root *found_root; @@ -2576,6 +2625,12 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, found_key.offset = ref_offset; level = 0; + if (last_extent == extent_key->objectid && + *last_file_objectid == ref_objectid && + *last_file_offset == ref_offset && + *last_file_root == ref_root) + goto out; + ret = find_root_for_ref(extent_root, path, &found_key, level, 1, &found_root, extent_key->objectid); @@ -2583,6 +2638,12 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, if (ret) goto out; + if (last_extent == extent_key->objectid && + *last_file_objectid == ref_objectid && + *last_file_offset == ref_offset && + *last_file_root == ref_root) + goto out; + mutex_unlock(&extent_root->fs_info->fs_mutex); inode = btrfs_iget_locked(extent_root->fs_info->sb, ref_objectid, found_root); @@ -2603,6 +2664,10 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, mutex_lock(&extent_root->fs_info->fs_mutex); goto out; } + *last_file_objectid = inode->i_ino; + *last_file_root = found_root->root_key.objectid; + *last_file_offset = ref_offset; + relocate_inode_pages(inode, ref_offset, extent_key->offset); iput(inode); mutex_lock(&extent_root->fs_info->fs_mutex); @@ -2643,6 +2708,8 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, path->nodes[i] = NULL; } btrfs_release_path(found_root, path); + if (found_root == found_root->fs_info->extent_root) + btrfs_extent_post_op(trans, found_root); btrfs_end_transaction(trans, found_root); } @@ -2678,6 +2745,10 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; + u64 last_file_objectid = 0; + u64 last_file_root = 0; + u64 last_file_offset = (u64)-1; + u64 last_extent = 0; u32 nritems; u32 item_size; int ret = 0; @@ -2722,9 +2793,13 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, key.offset = found_key.offset + 1; item_size = btrfs_item_size_nr(leaf, path->slots[0]); - ret = relocate_one_reference(extent_root, path, extent_key); + ret = relocate_one_reference(extent_root, path, extent_key, + &last_file_objectid, + &last_file_offset, + &last_file_root, last_extent); if (ret) goto out; + last_extent = extent_key->objectid; } ret = 0; out: @@ -2770,6 +2845,32 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) return flags; } +int __alloc_chunk_for_shrink(struct btrfs_root *root, + struct btrfs_block_group_cache *shrink_block_group, + int force) +{ + struct btrfs_trans_handle *trans; + u64 new_alloc_flags; + u64 calc; + + if (btrfs_block_group_used(&shrink_block_group->item) > 0) { + + trans = btrfs_start_transaction(root, 1); + new_alloc_flags = update_block_group_flags(root, + shrink_block_group->flags); + if (new_alloc_flags != shrink_block_group->flags) { + calc = + btrfs_block_group_used(&shrink_block_group->item); + } else { + calc = shrink_block_group->key.offset; + } + do_chunk_alloc(trans, root->fs_info->extent_root, + calc + 2 * 1024 * 1024, new_alloc_flags, force); + btrfs_end_transaction(trans, root); + } + return 0; +} + int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) { struct btrfs_trans_handle *trans; @@ -2778,7 +2879,6 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) u64 cur_byte; u64 total_found; u64 shrink_last_byte; - u64 new_alloc_flags; struct btrfs_block_group_cache *shrink_block_group; struct btrfs_fs_info *info = root->fs_info; struct btrfs_key key; @@ -2792,7 +2892,8 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) shrink_start); BUG_ON(!shrink_block_group); - shrink_last_byte = shrink_start + shrink_block_group->key.offset; + shrink_last_byte = shrink_block_group->key.objectid + + shrink_block_group->key.offset; shrink_block_group->space_info->total_bytes -= shrink_block_group->key.offset; @@ -2804,23 +2905,10 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) (unsigned long long)shrink_start, (unsigned long long)shrink_block_group->flags); + __alloc_chunk_for_shrink(root, shrink_block_group, 1); + again: - if (btrfs_block_group_used(&shrink_block_group->item) > 0) { - u64 calc; - trans = btrfs_start_transaction(root, 1); - new_alloc_flags = update_block_group_flags(root, - shrink_block_group->flags); - if (new_alloc_flags != shrink_block_group->flags) { - calc = - btrfs_block_group_used(&shrink_block_group->item); - } else { - calc = shrink_block_group->key.offset; - } - do_chunk_alloc(trans, root->fs_info->extent_root, - calc + 2 * 1024 * 1024, new_alloc_flags); - btrfs_end_transaction(trans, root); - } shrink_block_group->ro = 1; total_found = 0; @@ -2888,6 +2976,8 @@ next: if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || found_key.objectid + found_key.offset <= cur_byte) { + memcpy(&key, &found_key, sizeof(key)); + key.offset++; path->slots[0]++; goto next; } @@ -2897,6 +2987,7 @@ next: key.objectid = cur_byte; btrfs_release_path(root, path); ret = relocate_one_extent(root, path, &found_key); + __alloc_chunk_for_shrink(root, shrink_block_group, 0); } btrfs_release_path(root, path); @@ -2930,20 +3021,27 @@ next: if (ret < 0) goto out; - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - kfree(shrink_block_group); - - clear_extent_bits(&info->block_group_cache, found_key.objectid, - found_key.objectid + found_key.offset - 1, + clear_extent_bits(&info->block_group_cache, key.objectid, + key.objectid + key.offset - 1, (unsigned int)-1, GFP_NOFS); + + clear_extent_bits(&info->free_space_cache, + key.objectid, key.objectid + key.offset - 1, + (unsigned int)-1, GFP_NOFS); + + memset(shrink_block_group, 0, sizeof(*shrink_block_group)); + kfree(shrink_block_group); + btrfs_del_item(trans, root, path); - clear_extent_dirty(&info->free_space_cache, - shrink_start, shrink_last_byte - 1, - GFP_NOFS); btrfs_commit_transaction(trans, root); + + /* the code to unpin extents might set a few bits in the free + * space cache for this range again + */ + clear_extent_bits(&info->free_space_cache, + key.objectid, key.objectid + key.offset - 1, + (unsigned int)-1, GFP_NOFS); out: btrfs_free_path(path); return ret; @@ -3081,9 +3179,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, BUG_ON(!cache); cache->key.objectid = chunk_offset; cache->key.offset = size; - btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); - memset(&cache->item, 0, sizeof(cache->item)); + btrfs_set_block_group_used(&cache->item, bytes_used); btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); cache->flags = type; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index c02e2bf2f02..155961c7b4d 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -198,6 +198,13 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } if (root->defrag_progress.objectid == 0) { + u32 nritems; + + nritems = btrfs_header_nritems(root->node); + root->defrag_max.objectid = 0; + /* from above we know this is not a leaf */ + btrfs_node_key_to_cpu(root->node, &root->defrag_max, + nritems - 1); extent_buffer_get(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); BUG_ON(ret); @@ -254,6 +261,16 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, out: if (path) btrfs_free_path(path); + if (ret == -EAGAIN) { + if (root->defrag_max.objectid > root->defrag_progress.objectid) + goto done; + if (root->defrag_max.type > root->defrag_progress.type) + goto done; + if (root->defrag_max.offset > root->defrag_progress.offset) + goto done; + ret = 0; + } +done: if (ret != -EAGAIN) { memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 333e5269577..722eb455015 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -750,10 +750,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, if (bdev == fs_devices->latest_bdev) fs_devices->latest_bdev = next_dev->bdev; - total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - btrfs_set_super_total_bytes(&root->fs_info->super_copy, - total_bytes - device->total_bytes); - total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); btrfs_set_super_num_devices(&root->fs_info->super_copy, total_bytes - 1); @@ -849,6 +845,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } root->fs_info->fs_devices->num_devices--; + root->fs_info->fs_devices->open_devices--; ret = btrfs_shrink_device(device, 0); if (ret) @@ -873,7 +870,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (device->bdev) { /* one close for the device struct or super_block */ close_bdev_excl(device->bdev); - root->fs_info->fs_devices->open_devices--; } if (bdev) { /* one close for us */ @@ -1450,7 +1446,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, return -ENOSPC; if (type & (BTRFS_BLOCK_GROUP_RAID0)) { - num_stripes = btrfs_super_num_devices(&info->super_copy); + num_stripes = extent_root->fs_info->fs_devices->open_devices; min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_DUP)) { @@ -1459,13 +1455,13 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { num_stripes = min_t(u64, 2, - btrfs_super_num_devices(&info->super_copy)); + extent_root->fs_info->fs_devices->open_devices); if (num_stripes < 2) return -ENOSPC; min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { - num_stripes = btrfs_super_num_devices(&info->super_copy); + num_stripes = extent_root->fs_info->fs_devices->open_devices; if (num_stripes < 4) return -ENOSPC; num_stripes &= ~(u32)1; -- cgit v1.2.3-70-g09d2 From 925baeddc5b0764a53f2214a1253251bab0e0324 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: Btrfs: Start btree concurrency work. The allocation trees and the chunk trees are serialized via their own dedicated mutexes. This means allocation location is still not very fine grained. The main FS btree is protected by locks on each block in the btree. Locks are taken top / down, and as processing finishes on a given level of the tree, the lock is released after locking the lower level. The end result of a search is now a path where only the lowest level is locked. Releasing or freeing the path drops any locks held. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 386 ++++++++++++++++++++++++++++++++++++------------- fs/btrfs/ctree.h | 23 ++- fs/btrfs/disk-io.c | 13 +- fs/btrfs/extent-tree.c | 252 +++++++++++++++++++++----------- fs/btrfs/extent_io.c | 8 - fs/btrfs/inode.c | 8 +- fs/btrfs/ioctl.c | 8 +- fs/btrfs/locking.c | 50 +++++++ fs/btrfs/locking.h | 26 ++++ fs/btrfs/transaction.c | 15 +- fs/btrfs/tree-defrag.c | 2 + 12 files changed, 579 insertions(+), 214 deletions(-) create mode 100644 fs/btrfs/locking.c create mode 100644 fs/btrfs/locking.h (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 7ed6b39e42d..0e2dcc718de 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,7 +6,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ - extent_io.o volumes.o async-thread.o ioctl.o + extent_io.o volumes.o async-thread.o ioctl.o locking.o btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o else diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1d404bde3cf..75625c68fdd 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -21,6 +21,7 @@ #include "disk-io.h" #include "transaction.h" #include "print-tree.h" +#include "locking.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); @@ -64,12 +65,47 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) int i; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) - break; + continue; + if (p->locks[i]) { + btrfs_tree_unlock(p->nodes[i]); + p->locks[i] = 0; + } free_extent_buffer(p->nodes[i]); } memset(p, 0, sizeof(*p)); } +struct extent_buffer *btrfs_root_node(struct btrfs_root *root) +{ + struct extent_buffer *eb; + spin_lock(&root->node_lock); + eb = root->node; + extent_buffer_get(eb); + spin_unlock(&root->node_lock); + return eb; +} + +struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) +{ + struct extent_buffer *eb; + + while(1) { + eb = btrfs_root_node(root); + btrfs_tree_lock(eb); + + spin_lock(&root->node_lock); + if (eb == root->node) { + spin_unlock(&root->node_lock); + break; + } + spin_unlock(&root->node_lock); + + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + } + return eb; +} + static void add_root_to_dirty_list(struct btrfs_root *root) { if (root->track_dirty && list_empty(&root->dirty_list)) { @@ -111,7 +147,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, } else { first_key.objectid = 0; } - cow = __btrfs_alloc_free_block(trans, new_root, buf->len, + cow = btrfs_alloc_free_block(trans, new_root, buf->len, new_root_objectid, trans->transid, first_key.objectid, level, buf->start, 0); @@ -151,8 +187,14 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, int ret = 0; int different_trans = 0; int level; + int unlock_orig = 0; struct btrfs_key first_key; + if (*cow_ret == buf) + unlock_orig = 1; + + WARN_ON(!btrfs_tree_locked(buf)); + if (root->ref_cows) { root_gen = trans->transid; } else { @@ -172,7 +214,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, } else { first_key.objectid = 0; } - cow = __btrfs_alloc_free_block(trans, root, buf->len, + cow = btrfs_alloc_free_block(trans, root, buf->len, root->root_key.objectid, root_gen, first_key.objectid, level, search_start, empty_size); @@ -196,9 +238,14 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, } if (buf == root->node) { + WARN_ON(parent && parent != buf); root_gen = btrfs_header_generation(buf); + + spin_lock(&root->node_lock); root->node = cow; extent_buffer_get(cow); + spin_unlock(&root->node_lock); + if (buf != root->commit_root) { btrfs_free_extent(trans, root, buf->start, buf->len, root->root_key.objectid, @@ -219,6 +266,8 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_header_owner(parent), root_gen, 0, 0, 1); } + if (unlock_orig) + btrfs_tree_unlock(buf); free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); *cow_ret = cow; @@ -316,6 +365,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, int progress_passed = 0; struct btrfs_disk_key disk_key; + /* FIXME this code needs locking */ + return 0; + parent_level = btrfs_header_level(parent); if (cache_only && parent_level != 1) return 0; @@ -729,6 +781,7 @@ static int balance_level(struct btrfs_trans_handle *trans, return 0; mid = path->nodes[level]; + WARN_ON(!path->locks[level]); WARN_ON(btrfs_header_generation(mid) != trans->transid); orig_ptr = btrfs_node_blockptr(mid, orig_slot); @@ -749,14 +802,21 @@ static int balance_level(struct btrfs_trans_handle *trans, /* promote the child to a root */ child = read_node_slot(root, mid, 0); + btrfs_tree_lock(child); BUG_ON(!child); ret = btrfs_cow_block(trans, root, child, mid, 0, &child); BUG_ON(ret); + spin_lock(&root->node_lock); root->node = child; + spin_unlock(&root->node_lock); + add_root_to_dirty_list(root); + btrfs_tree_unlock(child); + path->locks[level] = 0; path->nodes[level] = NULL; clean_tree_block(trans, root, mid); + btrfs_tree_unlock(mid); /* once for the path */ free_extent_buffer(mid); ret = btrfs_free_extent(trans, root, mid->start, mid->len, @@ -775,6 +835,7 @@ static int balance_level(struct btrfs_trans_handle *trans, left = read_node_slot(root, parent, pslot - 1); if (left) { + btrfs_tree_lock(left); wret = btrfs_cow_block(trans, root, left, parent, pslot - 1, &left); if (wret) { @@ -784,6 +845,7 @@ static int balance_level(struct btrfs_trans_handle *trans, } right = read_node_slot(root, parent, pslot + 1); if (right) { + btrfs_tree_lock(right); wret = btrfs_cow_block(trans, root, right, parent, pslot + 1, &right); if (wret) { @@ -815,6 +877,7 @@ static int balance_level(struct btrfs_trans_handle *trans, u32 blocksize = right->len; clean_tree_block(trans, root, right); + btrfs_tree_unlock(right); free_extent_buffer(right); right = NULL; wret = del_ptr(trans, root, path, level + 1, pslot + @@ -862,7 +925,9 @@ static int balance_level(struct btrfs_trans_handle *trans, u64 root_gen = btrfs_header_generation(parent); u64 bytenr = mid->start; u32 blocksize = mid->len; + clean_tree_block(trans, root, mid); + btrfs_tree_unlock(mid); free_extent_buffer(mid); mid = NULL; wret = del_ptr(trans, root, path, level + 1, pslot); @@ -885,11 +950,14 @@ static int balance_level(struct btrfs_trans_handle *trans, if (left) { if (btrfs_header_nritems(left) > orig_slot) { extent_buffer_get(left); + /* left was locked after cow */ path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; - if (mid) + if (mid) { + btrfs_tree_unlock(mid); free_extent_buffer(mid); + } } else { orig_slot -= btrfs_header_nritems(left); path->slots[level] = orig_slot; @@ -901,10 +969,15 @@ static int balance_level(struct btrfs_trans_handle *trans, btrfs_node_blockptr(path->nodes[level], path->slots[level])) BUG(); enospc: - if (right) + if (right) { + btrfs_tree_unlock(right); free_extent_buffer(right); - if (left) + } + if (left) { + if (path->nodes[level] != left) + btrfs_tree_unlock(left); free_extent_buffer(left); + } return ret; } @@ -942,6 +1015,8 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, /* first, try to make some room in the middle buffer */ if (left) { u32 left_nr; + + btrfs_tree_lock(left); left_nr = btrfs_header_nritems(left); if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; @@ -967,24 +1042,28 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; + btrfs_tree_unlock(mid); free_extent_buffer(mid); } else { orig_slot -= btrfs_header_nritems(left); path->slots[level] = orig_slot; + btrfs_tree_unlock(left); free_extent_buffer(left); } return 0; } + btrfs_tree_unlock(left); free_extent_buffer(left); } - right= read_node_slot(root, parent, pslot + 1); + right = read_node_slot(root, parent, pslot + 1); /* * then try to empty the right most buffer into the middle */ if (right) { u32 right_nr; + btrfs_tree_lock(right); right_nr = btrfs_header_nritems(right); if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; @@ -1013,12 +1092,15 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, path->slots[level + 1] += 1; path->slots[level] = orig_slot - btrfs_header_nritems(mid); + btrfs_tree_unlock(mid); free_extent_buffer(mid); } else { + btrfs_tree_unlock(right); free_extent_buffer(right); } return 0; } + btrfs_tree_unlock(right); free_extent_buffer(right); } return 1; @@ -1050,6 +1132,8 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, return; node = path->nodes[level]; + WARN_ON(!path->skip_locking && !btrfs_tree_locked(node)); + search = btrfs_node_blockptr(node, slot); blocksize = btrfs_level_size(root, level - 1); eb = btrfs_find_tree_block(root, search, blocksize); @@ -1098,6 +1182,39 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, highest_read = search; } } + +static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) +{ + int i; + int skip_level = level; + struct extent_buffer *t; + + for (i = level; i < BTRFS_MAX_LEVEL; i++) { + if (!path->nodes[i]) + break; + if (!path->locks[i]) + break; + if (path->slots[i] == 0) { + skip_level = i + 1; + continue; + } + if (path->keep_locks) { + u32 nritems; + t = path->nodes[i]; + nritems = btrfs_header_nritems(t); + if (path->slots[i] >= nritems - 1) { + skip_level = i + 1; + continue; + } + } + t = path->nodes[i]; + if (i >= lowest_unlock && i > skip_level && path->locks[i]) { + btrfs_tree_unlock(t); + path->locks[i] = 0; + } + } +} + /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf @@ -1120,15 +1237,27 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int ret; int level; int should_reada = p->reada; + int lowest_unlock = 1; u8 lowest_level = 0; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len); WARN_ON(p->nodes[0] != NULL); - WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); + // WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); + WARN_ON(root == root->fs_info->extent_root && + !mutex_is_locked(&root->fs_info->alloc_mutex)); + WARN_ON(root == root->fs_info->chunk_root && + !mutex_is_locked(&root->fs_info->chunk_mutex)); + WARN_ON(root == root->fs_info->dev_root && + !mutex_is_locked(&root->fs_info->chunk_mutex)); + if (ins_len < 0) + lowest_unlock = 2; again: - b = root->node; - extent_buffer_get(b); + if (!p->skip_locking) + b = btrfs_lock_root_node(root); + else + b = btrfs_root_node(root); + while (b) { level = btrfs_header_level(b); if (cow) { @@ -1147,9 +1276,12 @@ again: WARN_ON(1); level = btrfs_header_level(b); p->nodes[level] = b; + if (!p->skip_locking) + p->locks[level] = 1; ret = check_block(root, p, level); if (ret) return -1; + ret = bin_search(b, key, level, &slot); if (level != 0) { if (ret && slot > 0) @@ -1177,14 +1309,19 @@ again: BUG_ON(btrfs_header_nritems(b) == 1); } /* this is only true while dropping a snapshot */ - if (level == lowest_level) + if (level == lowest_level) { + unlock_up(p, level, lowest_unlock); break; + } if (should_reada) reada_for_search(root, p, level, slot, key->objectid); b = read_node_slot(root, b, slot); + if (!p->skip_locking) + btrfs_tree_lock(b); + unlock_up(p, level, lowest_unlock); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -1195,6 +1332,7 @@ again: if (sret) return sret; } + unlock_up(p, level, lowest_unlock); return ret; } } @@ -1225,6 +1363,13 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, break; t = path->nodes[i]; btrfs_set_node_key(t, key, tslot); + if (!btrfs_tree_locked(path->nodes[i])) { + int ii; +printk("fixup without lock on level %d\n", btrfs_header_level(path->nodes[i])); + for (ii = 0; ii < BTRFS_MAX_LEVEL; ii++) { +printk("level %d slot %d\n", ii, path->slots[ii]); + } + } btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; @@ -1370,6 +1515,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, u64 lower_gen; struct extent_buffer *lower; struct extent_buffer *c; + struct extent_buffer *old; struct btrfs_disk_key lower_key; BUG_ON(path->nodes[level]); @@ -1386,12 +1532,13 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, else btrfs_node_key(lower, &lower_key, 0); - c = __btrfs_alloc_free_block(trans, root, root->nodesize, + c = btrfs_alloc_free_block(trans, root, root->nodesize, root->root_key.objectid, root_gen, lower_key.objectid, level, root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); + memset_extent_buffer(c, 0, 0, root->nodesize); btrfs_set_header_nritems(c, 1); btrfs_set_header_level(c, level); @@ -1416,23 +1563,31 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(c); - /* the super has an extra ref to root->node */ - free_extent_buffer(root->node); + spin_lock(&root->node_lock); + old = root->node; root->node = c; + spin_unlock(&root->node_lock); + + /* the super has an extra ref to root->node */ + free_extent_buffer(old); + add_root_to_dirty_list(root); extent_buffer_get(c); path->nodes[level] = c; + path->locks[level] = 1; path->slots[level] = 0; if (root->ref_cows && lower_gen != trans->transid) { struct btrfs_path *back_path = btrfs_alloc_path(); int ret; + mutex_lock(&root->fs_info->alloc_mutex); ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, path, lower->start, root->root_key.objectid, trans->transid, 0, 0); BUG_ON(ret); + mutex_unlock(&root->fs_info->alloc_mutex); btrfs_free_path(back_path); } return 0; @@ -1521,7 +1676,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root root_gen = 0; btrfs_node_key(c, &disk_key, 0); - split = __btrfs_alloc_free_block(trans, root, root->nodesize, + split = btrfs_alloc_free_block(trans, root, root->nodesize, root->root_key.objectid, root_gen, btrfs_disk_key_objectid(&disk_key), @@ -1564,10 +1719,12 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root if (path->slots[level] >= mid) { path->slots[level] -= mid; + btrfs_tree_unlock(c); free_extent_buffer(c); path->nodes[level] = split; path->slots[level + 1] += 1; } else { + btrfs_tree_unlock(split); free_extent_buffer(split); } return ret; @@ -1648,30 +1805,24 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; right = read_node_slot(root, upper, slot + 1); + btrfs_tree_lock(right); free_space = btrfs_leaf_free_space(root, right); - if (free_space < data_size + sizeof(struct btrfs_item)) { - free_extent_buffer(right); - return 1; - } + if (free_space < data_size + sizeof(struct btrfs_item)) + goto out_unlock; /* cow and double check */ ret = btrfs_cow_block(trans, root, right, upper, slot + 1, &right); - if (ret) { - free_extent_buffer(right); - return 1; - } + if (ret) + goto out_unlock; + free_space = btrfs_leaf_free_space(root, right); - if (free_space < data_size + sizeof(struct btrfs_item)) { - free_extent_buffer(right); - return 1; - } + if (free_space < data_size + sizeof(struct btrfs_item)) + goto out_unlock; left_nritems = btrfs_header_nritems(left); - if (left_nritems == 0) { - free_extent_buffer(right); - return 1; - } + if (left_nritems == 0) + goto out_unlock; if (empty) nr = 0; @@ -1707,10 +1858,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root left->map_token = NULL; } - if (push_items == 0) { - free_extent_buffer(right); - return 1; - } + if (push_items == 0) + goto out_unlock; if (!empty && push_items == left_nritems) WARN_ON(1); @@ -1778,14 +1927,24 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { path->slots[0] -= left_nritems; + if (btrfs_header_nritems(path->nodes[0]) == 0) + clean_tree_block(trans, root, path->nodes[0]); + btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[1] += 1; } else { + btrfs_tree_unlock(right); free_extent_buffer(right); } return 0; + +out_unlock: + btrfs_tree_unlock(right); + free_extent_buffer(right); + return 1; } + /* * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise @@ -1823,10 +1982,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root } left = read_node_slot(root, path->nodes[1], slot - 1); + btrfs_tree_lock(left); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - free_extent_buffer(left); - return 1; + ret = 1; + goto out; } /* cow and double check */ @@ -1834,14 +1994,14 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root path->nodes[1], slot - 1, &left); if (ret) { /* we hit -ENOSPC, but it isn't fatal here */ - free_extent_buffer(left); - return 1; + ret = 1; + goto out; } free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - free_extent_buffer(left); - return 1; + ret = 1; + goto out; } if (empty) @@ -1876,8 +2036,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root } if (push_items == 0) { - free_extent_buffer(left); - return 1; + ret = 1; + goto out; } if (!empty && push_items == btrfs_header_nritems(right)) WARN_ON(1); @@ -1975,15 +2135,23 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { path->slots[0] += old_left_nritems; + if (btrfs_header_nritems(path->nodes[0]) == 0) + clean_tree_block(trans, root, path->nodes[0]); + btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = left; path->slots[1] -= 1; } else { + btrfs_tree_unlock(left); free_extent_buffer(left); path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); return ret; +out: + btrfs_tree_unlock(left); + free_extent_buffer(left); + return ret; } /* @@ -2052,7 +2220,7 @@ again: btrfs_item_key(l, &disk_key, 0); - right = __btrfs_alloc_free_block(trans, root, root->leafsize, + right = btrfs_alloc_free_block(trans, root, root->leafsize, root->root_key.objectid, root_gen, disk_key.objectid, 0, l->start, 0); @@ -2085,6 +2253,8 @@ again: path->slots[1] + 1, 1); if (wret) ret = wret; + + btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] = 0; @@ -2111,6 +2281,7 @@ again: path->slots[1], 1); if (wret) ret = wret; + btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] = 0; @@ -2184,12 +2355,15 @@ again: BUG_ON(path->slots[0] != slot); if (mid <= slot) { + btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] -= mid; path->slots[1] += 1; - } else + } else { + btrfs_tree_unlock(right); free_extent_buffer(right); + } BUG_ON(path->slots[0] < 0); @@ -2418,10 +2592,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, total_data += data_size[i]; } - /* create a root if there isn't one */ - if (!root->node) - BUG(); - total_size = total_data + (nr - 1) * sizeof(struct btrfs_item); ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); if (ret == 0) { @@ -2516,7 +2686,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - out: return ret; } @@ -2655,7 +2824,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_set_header_level(leaf, 0); } else { u64 root_gen = btrfs_header_generation(path->nodes[1]); - clean_tree_block(trans, root, leaf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; @@ -2706,8 +2874,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, root_gen = btrfs_header_generation( path->nodes[1]); - clean_tree_block(trans, root, leaf); - wret = del_ptr(trans, root, path, 1, slot); if (wret) ret = wret; @@ -2720,7 +2886,13 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } else { - btrfs_mark_buffer_dirty(leaf); + /* if we're still in the path, make sure + * we're dirty. Otherwise, one of the + * push_leaf functions must have already + * dirtied this buffer + */ + if (path->nodes[0] == leaf) + btrfs_mark_buffer_dirty(leaf); free_extent_buffer(leaf); } } else { @@ -2731,56 +2903,40 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, } /* - * walk up the tree as far as required to find the previous leaf. + * search the tree again to find a leaf with lesser keys * returns 0 if it found something or 1 if there are no lesser leaves. * returns < 0 on io errors. */ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { - int slot; - int level = 1; - struct extent_buffer *c; - struct extent_buffer *next = NULL; + struct btrfs_key key; + struct btrfs_disk_key found_key; + int ret; - while(level < BTRFS_MAX_LEVEL) { - if (!path->nodes[level]) - return 1; + btrfs_item_key_to_cpu(path->nodes[0], &key, 0); - slot = path->slots[level]; - c = path->nodes[level]; - if (slot == 0) { - level++; - if (level == BTRFS_MAX_LEVEL) - return 1; - continue; - } - slot--; - - if (next) - free_extent_buffer(next); + if (key.offset > 0) + key.offset--; + else if (key.type > 0) + key.type--; + else if (key.objectid > 0) + key.objectid--; + else + return 1; - next = read_node_slot(root, c, slot); - break; - } - path->slots[level] = slot; - while(1) { - level--; - c = path->nodes[level]; - free_extent_buffer(c); - slot = btrfs_header_nritems(next); - if (slot != 0) - slot--; - path->nodes[level] = next; - path->slots[level] = slot; - if (!level) - break; - next = read_node_slot(root, next, slot); - } - return 0; + btrfs_release_path(root, path); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + btrfs_item_key(path->nodes[0], &found_key, 0); + ret = comp_keys(&found_key, &key); + if (ret < 0) + return 0; + return 1; } /* - * walk up the tree as far as required to find the next leaf. + * search the tree again to find a leaf with greater keys * returns 0 if it found something or 1 if there are no greater leaves. * returns < 0 on io errors. */ @@ -2790,6 +2946,28 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int level = 1; struct extent_buffer *c; struct extent_buffer *next = NULL; + struct btrfs_key key; + u32 nritems; + int ret; + + nritems = btrfs_header_nritems(path->nodes[0]); + if (nritems == 0) { + return 1; + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); + + path->keep_locks = 1; + btrfs_release_path(root, path); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + path->keep_locks = 0; + + if (ret < 0) + return ret; + + if (path->slots[0] < nritems - 1) { + goto done; + } while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) @@ -2799,33 +2977,45 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) c = path->nodes[level]; if (slot >= btrfs_header_nritems(c)) { level++; - if (level == BTRFS_MAX_LEVEL) + if (level == BTRFS_MAX_LEVEL) { return 1; + } continue; } - if (next) + if (next) { + btrfs_tree_unlock(next); free_extent_buffer(next); + } - if (path->reada) + if (level == 1 && path->locks[1] && path->reada) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, c, slot); + if (!path->skip_locking) + btrfs_tree_lock(next); break; } path->slots[level] = slot; while(1) { level--; c = path->nodes[level]; + if (path->locks[level]) + btrfs_tree_unlock(c); free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; + path->locks[level] = 1; if (!level) break; - if (path->reada) - reada_for_search(root, path, level, 0, 0); + if (level == 1 && path->locks[1] && path->reada) + reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); + if (!path->skip_locking) + btrfs_tree_lock(next); } +done: + unlock_up(path, 0, 1); return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index dcea9d706d9..50891b39f36 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -330,8 +330,13 @@ struct btrfs_node { struct btrfs_path { struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; + /* if there is real range locking, this locks field will change */ + int locks[BTRFS_MAX_LEVEL]; int reada; + /* keep some upper locks as we walk down */ + int keep_locks; int lowest_level; + int skip_locking; }; /* @@ -515,6 +520,8 @@ struct btrfs_fs_info { spinlock_t hash_lock; struct mutex trans_mutex; struct mutex fs_mutex; + struct mutex alloc_mutex; + struct mutex chunk_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; @@ -576,6 +583,10 @@ struct btrfs_fs_info { */ struct btrfs_root { struct extent_buffer *node; + + /* the node lock is held while changing the node pointer */ + spinlock_t node_lock; + struct extent_buffer *commit_root; struct btrfs_root_item root_item; struct btrfs_key root_key; @@ -1353,13 +1364,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, int data, int owner); -int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 owner_objectid); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u32 size, - u64 root_objectid, - u64 hint, u64 empty_size); -struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize, u64 root_objectid, @@ -1368,8 +1373,6 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int level, u64 hint, u64 empty_size); -int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 new_size); int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -1409,6 +1412,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type); + +struct extent_buffer *btrfs_root_node(struct btrfs_root *root); +struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); + int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e5c758e306d..fe40bdd984f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -32,6 +32,7 @@ #include "volumes.h" #include "print-tree.h" #include "async-thread.h" +#include "locking.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -681,9 +682,11 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, { struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == - root->fs_info->running_transaction->transid) + root->fs_info->running_transaction->transid) { + WARN_ON(!btrfs_tree_locked(buf)); clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); + } return 0; } @@ -720,6 +723,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->in_sysfs = 0; INIT_LIST_HEAD(&root->dirty_list); + spin_lock_init(&root->node_lock); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -1196,6 +1200,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); + mutex_init(&fs_info->alloc_mutex); + mutex_init(&fs_info->chunk_mutex); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1274,7 +1280,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_lock(&fs_info->fs_mutex); + mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_sys_array(tree_root); + mutex_unlock(&fs_info->chunk_mutex); if (ret) { printk("btrfs: failed to read the system array on %s\n", sb->s_id); @@ -1296,7 +1304,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), BTRFS_UUID_SIZE); + mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_chunk_tree(chunk_root); + mutex_unlock(&fs_info->chunk_mutex); BUG_ON(ret); btrfs_close_extra_devices(fs_devices); @@ -1654,6 +1664,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; + WARN_ON(!btrfs_tree_locked(buf)); if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", (unsigned long long)buf->start, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 41a63462d3e..7e40c516fe6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -25,6 +25,7 @@ #include "print-tree.h" #include "transaction.h" #include "volumes.h" +#include "locking.h" #define BLOCK_GROUP_DATA EXTENT_WRITEBACK #define BLOCK_GROUP_METADATA EXTENT_UPTODATE @@ -36,7 +37,28 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static struct btrfs_block_group_cache * +__btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache *hint, + u64 search_start, int data, int owner); +void maybe_lock_mutex(struct btrfs_root *root) +{ + if (root != root->fs_info->extent_root && + root != root->fs_info->chunk_root && + root != root->fs_info->dev_root) { + mutex_lock(&root->fs_info->alloc_mutex); + } +} + +void maybe_unlock_mutex(struct btrfs_root *root) +{ + if (root != root->fs_info->extent_root && + root != root->fs_info->chunk_root && + root != root->fs_info->dev_root) { + mutex_unlock(&root->fs_info->alloc_mutex); + } +} static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) @@ -66,6 +88,7 @@ static int cache_block_group(struct btrfs_root *root, return -ENOMEM; path->reada = 2; + path->skip_locking = 1; first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; key.offset = 0; @@ -290,7 +313,7 @@ no_cache: cache = btrfs_lookup_first_block_group(root->fs_info, last); } cache_miss = 0; - cache = btrfs_find_block_group(root, cache, last, data, 0); + cache = __btrfs_find_block_group(root, cache, last, data, 0); if (!cache) goto no_cache; *cache_ret = cache; @@ -318,10 +341,10 @@ static int block_group_state_bits(u64 flags) return bits; } -struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, - struct btrfs_block_group_cache - *hint, u64 search_start, - int data, int owner) +static struct btrfs_block_group_cache * +__btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache *hint, + u64 search_start, int data, int owner) { struct btrfs_block_group_cache *cache; struct extent_io_tree *block_group_cache; @@ -411,6 +434,18 @@ found: return found_group; } +struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache + *hint, u64 search_start, + int data, int owner) +{ + + struct btrfs_block_group_cache *ret; + mutex_lock(&root->fs_info->alloc_mutex); + ret = __btrfs_find_block_group(root, hint, search_start, data, owner); + mutex_unlock(&root->fs_info->alloc_mutex); + return ret; +} static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset) { @@ -646,7 +681,7 @@ out: return ret; } -int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, +static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 root_objectid, u64 ref_generation, @@ -696,6 +731,22 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return 0; } +int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset) +{ + int ret; + + mutex_lock(&root->fs_info->alloc_mutex); + ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, + root_objectid, ref_generation, + owner, owner_offset); + mutex_unlock(&root->fs_info->alloc_mutex); + return ret; +} + int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -760,6 +811,10 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, struct btrfs_extent_ref *ref_item; int level = -1; + /* FIXME, needs locking */ + BUG(); + + mutex_lock(&root->fs_info->alloc_mutex); path = btrfs_alloc_path(); again: if (level == -1) @@ -854,33 +909,9 @@ again: out: btrfs_free_path(path); + mutex_unlock(&root->fs_info->alloc_mutex); return total_count; } -int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 owner_objectid) -{ - u64 generation; - u64 key_objectid; - u64 level; - u32 nritems; - struct btrfs_disk_key disk_key; - - level = btrfs_header_level(root->node); - generation = trans->transid; - nritems = btrfs_header_nritems(root->node); - if (nritems > 0) { - if (level == 0) - btrfs_item_key(root->node, &disk_key, 0); - else - btrfs_node_key(root->node, &disk_key, 0); - key_objectid = btrfs_disk_key_objectid(&disk_key); - } else { - key_objectid = 0; - } - return btrfs_inc_extent_ref(trans, root, root->node->start, - root->node->len, owner_objectid, - generation, level, key_objectid); -} int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) @@ -897,6 +928,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (!root->ref_cows) return 0; + mutex_lock(&root->fs_info->alloc_mutex); level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); for (i = 0; i < nritems; i++) { @@ -913,7 +945,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); if (disk_bytenr == 0) continue; - ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, + ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(buf, fi), root->root_key.objectid, trans->transid, key.objectid, key.offset); @@ -924,7 +956,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } else { bytenr = btrfs_node_blockptr(buf, i); btrfs_node_key_to_cpu(buf, &key, i); - ret = btrfs_inc_extent_ref(trans, root, bytenr, + ret = __btrfs_inc_extent_ref(trans, root, bytenr, btrfs_level_size(root, level - 1), root->root_key.objectid, trans->transid, @@ -935,6 +967,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } } } + mutex_unlock(&root->fs_info->alloc_mutex); return 0; fail: WARN_ON(1); @@ -965,6 +998,7 @@ fail: } } #endif + mutex_unlock(&root->fs_info->alloc_mutex); return ret; } @@ -1019,6 +1053,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; + mutex_lock(&root->fs_info->alloc_mutex); while(1) { ret = find_first_extent_bit(block_group_cache, last, &start, &end, BLOCK_GROUP_DIRTY); @@ -1045,6 +1080,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, BLOCK_GROUP_DIRTY, GFP_NOFS); } btrfs_free_path(path); + mutex_unlock(&root->fs_info->alloc_mutex); return werr; } @@ -1162,26 +1198,28 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, space_info->force_alloc = 0; } if (space_info->full) - return 0; + goto out; thresh = div_factor(space_info->total_bytes, 6); if (!force && (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < thresh) - return 0; + goto out; + mutex_lock(&extent_root->fs_info->chunk_mutex); ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags); if (ret == -ENOSPC) { printk("space info full %Lu\n", flags); space_info->full = 1; - return 0; + goto out; } BUG_ON(ret); ret = btrfs_make_block_group(trans, extent_root, 0, flags, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); - + mutex_unlock(&extent_root->fs_info->chunk_mutex); +out: return 0; } @@ -1318,6 +1356,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct extent_io_tree *free_space_cache; free_space_cache = &root->fs_info->free_space_cache; + mutex_lock(&root->fs_info->alloc_mutex); while(1) { ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY); @@ -1327,6 +1366,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, clear_extent_dirty(unpin, start, end, GFP_NOFS); set_extent_dirty(free_space_cache, start, end, GFP_NOFS); } + mutex_unlock(&root->fs_info->alloc_mutex); return 0; } @@ -1363,18 +1403,24 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, GFP_NOFS); eb = read_tree_block(extent_root, ins.objectid, ins.offset, trans->transid); + btrfs_tree_lock(eb); level = btrfs_header_level(eb); if (level == 0) { btrfs_item_key(eb, &first, 0); } else { btrfs_node_key(eb, &first, 0); } + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + /* + * the first key is just a hint, so the race we've created + * against reading it is fine + */ err = btrfs_insert_extent_backref(trans, extent_root, path, start, extent_root->root_key.objectid, 0, level, btrfs_disk_key_objectid(&first)); BUG_ON(err); - free_extent_buffer(eb); } btrfs_free_path(path); return 0; @@ -1384,12 +1430,14 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, int pending) { int err = 0; - struct extent_buffer *buf; if (!pending) { +#if 0 + struct extent_buffer *buf; buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { - if (btrfs_buffer_uptodate(buf, 0)) { + if (!btrfs_try_tree_lock(buf) && + btrfs_buffer_uptodate(buf, 0)) { u64 transid = root->fs_info->running_transaction->transid; u64 header_transid = @@ -1398,12 +1446,15 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { clean_tree_block(NULL, root, buf); + btrfs_tree_unlock(buf); free_extent_buffer(buf); return 1; } + btrfs_tree_unlock(buf); } free_extent_buffer(buf); } +#endif update_pinned_extents(root, bytenr, num_bytes, 1); } else { set_extent_bits(&root->fs_info->pending_del, @@ -1586,10 +1637,11 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct /* * remove an extent from the root, returns 0 on success */ -int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, - u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, int pin) +static int __btrfs_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u64 root_objectid, + u64 ref_generation, u64 owner_objectid, + u64 owner_offset, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; int pending_ret; @@ -1610,6 +1662,22 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root return ret ? ret : pending_ret; } +int btrfs_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u64 root_objectid, + u64 ref_generation, u64 owner_objectid, + u64 owner_offset, int pin) +{ + int ret; + + maybe_lock_mutex(root); + ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, + root_objectid, ref_generation, + owner_objectid, owner_offset, pin); + maybe_unlock_mutex(root); + return ret; +} + static u64 stripe_align(struct btrfs_root *root, u64 val) { u64 mask = ((u64)root->stripesize - 1); @@ -1679,12 +1747,12 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, block_group = btrfs_lookup_first_block_group(info, hint_byte); if (!block_group) hint_byte = search_start; - block_group = btrfs_find_block_group(root, block_group, + block_group = __btrfs_find_block_group(root, block_group, hint_byte, data, 1); if (last_ptr && *last_ptr == 0 && block_group) hint_byte = block_group->key.objectid; } else { - block_group = btrfs_find_block_group(root, + block_group = __btrfs_find_block_group(root, trans->block_group, search_start, data, 1); } @@ -1806,7 +1874,7 @@ enospc: } block_group = btrfs_lookup_first_block_group(info, search_start); cond_resched(); - block_group = btrfs_find_block_group(root, block_group, + block_group = __btrfs_find_block_group(root, block_group, search_start, data, 0); goto check_failed; @@ -1843,6 +1911,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key keys[2]; + maybe_lock_mutex(root); + if (data) { alloc_profile = info->avail_data_alloc_bits & info->data_alloc_profile; @@ -1892,9 +1962,10 @@ again: if (ret) { printk("allocation failed flags %Lu\n", data); } - BUG_ON(ret); - if (ret) - return ret; + if (ret) { + BUG(); + goto out; + } /* block accounting for super block */ super_used = btrfs_super_bytes_used(&info->super_copy); @@ -1953,11 +2024,11 @@ again: finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); - if (ret) { - return ret; - } + if (ret) + goto out; if (pending_ret) { - return pending_ret; + ret = pending_ret; + goto out; } update_block: @@ -1967,36 +2038,15 @@ update_block: ins->objectid, ins->offset); BUG(); } - return 0; +out: + maybe_unlock_mutex(root); + return ret; } - /* * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u32 blocksize, - u64 root_objectid, u64 hint, - u64 empty_size) -{ - u64 ref_generation; - - if (root->ref_cows) - ref_generation = trans->transid; - else - ref_generation = 0; - - - return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid, - ref_generation, 0, 0, hint, empty_size); -} - -/* - * helper function to allocate a block for a given tree - * returns the tree buffer or NULL. - */ -struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize, u64 root_objectid, @@ -2026,6 +2076,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); } btrfs_set_header_generation(buf, trans->transid); + btrfs_tree_lock(buf); clean_tree_block(trans, root, buf); btrfs_set_buffer_uptodate(buf); @@ -2076,7 +2127,7 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); if (disk_bytenr == 0) continue; - ret = btrfs_free_extent(trans, root, disk_bytenr, + ret = __btrfs_free_extent(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(leaf, fi), leaf_owner, leaf_generation, key.objectid, key.offset, 0); @@ -2151,6 +2202,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, int ret; u32 refs; + mutex_lock(&root->fs_info->alloc_mutex); + WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = lookup_extent_ref(trans, root, @@ -2182,6 +2235,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); blocksize = btrfs_level_size(root, *level - 1); + ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) { @@ -2189,7 +2243,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); path->slots[*level]++; - ret = btrfs_free_extent(trans, root, bytenr, + ret = __btrfs_free_extent(trans, root, bytenr, blocksize, root_owner, root_gen, 0, 0, 1); BUG_ON(ret); @@ -2201,9 +2255,11 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, reada_walk_down(root, cur, path->slots[*level]); mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); next = read_tree_block(root, bytenr, blocksize, ptr_gen); mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); /* we've dropped the lock, double check */ ret = lookup_extent_ref(trans, root, bytenr, @@ -2216,7 +2272,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, path->slots[*level]++; free_extent_buffer(next); - ret = btrfs_free_extent(trans, root, bytenr, + ret = __btrfs_free_extent(trans, root, bytenr, blocksize, root_owner, root_gen, 0, 0, 1); @@ -2244,13 +2300,14 @@ out: } root_gen = btrfs_header_generation(parent); - ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, + ret = __btrfs_free_extent(trans, root, path->nodes[*level]->start, path->nodes[*level]->len, root_owner, root_gen, 0, 0, 1); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; BUG_ON(ret); + mutex_unlock(&root->fs_info->alloc_mutex); return 0; } @@ -2350,6 +2407,12 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_node_key(node, &found_key, path->slots[level]); WARN_ON(memcmp(&found_key, &root_item->drop_progress, sizeof(found_key))); + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + if (path->nodes[i] && path->locks[i]) { + path->locks[i] = 0; + btrfs_tree_unlock(path->nodes[i]); + } + } } while(1) { wret = walk_down_tree(trans, root, path, &level); @@ -2383,6 +2446,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) u64 end; u64 ptr; int ret; + + mutex_lock(&info->alloc_mutex); while(1) { ret = find_first_extent_bit(&info->block_group_cache, 0, &start, &end, (unsigned int)-1); @@ -2402,6 +2467,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) clear_extent_dirty(&info->free_space_cache, start, end, GFP_NOFS); } + mutex_unlock(&info->alloc_mutex); return 0; } @@ -2678,6 +2744,7 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, eb = read_tree_block(found_root, extent_key->objectid, extent_key->offset, 0); + btrfs_tree_lock(eb); level = btrfs_header_level(eb); if (level == 0) @@ -2685,6 +2752,7 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, else btrfs_node_key_to_cpu(eb, &found_key, 0); + btrfs_tree_unlock(eb); free_extent_buffer(eb); ret = find_root_for_ref(extent_root, path, &found_key, @@ -2888,6 +2956,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) int ret; int progress; + mutex_lock(&root->fs_info->alloc_mutex); shrink_block_group = btrfs_lookup_block_group(root->fs_info, shrink_start); BUG_ON(!shrink_block_group); @@ -3044,20 +3113,22 @@ next: (unsigned int)-1, GFP_NOFS); out: btrfs_free_path(path); + mutex_unlock(&root->fs_info->alloc_mutex); return ret; } int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key) { - int ret; + int ret = 0; struct btrfs_key found_key; struct extent_buffer *leaf; int slot; ret = btrfs_search_slot(NULL, root, key, path, 0, 0); if (ret < 0) - return ret; + goto out; + while(1) { slot = path->slots[0]; leaf = path->nodes[0]; @@ -3066,18 +3137,20 @@ int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, if (ret == 0) continue; if (ret < 0) - goto error; + goto out; break; } btrfs_item_key_to_cpu(leaf, &found_key, slot); if (found_key.objectid >= key->objectid && - found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) - return 0; + found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { + ret = 0; + goto out; + } path->slots[0]++; } ret = -ENOENT; -error: +out: return ret; } @@ -3103,6 +3176,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) if (!path) return -ENOMEM; + mutex_lock(&root->fs_info->alloc_mutex); while(1) { ret = find_first_block_group(root, path, &key); if (ret > 0) { @@ -3158,6 +3232,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) ret = 0; error: btrfs_free_path(path); + mutex_unlock(&root->fs_info->alloc_mutex); return ret; } @@ -3205,5 +3280,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ret = del_pending_extents(trans, extent_root); BUG_ON(ret); set_avail_alloc_bits(extent_root->fs_info, type); + return 0; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 17c508a941e..bd15cdcaba9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2889,7 +2889,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); - lock_page(page); if (i == 0) set_page_extent_head(page, eb->len); else @@ -2907,7 +2906,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, end = start + PAGE_CACHE_SIZE - 1; if (test_range_bit(tree, start, end, EXTENT_DIRTY, 0)) { - unlock_page(page); continue; } } @@ -2919,7 +2917,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, PAGECACHE_TAG_DIRTY); } read_unlock_irq(&page->mapping->tree_lock); - unlock_page(page); } return 0; } @@ -2948,17 +2945,12 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, * on us if the page isn't already dirty. */ if (i == 0) { - lock_page(page); set_page_extent_head(page, eb->len); } else if (PagePrivate(page) && page->private != EXTENT_PAGE_PRIVATE) { - lock_page(page); set_page_extent_mapped(page); - unlock_page(page); } __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); - if (i == 0) - unlock_page(page); } return set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0c79346fd2c..61bd8953a68 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -115,6 +115,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); btrfs_set_trans_block_group(trans, inode); + mutex_unlock(&root->fs_info->fs_mutex); num_bytes = (end - start + blocksize) & ~(blocksize - 1); num_bytes = max(blocksize, num_bytes); @@ -159,6 +160,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) btrfs_add_ordered_inode(inode); btrfs_update_inode(trans, root, inode); out: + mutex_lock(&root->fs_info->fs_mutex); btrfs_end_transaction(trans, root); return ret; } @@ -349,10 +351,12 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + mutex_unlock(&root->fs_info->fs_mutex); btrfs_set_trans_block_group(trans, inode); btrfs_csum_file_blocks(trans, root, inode, bio, sums); + mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -807,6 +811,7 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, goto err; } ret = btrfs_delete_one_dir_name(trans, root, path, di); + btrfs_release_path(root, path); dentry->d_inode->i_ctime = dir->i_ctime; ret = btrfs_del_inode_ref(trans, root, name, name_len, @@ -881,8 +886,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; unsigned long nr = 0; - if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) + if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) { return -ENOTEMPTY; + } mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 1); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6fb45580275..3fbf74e93db 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -43,6 +43,7 @@ #include "ioctl.h" #include "print-tree.h" #include "volumes.h" +#include "locking.h" @@ -75,9 +76,9 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, if (ret) goto fail; - leaf = __btrfs_alloc_free_block(trans, root, root->leafsize, - objectid, trans->transid, 0, 0, - 0, 0); + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + objectid, trans->transid, 0, 0, + 0, 0); if (IS_ERR(leaf)) return PTR_ERR(leaf); @@ -108,6 +109,7 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); root_item.drop_level = 0; + btrfs_tree_unlock(leaf); free_extent_buffer(leaf); leaf = NULL; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c new file mode 100644 index 00000000000..80813a307b4 --- /dev/null +++ b/fs/btrfs/locking.c @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#include +#include +#include +#include +#include +#include +#include "ctree.h" +#include "extent_io.h" +#include "locking.h" + +int btrfs_tree_lock(struct extent_buffer *eb) +{ + lock_page(eb->first_page); + return 0; +} + +int btrfs_try_tree_lock(struct extent_buffer *eb) +{ + return TestSetPageLocked(eb->first_page); +} + +int btrfs_tree_unlock(struct extent_buffer *eb) +{ + WARN_ON(!PageLocked(eb->first_page)); + unlock_page(eb->first_page); + return 0; +} + +int btrfs_tree_locked(struct extent_buffer *eb) +{ + return PageLocked(eb->first_page); +} + diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h new file mode 100644 index 00000000000..2dab96d8280 --- /dev/null +++ b/fs/btrfs/locking.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_LOCKING_ +#define __BTRFS_LOCKING_ + +int btrfs_tree_lock(struct extent_buffer *eb); +int btrfs_tree_unlock(struct extent_buffer *eb); +int btrfs_tree_locked(struct extent_buffer *eb); +int btrfs_try_tree_lock(struct extent_buffer *eb); +#endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f04684f7fea..1ed433a7149 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -23,6 +23,7 @@ #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "locking.h" static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; @@ -96,8 +97,7 @@ static noinline int record_root_in_trans(struct btrfs_root *root) radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_DEFRAG_TAG); - root->commit_root = root->node; - extent_buffer_get(root->node); + root->commit_root = btrfs_root_node(root); } else { WARN_ON(1); } @@ -559,6 +559,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root = pending->root; struct extent_buffer *tmp; + struct extent_buffer *old; int ret; int namelen; u64 objectid; @@ -578,16 +579,18 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, key.offset = 1; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - extent_buffer_get(root->node); - btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - free_extent_buffer(tmp); + old = btrfs_lock_root_node(root); + btrfs_cow_block(trans, root, old, NULL, 0, &old); - btrfs_copy_root(trans, root, root->node, &tmp, objectid); + btrfs_copy_root(trans, root, old, &tmp, objectid); + btrfs_tree_unlock(old); + free_extent_buffer(old); btrfs_set_root_bytenr(new_root_item, tmp->start); btrfs_set_root_level(new_root_item, btrfs_header_level(tmp)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, new_root_item); + btrfs_tree_unlock(tmp); free_extent_buffer(tmp); if (ret) goto fail; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 155961c7b4d..fab851d8538 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -181,6 +181,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (root->fs_info->extent_root == root) is_extent = 1; + goto out; + if (root->ref_cows == 0 && !is_extent) goto out; -- cgit v1.2.3-70-g09d2 From a213501153fd66e2359e091b1612841305ba6551 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: Btrfs: Replace the big fs_mutex with a collection of other locks Extent alloctions are still protected by a large alloc_mutex. Objectid allocations are covered by a objectid mutex Other btree operations are protected by a lock on individual btree nodes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 40 +++++++++++++++++++++----- fs/btrfs/ctree.h | 5 ++-- fs/btrfs/disk-io.c | 15 +++++----- fs/btrfs/extent-tree.c | 18 +++++------- fs/btrfs/file.c | 7 +---- fs/btrfs/inode-map.c | 8 ++++++ fs/btrfs/inode.c | 76 ++++---------------------------------------------- fs/btrfs/ioctl.c | 24 ++++------------ fs/btrfs/super.c | 2 -- fs/btrfs/transaction.c | 42 +++++++++++----------------- fs/btrfs/volumes.c | 19 +++++++++---- fs/btrfs/xattr.c | 10 +------ 12 files changed, 101 insertions(+), 165 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 75625c68fdd..dff4da082d0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -63,6 +63,9 @@ void btrfs_free_path(struct btrfs_path *p) void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; + int skip = p->skip_locking; + int keep = p->keep_locks; + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) continue; @@ -73,6 +76,8 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) free_extent_buffer(p->nodes[i]); } memset(p, 0, sizeof(*p)); + p->skip_locking = skip; + p->keep_locks = keep; } struct extent_buffer *btrfs_root_node(struct btrfs_root *root) @@ -1202,13 +1207,19 @@ static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) u32 nritems; t = path->nodes[i]; nritems = btrfs_header_nritems(t); - if (path->slots[i] >= nritems - 1) { + if (nritems < 2 || path->slots[i] >= nritems - 2) { +if (path->keep_locks) { +//printk("path %p skip level now %d\n", path, skip_level); +} skip_level = i + 1; continue; } } t = path->nodes[i]; if (i >= lowest_unlock && i > skip_level && path->locks[i]) { +if (path->keep_locks) { +//printk("path %p unlocking level %d slot %d nritems %d skip_level %d\n", path, i, path->slots[i], btrfs_header_nritems(t), skip_level); +} btrfs_tree_unlock(t); path->locks[i] = 0; } @@ -1243,7 +1254,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len); WARN_ON(p->nodes[0] != NULL); - // WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); WARN_ON(root == root->fs_info->extent_root && !mutex_is_locked(&root->fs_info->alloc_mutex)); WARN_ON(root == root->fs_info->chunk_root && @@ -1321,7 +1331,7 @@ again: b = read_node_slot(root, b, slot); if (!p->skip_locking) btrfs_tree_lock(b); - unlock_up(p, level, lowest_unlock); + unlock_up(p, level + 1, lowest_unlock); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -1804,6 +1814,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; + WARN_ON(!btrfs_tree_locked(path->nodes[1])); + right = read_node_slot(root, upper, slot + 1); btrfs_tree_lock(right); free_space = btrfs_leaf_free_space(root, right); @@ -1981,6 +1993,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } + WARN_ON(!btrfs_tree_locked(path->nodes[1])); + left = read_node_slot(root, path->nodes[1], slot - 1); btrfs_tree_lock(left); free_space = btrfs_leaf_free_space(root, left); @@ -2957,15 +2971,16 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); - path->keep_locks = 1; btrfs_release_path(root, path); + path->keep_locks = 1; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); path->keep_locks = 0; if (ret < 0) return ret; - if (path->slots[0] < nritems - 1) { + nritems = btrfs_header_nritems(path->nodes[0]); + if (nritems > 0 && path->slots[0] < nritems - 1) { goto done; } @@ -2992,8 +3007,17 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, c, slot); - if (!path->skip_locking) + if (!path->skip_locking) { + if (!btrfs_tree_locked(c)) { + int i; + WARN_ON(1); +printk("path %p no lock on level %d\n", path, level); +for (i = 0; i < BTRFS_MAX_LEVEL; i++) { +printk("path %p level %d slot %d nritems %d\n", path, i, path->slots[i], btrfs_header_nritems(path->nodes[i])); +} + } btrfs_tree_lock(next); + } break; } path->slots[level] = slot; @@ -3011,8 +3035,10 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (level == 1 && path->locks[1] && path->reada) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); - if (!path->skip_locking) + if (!path->skip_locking) { + WARN_ON(!btrfs_tree_locked(path->nodes[level])); btrfs_tree_lock(next); + } } done: unlock_up(path, 0, 1); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 50891b39f36..692b8ea42de 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -519,9 +519,9 @@ struct btrfs_fs_info { struct backing_dev_info bdi; spinlock_t hash_lock; struct mutex trans_mutex; - struct mutex fs_mutex; struct mutex alloc_mutex; struct mutex chunk_mutex; + struct mutex drop_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; @@ -554,7 +554,7 @@ struct btrfs_fs_info { struct completion kobj_unregister; int do_barriers; int closing; - unsigned long throttles; + atomic_t throttles; u64 total_pinned; struct list_head dirty_cowonly_roots; @@ -594,6 +594,7 @@ struct btrfs_root { struct inode *inode; struct kobject root_kobj; struct completion kobj_unregister; + struct mutex objectid_mutex; u64 objectid; u64 last_trans; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fe40bdd984f..f638803549e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -724,6 +724,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, INIT_LIST_HEAD(&root->dirty_list); spin_lock_init(&root->node_lock); + mutex_init(&root->objectid_mutex); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -1146,6 +1147,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); + atomic_set(&fs_info->throttles, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; @@ -1199,7 +1201,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); mutex_init(&fs_info->trans_mutex); - mutex_init(&fs_info->fs_mutex); + mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->alloc_mutex); mutex_init(&fs_info->chunk_mutex); @@ -1278,8 +1280,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_sb_buffer; } - mutex_lock(&fs_info->fs_mutex); - mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_sys_array(tree_root); mutex_unlock(&fs_info->chunk_mutex); @@ -1342,7 +1342,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; - mutex_unlock(&fs_info->fs_mutex); return tree_root; fail_extent_root: @@ -1350,7 +1349,6 @@ fail_extent_root: fail_tree_root: free_extent_buffer(tree_root->node); fail_sys_array: - mutex_unlock(&fs_info->fs_mutex); fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->workers); @@ -1562,8 +1560,9 @@ int close_ctree(struct btrfs_root *root) struct btrfs_fs_info *fs_info = root->fs_info; fs_info->closing = 1; + smp_mb(); + btrfs_transaction_flush_work(root); - mutex_lock(&fs_info->fs_mutex); btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -1574,7 +1573,6 @@ int close_ctree(struct btrfs_root *root) BUG_ON(ret); write_ctree_super(NULL, root); - mutex_unlock(&fs_info->fs_mutex); btrfs_transaction_flush_work(root); @@ -1679,7 +1677,8 @@ void btrfs_throttle(struct btrfs_root *root) struct backing_dev_info *bdi; bdi = &root->fs_info->bdi; - if (root->fs_info->throttles && bdi_write_congested(bdi)) { + if (atomic_read(&root->fs_info->throttles) && + bdi_write_congested(bdi)) { #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) congestion_wait(WRITE, HZ/20); #else diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7e40c516fe6..890b9e9d8e2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1577,9 +1577,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root } /* block accounting for super block */ + spin_lock_irq(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); btrfs_set_super_bytes_used(&info->super_copy, super_used - num_bytes); + spin_unlock_irq(&info->delalloc_lock); /* block accounting for root item */ root_used = btrfs_root_used(&root->root_item); @@ -1968,8 +1970,10 @@ again: } /* block accounting for super block */ + spin_lock_irq(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); + spin_unlock_irq(&info->delalloc_lock); /* block accounting for root item */ root_used = btrfs_root_used(&root->root_item); @@ -2172,12 +2176,12 @@ static void noinline reada_walk_down(struct btrfs_root *root, continue; } } - mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); ret = readahead_tree_block(root, bytenr, blocksize, btrfs_node_ptr_generation(node, i)); last = bytenr + blocksize; cond_resched(); - mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); if (ret) break; } @@ -2254,11 +2258,9 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, free_extent_buffer(next); reada_walk_down(root, cur, path->slots[*level]); - mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->alloc_mutex); next = read_tree_block(root, bytenr, blocksize, ptr_gen); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->alloc_mutex); /* we've dropped the lock, double check */ @@ -2381,6 +2383,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root int orig_level; struct btrfs_root_item *root_item = &root->root_item; + WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); path = btrfs_alloc_path(); BUG_ON(!path); @@ -2710,7 +2713,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, *last_file_root == ref_root) goto out; - mutex_unlock(&extent_root->fs_info->fs_mutex); inode = btrfs_iget_locked(extent_root->fs_info->sb, ref_objectid, found_root); if (inode->i_state & I_NEW) { @@ -2727,7 +2729,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, * the latest version of the tree root */ if (is_bad_inode(inode)) { - mutex_lock(&extent_root->fs_info->fs_mutex); goto out; } *last_file_objectid = inode->i_ino; @@ -2736,7 +2737,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, relocate_inode_pages(inode, ref_offset, extent_key->offset); iput(inode); - mutex_lock(&extent_root->fs_info->fs_mutex); } else { struct btrfs_trans_handle *trans; struct extent_buffer *eb; @@ -3033,9 +3033,7 @@ next: if (progress && need_resched()) { memcpy(&key, &found_key, sizeof(key)); - mutex_unlock(&root->fs_info->fs_mutex); cond_resched(); - mutex_lock(&root->fs_info->fs_mutex); btrfs_release_path(root, path); btrfs_search_slot(NULL, root, &key, path, 0, 0); progress = 0; @@ -3068,9 +3066,7 @@ next: trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_clean_old_snapshots(tree_root); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 73c6d085bd9..18bbe108a0e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -252,7 +252,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { err = -ENOMEM; @@ -341,7 +340,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, failed: err = btrfs_end_transaction(trans, root); out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; } @@ -905,9 +903,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(pages)); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, write_bytes, 0); - mutex_unlock(&root->fs_info->fs_mutex); if (ret) goto out; @@ -998,9 +994,9 @@ static int btrfs_sync_file(struct file *file, * check the transaction that last modified this inode * and see if its already been committed */ - mutex_lock(&root->fs_info->fs_mutex); if (!BTRFS_I(inode)->last_trans) goto out; + mutex_lock(&root->fs_info->trans_mutex); if (BTRFS_I(inode)->last_trans <= root->fs_info->last_trans_committed) { @@ -1023,7 +1019,6 @@ static int btrfs_sync_file(struct file *file, } ret = btrfs_commit_transaction(trans, root); out: - mutex_unlock(&root->fs_info->fs_mutex); return ret > 0 ? EIO : ret; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index a0925eabdaa..298346ae148 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -69,6 +69,12 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_key search_key; u64 search_start = dirid; + mutex_lock(&root->objectid_mutex); + if (root->last_inode_alloc) { + *objectid = ++root->last_inode_alloc; + mutex_unlock(&root->objectid_mutex); + return 0; + } path = btrfs_alloc_path(); BUG_ON(!path); search_start = root->last_inode_alloc; @@ -124,9 +130,11 @@ found: btrfs_release_path(root, path); btrfs_free_path(path); BUG_ON(*objectid < search_start); + mutex_unlock(&root->objectid_mutex); return 0; error: btrfs_release_path(root, path); btrfs_free_path(path); + mutex_unlock(&root->objectid_mutex); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 61bd8953a68..b2251e27ac8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -79,12 +79,15 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int for_del) { - u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy); - u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy); + u64 total; + u64 used; u64 thresh; unsigned long flags; int ret = 0; + spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); + total = btrfs_super_total_bytes(&root->fs_info->super_copy); + used = btrfs_super_bytes_used(&root->fs_info->super_copy); if (for_del) thresh = total * 90; else @@ -92,7 +95,6 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, do_div(thresh, 100); - spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); if (used + root->fs_info->delalloc_bytes + num_required > thresh) ret = -ENOSPC; spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); @@ -115,7 +117,6 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); btrfs_set_trans_block_group(trans, inode); - mutex_unlock(&root->fs_info->fs_mutex); num_bytes = (end - start + blocksize) & ~(blocksize - 1); num_bytes = max(blocksize, num_bytes); @@ -160,7 +161,6 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) btrfs_add_ordered_inode(inode); btrfs_update_inode(trans, root, inode); out: - mutex_lock(&root->fs_info->fs_mutex); btrfs_end_transaction(trans, root); return ret; } @@ -269,14 +269,13 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; - mutex_lock(&root->fs_info->fs_mutex); + if (btrfs_test_opt(root, NODATACOW) || btrfs_test_flag(inode, NODATACOW)) ret = run_delalloc_nocow(inode, start, end); else ret = cow_file_range(inode, start, end); - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -349,17 +348,13 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ret = btrfs_csum_one_bio(root, bio, &sums); BUG_ON(ret); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_set_trans_block_group(trans, inode); btrfs_csum_file_blocks(trans, root, inode, bio, sums); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); kfree(sums); @@ -404,7 +399,6 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) btrfs_test_flag(inode, NODATASUM)) return 0; - mutex_lock(&root->fs_info->fs_mutex); path = btrfs_alloc_path(); item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); if (IS_ERR(item)) { @@ -422,7 +416,6 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) out: if (path) btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -616,7 +609,6 @@ void btrfs_read_locked_inode(struct inode *inode) path = btrfs_alloc_path(); BUG_ON(!path); - mutex_lock(&root->fs_info->fs_mutex); memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); @@ -662,8 +654,6 @@ void btrfs_read_locked_inode(struct inode *inode) btrfs_free_path(path); inode_item = NULL; - mutex_unlock(&root->fs_info->fs_mutex); - switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; @@ -691,9 +681,7 @@ void btrfs_read_locked_inode(struct inode *inode) return; make_bad: - btrfs_release_path(root, path); btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); make_bad_inode(inode); } @@ -758,7 +746,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_last_trans(trans, inode); ret = 0; failed: - btrfs_release_path(root, path); btrfs_free_path(path); return ret; } @@ -849,7 +836,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) unsigned long nr = 0; root = BTRFS_I(dir)->root; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 1); if (ret) @@ -871,7 +857,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); return ret; @@ -890,7 +875,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) return -ENOTEMPTY; } - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 1); if (ret) goto fail; @@ -907,7 +891,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) nr = trans->blocks_used; ret = btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); @@ -1129,7 +1112,6 @@ error: ret = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); } - btrfs_release_path(root, path); btrfs_free_path(path); inode->i_sb->s_dirt = 1; return ret; @@ -1234,9 +1216,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_size <= hole_start) goto out; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); - mutex_unlock(&root->fs_info->fs_mutex); if (err) goto fail; @@ -1245,7 +1225,6 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); hole_size = block_end - hole_start; - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); err = btrfs_drop_extents(trans, root, inode, @@ -1262,7 +1241,6 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_check_file(root, inode); } btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); if (err) return err; @@ -1286,7 +1264,6 @@ void btrfs_delete_inode(struct inode *inode) } inode->i_size = 0; - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -1298,7 +1275,6 @@ void btrfs_delete_inode(struct inode *inode) clear_inode(inode); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); return; @@ -1306,7 +1282,6 @@ void btrfs_delete_inode(struct inode *inode) no_delete_lock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); no_delete: @@ -1402,7 +1377,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, path = btrfs_alloc_path(); BUG_ON(!path); - mutex_lock(&root->fs_info->fs_mutex); *sub_root = btrfs_read_fs_root(root->fs_info, location, dentry->d_name.name, @@ -1416,7 +1390,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, location->offset = 0; btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); return 0; } @@ -1482,9 +1455,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &location); - mutex_unlock(&root->fs_info->fs_mutex); if (ret < 0) return ERR_PTR(ret); @@ -1559,7 +1530,6 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) filp->f_pos = 1; } - mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; path = btrfs_alloc_path(); path->reada = 2; @@ -1668,9 +1638,7 @@ read_dir_items: nopos: ret = 0; err: - btrfs_release_path(root, path); btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -1681,11 +1649,9 @@ int btrfs_write_inode(struct inode *inode, int wait) int ret = 0; if (wait) { - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); } return ret; } @@ -1701,12 +1667,10 @@ void btrfs_dirty_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); } static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, @@ -1874,7 +1838,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; @@ -1912,8 +1875,6 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); - if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -1934,7 +1895,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, unsigned long nr = 0; u64 objectid; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; @@ -1980,8 +1940,6 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); - if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -2009,7 +1967,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, #else inc_nlink(inode); #endif - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; @@ -2032,8 +1989,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, nr = trans->blocks_used; btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); - if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -2053,7 +2008,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) u64 objectid = 0; unsigned long nr = 1; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto out_unlock; @@ -2106,7 +2060,6 @@ out_fail: btrfs_end_transaction(trans, root); out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); if (drop_on_err) iput(inode); btrfs_btree_balance_dirty(root, nr); @@ -2199,7 +2152,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, path = btrfs_alloc_path(); BUG_ON(!path); - mutex_lock(&root->fs_info->fs_mutex); again: spin_lock(&em_tree->lock); @@ -2402,7 +2354,6 @@ out: if (!err) err = ret; } - mutex_unlock(&root->fs_info->fs_mutex); if (err) { free_extent_map(em); WARN_ON(1); @@ -2584,9 +2535,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) int ret; u64 page_start; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); - mutex_unlock(&root->fs_info->fs_mutex); if (ret) goto out; @@ -2631,7 +2580,6 @@ static void btrfs_truncate(struct inode *inode) btrfs_truncate_page(inode->i_mapping, inode->i_size); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -2643,7 +2591,6 @@ static void btrfs_truncate(struct inode *inode) ret = btrfs_end_transaction(trans, root); BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); } @@ -2827,7 +2774,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode *new_inode = new_dentry->d_inode; struct inode *old_inode = old_dentry->d_inode; struct timespec ctime = CURRENT_TIME; - struct btrfs_path *path; int ret; if (S_ISDIR(old_inode->i_mode) && new_inode && @@ -2835,7 +2781,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, return -ENOTEMPTY; } - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 0); if (ret) goto out_unlock; @@ -2843,11 +2788,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, new_dir); - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out_fail; - } old_dentry->d_inode->i_nlink++; old_dir->i_ctime = old_dir->i_mtime = ctime; @@ -2869,10 +2809,8 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, goto out_fail; out_fail: - btrfs_free_path(path); btrfs_end_transaction(trans, root); out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -2898,7 +2836,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto out_fail; @@ -2979,7 +2916,6 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); out_fail: - mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { inode_dec_link_count(inode); iput(inode); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3fbf74e93db..6002eb64daf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -63,7 +63,6 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; unsigned long nr = 1; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 0); if (ret) goto fail_commit; @@ -164,7 +163,6 @@ fail: if (err && !ret) ret = err; fail_commit: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); return ret; @@ -181,7 +179,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (!root->ref_cows) return -EINVAL; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 0); if (ret) goto fail_unlock; @@ -208,7 +205,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) err = btrfs_commit_transaction(trans, root); fail_unlock: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); return ret; @@ -228,9 +224,7 @@ int btrfs_defrag_file(struct file *file) unsigned long i; int ret; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, inode->i_size, 0); - mutex_unlock(&root->fs_info->fs_mutex); if (ret) return -ENOSPC; @@ -315,7 +309,8 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) goto out; } - mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); sizestr = vol_args->name; devstr = strchr(sizestr, ':'); if (devstr) { @@ -385,7 +380,8 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) } out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); out: kfree(vol_args); return ret; @@ -428,11 +424,9 @@ static noinline int btrfs_ioctl_snap_create(struct btrfs_root *root, } root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, - mutex_lock(&root->fs_info->fs_mutex); di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, root_dirid, vol_args->name, namelen, 0); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_free_path(path); if (di && !IS_ERR(di)) { @@ -445,10 +439,12 @@ static noinline int btrfs_ioctl_snap_create(struct btrfs_root *root, goto out; } + mutex_lock(&root->fs_info->drop_mutex); if (root == root->fs_info->tree_root) ret = create_subvol(root, vol_args->name, namelen); else ret = create_snapshot(root, vol_args->name, namelen); + mutex_unlock(&root->fs_info->drop_mutex); out: kfree(vol_args); return ret; @@ -461,10 +457,8 @@ static int btrfs_ioctl_defrag(struct file *file) switch (inode->i_mode & S_IFMT) { case S_IFDIR: - mutex_lock(&root->fs_info->fs_mutex); btrfs_defrag_root(root, 0); btrfs_defrag_root(root->fs_info->extent_root, 0); - mutex_unlock(&root->fs_info->fs_mutex); break; case S_IFREG: btrfs_defrag_file(file); @@ -588,7 +582,6 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); } - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 0); path = btrfs_alloc_path(); if (!path) { @@ -685,7 +678,6 @@ out: unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); out_unlock: mutex_unlock(&src->i_mutex); @@ -711,7 +703,6 @@ long btrfs_ioctl_trans_start(struct file *file) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - mutex_lock(&root->fs_info->fs_mutex); if (file->private_data) { ret = -EINPROGRESS; goto out; @@ -723,7 +714,6 @@ long btrfs_ioctl_trans_start(struct file *file) ret = -ENOMEM; /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ out: - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -740,7 +730,6 @@ long btrfs_ioctl_trans_end(struct file *file) struct btrfs_trans_handle *trans; int ret = 0; - mutex_lock(&root->fs_info->fs_mutex); trans = file->private_data; if (!trans) { ret = -EINVAL; @@ -749,7 +738,6 @@ long btrfs_ioctl_trans_end(struct file *file) btrfs_end_transaction(trans, root); file->private_data = 0; out: - mutex_unlock(&root->fs_info->fs_mutex); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 196d0e280b1..b61ded7a20c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -366,12 +366,10 @@ int btrfs_sync_fs(struct super_block *sb, int wait) return 0; } btrfs_clean_old_snapshots(root); - mutex_lock(&root->fs_info->fs_mutex); btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; - mutex_unlock(&root->fs_info->fs_mutex); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1ed433a7149..5a1ee0665ae 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -370,6 +370,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) struct btrfs_trans_handle *trans; unsigned long nr; + smp_mb(); if (root->defrag_running) return 0; trans = btrfs_start_transaction(root, 1); @@ -378,16 +379,15 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) ret = btrfs_defrag_leaves(trans, root, cacheonly); nr = trans->blocks_used; btrfs_end_transaction(trans, root); - mutex_unlock(&info->fs_mutex); btrfs_btree_balance_dirty(info->tree_root, nr); cond_resched(); - mutex_lock(&info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (ret != -EAGAIN) break; } root->defrag_running = 0; + smp_mb(); radix_tree_tag_clear(&info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_DEFRAG_TAG); @@ -435,14 +435,14 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, while(!list_empty(list)) { struct btrfs_root *root; - mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); num_bytes = btrfs_root_used(&dirty->root->root_item); root = dirty->latest_root; - root->fs_info->throttles++; + atomic_inc(&root->fs_info->throttles); + mutex_lock(&root->fs_info->drop_mutex); while(1) { trans = btrfs_start_transaction(tree_root, 1); ret = btrfs_drop_snapshot(trans, dirty->root); @@ -459,14 +459,16 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, nr = trans->blocks_used; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); - mutex_unlock(&tree_root->fs_info->fs_mutex); + + mutex_unlock(&root->fs_info->drop_mutex); btrfs_btree_balance_dirty(tree_root, nr); cond_resched(); - mutex_lock(&tree_root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->drop_mutex); } BUG_ON(ret); - root->fs_info->throttles--; + atomic_dec(&root->fs_info->throttles); + mutex_lock(&root->fs_info->alloc_mutex); num_bytes -= btrfs_root_used(&dirty->root->root_item); bytes_used = btrfs_root_used(&root->root_item); if (num_bytes) { @@ -474,11 +476,15 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, btrfs_set_root_used(&root->root_item, bytes_used - num_bytes); } + mutex_unlock(&root->fs_info->alloc_mutex); + ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); if (ret) { BUG(); break; } + mutex_unlock(&root->fs_info->drop_mutex); + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); @@ -486,7 +492,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); - mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root, nr); cond_resched(); @@ -503,7 +508,7 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, u64 objectid = 0; int ret; - root->fs_info->throttles++; + atomic_inc(&root->fs_info->throttles); while(1) { ret = btrfs_find_first_ordered_inode( &cur_trans->ordered_inode_tree, @@ -512,7 +517,6 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, break; mutex_unlock(&root->fs_info->trans_mutex); - mutex_unlock(&root->fs_info->fs_mutex); if (S_ISREG(inode->i_mode)) { atomic_inc(&BTRFS_I(inode)->ordered_writeback); @@ -521,7 +525,6 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, } iput(inode); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); } while(1) { @@ -533,7 +536,6 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, if (!ret) break; mutex_unlock(&root->fs_info->trans_mutex); - mutex_unlock(&root->fs_info->fs_mutex); if (S_ISREG(inode->i_mode)) { atomic_inc(&BTRFS_I(inode)->ordered_writeback); @@ -543,10 +545,9 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, atomic_dec(&inode->i_count); iput(inode); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); } - root->fs_info->throttles--; + atomic_dec(&root->fs_info->throttles); return 0; } @@ -661,7 +662,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); ret = wait_for_commit(root, cur_trans); BUG_ON(ret); @@ -669,7 +669,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); - mutex_lock(&root->fs_info->fs_mutex); return 0; } @@ -687,12 +686,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction, list); if (!prev_trans->commit_done) { prev_trans->use_count++; - mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->trans_mutex); wait_for_commit(root, prev_trans); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); put_transaction(prev_trans); } @@ -709,12 +706,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, else timeout = 1; - mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->trans_mutex); schedule_timeout(timeout); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); ret = btrfs_write_ordered_inodes(trans, root); @@ -755,12 +750,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_copy_pinned(root, pinned_copy); mutex_unlock(&root->fs_info->trans_mutex); - mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root); - mutex_lock(&root->fs_info->fs_mutex); btrfs_finish_extent_commit(trans, root, pinned_copy); mutex_lock(&root->fs_info->trans_mutex); @@ -781,9 +774,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->fs_info->closing) { - mutex_unlock(&root->fs_info->fs_mutex); drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); - mutex_lock(&root->fs_info->fs_mutex); } return ret; } @@ -823,7 +814,7 @@ void btrfs_transaction_cleaner(struct work_struct *work) unsigned long delay = HZ * 30; int ret; - mutex_lock(&root->fs_info->fs_mutex); + smp_mb(); if (root->fs_info->closing) goto out; @@ -844,7 +835,6 @@ void btrfs_transaction_cleaner(struct work_struct *work) trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); out: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_clean_old_snapshots(root); btrfs_transaction_queue_work(root, delay); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ba396857102..869864ddcc2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -866,7 +866,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) u64 devid; int ret = 0; - mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); mutex_lock(&uuid_mutex); all_avail = root->fs_info->avail_data_alloc_bits | @@ -984,7 +985,8 @@ error_close: close_bdev_excl(bdev); out: mutex_unlock(&uuid_mutex); - mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); return ret; } @@ -1003,7 +1005,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (!bdev) { return -EIO; } - mutex_lock(&root->fs_info->fs_mutex); + + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); + trans = btrfs_start_transaction(root, 1); devices = &root->fs_info->fs_devices->devices; list_for_each(cur, devices) { @@ -1057,7 +1062,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->open_devices++; out: btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); + return ret; out_close_bdev: @@ -1297,9 +1304,10 @@ int btrfs_balance(struct btrfs_root *dev_root) struct btrfs_key found_key; + BUG(); /* FIXME, needs locking */ + dev_root = dev_root->fs_info->dev_root; - mutex_lock(&dev_root->fs_info->fs_mutex); /* step one make some room on all the devices */ list_for_each(cur, devices) { device = list_entry(cur, struct btrfs_device, dev_list); @@ -1368,7 +1376,6 @@ int btrfs_balance(struct btrfs_root *dev_root) ret = 0; error: btrfs_free_path(path); - mutex_unlock(&dev_root->fs_info->fs_mutex); return ret; } diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index f63488dc2f1..3e9eb91c3c8 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -153,7 +153,6 @@ ssize_t btrfs_xattr_get(struct inode *inode, int name_index, return -ENOMEM; } - mutex_lock(&root->fs_info->fs_mutex); /* lookup the xattr by name */ di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, strlen(name), 0); @@ -181,7 +180,6 @@ ssize_t btrfs_xattr_get(struct inode *inode, int name_index, ret = btrfs_dir_data_len(leaf, di); out: - mutex_unlock(&root->fs_info->fs_mutex); kfree(name); btrfs_free_path(path); return ret; @@ -210,7 +208,6 @@ int btrfs_xattr_set(struct inode *inode, int name_index, return -ENOMEM; } - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -260,7 +257,6 @@ out: } btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); kfree(name); btrfs_free_path(path); @@ -297,8 +293,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) return -ENOMEM; path->reada = 2; - mutex_lock(&root->fs_info->fs_mutex); - /* search for our xattrs */ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) @@ -379,15 +373,13 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ret = total_size; err: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_free_path(path); return ret; } /* - * delete all the xattrs associated with the inode. fs_mutex should be - * held when we come into here + * delete all the xattrs associated with the inode. */ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) -- cgit v1.2.3-70-g09d2 From 051e1b9f748ae673b7325d3fc049bb838606cffa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: Drop locks in btrfs_search_slot when reading a tree block. One lock per btree block can make for significant congestion if everyone has to wait for IO at the high levels of the btree. This drops locks held by a path when doing reads during a tree search. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 74 +++++++++++++++++++++++++------------------------- fs/btrfs/ctree.h | 1 - fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 1 - 4 files changed, 38 insertions(+), 39 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index dff4da082d0..1b756fae279 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -63,7 +63,6 @@ void btrfs_free_path(struct btrfs_path *p) void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; - int skip = p->skip_locking; int keep = p->keep_locks; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { @@ -76,7 +75,6 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) free_extent_buffer(p->nodes[i]); } memset(p, 0, sizeof(*p)); - p->skip_locking = skip; p->keep_locks = keep; } @@ -1137,7 +1135,6 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, return; node = path->nodes[level]; - WARN_ON(!path->skip_locking && !btrfs_tree_locked(node)); search = btrfs_node_blockptr(node, slot); blocksize = btrfs_level_size(root, level - 1); @@ -1192,6 +1189,7 @@ static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) { int i; int skip_level = level; + int no_skips = 0; struct extent_buffer *t; for (i = level; i < BTRFS_MAX_LEVEL; i++) { @@ -1199,27 +1197,24 @@ static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) break; if (!path->locks[i]) break; - if (path->slots[i] == 0) { + if (!no_skips && path->slots[i] == 0) { skip_level = i + 1; continue; } - if (path->keep_locks) { + if (!no_skips && path->keep_locks) { u32 nritems; t = path->nodes[i]; nritems = btrfs_header_nritems(t); - if (nritems < 2 || path->slots[i] >= nritems - 2) { -if (path->keep_locks) { -//printk("path %p skip level now %d\n", path, skip_level); -} + if (nritems < 1 || path->slots[i] >= nritems - 1) { skip_level = i + 1; continue; } } + if (skip_level < i && i >= lowest_unlock) + no_skips = 1; + t = path->nodes[i]; if (i >= lowest_unlock && i > skip_level && path->locks[i]) { -if (path->keep_locks) { -//printk("path %p unlocking level %d slot %d nritems %d skip_level %d\n", path, i, path->slots[i], btrfs_header_nritems(t), skip_level); -} btrfs_tree_unlock(t); path->locks[i] = 0; } @@ -1244,6 +1239,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow) { struct extent_buffer *b; + struct extent_buffer *tmp; int slot; int ret; int level; @@ -1263,10 +1259,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root if (ins_len < 0) lowest_unlock = 2; again: - if (!p->skip_locking) - b = btrfs_lock_root_node(root); - else - b = btrfs_root_node(root); + b = btrfs_lock_root_node(root); while (b) { level = btrfs_header_level(b); @@ -1286,8 +1279,7 @@ again: WARN_ON(1); level = btrfs_header_level(b); p->nodes[level] = b; - if (!p->skip_locking) - p->locks[level] = 1; + p->locks[level] = 1; ret = check_block(root, p, level); if (ret) return -1; @@ -1328,10 +1320,29 @@ again: reada_for_search(root, p, level, slot, key->objectid); - b = read_node_slot(root, b, slot); - if (!p->skip_locking) - btrfs_tree_lock(b); - unlock_up(p, level + 1, lowest_unlock); + tmp = btrfs_find_tree_block(root, + btrfs_node_blockptr(b, slot), + btrfs_level_size(root, level - 1)); + if (tmp && btrfs_buffer_uptodate(tmp, + btrfs_node_ptr_generation(b, slot))) { + b = tmp; + } else { + /* + * reduce lock contention at high levels + * of the btree by dropping locks before + * we read. + */ + if (level > 1) { + btrfs_release_path(NULL, p); + if (tmp) + free_extent_buffer(tmp); + goto again; + } else { + b = read_node_slot(root, b, slot); + } + } + btrfs_tree_lock(b); + unlock_up(p, level, lowest_unlock); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -3007,17 +3018,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, c, slot); - if (!path->skip_locking) { - if (!btrfs_tree_locked(c)) { - int i; - WARN_ON(1); -printk("path %p no lock on level %d\n", path, level); -for (i = 0; i < BTRFS_MAX_LEVEL; i++) { -printk("path %p level %d slot %d nritems %d\n", path, i, path->slots[i], btrfs_header_nritems(path->nodes[i])); -} - } - btrfs_tree_lock(next); - } + WARN_ON(!btrfs_tree_locked(c)); + btrfs_tree_lock(next); break; } path->slots[level] = slot; @@ -3035,10 +3037,8 @@ printk("path %p level %d slot %d nritems %d\n", path, i, path->slots[i], btrfs_h if (level == 1 && path->locks[1] && path->reada) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); - if (!path->skip_locking) { - WARN_ON(!btrfs_tree_locked(path->nodes[level])); - btrfs_tree_lock(next); - } + WARN_ON(!btrfs_tree_locked(path->nodes[level])); + btrfs_tree_lock(next); } done: unlock_up(path, 0, 1); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 692b8ea42de..9ea12d42741 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -336,7 +336,6 @@ struct btrfs_path { /* keep some upper locks as we walk down */ int keep_locks; int lowest_level; - int skip_locking; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f638803549e..ffc363d2fb2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1684,6 +1684,7 @@ void btrfs_throttle(struct btrfs_root *root) #else blk_congestion_wait(WRITE, HZ/20); #endif + } } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 890b9e9d8e2..0905653dd3f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -88,7 +88,6 @@ static int cache_block_group(struct btrfs_root *root, return -ENOMEM; path->reada = 2; - path->skip_locking = 1; first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; key.offset = 0; -- cgit v1.2.3-70-g09d2 From 5cd57b2cbbb06a350df2698314e4e6a80805fc2f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: Btrfs: Add a skip_locking parameter to struct path, and make various funcs honor it Allocations may need to read in block groups from the extent allocation tree, which will require a tree search and take locks on the extent allocation tree. But, those locks might already be held in other places, leading to deadlocks. Since the alloc_mutex serializes everything right now, it is safe to skip the btree locking while caching block groups. A better fix will be to either create a recursive lock or find a way to back off existing locks while caching block groups. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 32 ++++++++++++++++++-------------- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 6 ++++++ 3 files changed, 25 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b8f7aecf68d..fd68601b60b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -64,6 +64,7 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; int keep = p->keep_locks; + int skip = p->skip_locking; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) @@ -76,6 +77,7 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) } memset(p, 0, sizeof(*p)); p->keep_locks = keep; + p->skip_locking = skip; } struct extent_buffer *btrfs_root_node(struct btrfs_root *root) @@ -1262,7 +1264,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root if (ins_len < 0) lowest_unlock = 2; again: - b = btrfs_lock_root_node(root); + if (p->skip_locking) + b = btrfs_root_node(root); + else + b = btrfs_lock_root_node(root); while (b) { level = btrfs_header_level(b); @@ -1282,7 +1287,8 @@ again: WARN_ON(1); level = btrfs_header_level(b); p->nodes[level] = b; - p->locks[level] = 1; + if (!p->skip_locking) + p->locks[level] = 1; ret = check_block(root, p, level); if (ret) return -1; @@ -1349,7 +1355,8 @@ again: b = read_node_slot(root, b, slot); } } - btrfs_tree_lock(b); + if (!p->skip_locking) + btrfs_tree_lock(b); unlock_up(p, level, lowest_unlock); } else { p->slots[level] = slot; @@ -1392,13 +1399,6 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, break; t = path->nodes[i]; btrfs_set_node_key(t, key, tslot); - if (!btrfs_tree_locked(path->nodes[i])) { - int ii; -printk("fixup without lock on level %d\n", btrfs_header_level(path->nodes[i])); - for (ii = 0; ii < BTRFS_MAX_LEVEL; ii++) { -printk("level %d slot %d\n", ii, path->slots[ii]); - } - } btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; @@ -3033,8 +3033,10 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, c, slot); - WARN_ON(!btrfs_tree_locked(c)); - btrfs_tree_lock(next); + if (!path->skip_locking) { + WARN_ON(!btrfs_tree_locked(c)); + btrfs_tree_lock(next); + } break; } path->slots[level] = slot; @@ -3052,8 +3054,10 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (level == 1 && path->locks[1] && path->reada) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); - WARN_ON(!btrfs_tree_locked(path->nodes[level])); - btrfs_tree_lock(next); + if (!path->skip_locking) { + WARN_ON(!btrfs_tree_locked(path->nodes[level])); + btrfs_tree_lock(next); + } } done: unlock_up(path, 0, 1); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9ea12d42741..e9bbb53eda6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -335,6 +335,7 @@ struct btrfs_path { int reada; /* keep some upper locks as we walk down */ int keep_locks; + int skip_locking; int lowest_level; }; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0905653dd3f..544fc3f2fe6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -88,6 +88,12 @@ static int cache_block_group(struct btrfs_root *root, return -ENOMEM; path->reada = 2; + /* + * we get into deadlocks with paths held by callers of this function. + * since the alloc_mutex is protecting things right now, just + * skip the locking here + */ + path->skip_locking = 1; first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; key.offset = 0; -- cgit v1.2.3-70-g09d2 From 333db94cdde9e6dfdedab9290d04d812f83e0922 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: Btrfs: Fix snapshot deletion to release the alloc_mutex much more often. This lowers the impact of snapshot deletion on the rest of the FS. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- fs/btrfs/disk-io.c | 2 ++ fs/btrfs/extent-tree.c | 28 ++++++++++++++++++++-------- 3 files changed, 23 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index fd68601b60b..5edbcc09b3c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1255,7 +1255,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len); WARN_ON(p->nodes[0] != NULL); - WARN_ON(root == root->fs_info->extent_root && + WARN_ON(cow && root == root->fs_info->extent_root && !mutex_is_locked(&root->fs_info->alloc_mutex)); WARN_ON(root == root->fs_info->chunk_root && !mutex_is_locked(&root->fs_info->chunk_mutex)); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ffc363d2fb2..3cc480b8381 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1674,6 +1674,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) void btrfs_throttle(struct btrfs_root *root) { +#if 0 struct backing_dev_info *bdi; bdi = &root->fs_info->bdi; @@ -1686,6 +1687,7 @@ void btrfs_throttle(struct btrfs_root *root) #endif } +#endif } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 544fc3f2fe6..6274f30031d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1223,8 +1223,8 @@ printk("space info full %Lu\n", flags); ret = btrfs_make_block_group(trans, extent_root, 0, flags, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); - mutex_unlock(&extent_root->fs_info->chunk_mutex); out: + mutex_unlock(&extent_root->fs_info->chunk_mutex); return 0; } @@ -2181,17 +2181,29 @@ static void noinline reada_walk_down(struct btrfs_root *root, continue; } } - mutex_unlock(&root->fs_info->alloc_mutex); ret = readahead_tree_block(root, bytenr, blocksize, btrfs_node_ptr_generation(node, i)); last = bytenr + blocksize; cond_resched(); - mutex_lock(&root->fs_info->alloc_mutex); if (ret) break; } } +/* + * we want to avoid as much random IO as we can with the alloc mutex + * held, so drop the lock and do the lookup, then do it again with the + * lock held. + */ +int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, + u32 *refs) +{ + mutex_unlock(&root->fs_info->alloc_mutex); + lookup_extent_ref(NULL, root, start, len, refs); + mutex_lock(&root->fs_info->alloc_mutex); + return lookup_extent_ref(NULL, root, start, len, refs); +} + /* * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. @@ -2215,8 +2227,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_extent_ref(trans, root, - path->nodes[*level]->start, + ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, path->nodes[*level]->len, &refs); BUG_ON(ret); if (refs > 1) @@ -2245,7 +2256,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); blocksize = btrfs_level_size(root, *level - 1); - ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); + ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) { parent = path->nodes[*level]; @@ -2261,15 +2272,16 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); + mutex_unlock(&root->fs_info->alloc_mutex); + reada_walk_down(root, cur, path->slots[*level]); - mutex_unlock(&root->fs_info->alloc_mutex); next = read_tree_block(root, bytenr, blocksize, ptr_gen); mutex_lock(&root->fs_info->alloc_mutex); /* we've dropped the lock, double check */ - ret = lookup_extent_ref(trans, root, bytenr, + ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) { -- cgit v1.2.3-70-g09d2 From a74a4b97b61beede185b4b3ad359d7d378b0d312 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: Btrfs: Replace the transaction work queue with kthreads This creates one kthread for commits and one kthread for deleting old snapshots. All the work queues are removed. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 ++- fs/btrfs/ctree.h | 13 ++---- fs/btrfs/disk-io.c | 116 +++++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/extent-tree.c | 10 ++--- fs/btrfs/super.c | 16 +++---- fs/btrfs/transaction.c | 72 +----------------------------- fs/btrfs/transaction.h | 10 ----- fs/btrfs/volumes.c | 12 +++-- 8 files changed, 136 insertions(+), 118 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5edbcc09b3c..40f0e0cb804 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1352,6 +1352,8 @@ again: free_extent_buffer(tmp); goto again; } else { + if (tmp) + free_extent_buffer(tmp); b = read_node_slot(root, b, slot); } } @@ -3048,7 +3050,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; - path->locks[level] = 1; + if (!path->skip_locking) + path->locks[level] = 1; if (!level) break; if (level == 1 && path->locks[1] && path->reada) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e9bbb53eda6..244fe86bcc5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -519,15 +518,14 @@ struct btrfs_fs_info { struct backing_dev_info bdi; spinlock_t hash_lock; struct mutex trans_mutex; + struct mutex transaction_kthread_mutex; + struct mutex cleaner_mutex; struct mutex alloc_mutex; struct mutex chunk_mutex; struct mutex drop_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; - struct list_head end_io_work_list; - struct work_struct end_io_work; - spinlock_t end_io_work_lock; atomic_t nr_async_submits; /* @@ -543,13 +541,10 @@ struct btrfs_fs_info { struct btrfs_workers workers; struct btrfs_workers endio_workers; struct btrfs_workers submit_workers; + struct task_struct *transaction_kthread; + struct task_struct *cleaner_kthread; int thread_pool_size; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - struct work_struct trans_work; -#else - struct delayed_work trans_work; -#endif struct kobject super_kobj; struct completion kobj_unregister; int do_barriers; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 52569b57692..31ca9f89388 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include #include #include @@ -24,6 +25,12 @@ #include #include // for block_sync_page #include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) +# include +#else +# include +#endif #include "crc32c.h" #include "ctree.h" #include "disk-io.h" @@ -1100,6 +1107,87 @@ static void end_workqueue_fn(struct btrfs_work *work) #endif } +static int cleaner_kthread(void *arg) +{ + struct btrfs_root *root = arg; + + do { + smp_mb(); + if (root->fs_info->closing) + break; + + vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); + mutex_lock(&root->fs_info->cleaner_mutex); +printk("cleaner awake\n"); + btrfs_clean_old_snapshots(root); +printk("cleaner done\n"); + mutex_unlock(&root->fs_info->cleaner_mutex); + + if (freezing(current)) { + refrigerator(); + } else { + smp_mb(); + if (root->fs_info->closing) + break; + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + __set_current_state(TASK_RUNNING); + } + } while (!kthread_should_stop()); + return 0; +} + +static int transaction_kthread(void *arg) +{ + struct btrfs_root *root = arg; + struct btrfs_trans_handle *trans; + struct btrfs_transaction *cur; + unsigned long now; + unsigned long delay; + int ret; + + do { + smp_mb(); + if (root->fs_info->closing) + break; + + delay = HZ * 30; + vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); + mutex_lock(&root->fs_info->transaction_kthread_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + cur = root->fs_info->running_transaction; + if (!cur) { + mutex_unlock(&root->fs_info->trans_mutex); + goto sleep; + } + now = get_seconds(); + if (now < cur->start_time || now - cur->start_time < 30) { + mutex_unlock(&root->fs_info->trans_mutex); + delay = HZ * 5; + goto sleep; + } + mutex_unlock(&root->fs_info->trans_mutex); + btrfs_defrag_dirty_roots(root->fs_info); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_commit_transaction(trans, root); +sleep: + wake_up_process(root->fs_info->cleaner_kthread); + mutex_unlock(&root->fs_info->transaction_kthread_mutex); + + if (freezing(current)) { + refrigerator(); + } else { + if (root->fs_info->closing) + break; + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(delay); + __set_current_state(TASK_RUNNING); + } + } while (!kthread_should_stop()); + return 0; +} + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options) @@ -1189,11 +1277,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); -#else - INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); -#endif BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -1204,6 +1287,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->alloc_mutex); mutex_init(&fs_info->chunk_mutex); + mutex_init(&fs_info->transaction_kthread_mutex); + mutex_init(&fs_info->cleaner_mutex); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1247,7 +1332,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); - err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", @@ -1341,9 +1425,22 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->data_alloc_profile = (u64)-1; fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; + fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, + "btrfs-cleaner"); + if (!fs_info->cleaner_kthread) + goto fail_extent_root; + + fs_info->transaction_kthread = kthread_run(transaction_kthread, + tree_root, + "btrfs-transaction"); + if (!fs_info->transaction_kthread) + goto fail_trans_kthread; + return tree_root; +fail_trans_kthread: + kthread_stop(fs_info->cleaner_kthread); fail_extent_root: free_extent_buffer(extent_root->node); fail_tree_root: @@ -1562,8 +1659,11 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); - btrfs_transaction_flush_work(root); + kthread_stop(root->fs_info->transaction_kthread); + kthread_stop(root->fs_info->cleaner_kthread); + btrfs_defrag_dirty_roots(root->fs_info); + btrfs_clean_old_snapshots(root); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -1574,8 +1674,6 @@ int close_ctree(struct btrfs_root *root) write_ctree_super(NULL, root); - btrfs_transaction_flush_work(root); - if (fs_info->delalloc_bytes) { printk("btrfs: at unmount delalloc count %Lu\n", fs_info->delalloc_bytes); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6274f30031d..89cc4f61186 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1216,15 +1216,16 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, if (ret == -ENOSPC) { printk("space info full %Lu\n", flags); space_info->full = 1; - goto out; + goto out_unlock; } BUG_ON(ret); ret = btrfs_make_block_group(trans, extent_root, 0, flags, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); -out: +out_unlock: mutex_unlock(&extent_root->fs_info->chunk_mutex); +out: return 0; } @@ -2274,7 +2275,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, free_extent_buffer(next); mutex_unlock(&root->fs_info->alloc_mutex); - reada_walk_down(root, cur, path->slots[*level]); + if (path->slots[*level] == 0) + reada_walk_down(root, cur, path->slots[*level]); next = read_tree_block(root, bytenr, blocksize, ptr_gen); @@ -2446,8 +2448,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; - ret = -EAGAIN; - break; } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b61ded7a20c..726d6871fa1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -340,7 +340,6 @@ static int btrfs_fill_super(struct super_block * sb, goto fail_close; sb->s_root = root_dentry; - btrfs_transaction_queue_work(tree_root, HZ * 30); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) save_mount_options(sb, data); @@ -416,9 +415,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, goto error_free_subvol_name; bdev = fs_devices->latest_bdev; - btrfs_lock_volumes(); s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); - btrfs_unlock_volumes(); if (IS_ERR(s)) goto error_s; @@ -530,13 +527,15 @@ out: static void btrfs_write_super_lockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); - btrfs_transaction_flush_work(root); + mutex_lock(&root->fs_info->transaction_kthread_mutex); + mutex_lock(&root->fs_info->cleaner_mutex); } static void btrfs_unlockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); - btrfs_transaction_queue_work(root, HZ * 30); + mutex_unlock(&root->fs_info->cleaner_mutex); + mutex_unlock(&root->fs_info->transaction_kthread_mutex); } static struct super_operations btrfs_super_ops = { @@ -589,10 +588,9 @@ static int __init init_btrfs_fs(void) if (err) return err; - btrfs_init_transaction_sys(); err = btrfs_init_cachep(); if (err) - goto free_transaction_sys; + goto free_sysfs; err = extent_io_init(); if (err) @@ -618,15 +616,13 @@ free_extent_io: extent_io_exit(); free_cachep: btrfs_destroy_cachep(); -free_transaction_sys: - btrfs_exit_transaction_sys(); +free_sysfs: btrfs_exit_sysfs(); return err; } static void __exit exit_btrfs_fs(void) { - btrfs_exit_transaction_sys(); btrfs_destroy_cachep(); extent_map_exit(); extent_io_exit(); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 69ed5f85a38..0c53ff775b9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -29,8 +29,6 @@ static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; -static struct workqueue_struct *trans_wq; - #define BTRFS_ROOT_TRANS_TAG 0 #define BTRFS_ROOT_DEFRAG_TAG 1 @@ -807,81 +805,15 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) { struct list_head dirty_roots; INIT_LIST_HEAD(&dirty_roots); - +again: mutex_lock(&root->fs_info->trans_mutex); list_splice_init(&root->fs_info->dead_roots, &dirty_roots); mutex_unlock(&root->fs_info->trans_mutex); if (!list_empty(&dirty_roots)) { drop_dirty_roots(root, &dirty_roots); + goto again; } return 0; } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -void btrfs_transaction_cleaner(void *p) -#else -void btrfs_transaction_cleaner(struct work_struct *work) -#endif -{ -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - struct btrfs_fs_info *fs_info = p; -#else - struct btrfs_fs_info *fs_info = container_of(work, - struct btrfs_fs_info, - trans_work.work); - -#endif - struct btrfs_root *root = fs_info->tree_root; - struct btrfs_transaction *cur; - struct btrfs_trans_handle *trans; - unsigned long now; - unsigned long delay = HZ * 30; - int ret; - - smp_mb(); - if (root->fs_info->closing) - goto out; - - mutex_lock(&root->fs_info->trans_mutex); - cur = root->fs_info->running_transaction; - if (!cur) { - mutex_unlock(&root->fs_info->trans_mutex); - goto out; - } - now = get_seconds(); - if (now < cur->start_time || now - cur->start_time < 30) { - mutex_unlock(&root->fs_info->trans_mutex); - delay = HZ * 5; - goto out; - } - mutex_unlock(&root->fs_info->trans_mutex); - btrfs_defrag_dirty_roots(root->fs_info); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_commit_transaction(trans, root); -out: - btrfs_clean_old_snapshots(root); - btrfs_transaction_queue_work(root, delay); -} - -void btrfs_transaction_queue_work(struct btrfs_root *root, int delay) -{ - if (!root->fs_info->closing) - queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay); -} - -void btrfs_transaction_flush_work(struct btrfs_root *root) -{ - cancel_delayed_work(&root->fs_info->trans_work); - flush_workqueue(trans_wq); -} - -void __init btrfs_init_transaction_sys(void) -{ - trans_wq = create_workqueue("btrfs-transaction"); -} - -void btrfs_exit_transaction_sys(void) -{ - destroy_workqueue(trans_wq); -} diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 52559b51b18..e1e5a06b65f 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -82,16 +82,6 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -void btrfs_transaction_cleaner(void *p); -#else -void btrfs_transaction_cleaner(struct work_struct *work); -#endif - -void btrfs_transaction_flush_work(struct btrfs_root *root); -void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); -void btrfs_init_transaction_sys(void); -void btrfs_exit_transaction_sys(void); int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, struct list_head *dead_list); int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 869864ddcc2..4e7cee27aab 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -271,13 +271,17 @@ again: list_for_each(cur, head) { device = list_entry(cur, struct btrfs_device, dev_list); if (!device->in_fs_metadata) { - if (device->bdev) { - close_bdev_excl(device->bdev); - fs_devices->open_devices--; - } + struct block_device *bdev; list_del(&device->dev_list); list_del(&device->dev_alloc_list); fs_devices->num_devices--; + if (device->bdev) { + bdev = device->bdev; + fs_devices->open_devices--; + mutex_unlock(&uuid_mutex); + close_bdev_excl(bdev); + mutex_lock(&uuid_mutex); + } kfree(device->name); kfree(device); goto again; -- cgit v1.2.3-70-g09d2 From e7a84565bcdb239caad29ccbe559ef978090ac7e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: Btrfs: Add btree locking to the tree defragmentation code The online btree defragger is simplified and rewritten to use standard btree searches instead of a walk up / down mechanism. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 46 ++++++++-- fs/btrfs/ctree.h | 2 + fs/btrfs/extent-tree.c | 7 ++ fs/btrfs/tree-defrag.c | 239 ++++++++++--------------------------------------- 4 files changed, 93 insertions(+), 201 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 40f0e0cb804..7f4cc2b88d0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -354,7 +354,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_key *progress) { struct extent_buffer *cur; - struct extent_buffer *tmp; u64 blocknr; u64 gen; u64 search_start = *last_ret; @@ -370,9 +369,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, int progress_passed = 0; struct btrfs_disk_key disk_key; - /* FIXME this code needs locking */ - return 0; - parent_level = btrfs_header_level(parent); if (cache_only && parent_level != 1) return 0; @@ -454,20 +450,23 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (search_start == 0) search_start = last_block; + btrfs_tree_lock(cur); err = __btrfs_cow_block(trans, root, cur, parent, i, - &tmp, search_start, + &cur, search_start, min(16 * blocksize, (end_slot - i) * blocksize)); if (err) { + btrfs_tree_unlock(cur); free_extent_buffer(cur); break; } - search_start = tmp->start; - last_block = tmp->start; + search_start = cur->start; + last_block = cur->start; *last_ret = search_start; if (parent_level == 1) - btrfs_clear_buffer_defrag(tmp); - free_extent_buffer(tmp); + btrfs_clear_buffer_defrag(cur); + btrfs_tree_unlock(cur); + free_extent_buffer(cur); } if (parent->map_token) { unmap_extent_buffer(parent, parent->map_token, @@ -2970,6 +2969,35 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) return 1; } +int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *key, int lowest_level) +{ + int level = lowest_level; + int slot; + struct extent_buffer *c; + + while(level < BTRFS_MAX_LEVEL) { + if (!path->nodes[level]) + return 1; + + slot = path->slots[level] + 1; + c = path->nodes[level]; + if (slot >= btrfs_header_nritems(c)) { + level++; + if (level == BTRFS_MAX_LEVEL) { + return 1; + } + continue; + } + if (level == 0) + btrfs_item_key_to_cpu(c, key, slot); + else + btrfs_node_key_to_cpu(c, key, slot); + return 0; + } + return 1; +} + /* * search the tree again to find a leaf with greater keys * returns 0 if it found something or 1 if there are no greater leaves. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 244fe86bcc5..ca8e6f15859 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1411,6 +1411,8 @@ int btrfs_previous_item(struct btrfs_root *root, struct extent_buffer *btrfs_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); +int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *key, int lowest_level); int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 89cc4f61186..a9b3a25a45b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2201,6 +2201,7 @@ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, { mutex_unlock(&root->fs_info->alloc_mutex); lookup_extent_ref(NULL, root, start, len, refs); + cond_resched(); mutex_lock(&root->fs_info->alloc_mutex); return lookup_extent_ref(NULL, root, start, len, refs); } @@ -2280,6 +2281,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, next = read_tree_block(root, bytenr, blocksize, ptr_gen); + cond_resched(); mutex_lock(&root->fs_info->alloc_mutex); /* we've dropped the lock, double check */ @@ -2329,6 +2331,7 @@ out: *level += 1; BUG_ON(ret); mutex_unlock(&root->fs_info->alloc_mutex); + cond_resched(); return 0; } @@ -2448,6 +2451,10 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; + if (trans->transaction->in_commit) { + ret = -EAGAIN; + break; + } } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index fab851d8538..1677e4edaf6 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -21,167 +21,26 @@ #include "disk-io.h" #include "print-tree.h" #include "transaction.h" - -static void reada_defrag(struct btrfs_root *root, - struct extent_buffer *node) -{ - int i; - u32 nritems; - u64 bytenr; - u64 gen; - u32 blocksize; - int ret; - - blocksize = btrfs_level_size(root, btrfs_header_level(node) - 1); - nritems = btrfs_header_nritems(node); - for (i = 0; i < nritems; i++) { - bytenr = btrfs_node_blockptr(node, i); - gen = btrfs_node_ptr_generation(node, i); - ret = readahead_tree_block(root, bytenr, blocksize, gen); - if (ret) - break; - } -} - -static int defrag_walk_down(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, int *level, - int cache_only, u64 *last_ret) -{ - struct extent_buffer *next; - struct extent_buffer *cur; - u64 bytenr; - u64 ptr_gen; - int ret = 0; - int is_extent = 0; - - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - - if (root->fs_info->extent_root == root) - is_extent = 1; - - if (*level == 1 && cache_only && path->nodes[1] && - !btrfs_buffer_defrag(path->nodes[1])) { - goto out; - } - while(*level > 0) { - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - cur = path->nodes[*level]; - - if (!cache_only && *level > 1 && path->slots[*level] == 0) - reada_defrag(root, cur); - - if (btrfs_header_level(cur) != *level) - WARN_ON(1); - - if (path->slots[*level] >= - btrfs_header_nritems(cur)) - break; - - if (*level == 1) { - WARN_ON(btrfs_header_generation(path->nodes[*level]) != - trans->transid); - ret = btrfs_realloc_node(trans, root, - path->nodes[*level], - path->slots[*level], - cache_only, last_ret, - &root->defrag_progress); - if (is_extent) - btrfs_extent_post_op(trans, root); - - break; - } - bytenr = btrfs_node_blockptr(cur, path->slots[*level]); - ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); - - if (cache_only) { - next = btrfs_find_tree_block(root, bytenr, - btrfs_level_size(root, *level - 1)); - if (!next || !btrfs_buffer_uptodate(next, ptr_gen) || - !btrfs_buffer_defrag(next)) { - free_extent_buffer(next); - path->slots[*level]++; - continue; - } - } else { - next = read_tree_block(root, bytenr, - btrfs_level_size(root, *level - 1), - ptr_gen); - } - ret = btrfs_cow_block(trans, root, next, path->nodes[*level], - path->slots[*level], &next); - BUG_ON(ret); - if (is_extent) - btrfs_extent_post_op(trans, root); - - WARN_ON(*level <= 0); - if (path->nodes[*level-1]) - free_extent_buffer(path->nodes[*level-1]); - path->nodes[*level-1] = next; - *level = btrfs_header_level(next); - path->slots[*level] = 0; - } - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - - btrfs_clear_buffer_defrag(path->nodes[*level]); -out: - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - *level += 1; - WARN_ON(ret && ret != -EAGAIN); - return ret; -} - -static int defrag_walk_up(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, int *level, - int cache_only) -{ - int i; - int slot; - struct extent_buffer *node; - - for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { - slot = path->slots[i]; - if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { - path->slots[i]++; - *level = i; - node = path->nodes[i]; - WARN_ON(i == 0); - btrfs_node_key_to_cpu(node, &root->defrag_progress, - path->slots[i]); - root->defrag_level = i; - return 0; - } else { - btrfs_clear_buffer_defrag(path->nodes[*level]); - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - *level = i + 1; - } - } - return 1; -} +#include "locking.h" int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only) { struct btrfs_path *path = NULL; - struct extent_buffer *tmp; + struct btrfs_key key; int ret = 0; int wret; int level; int orig_level; int i; int is_extent = 0; + int next_key_ret = 0; u64 last_ret = 0; - if (root->fs_info->extent_root == root) + if (root->fs_info->extent_root == root) { + mutex_lock(&root->fs_info->alloc_mutex); is_extent = 1; - - goto out; + } if (root->ref_cows == 0 && !is_extent) goto out; @@ -200,67 +59,63 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } if (root->defrag_progress.objectid == 0) { + struct extent_buffer *root_node; u32 nritems; - nritems = btrfs_header_nritems(root->node); + root_node = btrfs_lock_root_node(root); + nritems = btrfs_header_nritems(root_node); root->defrag_max.objectid = 0; /* from above we know this is not a leaf */ - btrfs_node_key_to_cpu(root->node, &root->defrag_max, + btrfs_node_key_to_cpu(root_node, &root->defrag_max, nritems - 1); - extent_buffer_get(root->node); - ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - BUG_ON(ret); - path->nodes[level] = root->node; - path->slots[level] = 0; - if (is_extent) - btrfs_extent_post_op(trans, root); + btrfs_tree_unlock(root_node); + free_extent_buffer(root_node); + memset(&key, 0, sizeof(key)); } else { - level = root->defrag_level; - path->lowest_level = level; - wret = btrfs_search_slot(trans, root, &root->defrag_progress, - path, 0, 1); - - if (is_extent) - btrfs_extent_post_op(trans, root); - - if (wret < 0) { - ret = wret; - goto out; - } - - while(level > 0 && !path->nodes[level]) - level--; - - if (!path->nodes[level]) { - ret = 0; - goto out; - } + memcpy(&key, &root->defrag_progress, sizeof(key)); } - while(1) { - wret = defrag_walk_down(trans, root, path, &level, cache_only, - &last_ret); - if (wret > 0) - break; - if (wret < 0) - ret = wret; + path->lowest_level = 1; + path->keep_locks = 1; + wret = btrfs_search_slot(trans, root, &key, path, 0, 1); - wret = defrag_walk_up(trans, root, path, &level, cache_only); - if (wret > 0) - break; - if (wret < 0) - ret = wret; - else - ret = -EAGAIN; - break; + if (wret < 0) { + ret = wret; + goto out; + } + if (!path->nodes[1]) { + ret = 0; + goto out; + } + path->slots[1] = btrfs_header_nritems(path->nodes[1]); + next_key_ret = btrfs_find_next_key(root, path, &key, 1); + ret = btrfs_realloc_node(trans, root, + path->nodes[1], 0, + cache_only, &last_ret, + &root->defrag_progress); + WARN_ON(ret && ret != -EAGAIN); + if (next_key_ret == 0) { + memcpy(&root->defrag_progress, &key, sizeof(key)); + ret = -EAGAIN; } - for (i = 0; i <= orig_level; i++) { + + for (i = 1; i < BTRFS_MAX_LEVEL; i++) { + if (path->locks[i]) { + btrfs_tree_unlock(path->nodes[i]); + path->locks[i] = 0; + } if (path->nodes[i]) { free_extent_buffer(path->nodes[i]); path->nodes[i] = NULL; } } + if (is_extent) + btrfs_extent_post_op(trans, root); + out: + if (is_extent) + mutex_unlock(&root->fs_info->alloc_mutex); + if (path) btrfs_free_path(path); if (ret == -EAGAIN) { -- cgit v1.2.3-70-g09d2 From 079899c2384023cd8efcd3806680b4f1d2abbd54 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: Btrfs: Change find_extent_buffer to use TestSetPageLocked This makes it possible for callers to check for extent_buffers in cache without deadlocking against any btree locks held. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 -- fs/btrfs/extent_io.c | 7 ++++++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a9b3a25a45b..dc3c03c6612 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1438,7 +1438,6 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, int err = 0; if (!pending) { -#if 0 struct extent_buffer *buf; buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { @@ -1460,7 +1459,6 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, } free_extent_buffer(buf); } -#endif update_pinned_extents(root, bytenr, num_bytes, 1); } else { set_extent_bits(&root->fs_info->pending_del, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index bd15cdcaba9..40a5f53cb04 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2808,10 +2808,15 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, goto lru_add; for (i = 0; i < num_pages; i++, index++) { - p = find_lock_page(mapping, index); + p = find_get_page(mapping, index); if (!p) { goto fail; } + if (TestSetPageLocked(p)) { + page_cache_release(p); + goto fail; + } + set_page_extent_mapped(p); mark_page_accessed(p); -- cgit v1.2.3-70-g09d2 From 3f157a2fd2ad731e1ed9964fecdc5f459f04a4a4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: Btrfs: Online btree defragmentation fixes The btree defragger wasn't making forward progress because the new key wasn't being saved by the btrfs_search_forward function. This also disables the automatic btree defrag, it wasn't scaling well to huge filesystems. The auto-defrag needs to be done differently. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/ctree.h | 7 +- fs/btrfs/disk-io.c | 61 +----------------- fs/btrfs/disk-io.h | 6 -- fs/btrfs/extent-tree.c | 2 - fs/btrfs/super.c | 1 - fs/btrfs/transaction.c | 35 +--------- fs/btrfs/transaction.h | 1 - fs/btrfs/tree-defrag.c | 36 ++++++----- 9 files changed, 190 insertions(+), 129 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7f4cc2b88d0..0cb80f32a9c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -63,10 +63,9 @@ void btrfs_free_path(struct btrfs_path *p) void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; - int keep = p->keep_locks; - int skip = p->skip_locking; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + p->slots[i] = 0; if (!p->nodes[i]) continue; if (p->locks[i]) { @@ -74,10 +73,8 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) p->locks[i] = 0; } free_extent_buffer(p->nodes[i]); + p->nodes[i] = NULL; } - memset(p, 0, sizeof(*p)); - p->keep_locks = keep; - p->skip_locking = skip; } struct extent_buffer *btrfs_root_node(struct btrfs_root *root) @@ -463,8 +460,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, search_start = cur->start; last_block = cur->start; *last_ret = search_start; - if (parent_level == 1) - btrfs_clear_buffer_defrag(cur); btrfs_tree_unlock(cur); free_extent_buffer(cur); } @@ -2969,8 +2964,138 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) return 1; } +/* + * A helper function to walk down the tree starting at min_key, and looking + * for nodes or leaves that are either in cache or have a minimum + * transaction id. This is used by the btree defrag code, but could + * also be used to search for blocks that have changed since a given + * transaction id. + * + * This does not cow, but it does stuff the starting key it finds back + * into min_key, so you can call btrfs_search_slot with cow=1 on the + * key and get a writable path. + * + * This does lock as it descends, and path->keep_locks should be set + * to 1 by the caller. + * + * This honors path->lowest_level to prevent descent past a given level + * of the tree. + * + * returns zero if something useful was found, < 0 on error and 1 if there + * was nothing in the tree that matched the search criteria. + */ +int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_path *path, int cache_only, + u64 min_trans) +{ + struct extent_buffer *cur; + struct btrfs_key found_key; + int slot; + u32 nritems; + int level; + int ret = 1; + +again: + cur = btrfs_lock_root_node(root); + level = btrfs_header_level(cur); + path->nodes[level] = cur; + path->locks[level] = 1; + + if (btrfs_header_generation(cur) < min_trans) { + ret = 1; + goto out; + } + while(1) { + nritems = btrfs_header_nritems(cur); + level = btrfs_header_level(cur); + bin_search(cur, min_key, level, &slot); + + /* at level = 0, we're done, setup the path and exit */ + if (level == 0) { + ret = 0; + path->slots[level] = slot; + btrfs_item_key_to_cpu(cur, &found_key, slot); + goto out; + } + /* + * check this node pointer against the cache_only and + * min_trans parameters. If it isn't in cache or is too + * old, skip to the next one. + */ + while(slot < nritems) { + u64 blockptr; + u64 gen; + struct extent_buffer *tmp; + blockptr = btrfs_node_blockptr(cur, slot); + gen = btrfs_node_ptr_generation(cur, slot); + if (gen < min_trans) { + slot++; + continue; + } + if (!cache_only) + break; + + tmp = btrfs_find_tree_block(root, blockptr, + btrfs_level_size(root, level - 1)); + + if (tmp && btrfs_buffer_uptodate(tmp, gen)) { + free_extent_buffer(tmp); + break; + } + if (tmp) + free_extent_buffer(tmp); + slot++; + } + /* + * we didn't find a candidate key in this node, walk forward + * and find another one + */ + if (slot >= nritems) { + ret = btrfs_find_next_key(root, path, min_key, level, + cache_only, min_trans); + if (ret == 0) { + btrfs_release_path(root, path); + goto again; + } else { + goto out; + } + } + /* save our key for returning back */ + btrfs_node_key_to_cpu(cur, &found_key, slot); + path->slots[level] = slot; + if (level == path->lowest_level) { + ret = 0; + unlock_up(path, level, 1); + goto out; + } + cur = read_node_slot(root, cur, slot); + + btrfs_tree_lock(cur); + path->locks[level - 1] = 1; + path->nodes[level - 1] = cur; + unlock_up(path, level, 1); + } +out: + if (ret == 0) + memcpy(min_key, &found_key, sizeof(found_key)); + return ret; +} + +/* + * this is similar to btrfs_next_leaf, but does not try to preserve + * and fixup the path. It looks for and returns the next key in the + * tree based on the current path and the cache_only and min_trans + * parameters. + * + * 0 is returned if another key is found, < 0 if there are any errors + * and 1 is returned if there are no higher keys in the tree + * + * path->keep_locks should be set to 1 on the search made before + * calling this function. + */ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key, int lowest_level) + struct btrfs_key *key, int lowest_level, + int cache_only, u64 min_trans) { int level = lowest_level; int slot; @@ -2982,6 +3107,7 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, slot = path->slots[level] + 1; c = path->nodes[level]; +next: if (slot >= btrfs_header_nritems(c)) { level++; if (level == BTRFS_MAX_LEVEL) { @@ -2991,8 +3117,28 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, } if (level == 0) btrfs_item_key_to_cpu(c, key, slot); - else + else { + u64 blockptr = btrfs_node_blockptr(c, slot); + u64 gen = btrfs_node_ptr_generation(c, slot); + + if (cache_only) { + struct extent_buffer *cur; + cur = btrfs_find_tree_block(root, blockptr, + btrfs_level_size(root, level - 1)); + if (!cur || !btrfs_buffer_uptodate(cur, gen)) { + slot++; + if (cur) + free_extent_buffer(cur); + goto next; + } + free_extent_buffer(cur); + } + if (gen < min_trans) { + slot++; + goto next; + } btrfs_node_key_to_cpu(c, key, slot); + } return 0; } return 1; @@ -3095,6 +3241,12 @@ done: return 0; } +/* + * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps + * searching until it gets past min_objectid or finds an item of 'type' + * + * returns 0 if something is found, 1 if nothing was found and < 0 on error + */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ca8e6f15859..a28796482b4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -609,6 +609,7 @@ struct btrfs_root { u64 last_inode_alloc; int ref_cows; int track_dirty; + u64 defrag_trans_start; struct btrfs_key defrag_progress; struct btrfs_key defrag_max; int defrag_running; @@ -1412,7 +1413,11 @@ int btrfs_previous_item(struct btrfs_root *root, struct extent_buffer *btrfs_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key, int lowest_level); + struct btrfs_key *key, int lowest_level, + int cache_only, u64 min_trans); +int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_path *path, int cache_only, + u64 min_trans); int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 31ca9f89388..4cdc0b6a267 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -295,7 +295,6 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); - btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", @@ -355,7 +354,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { ret = -EIO; @@ -736,6 +734,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); + root->defrag_trans_start = fs_info->generation; init_completion(&root->kobj_unregister); root->defrag_running = 0; root->defrag_level = 0; @@ -1168,7 +1167,6 @@ static int transaction_kthread(void *arg) goto sleep; } mutex_unlock(&root->fs_info->trans_mutex); - btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sleep: @@ -1434,12 +1432,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root, "btrfs-transaction"); if (!fs_info->transaction_kthread) - goto fail_trans_kthread; + goto fail_cleaner; return tree_root; -fail_trans_kthread: +fail_cleaner: kthread_stop(fs_info->cleaner_kthread); fail_extent_root: free_extent_buffer(extent_root->node); @@ -1662,7 +1660,6 @@ int close_ctree(struct btrfs_root *root) kthread_stop(root->fs_info->transaction_kthread); kthread_stop(root->fs_info->cleaner_kthread); - btrfs_defrag_dirty_roots(root->fs_info); btrfs_clean_old_snapshots(root); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -1794,58 +1791,6 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) return; } -void btrfs_set_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, - buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); -} - -void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, - buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, - GFP_NOFS); -} - -int btrfs_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); -} - -int btrfs_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG_DONE, 0); -} - -int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG_DONE, GFP_NOFS); -} - -int btrfs_clear_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG, GFP_NOFS); -} - int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index deff6b4815a..353c3c50c95 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -61,12 +61,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); int btrfs_set_buffer_uptodate(struct extent_buffer *buf); int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf); -void btrfs_set_buffer_defrag(struct extent_buffer *buf); -void btrfs_set_buffer_defrag_done(struct extent_buffer *buf); -int btrfs_buffer_defrag(struct extent_buffer *buf); -int btrfs_buffer_defrag_done(struct extent_buffer *buf); -int btrfs_clear_buffer_defrag(struct extent_buffer *buf); -int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf); int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index dc3c03c6612..5e0857ffbc3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2095,8 +2095,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - if (!btrfs_test_opt(root, SSD)) - btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 726d6871fa1..5e28cf5c2e8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -365,7 +365,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) return 0; } btrfs_clean_old_snapshots(root); - btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8e909cb97c6..98f422d9ab0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -30,7 +30,6 @@ extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; #define BTRFS_ROOT_TRANS_TAG 0 -#define BTRFS_ROOT_DEFRAG_TAG 1 static noinline void put_transaction(struct btrfs_transaction *transaction) { @@ -92,9 +91,6 @@ static noinline int record_root_in_trans(struct btrfs_root *root) radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); - radix_tree_tag_set(&root->fs_info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); root->commit_root = btrfs_root_node(root); } else { WARN_ON(1); @@ -403,44 +399,15 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) cond_resched(); trans = btrfs_start_transaction(root, 1); - if (ret != -EAGAIN) + if (root->fs_info->closing || ret != -EAGAIN) break; } root->defrag_running = 0; smp_mb(); - radix_tree_tag_clear(&info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); btrfs_end_transaction(trans, root); return 0; } -int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) -{ - struct btrfs_root *gang[1]; - struct btrfs_root *root; - int i; - int ret; - int err = 0; - u64 last = 0; - - while(1) { - ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix, - (void **)gang, last, - ARRAY_SIZE(gang), - BTRFS_ROOT_DEFRAG_TAG); - if (ret == 0) - break; - for (i = 0; i < ret; i++) { - root = gang[i]; - last = root->root_key.objectid + 1; - btrfs_defrag_root(root, 1); - } - } - btrfs_defrag_root(info->extent_root, 1); - return err; -} - static noinline int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e1e5a06b65f..9ccd5a5b170 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -84,7 +84,6 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, struct list_head *dead_list); -int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b17693f61fb..cc2650b0695 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -32,10 +32,13 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int wret; int level; int orig_level; - int i; int is_extent = 0; int next_key_ret = 0; u64 last_ret = 0; + u64 min_trans = 0; + + if (cache_only) + goto out; if (root->fs_info->extent_root == root) { /* @@ -43,10 +46,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, * we can't defrag the extent root without deadlock */ goto out; -#if 0 - mutex_lock(&root->fs_info->alloc_mutex); - is_extent = 1; -#endif } if (root->ref_cows == 0 && !is_extent) @@ -84,6 +83,17 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, path->lowest_level = 1; path->keep_locks = 1; + if (cache_only) + min_trans = root->defrag_trans_start; + + ret = btrfs_search_forward(root, &key, path, cache_only, min_trans); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + goto out; + } + btrfs_release_path(root, path); wret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (wret < 0) { @@ -95,7 +105,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } path->slots[1] = btrfs_header_nritems(path->nodes[1]); - next_key_ret = btrfs_find_next_key(root, path, &key, 1); + next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only, + min_trans); ret = btrfs_realloc_node(trans, root, path->nodes[1], 0, cache_only, &last_ret, @@ -106,19 +117,9 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, ret = -EAGAIN; } - for (i = 1; i < BTRFS_MAX_LEVEL; i++) { - if (path->locks[i]) { - btrfs_tree_unlock(path->nodes[i]); - path->locks[i] = 0; - } - if (path->nodes[i]) { - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - } - } + btrfs_release_path(root, path); if (is_extent) btrfs_extent_post_op(trans, root); - out: if (is_extent) mutex_unlock(&root->fs_info->alloc_mutex); @@ -138,6 +139,7 @@ done: if (ret != -EAGAIN) { memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + root->defrag_trans_start = trans->transid; } return ret; } -- cgit v1.2.3-70-g09d2 From 7d9eb12c8739e7dc80c78c6b3596f912ecd8f941 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jul 2008 14:19:17 -0400 Subject: Btrfs: Add locking around volume management (device add/remove/balance) Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 4 --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 73 +++++++++++++++++++++++++++++++++++++------------- fs/btrfs/ioctl.c | 6 ++--- fs/btrfs/volumes.c | 58 +++++++++++++++++++++++++++++---------- 6 files changed, 103 insertions(+), 40 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c6759fc1004..bbf9bf37406 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1251,10 +1251,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(p->nodes[0] != NULL); WARN_ON(cow && root == root->fs_info->extent_root && !mutex_is_locked(&root->fs_info->alloc_mutex)); - WARN_ON(root == root->fs_info->chunk_root && - !mutex_is_locked(&root->fs_info->chunk_mutex)); - WARN_ON(root == root->fs_info->dev_root && - !mutex_is_locked(&root->fs_info->chunk_mutex)); if (ins_len < 0) lowest_unlock = 2; again: diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a28796482b4..f3783dbd9b6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -523,6 +523,7 @@ struct btrfs_fs_info { struct mutex alloc_mutex; struct mutex chunk_mutex; struct mutex drop_mutex; + struct mutex volume_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4cdc0b6a267..8f4c40033e9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1287,6 +1287,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); + mutex_init(&fs_info->volume_mutex); #if 0 ret = add_hasher(fs_info, "crc32c"); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5e0857ffbc3..8ebfa6be079 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -245,6 +245,7 @@ static int noinline find_search_start(struct btrfs_root *root, u64 search_start = *start_ret; int wrapped = 0; + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); free_space_cache = &root->fs_info->free_space_cache; @@ -1242,6 +1243,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 start; u64 end; + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); while(total) { cache = btrfs_lookup_block_group(info, bytenr); if (!cache) { @@ -1297,6 +1299,7 @@ static int update_pinned_extents(struct btrfs_root *root, struct btrfs_block_group_cache *cache; struct btrfs_fs_info *fs_info = root->fs_info; + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); if (pin) { set_extent_dirty(&fs_info->pinned_extents, bytenr, bytenr + num - 1, GFP_NOFS); @@ -1391,6 +1394,7 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, int level; int err = 0; + WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); btrfs_set_stack_extent_refs(&extent_item, 1); btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); path = btrfs_alloc_path(); @@ -1437,6 +1441,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, { int err = 0; + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); if (!pending) { struct extent_buffer *buf; buf = btrfs_find_tree_block(root, bytenr, num_bytes); @@ -1490,6 +1495,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item *ei; u32 refs; + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_bytes; @@ -1619,6 +1625,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct struct extent_io_tree *pending_del; struct extent_io_tree *pinned_extents; + WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); pending_del = &extent_root->fs_info->pending_del; pinned_extents = &extent_root->fs_info->pinned_extents; @@ -2428,6 +2435,10 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_node_key(node, &found_key, path->slots[level]); WARN_ON(memcmp(&found_key, &root_item->drop_progress, sizeof(found_key))); + /* + * unlock our path, this is safe because only this + * function is allowed to delete this snapshot + */ for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (path->nodes[i] && path->locks[i]) { path->locks[i] = 0; @@ -2611,7 +2622,6 @@ static int find_root_for_ref(struct btrfs_root *root, u64 root_search_start = BTRFS_FS_TREE_OBJECTID; u64 found_bytenr; int ret; - int i; root_location.offset = (u64)-1; root_location.type = BTRFS_ROOT_ITEM_KEY; @@ -2635,12 +2645,6 @@ static int find_root_for_ref(struct btrfs_root *root, found_bytenr = path->nodes[level]->start; } - for (i = level; i < BTRFS_MAX_LEVEL; i++) { - if (!path->nodes[i]) - break; - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - } btrfs_release_path(cur_root, path); if (found_bytenr == bytenr) { @@ -2689,6 +2693,8 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, int ret; int level; + WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); + ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_ref); ref_root = btrfs_ref_root(path->nodes[0], ref); @@ -2707,6 +2713,7 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, &root_location); BUG_ON(!found_root); + mutex_unlock(&extent_root->fs_info->alloc_mutex); if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { found_key.objectid = ref_objectid; @@ -2748,9 +2755,9 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, /* this can happen if the reference is not against * the latest version of the tree root */ - if (is_bad_inode(inode)) { + if (is_bad_inode(inode)) goto out; - } + *last_file_objectid = inode->i_ino; *last_file_root = found_root->root_key.objectid; *last_file_offset = ref_offset; @@ -2760,7 +2767,7 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, } else { struct btrfs_trans_handle *trans; struct extent_buffer *eb; - int i; + int needs_lock = 0; eb = read_tree_block(found_root, extent_key->objectid, extent_key->offset, 0); @@ -2782,26 +2789,40 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, if (ret) goto out; + /* + * right here almost anything could happen to our key, + * but that's ok. The cow below will either relocate it + * or someone else will have relocated it. Either way, + * it is in a different spot than it was before and + * we're happy. + */ + trans = btrfs_start_transaction(found_root, 1); + if (found_root == extent_root->fs_info->extent_root || + found_root == extent_root->fs_info->chunk_root || + found_root == extent_root->fs_info->dev_root) { + needs_lock = 1; + mutex_lock(&extent_root->fs_info->alloc_mutex); + } + path->lowest_level = level; path->reada = 2; ret = btrfs_search_slot(trans, found_root, &found_key, path, 0, 1); path->lowest_level = 0; - for (i = level; i < BTRFS_MAX_LEVEL; i++) { - if (!path->nodes[i]) - break; - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - } btrfs_release_path(found_root, path); + if (found_root == found_root->fs_info->extent_root) btrfs_extent_post_op(trans, found_root); + if (needs_lock) + mutex_unlock(&extent_root->fs_info->alloc_mutex); + btrfs_end_transaction(trans, found_root); - } + } out: + mutex_lock(&extent_root->fs_info->alloc_mutex); return 0; } @@ -2943,7 +2964,10 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root, if (btrfs_block_group_used(&shrink_block_group->item) > 0) { + mutex_unlock(&root->fs_info->alloc_mutex); trans = btrfs_start_transaction(root, 1); + mutex_lock(&root->fs_info->alloc_mutex); + new_alloc_flags = update_block_group_flags(root, shrink_block_group->flags); if (new_alloc_flags != shrink_block_group->flags) { @@ -2954,7 +2978,10 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root, } do_chunk_alloc(trans, root->fs_info->extent_root, calc + 2 * 1024 * 1024, new_alloc_flags, force); + + mutex_unlock(&root->fs_info->alloc_mutex); btrfs_end_transaction(trans, root); + mutex_lock(&root->fs_info->alloc_mutex); } return 0; } @@ -3031,9 +3058,9 @@ again: if (ret < 0) goto out; +next: leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); -next: if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(root, path); if (ret < 0) @@ -3083,6 +3110,7 @@ next: printk("btrfs relocate found %llu last extent was %llu\n", (unsigned long long)total_found, (unsigned long long)found_key.objectid); + mutex_unlock(&root->fs_info->alloc_mutex); trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); @@ -3090,6 +3118,7 @@ next: trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); + mutex_lock(&root->fs_info->alloc_mutex); goto again; } @@ -3097,7 +3126,10 @@ next: * we've freed all the extents, now remove the block * group item from the tree */ + mutex_unlock(&root->fs_info->alloc_mutex); + trans = btrfs_start_transaction(root, 1); + mutex_lock(&root->fs_info->alloc_mutex); memcpy(&key, &shrink_block_group->key, sizeof(key)); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); @@ -3119,8 +3151,12 @@ next: kfree(shrink_block_group); btrfs_del_item(trans, root, path); + btrfs_release_path(root, path); + mutex_unlock(&root->fs_info->alloc_mutex); btrfs_commit_transaction(trans, root); + mutex_lock(&root->fs_info->alloc_mutex); + /* the code to unpin extents might set a few bits in the free * space cache for this range again */ @@ -3263,6 +3299,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *cache; struct extent_io_tree *block_group_cache; + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); extent_root = root->fs_info->extent_root; block_group_cache = &root->fs_info->block_group_cache; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 026039a2ac5..83f17a5cbd6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -307,8 +307,7 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) goto out; } - mutex_lock(&root->fs_info->alloc_mutex); - mutex_lock(&root->fs_info->chunk_mutex); + mutex_lock(&root->fs_info->volume_mutex); sizestr = vol_args->name; devstr = strchr(sizestr, ':'); if (devstr) { @@ -378,8 +377,7 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) } out_unlock: - mutex_lock(&root->fs_info->alloc_mutex); - mutex_lock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->volume_mutex); out: kfree(vol_args); return ret; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4e7cee27aab..5e6ee7a6f73 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -56,6 +56,18 @@ void btrfs_unlock_volumes(void) mutex_unlock(&uuid_mutex); } +static void lock_chunks(struct btrfs_root *root) +{ + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); +} + +static void unlock_chunks(struct btrfs_root *root) +{ + mutex_unlock(&root->fs_info->alloc_mutex); + mutex_unlock(&root->fs_info->chunk_mutex); +} + int btrfs_cleanup_fs_uuids(void) { struct btrfs_fs_devices *fs_devices; @@ -822,6 +834,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; + lock_chunks(root); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) @@ -856,6 +869,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, total_bytes - 1); out: btrfs_free_path(path); + unlock_chunks(root); btrfs_commit_transaction(trans, root); return ret; } @@ -870,9 +884,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) u64 devid; int ret = 0; - mutex_lock(&root->fs_info->alloc_mutex); - mutex_lock(&root->fs_info->chunk_mutex); mutex_lock(&uuid_mutex); + mutex_lock(&root->fs_info->volume_mutex); all_avail = root->fs_info->avail_data_alloc_bits | root->fs_info->avail_system_alloc_bits | @@ -988,9 +1001,8 @@ error_close: if (bdev) close_bdev_excl(bdev); out: + mutex_unlock(&root->fs_info->volume_mutex); mutex_unlock(&uuid_mutex); - mutex_unlock(&root->fs_info->chunk_mutex); - mutex_unlock(&root->fs_info->alloc_mutex); return ret; } @@ -1010,10 +1022,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) return -EIO; } - mutex_lock(&root->fs_info->alloc_mutex); - mutex_lock(&root->fs_info->chunk_mutex); + mutex_lock(&root->fs_info->volume_mutex); trans = btrfs_start_transaction(root, 1); + lock_chunks(root); devices = &root->fs_info->fs_devices->devices; list_for_each(cur, devices) { device = list_entry(cur, struct btrfs_device, dev_list); @@ -1065,9 +1077,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->num_devices++; root->fs_info->fs_devices->open_devices++; out: + unlock_chunks(root); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->chunk_mutex); - mutex_unlock(&root->fs_info->alloc_mutex); + mutex_unlock(&root->fs_info->volume_mutex); return ret; @@ -1122,7 +1134,7 @@ out: return ret; } -int btrfs_grow_device(struct btrfs_trans_handle *trans, +static int __btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 new_size) { struct btrfs_super_block *super_copy = @@ -1134,6 +1146,16 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, return btrfs_update_device(trans, device); } +int btrfs_grow_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device, u64 new_size) +{ + int ret; + lock_chunks(device->dev_root); + ret = __btrfs_grow_device(trans, device, new_size); + unlock_chunks(device->dev_root); + return ret; +} + static int btrfs_free_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 chunk_tree, u64 chunk_objectid, @@ -1234,6 +1256,8 @@ int btrfs_relocate_chunk(struct btrfs_root *root, trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); + lock_chunks(root); + /* * step two, delete the device extents and the * chunk tree entries @@ -1278,6 +1302,7 @@ int btrfs_relocate_chunk(struct btrfs_root *root, /* once for us */ free_extent_map(em); + unlock_chunks(root); btrfs_end_transaction(trans, root); return 0; } @@ -1308,8 +1333,7 @@ int btrfs_balance(struct btrfs_root *dev_root) struct btrfs_key found_key; - BUG(); /* FIXME, needs locking */ - + mutex_lock(&dev_root->fs_info->volume_mutex); dev_root = dev_root->fs_info->dev_root; /* step one make some room on all the devices */ @@ -1355,13 +1379,14 @@ int btrfs_balance(struct btrfs_root *dev_root) ret = btrfs_previous_item(chunk_root, path, 0, BTRFS_CHUNK_ITEM_KEY); - if (ret) { + if (ret) break; - } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); if (found_key.objectid != key.objectid) break; + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_chunk); @@ -1370,16 +1395,17 @@ int btrfs_balance(struct btrfs_root *dev_root) if (key.offset == 0) break; + btrfs_release_path(chunk_root, path); ret = btrfs_relocate_chunk(chunk_root, chunk_root->root_key.objectid, found_key.objectid, found_key.offset); BUG_ON(ret); - btrfs_release_path(chunk_root, path); } ret = 0; error: btrfs_free_path(path); + mutex_unlock(&dev_root->fs_info->volume_mutex); return ret; } @@ -1419,14 +1445,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) path->reada = 2; + lock_chunks(root); + device->total_bytes = new_size; ret = btrfs_update_device(trans, device); if (ret) { + unlock_chunks(root); btrfs_end_transaction(trans, root); goto done; } WARN_ON(diff > old_total); btrfs_set_super_total_bytes(super_copy, old_total - diff); + unlock_chunks(root); btrfs_end_transaction(trans, root); key.objectid = device->devid; -- cgit v1.2.3-70-g09d2 From e6dcd2dc9c489108648e2ed543315dd134d50a9a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:53:50 -0400 Subject: Btrfs: New data=ordered implementation The old data=ordered code would force commit to wait until all the data extents from the transaction were fully on disk. This introduced large latencies into the commit and stalled new writers in the transaction for a long time. The new code changes the way data allocations and extents work: * When delayed allocation is filled, data extents are reserved, and the extent bit EXTENT_ORDERED is set on the entire range of the extent. A struct btrfs_ordered_extent is allocated an inserted into a per-inode rbtree to track the pending extents. * As each page is written EXTENT_ORDERED is cleared on the bytes corresponding to that page. * When all of the bytes corresponding to a single struct btrfs_ordered_extent are written, The previously reserved extent is inserted into the FS btree and into the extent allocation trees. The checksums for the file data are also updated. Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 4 +- fs/btrfs/ctree.h | 19 +- fs/btrfs/disk-io.c | 13 +- fs/btrfs/extent-tree.c | 132 +++++++++----- fs/btrfs/extent_io.c | 52 +++++- fs/btrfs/extent_io.h | 14 +- fs/btrfs/extent_map.c | 5 +- fs/btrfs/file-item.c | 62 ++++--- fs/btrfs/file.c | 67 ++++--- fs/btrfs/inode.c | 447 ++++++++++++++++++++++++++++++----------------- fs/btrfs/ordered-data.c | 455 ++++++++++++++++++++++++++++++------------------ fs/btrfs/ordered-data.h | 71 ++++++-- fs/btrfs/transaction.c | 67 +------ fs/btrfs/transaction.h | 4 - 14 files changed, 910 insertions(+), 502 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 40b4e0c9cd0..8d03687510e 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -21,6 +21,7 @@ #include "extent_map.h" #include "extent_io.h" +#include "ordered-data.h" /* in memory btrfs inode */ struct btrfs_inode { @@ -32,9 +33,8 @@ struct btrfs_inode { struct extent_io_tree io_failure_tree; struct mutex csum_mutex; struct inode vfs_inode; - atomic_t ordered_writeback; + struct btrfs_ordered_inode_tree ordered_tree; - u64 ordered_trans; /* * transid of the trans_handle that last modified this inode */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f3783dbd9b6..ceebc052ddc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "bit-radix.h" #include "extent_io.h" @@ -37,6 +38,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; +struct btrfs_ordered_sum; #define BTRFS_MAGIC "_B5RfS_M" @@ -510,6 +512,7 @@ struct btrfs_fs_info { u64 max_inline; u64 alloc_start; struct btrfs_transaction *running_transaction; + wait_queue_head_t transaction_throttle; struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; struct block_device *__bdev; @@ -541,6 +544,7 @@ struct btrfs_fs_info { */ struct btrfs_workers workers; struct btrfs_workers endio_workers; + struct btrfs_workers endio_write_workers; struct btrfs_workers submit_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; @@ -1384,6 +1388,17 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, u64 data); +int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins); +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root @@ -1556,9 +1571,9 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, u64 bytenr, int mod); int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct bio *bio, char *sums); + struct btrfs_ordered_sum *sums); int btrfs_csum_one_bio(struct btrfs_root *root, - struct bio *bio, char **sums_ret); + struct bio *bio, struct btrfs_ordered_sum **sums_ret); struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b01b3f4f92a..4a5ebafb935 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -407,7 +407,11 @@ static int end_workqueue_bio(struct bio *bio, end_io_wq->error = err; end_io_wq->work.func = end_workqueue_fn; end_io_wq->work.flags = 0; - btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); + if (bio->bi_rw & (1 << BIO_RW)) + btrfs_queue_worker(&fs_info->endio_write_workers, + &end_io_wq->work); + else + btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; @@ -1286,6 +1290,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); + init_waitqueue_head(&fs_info->transaction_throttle); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1325,9 +1330,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->endio_write_workers, + fs_info->thread_pool_size); btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_start_workers(&fs_info->endio_write_workers, + fs_info->thread_pool_size); err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { @@ -1447,6 +1456,7 @@ fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); fail_iput: iput(fs_info->btree_inode); @@ -1702,6 +1712,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); iput(fs_info->btree_inode); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8ebfa6be079..343d1101c31 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1895,36 +1895,17 @@ error: return ret; } -/* - * finds a free extent and does all the dirty work required for allocation - * returns the key for the extent through ins, and a tree buffer for - * the first block of the extent through buf. - * - * returns 0 if everything worked, non-zero otherwise. - */ -int btrfs_alloc_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 num_bytes, u64 min_alloc_size, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, u64 data) +static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data) { int ret; - int pending_ret; - u64 super_used; - u64 root_used; u64 search_start = 0; u64 alloc_profile; - u32 sizes[2]; struct btrfs_fs_info *info = root->fs_info; - struct btrfs_root *extent_root = info->extent_root; - struct btrfs_extent_item *extent_item; - struct btrfs_extent_ref *ref; - struct btrfs_path *path; - struct btrfs_key keys[2]; - - maybe_lock_mutex(root); if (data) { alloc_profile = info->avail_data_alloc_bits & @@ -1974,11 +1955,48 @@ again: } if (ret) { printk("allocation failed flags %Lu\n", data); - } - if (ret) { BUG(); - goto out; } + clear_extent_dirty(&root->fs_info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1, + GFP_NOFS); + return 0; +} + +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data) +{ + int ret; + maybe_lock_mutex(root); + ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, + empty_size, hint_byte, search_end, ins, + data); + maybe_unlock_mutex(root); + return ret; +} + +static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins) +{ + int ret; + int pending_ret; + u64 super_used; + u64 root_used; + u64 num_bytes = ins->offset; + u32 sizes[2]; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_root *extent_root = info->extent_root; + struct btrfs_extent_item *extent_item; + struct btrfs_extent_ref *ref; + struct btrfs_path *path; + struct btrfs_key keys[2]; /* block accounting for super block */ spin_lock_irq(&info->delalloc_lock); @@ -1990,10 +2008,6 @@ again: root_used = btrfs_root_used(&root->root_item); btrfs_set_root_used(&root->root_item, root_used + num_bytes); - clear_extent_dirty(&root->fs_info->free_space_cache, - ins->objectid, ins->objectid + ins->offset - 1, - GFP_NOFS); - if (root == extent_root) { set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, @@ -2001,10 +2015,6 @@ again: goto update_block; } - WARN_ON(trans->alloc_exclude_nr); - trans->alloc_exclude_start = ins->objectid; - trans->alloc_exclude_nr = ins->offset; - memcpy(&keys[0], ins, sizeof(*ins)); keys[1].offset = hash_extent_ref(root_objectid, ref_generation, owner, owner_offset); @@ -2054,6 +2064,51 @@ update_block: BUG(); } out: + return ret; +} + +int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins) +{ + int ret; + maybe_lock_mutex(root); + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, + ref_generation, owner, + owner_offset, ins); + maybe_unlock_mutex(root); + return ret; +} +/* + * finds a free extent and does all the dirty work required for allocation + * returns the key for the extent through ins, and a tree buffer for + * the first block of the extent through buf. + * + * returns 0 if everything worked, non-zero otherwise. + */ +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, u64 data) +{ + int ret; + + maybe_lock_mutex(root); + + ret = __btrfs_reserve_extent(trans, root, num_bytes, + min_alloc_size, empty_size, hint_byte, + search_end, ins, data); + BUG_ON(ret); + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, + ref_generation, owner, + owner_offset, ins); + BUG_ON(ret); + maybe_unlock_mutex(root); return ret; } @@ -2288,8 +2343,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->alloc_mutex); /* we've dropped the lock, double check */ - ret = drop_snap_lookup_refcount(root, bytenr, - blocksize, &refs); + ret = lookup_extent_ref(NULL, root, bytenr, blocksize, + &refs); BUG_ON(ret); if (refs != 1) { parent = path->nodes[*level]; @@ -2584,7 +2639,6 @@ out_unlock: kfree(ra); trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); if (trans) { - btrfs_add_ordered_inode(inode); btrfs_end_transaction(trans, BTRFS_I(inode)->root); mark_inode_dirty(inode); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 40a5f53cb04..3f82a6e9ca4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -793,6 +793,13 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_dirty); +int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); +} +EXPORT_SYMBOL(set_extent_ordered); + int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask) { @@ -812,8 +819,8 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, - mask); + EXTENT_DELALLOC | EXTENT_DIRTY, + 0, NULL, mask); } EXPORT_SYMBOL(set_extent_delalloc); @@ -825,6 +832,13 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(clear_extent_dirty); +int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); +} +EXPORT_SYMBOL(clear_extent_ordered); + int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { @@ -1395,10 +1409,9 @@ static int end_bio_extent_writepage(struct bio *bio, if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - if (tree->ops && tree->ops->writepage_end_io_hook) { ret = tree->ops->writepage_end_io_hook(page, start, - end, state); + end, state, uptodate); if (ret) uptodate = 0; } @@ -1868,9 +1881,14 @@ static int __extent_read_full_page(struct extent_io_tree *tree, unlock_extent(tree, cur, end, GFP_NOFS); break; } - extent_offset = cur - em->start; + if (extent_map_end(em) <= cur) { +printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur); + } BUG_ON(extent_map_end(em) <= cur); + if (end < cur) { +printk("2bad mapping end %Lu cur %Lu\n", end, cur); + } BUG_ON(end < cur); iosize = min(extent_map_end(em) - cur, end - cur + 1); @@ -1976,6 +1994,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 last_byte = i_size_read(inode); u64 block_start; u64 iosize; + u64 unlock_start; sector_t sector; struct extent_map *em; struct block_device *bdev; @@ -1988,7 +2007,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 nr_delalloc; u64 delalloc_end; - WARN_ON(!PageLocked(page)); page_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || @@ -2030,6 +2048,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_start = delalloc_end + 1; } lock_extent(tree, start, page_end, GFP_NOFS); + unlock_start = start; end = page_end; if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { @@ -2038,6 +2057,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (last_byte <= start) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); + unlock_extent(tree, start, page_end, GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, start, + page_end, NULL, 1); + unlock_start = page_end + 1; goto done; } @@ -2047,6 +2071,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, while (cur <= end) { if (cur >= last_byte) { clear_extent_dirty(tree, cur, page_end, GFP_NOFS); + unlock_extent(tree, unlock_start, page_end, GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, cur, + page_end, NULL, 1); + unlock_start = page_end + 1; break; } em = epd->get_extent(inode, page, page_offset, cur, @@ -2071,8 +2100,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, block_start == EXTENT_MAP_INLINE) { clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); + + unlock_extent(tree, unlock_start, cur + iosize -1, + GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, cur, + cur + iosize - 1, + NULL, 1); cur = cur + iosize; page_offset += iosize; + unlock_start = cur; continue; } @@ -2119,7 +2156,8 @@ done: set_page_writeback(page); end_page_writeback(page); } - unlock_extent(tree, start, page_end, GFP_NOFS); + if (unlock_start <= page_end) + unlock_extent(tree, unlock_start, page_end, GFP_NOFS); unlock_page(page); return 0; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f1960dafaa1..2268a799589 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -13,6 +13,8 @@ #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) #define EXTENT_BUFFER_FILLED (1 << 8) +#define EXTENT_ORDERED (1 << 9) +#define EXTENT_ORDERED_METADATA (1 << 10) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) /* @@ -42,7 +44,7 @@ struct extent_io_ops { int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, - struct extent_state *state); + struct extent_state *state, int uptodate); int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits); int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, @@ -131,6 +133,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, int filled); int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask); +int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int wake, int delete, gfp_t mask); int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask); int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, @@ -141,8 +145,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start, + u64 end, gfp_t mask); int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, int bits); struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, @@ -209,6 +219,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, unsigned long start, unsigned long len); int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, struct extent_buffer *eb); +int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end); +int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); int clear_extent_buffer_dirty(struct extent_io_tree *tree, struct extent_buffer *eb); int set_extent_buffer_dirty(struct extent_io_tree *tree, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f5a04eb9a2a..81123277c2b 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -206,10 +206,11 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *merge = NULL; struct rb_node *rb; + BUG_ON(spin_trylock(&tree->lock)); rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { - merge = rb_entry(rb, struct extent_map, rb_node); ret = -EEXIST; + free_extent_map(merge); goto out; } atomic_inc(&em->refs); @@ -268,6 +269,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, struct rb_node *next = NULL; u64 end = range_end(start, len); + BUG_ON(spin_trylock(&tree->lock)); em = tree->last; if (em && end > em->start && start < extent_map_end(em)) goto found; @@ -318,6 +320,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; + BUG_ON(spin_trylock(&tree->lock)); rb_erase(&em->rb_node, &tree->map); em->in_tree = 0; if (tree->last == em) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index f537eb43c2c..345caf8ff51 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -135,26 +135,37 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, } int btrfs_csum_one_bio(struct btrfs_root *root, - struct bio *bio, char **sums_ret) + struct bio *bio, struct btrfs_ordered_sum **sums_ret) { - u32 *sums; + struct btrfs_ordered_sum *sums; + struct btrfs_sector_sum *sector_sum; char *data; struct bio_vec *bvec = bio->bi_io_vec; int bio_index = 0; - sums = kmalloc(bio->bi_vcnt * BTRFS_CRC32_SIZE, GFP_NOFS); + WARN_ON(bio->bi_vcnt <= 0); + sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); if (!sums) return -ENOMEM; - *sums_ret = (char *)sums; + *sums_ret = sums; + sector_sum = &sums->sums; + sums->file_offset = page_offset(bvec->bv_page); + sums->len = bio->bi_size; + INIT_LIST_HEAD(&sums->list); while(bio_index < bio->bi_vcnt) { data = kmap_atomic(bvec->bv_page, KM_USER0); - *sums = ~(u32)0; - *sums = btrfs_csum_data(root, data + bvec->bv_offset, - *sums, bvec->bv_len); + sector_sum->sum = ~(u32)0; + sector_sum->sum = btrfs_csum_data(root, + data + bvec->bv_offset, + sector_sum->sum, + bvec->bv_len); kunmap_atomic(data, KM_USER0); - btrfs_csum_final(*sums, (char *)sums); - sums++; + btrfs_csum_final(sector_sum->sum, + (char *)§or_sum->sum); + sector_sum->offset = page_offset(bvec->bv_page) + + bvec->bv_offset; + sector_sum++; bio_index++; bvec++; } @@ -163,7 +174,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct bio *bio, char *sums) + struct btrfs_ordered_sum *sums) { u64 objectid = inode->i_ino; u64 offset; @@ -171,17 +182,16 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_key file_key; struct btrfs_key found_key; u64 next_offset; + u64 total_bytes = 0; int found_next; struct btrfs_path *path; struct btrfs_csum_item *item; struct btrfs_csum_item *item_end; struct extent_buffer *leaf = NULL; u64 csum_offset; - u32 *sums32 = (u32 *)sums; + struct btrfs_sector_sum *sector_sum; u32 nritems; u32 ins_size; - int bio_index = 0; - struct bio_vec *bvec = bio->bi_io_vec; char *eb_map; char *eb_token; unsigned long map_len; @@ -189,10 +199,11 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); + sector_sum = &sums->sums; again: next_offset = (u64)-1; found_next = 0; - offset = page_offset(bvec->bv_page) + bvec->bv_offset; + offset = sector_sum->offset; file_key.objectid = objectid; file_key.offset = offset; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); @@ -303,7 +314,7 @@ found: item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + btrfs_item_size_nr(leaf, path->slots[0])); eb_token = NULL; -next_bvec: +next_sector: if (!eb_token || (unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) { @@ -321,21 +332,20 @@ next_bvec: } if (eb_token) { memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), - sums32, BTRFS_CRC32_SIZE); + §or_sum->sum, BTRFS_CRC32_SIZE); } else { - write_extent_buffer(leaf, sums32, (unsigned long)item, - BTRFS_CRC32_SIZE); + write_extent_buffer(leaf, §or_sum->sum, + (unsigned long)item, BTRFS_CRC32_SIZE); } - bio_index++; - bvec++; - sums32++; - if (bio_index < bio->bi_vcnt) { + total_bytes += root->sectorsize; + sector_sum++; + if (total_bytes < sums->len) { item = (struct btrfs_csum_item *)((char *)item + BTRFS_CRC32_SIZE); if (item < item_end && offset + PAGE_CACHE_SIZE == - page_offset(bvec->bv_page)) { - offset = page_offset(bvec->bv_page); - goto next_bvec; + sector_sum->offset) { + offset = sector_sum->offset; + goto next_sector; } } if (eb_token) { @@ -343,7 +353,7 @@ next_bvec: eb_token = NULL; } btrfs_mark_buffer_dirty(path->nodes[0]); - if (bio_index < bio->bi_vcnt) { + if (total_bytes < sums->len) { btrfs_release_path(root, path); goto again; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8037792f878..12e765f7e0d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -34,7 +34,6 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ordered-data.h" #include "ioctl.h" #include "print-tree.h" #include "compat.h" @@ -273,7 +272,9 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 mask = root->sectorsize - 1; last_pos_in_file = (isize + mask) & ~mask; hole_size = (start_pos - last_pos_in_file + mask) & ~mask; - if (last_pos_in_file < start_pos) { + if (hole_size > 0) { + btrfs_wait_ordered_range(inode, last_pos_in_file, + last_pos_in_file + hole_size); err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, @@ -303,19 +304,17 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size > root->fs_info->max_inline || (inline_size & (root->sectorsize -1)) == 0 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { - u64 last_end; - + /* check for reserved extents on each page, we don't want + * to reset the delalloc bit on things that already have + * extents reserved. + */ + set_extent_delalloc(io_tree, start_pos, + end_of_last_block, GFP_NOFS); for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); set_page_dirty(p); } - last_end = (u64)(pages[num_pages -1]->index) << - PAGE_CACHE_SHIFT; - last_end += PAGE_CACHE_SIZE - 1; - set_extent_delalloc(io_tree, start_pos, end_of_last_block, - GFP_NOFS); - btrfs_add_ordered_inode(inode); } else { u64 aligned_end; /* step one, delete the existing extents in this range */ @@ -350,10 +349,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) struct extent_map *split = NULL; struct extent_map *split2 = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *tmp; u64 len = end - start + 1; + u64 next_start; int ret; int testend = 1; + WARN_ON(end < start); if (end == (u64)-1) { len = (u64)-1; testend = 0; @@ -370,6 +372,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) spin_unlock(&em_tree->lock); break; } + tmp = rb_entry(&em->rb_node, struct extent_map, rb_node); + next_start = tmp->start; remove_extent_mapping(em_tree, em); if (em->block_start < EXTENT_MAP_LAST_BYTE && @@ -778,37 +782,58 @@ static int prepare_pages(struct btrfs_root *root, struct file *file, struct inode *inode = fdentry(file)->d_inode; int err = 0; u64 start_pos; + u64 last_pos; start_pos = pos & ~((u64)root->sectorsize - 1); + last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; memset(pages, 0, num_pages * sizeof(struct page *)); - +again: for (i = 0; i < num_pages; i++) { pages[i] = grab_cache_page(inode->i_mapping, index + i); if (!pages[i]) { err = -ENOMEM; BUG_ON(1); } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - ClearPageDirty(pages[i]); -#else - cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); -#endif wait_on_page_writeback(pages[i]); - set_page_extent_mapped(pages[i]); - WARN_ON(!PageLocked(pages[i])); } if (start_pos < inode->i_size) { - u64 last_pos; - last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; + struct btrfs_ordered_extent *ordered; lock_extent(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, GFP_NOFS); + ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1); + if (ordered && + ordered->file_offset + ordered->len > start_pos && + ordered->file_offset < last_pos) { + btrfs_put_ordered_extent(ordered); + unlock_extent(&BTRFS_I(inode)->io_tree, + start_pos, last_pos - 1, GFP_NOFS); + for (i = 0; i < num_pages; i++) { + unlock_page(pages[i]); + page_cache_release(pages[i]); + } + btrfs_wait_ordered_range(inode, start_pos, + last_pos - start_pos); + goto again; + } + if (ordered) + btrfs_put_ordered_extent(ordered); + clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, GFP_NOFS); unlock_extent(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, GFP_NOFS); } + for (i = 0; i < num_pages; i++) { +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(pages[i]); +#else + cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); +#endif + set_page_extent_mapped(pages[i]); + WARN_ON(!PageLocked(pages[i])); + } return 0; } @@ -969,13 +994,11 @@ out_nolock: (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } current->backing_dev_info = NULL; - btrfs_ordered_throttle(root, inode); return num_written ? num_written : err; } int btrfs_release_file(struct inode * inode, struct file * filp) { - btrfs_del_ordered_inode(inode, 0); if (filp->private_data) btrfs_ioctl_trans_end(filp); return 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d39433dfb2c..c5a62f0b959 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -43,6 +43,7 @@ #include "ioctl.h" #include "print-tree.h" #include "volumes.h" +#include "ordered-data.h" struct btrfs_iget_args { u64 ino; @@ -109,10 +110,11 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) u64 num_bytes; u64 cur_alloc_size; u64 blocksize = root->sectorsize; - u64 orig_start = start; u64 orig_num_bytes; struct btrfs_key ins; - int ret; + struct extent_map *em; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + int ret = 0; trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); @@ -120,33 +122,44 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) num_bytes = (end - start + blocksize) & ~(blocksize - 1); num_bytes = max(blocksize, num_bytes); - ret = btrfs_drop_extents(trans, root, inode, - start, start + num_bytes, start, &alloc_hint); orig_num_bytes = num_bytes; if (alloc_hint == EXTENT_MAP_INLINE) goto out; BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); + btrfs_drop_extent_cache(inode, start, start + num_bytes - 1); while(num_bytes > 0) { cur_alloc_size = min(num_bytes, root->fs_info->max_extent); - ret = btrfs_alloc_extent(trans, root, cur_alloc_size, - root->sectorsize, - root->root_key.objectid, - trans->transid, - inode->i_ino, start, 0, - alloc_hint, (u64)-1, &ins, 1); + ret = btrfs_reserve_extent(trans, root, cur_alloc_size, + root->sectorsize, 0, 0, + (u64)-1, &ins, 1); if (ret) { WARN_ON(1); goto out; } + em = alloc_extent_map(GFP_NOFS); + em->start = start; + em->len = ins.offset; + em->block_start = ins.objectid; + em->bdev = root->fs_info->fs_devices->latest_bdev; + while(1) { + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret != -EEXIST) { + free_extent_map(em); + break; + } + btrfs_drop_extent_cache(inode, start, + start + ins.offset - 1); + } + cur_alloc_size = ins.offset; - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - start, ins.objectid, ins.offset, - ins.offset, 0); - inode->i_blocks += ins.offset >> 9; - btrfs_check_file(root, inode); + ret = btrfs_add_ordered_extent(inode, start, ins.objectid, + ins.offset); + BUG_ON(ret); if (num_bytes < cur_alloc_size) { printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, cur_alloc_size); @@ -156,10 +169,6 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } - btrfs_drop_extent_cache(inode, orig_start, - orig_start + orig_num_bytes - 1); - btrfs_add_ordered_inode(inode); - btrfs_update_inode(trans, root, inode); out: btrfs_end_transaction(trans, root); return ret; @@ -341,25 +350,15 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; int ret = 0; - char *sums = NULL; + struct btrfs_ordered_sum *sums; ret = btrfs_csum_one_bio(root, bio, &sums); BUG_ON(ret); - trans = btrfs_start_transaction(root, 1); - - btrfs_set_trans_block_group(trans, inode); - mutex_lock(&BTRFS_I(inode)->csum_mutex); - btrfs_csum_file_blocks(trans, root, inode, bio, sums); - mutex_unlock(&BTRFS_I(inode)->csum_mutex); - - ret = btrfs_end_transaction(trans, root); + ret = btrfs_add_ordered_sum(inode, sums); BUG_ON(ret); - kfree(sums); - return btrfs_map_bio(root, rw, bio, mirror_num, 1); } @@ -369,14 +368,10 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; - if (!(rw & (1 << BIO_RW))) { - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); - BUG_ON(ret); - goto mapit; - } + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + BUG_ON(ret); - if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) { + if (!(rw & (1 << BIO_RW))) { goto mapit; } @@ -387,6 +382,96 @@ mapit: return btrfs_map_bio(root, rw, bio, mirror_num, 0); } +static int add_pending_csums(struct btrfs_trans_handle *trans, + struct inode *inode, u64 file_offset, + struct list_head *list) +{ + struct list_head *cur; + struct btrfs_ordered_sum *sum; + + btrfs_set_trans_block_group(trans, inode); + while(!list_empty(list)) { + cur = list->next; + sum = list_entry(cur, struct btrfs_ordered_sum, list); + mutex_lock(&BTRFS_I(inode)->csum_mutex); + btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root, + inode, sum); + mutex_unlock(&BTRFS_I(inode)->csum_mutex); + list_del(&sum->list); + kfree(sum); + } + return 0; +} + +int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, + struct extent_state *state, int uptodate) +{ + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + struct btrfs_ordered_extent *ordered_extent; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + u64 alloc_hint = 0; + struct list_head list; + struct btrfs_key ins; + int ret; + + ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); + if (!ret) { + return 0; + } + + trans = btrfs_start_transaction(root, 1); + + ordered_extent = btrfs_lookup_ordered_extent(inode, start); + BUG_ON(!ordered_extent); + + lock_extent(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + ordered_extent->len - 1, + GFP_NOFS); + + INIT_LIST_HEAD(&list); + + ins.objectid = ordered_extent->start; + ins.offset = ordered_extent->len; + ins.type = BTRFS_EXTENT_ITEM_KEY; + ret = btrfs_alloc_reserved_extent(trans, root, root->root_key.objectid, + trans->transid, inode->i_ino, + ordered_extent->file_offset, &ins); + BUG_ON(ret); + ret = btrfs_drop_extents(trans, root, inode, + ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len, + ordered_extent->file_offset, &alloc_hint); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + ordered_extent->file_offset, + ordered_extent->start, + ordered_extent->len, + ordered_extent->len, 0); + BUG_ON(ret); + btrfs_drop_extent_cache(inode, ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len - 1); + inode->i_blocks += ordered_extent->len >> 9; + unlock_extent(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + ordered_extent->len - 1, + GFP_NOFS); + add_pending_csums(trans, inode, ordered_extent->file_offset, + &ordered_extent->list); + + btrfs_remove_ordered_extent(inode, ordered_extent); + /* once for us */ + btrfs_put_ordered_extent(ordered_extent); + /* once for the tree */ + btrfs_put_ordered_extent(ordered_extent); + + btrfs_update_inode(trans, root, inode); + btrfs_end_transaction(trans, root); + return 0; +} + int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) { int ret = 0; @@ -409,7 +494,8 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) if (ret == -ENOENT || ret == -EFBIG) ret = 0; csum = 0; - printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start); + printk("no csum found for inode %lu start %Lu\n", inode->i_ino, + start); goto out; } read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, @@ -833,7 +919,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) { struct btrfs_root *root; struct btrfs_trans_handle *trans; - struct inode *inode = dentry->d_inode; int ret; unsigned long nr = 0; @@ -849,14 +934,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_trans(trans, root, dir, dentry); nr = trans->blocks_used; - if (inode->i_nlink == 0) { - /* if the inode isn't linked anywhere, - * we don't need to worry about - * data=ordered - */ - btrfs_del_ordered_inode(inode, 1); - } - btrfs_end_transaction_throttle(trans, root); fail: btrfs_btree_balance_dirty(root, nr); @@ -931,6 +1008,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int extent_type = -1; u64 mask = root->sectorsize - 1; + btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); path = btrfs_alloc_path(); path->reada = -1; @@ -1117,34 +1195,6 @@ error: return ret; } -static int btrfs_cow_one_page(struct inode *inode, struct page *page, - size_t zero_start) -{ - char *kaddr; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - int ret = 0; - - WARN_ON(!PageLocked(page)); - set_page_extent_mapped(page); - - lock_extent(io_tree, page_start, page_end, GFP_NOFS); - set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, - page_end, GFP_NOFS); - - if (zero_start != PAGE_CACHE_SIZE) { - kaddr = kmap(page); - memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); - flush_dcache_page(page); - kunmap(page); - } - set_page_dirty(page); - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); - - return ret; -} - /* * taken from block_truncate_page, but does cow as it zeros out * any bytes left in the last page in the file. @@ -1153,12 +1203,16 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) { struct inode *inode = mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_ordered_extent *ordered; + char *kaddr; u32 blocksize = root->sectorsize; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); struct page *page; int ret = 0; u64 page_start; + u64 page_end; if ((offset & (blocksize - 1)) == 0) goto out; @@ -1168,6 +1222,10 @@ again: page = grab_cache_page(mapping, index); if (!page) goto out; + + page_start = page_offset(page); + page_end = page_start + PAGE_CACHE_SIZE - 1; + if (!PageUptodate(page)) { ret = btrfs_readpage(NULL, page); lock_page(page); @@ -1181,10 +1239,32 @@ again: goto out; } } - - page_start = (u64)page->index << PAGE_CACHE_SHIFT; wait_on_page_writeback(page); - ret = btrfs_cow_one_page(inode, page, offset); + + lock_extent(io_tree, page_start, page_end, GFP_NOFS); + set_page_extent_mapped(page); + + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_page(page); + page_cache_release(page); + btrfs_wait_ordered_extent(inode, ordered); + btrfs_put_ordered_extent(ordered); + goto again; + } + + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, + page_end, GFP_NOFS); + ret = 0; + if (offset != PAGE_CACHE_SIZE) { + kaddr = kmap(page); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap(page); + } + set_page_dirty(page); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); page_cache_release(page); @@ -1222,8 +1302,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); - lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); hole_size = block_end - hole_start; + btrfs_wait_ordered_range(inode, hole_start, hole_size); + lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -1258,6 +1339,7 @@ void btrfs_delete_inode(struct inode *inode) unsigned long nr; int ret; + btrfs_wait_ordered_range(inode, 0, (u64)-1); truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) { goto no_delete; @@ -1403,7 +1485,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); return 0; } @@ -1705,7 +1786,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->root = root; @@ -1930,7 +2010,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -2066,64 +2145,18 @@ out_unlock: static int merge_extent_mapping(struct extent_map_tree *em_tree, struct extent_map *existing, - struct extent_map *em) + struct extent_map *em, + u64 map_start, u64 map_len) { u64 start_diff; - u64 new_end; - int ret = 0; - int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE; - - if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE) - goto invalid; - - if (!real_blocks && em->block_start != existing->block_start) - goto invalid; - - new_end = max(existing->start + existing->len, em->start + em->len); - - if (existing->start >= em->start) { - if (em->start + em->len < existing->start) - goto invalid; - start_diff = existing->start - em->start; - if (real_blocks && em->block_start + start_diff != - existing->block_start) - goto invalid; - - em->len = new_end - em->start; - - remove_extent_mapping(em_tree, existing); - /* free for the tree */ - free_extent_map(existing); - ret = add_extent_mapping(em_tree, em); - - } else if (em->start > existing->start) { - - if (existing->start + existing->len < em->start) - goto invalid; - - start_diff = em->start - existing->start; - if (real_blocks && existing->block_start + start_diff != - em->block_start) - goto invalid; - - remove_extent_mapping(em_tree, existing); - em->block_start = existing->block_start; - em->start = existing->start; - em->len = new_end - existing->start; - free_extent_map(existing); - - ret = add_extent_mapping(em_tree, em); - } else { - goto invalid; - } - return ret; - -invalid: - printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n", - existing->start, existing->len, existing->block_start, - em->start, em->len, em->block_start); - return -EIO; + BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); + start_diff = map_start - em->start; + em->start = map_start; + em->len = map_len; + if (em->block_start < EXTENT_MAP_LAST_BYTE) + em->block_start += start_diff; + return add_extent_mapping(em_tree, em); } struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, @@ -2170,10 +2203,9 @@ again: err = -ENOMEM; goto out; } - + em->bdev = root->fs_info->fs_devices->latest_bdev; em->start = EXTENT_MAP_HOLE; em->len = (u64)-1; - em->bdev = root->fs_info->fs_devices->latest_bdev; ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); if (ret < 0) { @@ -2314,6 +2346,9 @@ insert: */ if (ret == -EEXIST) { struct extent_map *existing; + + ret = 0; + existing = lookup_extent_mapping(em_tree, start, len); if (existing && (existing->start > start || existing->start + existing->len <= start)) { @@ -2325,7 +2360,8 @@ insert: em->len); if (existing) { err = merge_extent_mapping(em_tree, existing, - em); + em, start, + root->sectorsize); free_extent_map(existing); if (err) { free_extent_map(em); @@ -2341,6 +2377,7 @@ insert: } else { free_extent_map(em); em = existing; + err = 0; } } spin_unlock(&em_tree->lock); @@ -2348,8 +2385,9 @@ out: btrfs_free_path(path); if (trans) { ret = btrfs_end_transaction(trans, root); - if (!err) + if (!err) { err = ret; + } } if (err) { free_extent_map(em); @@ -2474,8 +2512,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping, return extent_readpages(tree, mapping, pages, nr_pages, btrfs_get_extent); } - -static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) +static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) { struct extent_io_tree *tree; struct extent_map_tree *map; @@ -2493,15 +2530,54 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) return ret; } +static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) +{ + struct btrfs_ordered_extent *ordered; + + ordered = btrfs_lookup_ordered_extent(page->mapping->host, + page_offset(page)); + if (ordered) { + btrfs_put_ordered_extent(ordered); + return 0; + } + return __btrfs_releasepage(page, gfp_flags); +} + static void btrfs_invalidatepage(struct page *page, unsigned long offset) { struct extent_io_tree *tree; + struct btrfs_ordered_extent *ordered; + u64 page_start = page_offset(page); + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + wait_on_page_writeback(page); tree = &BTRFS_I(page->mapping->host)->io_tree; - extent_invalidatepage(tree, page, offset); - btrfs_releasepage(page, GFP_NOFS); + if (offset) { + btrfs_releasepage(page, GFP_NOFS); + return; + } + + lock_extent(tree, page_start, page_end, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(page->mapping->host, + page_offset(page)); + if (ordered) { + clear_extent_bit(tree, page_start, page_end, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_LOCKED, 1, 0, GFP_NOFS); + btrfs_writepage_end_io_hook(page, page_start, + page_end, NULL, 1); + btrfs_put_ordered_extent(ordered); + lock_extent(tree, page_start, page_end, GFP_NOFS); + } + clear_extent_bit(tree, page_start, page_end, + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_ORDERED, + 1, 1, GFP_NOFS); + __btrfs_releasepage(page, GFP_NOFS); + if (PagePrivate(page)) { - invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); + invalidate_extent_lru(tree, page_offset(page), + PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -2527,35 +2603,63 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct inode *inode = fdentry(vma->vm_file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - unsigned long end; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_ordered_extent *ordered; + char *kaddr; + unsigned long zero_start; loff_t size; int ret; u64 page_start; + u64 page_end; ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); if (ret) goto out; ret = -EINVAL; - +again: lock_page(page); - wait_on_page_writeback(page); size = i_size_read(inode); - page_start = (u64)page->index << PAGE_CACHE_SHIFT; + page_start = page_offset(page); + page_end = page_start + PAGE_CACHE_SIZE - 1; if ((page->mapping != inode->i_mapping) || - (page_start > size)) { + (page_start >= size)) { /* page got truncated out from underneath us */ goto out_unlock; } + wait_on_page_writeback(page); + + lock_extent(io_tree, page_start, page_end, GFP_NOFS); + set_page_extent_mapped(page); + + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_page(page); + btrfs_wait_ordered_extent(inode, ordered); + btrfs_put_ordered_extent(ordered); + goto again; + } + + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, + page_end, GFP_NOFS); + ret = 0; /* page is wholly or partially inside EOF */ if (page_start + PAGE_CACHE_SIZE > size) - end = size & ~PAGE_CACHE_MASK; + zero_start = size & ~PAGE_CACHE_MASK; else - end = PAGE_CACHE_SIZE; + zero_start = PAGE_CACHE_SIZE; - ret = btrfs_cow_one_page(inode, page, end); + if (zero_start != PAGE_CACHE_SIZE) { + kaddr = kmap(page); + memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); + flush_dcache_page(page); + kunmap(page); + } + set_page_dirty(page); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); out_unlock: unlock_page(page); @@ -2662,15 +2766,28 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->last_trans = 0; - ei->ordered_trans = 0; + btrfs_ordered_inode_tree_init(&ei->ordered_tree); return &ei->vfs_inode; } void btrfs_destroy_inode(struct inode *inode) { + struct btrfs_ordered_extent *ordered; WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); + while(1) { + ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); + if (!ordered) + break; + else { + printk("found ordered extent %Lu %Lu\n", + ordered->file_offset, ordered->len); + btrfs_remove_ordered_extent(inode, ordered); + btrfs_put_ordered_extent(ordered); + btrfs_put_ordered_extent(ordered); + } + } btrfs_drop_extent_cache(inode, 0, (u64)-1); kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } @@ -2869,7 +2986,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -2921,6 +3037,20 @@ out_fail: return err; } +static int btrfs_set_page_dirty(struct page *page) +{ + struct inode *inode = page->mapping->host; + u64 page_start = page_offset(page); + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + + if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, + EXTENT_DELALLOC, 0)) { +printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page)); +WARN_ON(1); + } + return __set_page_dirty_nobuffers(page); +} + static int btrfs_permission(struct inode *inode, int mask, struct nameidata *nd) { @@ -2967,6 +3097,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { .merge_bio_hook = btrfs_merge_bio_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, + .writepage_end_io_hook = btrfs_writepage_end_io_hook, .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, @@ -2982,7 +3113,7 @@ static struct address_space_operations btrfs_aops = { .direct_IO = btrfs_direct_IO, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, - .set_page_dirty = __set_page_dirty_nobuffers, + .set_page_dirty = btrfs_set_page_dirty, }; static struct address_space_operations btrfs_symlink_aops = { diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 254da822566..6513270f054 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -22,48 +22,30 @@ #include "ctree.h" #include "transaction.h" #include "btrfs_inode.h" +#include "extent_io.h" -struct tree_entry { - u64 root_objectid; - u64 objectid; - struct inode *inode; - struct rb_node rb_node; -}; -/* - * returns > 0 if entry passed (root, objectid) is > entry, - * < 0 if (root, objectid) < entry and zero if they are equal - */ -static int comp_entry(struct tree_entry *entry, u64 root_objectid, - u64 objectid) +static u64 entry_end(struct btrfs_ordered_extent *entry) { - if (root_objectid < entry->root_objectid) - return -1; - if (root_objectid > entry->root_objectid) - return 1; - if (objectid < entry->objectid) - return -1; - if (objectid > entry->objectid) - return 1; - return 0; + if (entry->file_offset + entry->len < entry->file_offset) + return (u64)-1; + return entry->file_offset + entry->len; } -static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, - u64 objectid, struct rb_node *node) +static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, + struct rb_node *node) { struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; - struct tree_entry *entry; - int comp; + struct btrfs_ordered_extent *entry; while(*p) { parent = *p; - entry = rb_entry(parent, struct tree_entry, rb_node); + entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node); - comp = comp_entry(entry, root_objectid, objectid); - if (comp < 0) + if (file_offset < entry->file_offset) p = &(*p)->rb_left; - else if (comp > 0) + else if (file_offset >= entry_end(entry)) p = &(*p)->rb_right; else return parent; @@ -74,24 +56,23 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, return NULL; } -static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, - u64 objectid, struct rb_node **prev_ret) +static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, + struct rb_node **prev_ret) { struct rb_node * n = root->rb_node; struct rb_node *prev = NULL; - struct tree_entry *entry; - struct tree_entry *prev_entry = NULL; - int comp; + struct rb_node *test; + struct btrfs_ordered_extent *entry; + struct btrfs_ordered_extent *prev_entry = NULL; while(n) { - entry = rb_entry(n, struct tree_entry, rb_node); + entry = rb_entry(n, struct btrfs_ordered_extent, rb_node); prev = n; prev_entry = entry; - comp = comp_entry(entry, root_objectid, objectid); - if (comp < 0) + if (file_offset < entry->file_offset) n = n->rb_left; - else if (comp > 0) + else if (file_offset >= entry_end(entry)) n = n->rb_right; else return n; @@ -99,195 +80,329 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, if (!prev_ret) return NULL; - while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) { - prev = rb_next(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + while(prev && file_offset >= entry_end(prev_entry)) { + test = rb_next(prev); + if (!test) + break; + prev_entry = rb_entry(test, struct btrfs_ordered_extent, + rb_node); + if (file_offset < entry_end(prev_entry)) + break; + + prev = test; + } + if (prev) + prev_entry = rb_entry(prev, struct btrfs_ordered_extent, + rb_node); + while(prev && file_offset < entry_end(prev_entry)) { + test = rb_prev(prev); + if (!test) + break; + prev_entry = rb_entry(test, struct btrfs_ordered_extent, + rb_node); + prev = test; } *prev_ret = prev; return NULL; } -static inline struct rb_node *tree_search(struct rb_root *root, - u64 root_objectid, u64 objectid) +static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) +{ + if (file_offset < entry->file_offset || + entry->file_offset + entry->len <= file_offset) + return 0; + return 1; +} + +static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, + u64 file_offset) { + struct rb_root *root = &tree->tree; struct rb_node *prev; struct rb_node *ret; - ret = __tree_search(root, root_objectid, objectid, &prev); + struct btrfs_ordered_extent *entry; + + if (tree->last) { + entry = rb_entry(tree->last, struct btrfs_ordered_extent, + rb_node); + if (offset_in_entry(entry, file_offset)) + return tree->last; + } + ret = __tree_search(root, file_offset, &prev); if (!ret) - return prev; + ret = prev; + if (ret) + tree->last = ret; return ret; } -int btrfs_add_ordered_inode(struct inode *inode) +int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, + u64 start, u64 len) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 root_objectid = root->root_key.objectid; - u64 transid = root->fs_info->running_transaction->transid; - struct tree_entry *entry; - struct rb_node *node; struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry; - if (transid <= BTRFS_I(inode)->ordered_trans) - return 0; - - tree = &root->fs_info->running_transaction->ordered_inode_tree; - - read_lock(&tree->lock); - node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL); - read_unlock(&tree->lock); - if (node) { - return 0; - } - - entry = kmalloc(sizeof(*entry), GFP_NOFS); + tree = &BTRFS_I(inode)->ordered_tree; + entry = kzalloc(sizeof(*entry), GFP_NOFS); if (!entry) return -ENOMEM; - write_lock(&tree->lock); - entry->objectid = inode->i_ino; - entry->root_objectid = root_objectid; + mutex_lock(&tree->mutex); + entry->file_offset = file_offset; + entry->start = start; + entry->len = len; entry->inode = inode; + /* one ref for the tree */ + atomic_set(&entry->refs, 1); + init_waitqueue_head(&entry->wait); + INIT_LIST_HEAD(&entry->list); - node = tree_insert(&tree->tree, root_objectid, - inode->i_ino, &entry->rb_node); - - BTRFS_I(inode)->ordered_trans = transid; - if (!node) - igrab(inode); - - write_unlock(&tree->lock); + node = tree_insert(&tree->tree, file_offset, + &entry->rb_node); + if (node) { + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + atomic_inc(&entry->refs); + } + set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, + entry_end(entry) - 1, GFP_NOFS); - if (node) - kfree(entry); + set_bit(BTRFS_ORDERED_START, &entry->flags); + mutex_unlock(&tree->mutex); + BUG_ON(node); return 0; } -int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode) +int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum) { - struct tree_entry *entry; + struct btrfs_ordered_inode_tree *tree; struct rb_node *node; + struct btrfs_ordered_extent *entry; - write_lock(&tree->lock); - node = tree_search(&tree->tree, *root_objectid, *objectid); + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = tree_search(tree, sum->file_offset); if (!node) { - write_unlock(&tree->lock); - return 0; +search_fail: +printk("add ordered sum failed to find a node for inode %lu offset %Lu\n", inode->i_ino, sum->file_offset); + node = rb_first(&tree->tree); + while(node) { + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + printk("entry %Lu %Lu %Lu\n", entry->file_offset, entry->file_offset + entry->len, entry->start); + node = rb_next(node); + } + BUG(); } - entry = rb_entry(node, struct tree_entry, rb_node); + BUG_ON(!node); - while(comp_entry(entry, *root_objectid, *objectid) >= 0) { - node = rb_next(node); - if (!node) - break; - entry = rb_entry(node, struct tree_entry, rb_node); - } - if (!node) { - write_unlock(&tree->lock); - return 0; + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, sum->file_offset)) { + goto search_fail; } - *root_objectid = entry->root_objectid; - *inode = entry->inode; - atomic_inc(&entry->inode->i_count); - *objectid = entry->objectid; - write_unlock(&tree->lock); - return 1; + list_add_tail(&sum->list, &entry->list); + mutex_unlock(&tree->mutex); + return 0; } -int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode) +int btrfs_dec_test_ordered_pending(struct inode *inode, + u64 file_offset, u64 io_size) { - struct tree_entry *entry; + struct btrfs_ordered_inode_tree *tree; struct rb_node *node; - - write_lock(&tree->lock); - node = tree_search(&tree->tree, *root_objectid, *objectid); + struct btrfs_ordered_extent *entry; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + int ret; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, + GFP_NOFS); + node = tree_search(tree, file_offset); if (!node) { - write_unlock(&tree->lock); - return 0; + ret = 1; + goto out; } - entry = rb_entry(node, struct tree_entry, rb_node); - while(comp_entry(entry, *root_objectid, *objectid) >= 0) { - node = rb_next(node); - if (!node) - break; - entry = rb_entry(node, struct tree_entry, rb_node); + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, file_offset)) { + ret = 1; + goto out; } - if (!node) { - write_unlock(&tree->lock); - return 0; + + ret = test_range_bit(io_tree, entry->file_offset, + entry->file_offset + entry->len - 1, + EXTENT_ORDERED, 0); + if (!test_bit(BTRFS_ORDERED_START, &entry->flags)) { +printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file_offset, entry_end(entry)); } + if (ret == 0) + ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); +out: + mutex_unlock(&tree->mutex); + return ret == 0; +} - *root_objectid = entry->root_objectid; - *objectid = entry->objectid; - *inode = entry->inode; - atomic_inc(&entry->inode->i_count); - rb_erase(node, &tree->tree); - write_unlock(&tree->lock); - kfree(entry); - return 1; +int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) +{ + if (atomic_dec_and_test(&entry->refs)) + kfree(entry); + return 0; } -static void __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, - struct inode *inode, - u64 root_objectid, u64 objectid) +int btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) { - struct tree_entry *entry; + struct btrfs_ordered_inode_tree *tree; struct rb_node *node; - struct rb_node *prev; - write_lock(&tree->lock); - node = __tree_search(&tree->tree, root_objectid, objectid, &prev); - if (!node) { - write_unlock(&tree->lock); - return; - } + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = &entry->rb_node; rb_erase(node, &tree->tree); - BTRFS_I(inode)->ordered_trans = 0; - write_unlock(&tree->lock); - atomic_dec(&inode->i_count); - entry = rb_entry(node, struct tree_entry, rb_node); - kfree(entry); - return; + tree->last = NULL; + set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); + mutex_unlock(&tree->mutex); + wake_up(&entry->wait); + return 0; } -void btrfs_del_ordered_inode(struct inode *inode, int force) +void btrfs_wait_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 root_objectid = root->root_key.objectid; + u64 start = entry->file_offset; + u64 end = start + entry->len - 1; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); +#else + do_sync_mapping_range(inode->i_mapping, start, end, + SYNC_FILE_RANGE_WRITE); +#endif + wait_event(entry->wait, + test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); +} - if (!BTRFS_I(inode)->ordered_trans) { - return; - } +static void btrfs_start_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry, int wait) +{ + u64 start = entry->file_offset; + u64 end = start + entry->len - 1; - if (!force && (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || - mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) - return; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); +#else + do_sync_mapping_range(inode->i_mapping, start, end, + SYNC_FILE_RANGE_WRITE); +#endif + if (wait) + wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, + &entry->flags)); +} - spin_lock(&root->fs_info->new_trans_lock); - if (root->fs_info->running_transaction) { - struct btrfs_ordered_inode_tree *tree; - tree = &root->fs_info->running_transaction->ordered_inode_tree; - __btrfs_del_ordered_inode(tree, inode, root_objectid, - inode->i_ino); +void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) +{ + u64 end; + struct btrfs_ordered_extent *ordered; + int found; + int should_wait = 0; + +again: + if (start + len < start) + end = (u64)-1; + else + end = start + len - 1; + found = 0; + while(1) { + ordered = btrfs_lookup_first_ordered_extent(inode, end); + if (!ordered) { + break; + } + if (ordered->file_offset >= start + len) { + btrfs_put_ordered_extent(ordered); + break; + } + if (ordered->file_offset + ordered->len < start) { + btrfs_put_ordered_extent(ordered); + break; + } + btrfs_start_ordered_extent(inode, ordered, should_wait); + found++; + end = ordered->file_offset; + btrfs_put_ordered_extent(ordered); + if (end == 0) + break; + end--; + } + if (should_wait && found) { + should_wait = 0; + goto again; } - spin_unlock(&root->fs_info->new_trans_lock); } -int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode) +int btrfs_add_ordered_pending(struct inode *inode, + struct btrfs_ordered_extent *ordered, + u64 start, u64 len) { - struct btrfs_transaction *cur = root->fs_info->running_transaction; - while(cur == root->fs_info->running_transaction && - atomic_read(&BTRFS_I(inode)->ordered_writeback)) { -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) - congestion_wait(WRITE, HZ/20); -#else - blk_congestion_wait(WRITE, HZ/20); -#endif - } + WARN_ON(1); return 0; +#if 0 + int ret; + struct btrfs_ordered_inode_tree *tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) { + ret = -EAGAIN; + goto out; + } + set_extent_ordered(io_tree, start, start + len - 1, GFP_NOFS); + ret = 0; +out: + mutex_unlock(&tree->mutex); + return ret; +#endif +} + +struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, + u64 file_offset) +{ + struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry = NULL; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = tree_search(tree, file_offset); + if (!node) + goto out; + + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, file_offset)) + entry = NULL; + if (entry) + atomic_inc(&entry->refs); +out: + mutex_unlock(&tree->mutex); + return entry; +} + +struct btrfs_ordered_extent * +btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset) +{ + struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry = NULL; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = tree_search(tree, file_offset); + if (!node) + goto out; + + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + atomic_inc(&entry->refs); +out: + mutex_unlock(&tree->mutex); + return entry; } diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 4fa78736423..33292c5fe90 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -20,24 +20,73 @@ #define __BTRFS_ORDERED_DATA__ struct btrfs_ordered_inode_tree { - rwlock_t lock; + struct mutex mutex; struct rb_root tree; + struct rb_node *last; }; +struct btrfs_sector_sum { + u64 offset; + u32 sum; +}; + +struct btrfs_ordered_sum { + u64 file_offset; + u64 len; + struct list_head list; + struct btrfs_sector_sum sums; +}; + +/* bits for the flags field */ +#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */ +#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */ +#define BTRFS_ORDERED_START 2 /* set when tree setup */ + +struct btrfs_ordered_extent { + u64 file_offset; + u64 start; + u64 len; + unsigned long flags; + atomic_t refs; + struct list_head list; + struct inode *inode; + wait_queue_head_t wait; + struct rb_node rb_node; +}; + + +static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes) +{ + unsigned long num_sectors = (bytes + root->sectorsize - 1) / + root->sectorsize; + return sizeof(struct btrfs_ordered_sum) + + num_sectors * sizeof(struct btrfs_sector_sum); +} + static inline void btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) { - rwlock_init(&t->lock); + mutex_init(&t->mutex); t->tree.rb_node = NULL; + t->last = NULL; } -int btrfs_add_ordered_inode(struct inode *inode); -int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode); -int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode); -void btrfs_del_ordered_inode(struct inode *inode, int force); -int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode); +int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); +int btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry); +int btrfs_dec_test_ordered_pending(struct inode *inode, + u64 file_offset, u64 io_size); +int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, + u64 start, u64 len); +int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum); +struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, + u64 file_offset); +void btrfs_wait_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry); +void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); +struct btrfs_ordered_extent * +btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); +int btrfs_add_ordered_pending(struct inode *inode, + struct btrfs_ordered_extent *ordered, + u64 start, u64 len); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a8a3cb03de5..86a5acc19ce 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -67,7 +67,6 @@ static noinline int join_transaction(struct btrfs_root *root) cur_trans->start_time = get_seconds(); INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); - btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); extent_io_tree_init(&cur_trans->dirty_pages, root->fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -158,10 +157,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, wake_up(&cur_trans->writer_wait); if (cur_trans->in_commit && throttle) { - int ret; + DEFINE_WAIT(wait); mutex_unlock(&root->fs_info->trans_mutex); - ret = wait_for_commit(root, cur_trans); - BUG_ON(ret); + prepare_to_wait(&root->fs_info->transaction_throttle, &wait, + TASK_UNINTERRUPTIBLE); + schedule(); + finish_wait(&root->fs_info->transaction_throttle, &wait); mutex_lock(&root->fs_info->trans_mutex); } @@ -486,58 +487,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, return ret; } -int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - struct btrfs_transaction *cur_trans = trans->transaction; - struct inode *inode; - u64 root_objectid = 0; - u64 objectid = 0; - int ret; - - atomic_inc(&root->fs_info->throttles); - while(1) { - ret = btrfs_find_first_ordered_inode( - &cur_trans->ordered_inode_tree, - &root_objectid, &objectid, &inode); - if (!ret) - break; - - mutex_unlock(&root->fs_info->trans_mutex); - - if (S_ISREG(inode->i_mode)) { - atomic_inc(&BTRFS_I(inode)->ordered_writeback); - filemap_fdatawrite(inode->i_mapping); - atomic_dec(&BTRFS_I(inode)->ordered_writeback); - } - iput(inode); - - mutex_lock(&root->fs_info->trans_mutex); - } - while(1) { - root_objectid = 0; - objectid = 0; - ret = btrfs_find_del_first_ordered_inode( - &cur_trans->ordered_inode_tree, - &root_objectid, &objectid, &inode); - if (!ret) - break; - mutex_unlock(&root->fs_info->trans_mutex); - - if (S_ISREG(inode->i_mode)) { - atomic_inc(&BTRFS_I(inode)->ordered_writeback); - filemap_write_and_wait(inode->i_mapping); - atomic_dec(&BTRFS_I(inode)->ordered_writeback); - } - atomic_dec(&inode->i_count); - iput(inode); - - mutex_lock(&root->fs_info->trans_mutex); - } - atomic_dec(&root->fs_info->throttles); - return 0; -} - static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_pending_snapshot *pending) @@ -666,6 +615,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, extent_io_tree_init(pinned_copy, root->fs_info->btree_inode->i_mapping, GFP_NOFS); +printk("commit trans %Lu\n", trans->transid); trans->transaction->in_commit = 1; cur_trans = trans->transaction; if (cur_trans->list.prev != &root->fs_info->trans_list) { @@ -699,8 +649,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); - ret = btrfs_write_ordered_inodes(trans, root); - } while (cur_trans->num_writers > 1 || (cur_trans->num_joined != joined)); @@ -736,6 +684,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_copy_pinned(root, pinned_copy); + wake_up(&root->fs_info->transaction_throttle); + mutex_unlock(&root->fs_info->trans_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); @@ -758,6 +708,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); mutex_unlock(&root->fs_info->trans_mutex); +printk("done commit trans %Lu\n", trans->transid); kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->fs_info->closing) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 9ccd5a5b170..910350cd4cf 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -19,7 +19,6 @@ #ifndef __BTRFS_TRANSACTION__ #define __BTRFS_TRANSACTION__ #include "btrfs_inode.h" -#include "ordered-data.h" struct btrfs_transaction { u64 transid; @@ -31,7 +30,6 @@ struct btrfs_transaction { struct list_head list; struct extent_io_tree dirty_pages; unsigned long start_time; - struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; struct list_head pending_snapshots; @@ -88,8 +86,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, - struct btrfs_root *root); int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root); #endif -- cgit v1.2.3-70-g09d2 From ee6e6504e147a59a9f4d582662c105e9d72ae638 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:54:40 -0400 Subject: Add a per-inode lock around btrfs_drop_extents btrfs_drop_extents is always called with a range lock held on the inode. But, it may operate on extents outside that range as it drops and splits them. This patch adds a per-inode mutex that is held while calling btrfs_drop_extents and while inserting new extents into the tree. It prevents races from two procs working against adjacent ranges in the tree. Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/extent-tree.c | 2 ++ fs/btrfs/file.c | 8 ++++++++ fs/btrfs/inode.c | 10 ++++++++++ 4 files changed, 21 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 81c0444f37b..3bf40591742 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -32,6 +32,7 @@ struct btrfs_inode { struct extent_io_tree io_tree; struct extent_io_tree io_failure_tree; struct mutex csum_mutex; + struct mutex extent_mutex; struct inode vfs_inode; struct btrfs_ordered_inode_tree ordered_tree; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 343d1101c31..4036c62b667 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1671,6 +1671,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid, ref_generation, owner_objectid, owner_offset, pin, pin == 0); + + finish_current_insert(trans, root->fs_info->extent_root); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3e4e5c227c0..40ad1b2958c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -242,6 +242,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 end_of_last_block; u64 end_pos = pos + write_bytes; u64 inline_size; + int did_inline = 0; loff_t isize = i_size_read(inode); start_pos = pos & ~((u64)root->sectorsize - 1); @@ -275,6 +276,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, if (hole_size > 0) { btrfs_wait_ordered_range(inode, last_pos_in_file, last_pos_in_file + hole_size); + mutex_lock(&BTRFS_I(inode)->extent_mutex); err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, @@ -289,6 +291,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, 0, 0, hole_size, 0); btrfs_drop_extent_cache(inode, last_pos_in_file, last_pos_in_file + hole_size -1); + mutex_unlock(&BTRFS_I(inode)->extent_mutex); btrfs_check_file(root, inode); } if (err) @@ -321,6 +324,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, /* step one, delete the existing extents in this range */ aligned_end = (pos + write_bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + mutex_lock(&BTRFS_I(inode)->extent_mutex); err = btrfs_drop_extents(trans, root, inode, start_pos, aligned_end, aligned_end, &hint_byte); if (err) @@ -332,9 +336,13 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size, pages, 0, num_pages); btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1); BUG_ON(err); + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + did_inline = 1; } if (end_pos > isize) { i_size_write(inode, end_pos); + if (did_inline) + BTRFS_I(inode)->disk_i_size = end_pos; btrfs_update_inode(trans, root, inode); } failed: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 293355c92a4..0d1e2ba47a9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -516,6 +516,8 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, trans->transid, inode->i_ino, ordered_extent->file_offset, &ins); BUG_ON(ret); + + mutex_lock(&BTRFS_I(inode)->extent_mutex); ret = btrfs_drop_extents(trans, root, inode, ordered_extent->file_offset, ordered_extent->file_offset + @@ -531,6 +533,8 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, btrfs_drop_extent_cache(inode, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1); + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + inode->i_blocks += ordered_extent->len >> 9; unlock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, @@ -1399,6 +1403,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); + mutex_lock(&BTRFS_I(inode)->extent_mutex); err = btrfs_drop_extents(trans, root, inode, hole_start, block_end, hole_start, &alloc_hint); @@ -1412,6 +1417,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) (u64)-1); btrfs_check_file(root, inode); } + mutex_unlock(&BTRFS_I(inode)->extent_mutex); btrfs_end_transaction(trans, root); unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); if (err) @@ -1578,6 +1584,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) inode->i_mapping, GFP_NOFS); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); mutex_init(&BTRFS_I(inode)->csum_mutex); + mutex_init(&BTRFS_I(inode)->extent_mutex); return 0; } @@ -1880,6 +1887,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mapping, GFP_NOFS); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); mutex_init(&BTRFS_I(inode)->csum_mutex); + mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->root = root; @@ -2105,6 +2113,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); + mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; @@ -3079,6 +3088,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); + mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; -- cgit v1.2.3-70-g09d2 From 54641bd17db9fbfc13c7b1d4ee0dd2713bf3e076 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:54:48 -0400 Subject: Btrfs: Force caching of metadata block groups on mount to avoid deadlock This is a temporary change to avoid deadlocks until the extent tree locking is fixed up. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4036c62b667..be2aef1cb7a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3333,6 +3333,11 @@ int btrfs_read_block_groups(struct btrfs_root *root) set_state_private(block_group_cache, found_key.objectid, (unsigned long)cache); + /* hack for now */ + if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { + cache_block_group(root->fs_info->extent_root, + cache); + } if (key.objectid >= btrfs_super_total_bytes(&info->super_copy)) break; -- cgit v1.2.3-70-g09d2 From 4a09675279674041862d2210635b0cc1f60be28e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 21 Jul 2008 10:29:44 -0400 Subject: Btrfs: Data ordered fixes * In btrfs_delete_inode, wait for ordered extents after calling truncate_inode_pages. This is much faster, and more correct * Properly clear our the PageChecked bit everywhere we redirty the page. * Change the writepage fixup handler to lock the page range and check to see if an ordered extent had been inserted since the improperly dirtied page was discovered * Wait for ordered extents outside the transaction. This isn't required for locking rules but does improve transaction latencies * Reduce contention on the alloc_mutex by dropping it while incrementing refs on a node/leaf and while dropping refs on a leaf. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 18 +++++++++++++++--- fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 23 ++++++++++++++++++----- fs/btrfs/ordered-data.c | 11 +++++++++-- 4 files changed, 43 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index be2aef1cb7a..ccd49322f79 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -934,7 +934,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (!root->ref_cows) return 0; - mutex_lock(&root->fs_info->alloc_mutex); level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); for (i = 0; i < nritems; i++) { @@ -951,29 +950,36 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); if (disk_bytenr == 0) continue; + + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(buf, fi), root->root_key.objectid, trans->transid, key.objectid, key.offset); + mutex_unlock(&root->fs_info->alloc_mutex); if (ret) { faili = i; + WARN_ON(1); goto fail; } } else { bytenr = btrfs_node_blockptr(buf, i); btrfs_node_key_to_cpu(buf, &key, i); + + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_inc_extent_ref(trans, root, bytenr, btrfs_level_size(root, level - 1), root->root_key.objectid, trans->transid, level - 1, key.objectid); + mutex_unlock(&root->fs_info->alloc_mutex); if (ret) { faili = i; + WARN_ON(1); goto fail; } } } - mutex_unlock(&root->fs_info->alloc_mutex); return 0; fail: WARN_ON(1); @@ -1004,7 +1010,6 @@ fail: } } #endif - mutex_unlock(&root->fs_info->alloc_mutex); return ret; } @@ -2180,6 +2185,8 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, leaf_owner = btrfs_header_owner(leaf); leaf_generation = btrfs_header_generation(leaf); + mutex_unlock(&root->fs_info->alloc_mutex); + for (i = 0; i < nritems; i++) { u64 disk_bytenr; @@ -2197,12 +2204,17 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); if (disk_bytenr == 0) continue; + + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(leaf, fi), leaf_owner, leaf_generation, key.objectid, key.offset, 0); + mutex_unlock(&root->fs_info->alloc_mutex); BUG_ON(ret); } + + mutex_lock(&root->fs_info->alloc_mutex); return 0; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index eccdb9562ba..591a30208ac 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -75,6 +75,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) for (i = 0; i < num_pages; i++) { if (!pages[i]) break; + ClearPageChecked(pages[i]); unlock_page(pages[i]); mark_page_accessed(pages[i]); page_cache_release(pages[i]); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 50ee4befac8..8fb6dc25e7a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -418,7 +418,7 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work) fixup = container_of(work, struct btrfs_writepage_fixup, work); page = fixup->page; - +again: lock_page(page); if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { ClearPageChecked(page); @@ -430,9 +430,21 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work) page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); - ordered = btrfs_lookup_ordered_extent(inode, page_start); - if (ordered) + + /* already ordered? We're done */ + if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, + EXTENT_ORDERED, 0)) { goto out; + } + + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(&BTRFS_I(inode)->io_tree, page_start, + page_end, GFP_NOFS); + unlock_page(page); + btrfs_start_ordered_extent(inode, ordered, 1); + goto again; + } set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); @@ -1465,11 +1477,11 @@ void btrfs_delete_inode(struct inode *inode) unsigned long nr; int ret; - btrfs_wait_ordered_range(inode, 0, (u64)-1); truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) { goto no_delete; } + btrfs_wait_ordered_range(inode, 0, (u64)-1); btrfs_i_size_write(inode, 0); trans = btrfs_start_transaction(root, 1); @@ -2707,6 +2719,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) 1, 1, GFP_NOFS); __btrfs_releasepage(page, GFP_NOFS); + ClearPageChecked(page); if (PagePrivate(page)) { invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); @@ -2818,10 +2831,10 @@ static void btrfs_truncate(struct inode *inode) return; btrfs_truncate_page(inode->i_mapping, inode->i_size); + btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); - btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_i_size_write(inode, inode->i_size); /* FIXME, add redo link to tree so we don't leak on crash */ diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index c2b4a9c4ddb..0d87795fdd8 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -336,7 +336,7 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) orig_end = start + len - 1; wait_end = orig_end; } - +again: /* start IO across the range first to instantiate any delalloc * extents */ @@ -369,6 +369,14 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) break; end--; } + if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, + EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { + printk("inode %lu still ordered or delalloc after wait " + "%llu %llu\n", inode->i_ino, + (unsigned long long)start, + (unsigned long long)orig_end); + goto again; + } } /* @@ -545,7 +553,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) sector_sums = &ordered_sum->sums; for (i = 0; i < num_sectors; i++) { if (sector_sums[i].offset == offset) { -printk("find ordered sum inode %lu offset %Lu\n", inode->i_ino, offset); *sum = sector_sums[i].sum; ret = 0; goto out; -- cgit v1.2.3-70-g09d2 From a61e6f29dc7c9d56a776a518eed92bbc61848263 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 11:18:08 -0400 Subject: Btrfs: Use a mutex in the extent buffer for tree block locking This replaces the use of the page cache lock bit for locking, which wasn't suitable for block size < page size and couldn't be used recursively. The mutexes alone don't fix either problem, but they are the first step. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 7 +------ fs/btrfs/extent_io.c | 9 +++++++++ fs/btrfs/extent_io.h | 1 + fs/btrfs/locking.c | 13 ++++++------- 4 files changed, 17 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ccd49322f79..c51cd11de20 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1451,7 +1451,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, struct extent_buffer *buf; buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { - if (!btrfs_try_tree_lock(buf) && + if (btrfs_try_tree_lock(buf) && btrfs_buffer_uptodate(buf, 0)) { u64 transid = root->fs_info->running_transaction->transid; @@ -3345,11 +3345,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) set_state_private(block_group_cache, found_key.objectid, (unsigned long)cache); - /* hack for now */ - if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { - cache_block_group(root->fs_info->extent_root, - cache); - } if (key.objectid >= btrfs_super_total_bytes(&info->super_copy)) break; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 32bb4ed3723..7380449cb5b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2690,6 +2690,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, eb = kmem_cache_zalloc(extent_buffer_cache, mask); eb->start = start; eb->len = len; + mutex_init(&eb->mutex); spin_lock_irqsave(&leak_lock, flags); list_add(&eb->leak_list, &buffers); spin_unlock_irqrestore(&leak_lock, flags); @@ -2837,6 +2838,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); + lock_page(page); if (i == 0) set_page_extent_head(page, eb->len); else @@ -2854,6 +2856,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, end = start + PAGE_CACHE_SIZE - 1; if (test_range_bit(tree, start, end, EXTENT_DIRTY, 0)) { + unlock_page(page); continue; } } @@ -2865,6 +2868,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, PAGECACHE_TAG_DIRTY); } read_unlock_irq(&page->mapping->tree_lock); + unlock_page(page); } return 0; } @@ -2893,12 +2897,17 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, * on us if the page isn't already dirty. */ if (i == 0) { + lock_page(page); set_page_extent_head(page, eb->len); } else if (PagePrivate(page) && page->private != EXTENT_PAGE_PRIVATE) { + lock_page(page); set_page_extent_mapped(page); + unlock_page(page); } __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); + if (i == 0) + unlock_page(page); } return set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index dd367617d78..6c03e6a1993 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -90,6 +90,7 @@ struct extent_buffer { int flags; struct list_head leak_list; struct rb_node rb_node; + struct mutex mutex; }; struct extent_map_tree; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 058a506a0dd..01a9ac2be3f 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -29,32 +29,31 @@ int btrfs_tree_lock(struct extent_buffer *eb) { int i; - if (!TestSetPageLocked(eb->first_page)) + if (mutex_trylock(&eb->mutex)) return 0; for (i = 0; i < 512; i++) { cpu_relax(); - if (!TestSetPageLocked(eb->first_page)) + if (mutex_trylock(&eb->mutex)) return 0; } cpu_relax(); - lock_page(eb->first_page); + mutex_lock(&eb->mutex); return 0; } int btrfs_try_tree_lock(struct extent_buffer *eb) { - return TestSetPageLocked(eb->first_page); + return mutex_trylock(&eb->mutex); } int btrfs_tree_unlock(struct extent_buffer *eb) { - WARN_ON(!PageLocked(eb->first_page)); - unlock_page(eb->first_page); + mutex_unlock(&eb->mutex); return 0; } int btrfs_tree_locked(struct extent_buffer *eb) { - return PageLocked(eb->first_page); + return mutex_is_locked(&eb->mutex); } -- cgit v1.2.3-70-g09d2 From e34a5b4f77b8448cf2863ad0cbac35e2c2a86a0a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 12:08:37 -0400 Subject: Btrfs: Add some conditional schedules near the alloc_mutex This helps prevent stalls, especially while the snapshot cleaner is running hard Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c51cd11de20..72fa28236e5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -937,6 +937,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); for (i = 0; i < nritems; i++) { + cond_resched(); if (level == 0) { u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); @@ -2189,6 +2190,7 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, for (i = 0; i < nritems; i++) { u64 disk_bytenr; + cond_resched(); btrfs_item_key_to_cpu(leaf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) -- cgit v1.2.3-70-g09d2 From c286ac48ed7aaf53586f575af6053ae2a0f8554a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 23:06:41 -0400 Subject: Btrfs: alloc_mutex latency reduction This releases the alloc_mutex in a few places that hold it for over long operations. btrfs_lookup_block_group is changed so that it doesn't need the mutex at all. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 100 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 81 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index eeb5afa6e9b..90504ba7f83 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -483,6 +483,7 @@ struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; struct btrfs_space_info *space_info; + spinlock_t lock; u64 pinned; u64 flags; int cached; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 72fa28236e5..febc6295c7a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -319,7 +319,7 @@ no_cache: cache = btrfs_lookup_first_block_group(root->fs_info, last); } cache_miss = 0; - cache = __btrfs_find_block_group(root, cache, last, data, 0); + cache = btrfs_find_block_group(root, cache, last, data, 0); if (!cache) goto no_cache; *cache_ret = cache; @@ -379,19 +379,25 @@ __btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *shint; shint = btrfs_lookup_first_block_group(info, search_start); if (shint && block_group_bits(shint, data) && !shint->ro) { + spin_lock(&shint->lock); used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < div_factor(shint->key.offset, factor)) { + spin_unlock(&shint->lock); return shint; } + spin_unlock(&shint->lock); } } if (hint && !hint->ro && block_group_bits(hint, data)) { + spin_lock(&hint->lock); used = btrfs_block_group_used(&hint->item); if (used + hint->pinned < div_factor(hint->key.offset, factor)) { + spin_unlock(&hint->lock); return hint; } + spin_unlock(&hint->lock); last = hint->key.objectid + hint->key.offset; } else { if (hint) @@ -413,6 +419,7 @@ again: } cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; + spin_lock(&cache->lock); last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); @@ -420,9 +427,11 @@ again: free_check = div_factor(cache->key.offset, factor); if (used + cache->pinned < free_check) { found_group = cache; + spin_unlock(&cache->lock); goto found; } } + spin_unlock(&cache->lock); cond_resched(); } if (!wrapped) { @@ -447,9 +456,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, { struct btrfs_block_group_cache *ret; - mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_find_block_group(root, hint, search_start, data, owner); - mutex_unlock(&root->fs_info->alloc_mutex); return ret; } static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, @@ -1262,21 +1269,25 @@ static int update_block_group(struct btrfs_trans_handle *trans, set_extent_bits(&info->block_group_cache, start, end, BLOCK_GROUP_DIRTY, GFP_NOFS); + spin_lock(&cache->lock); old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { old_val += num_bytes; cache->space_info->bytes_used += num_bytes; + btrfs_set_block_group_used(&cache->item, old_val); + spin_unlock(&cache->lock); } else { old_val -= num_bytes; cache->space_info->bytes_used -= num_bytes; + btrfs_set_block_group_used(&cache->item, old_val); + spin_unlock(&cache->lock); if (mark_free) { set_extent_dirty(&info->free_space_cache, bytenr, bytenr + num_bytes - 1, GFP_NOFS); } } - btrfs_set_block_group_used(&cache->item, old_val); total -= num_bytes; bytenr += num_bytes; } @@ -1325,14 +1336,18 @@ static int update_pinned_extents(struct btrfs_root *root, } if (pin) { if (cache) { + spin_lock(&cache->lock); cache->pinned += len; cache->space_info->bytes_pinned += len; + spin_unlock(&cache->lock); } fs_info->total_pinned += len; } else { if (cache) { + spin_lock(&cache->lock); cache->pinned -= len; cache->space_info->bytes_pinned -= len; + spin_unlock(&cache->lock); } fs_info->total_pinned -= len; } @@ -1380,6 +1395,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); set_extent_dirty(free_space_cache, start, end, GFP_NOFS); + if (need_resched()) { + mutex_unlock(&root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&root->fs_info->alloc_mutex); + } } mutex_unlock(&root->fs_info->alloc_mutex); return 0; @@ -1417,8 +1437,16 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, &extent_item, sizeof(extent_item)); clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, GFP_NOFS); - eb = read_tree_block(extent_root, ins.objectid, ins.offset, - trans->transid); + + eb = btrfs_find_tree_block(extent_root, ins.objectid, + ins.offset); + + if (!btrfs_buffer_uptodate(eb, trans->transid)) { + mutex_unlock(&extent_root->fs_info->alloc_mutex); + btrfs_read_buffer(eb, trans->transid); + mutex_lock(&extent_root->fs_info->alloc_mutex); + } + btrfs_tree_lock(eb); level = btrfs_header_level(eb); if (level == 0) { @@ -1437,6 +1465,11 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, 0, level, btrfs_disk_key_objectid(&first)); BUG_ON(err); + if (need_resched()) { + mutex_unlock(&extent_root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&extent_root->fs_info->alloc_mutex); + } } btrfs_free_path(path); return 0; @@ -1640,15 +1673,28 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct EXTENT_LOCKED); if (ret) break; - update_pinned_extents(extent_root, start, end + 1 - start, 1); clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, GFP_NOFS); - ret = __free_extent(trans, extent_root, - start, end + 1 - start, - extent_root->root_key.objectid, - 0, 0, 0, 0, 0); + if (!test_range_bit(&extent_root->fs_info->extent_ins, + start, end, EXTENT_LOCKED, 0)) { + update_pinned_extents(extent_root, start, + end + 1 - start, 1); + ret = __free_extent(trans, extent_root, + start, end + 1 - start, + extent_root->root_key.objectid, + 0, 0, 0, 0, 0); + } else { + clear_extent_bits(&extent_root->fs_info->extent_ins, + start, end, EXTENT_LOCKED, GFP_NOFS); + } if (ret) err = ret; + + if (need_resched()) { + mutex_unlock(&extent_root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&extent_root->fs_info->alloc_mutex); + } } return err; } @@ -1768,12 +1814,12 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, block_group = btrfs_lookup_first_block_group(info, hint_byte); if (!block_group) hint_byte = search_start; - block_group = __btrfs_find_block_group(root, block_group, + block_group = btrfs_find_block_group(root, block_group, hint_byte, data, 1); if (last_ptr && *last_ptr == 0 && block_group) hint_byte = block_group->key.objectid; } else { - block_group = __btrfs_find_block_group(root, + block_group = btrfs_find_block_group(root, trans->block_group, search_start, data, 1); } @@ -1895,7 +1941,7 @@ enospc: } block_group = btrfs_lookup_first_block_group(info, search_start); cond_resched(); - block_group = __btrfs_find_block_group(root, block_group, + block_group = btrfs_find_block_group(root, block_group, search_start, data, 0); goto check_failed; @@ -3032,11 +3078,14 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root, u64 new_alloc_flags; u64 calc; + spin_lock(&shrink_block_group->lock); if (btrfs_block_group_used(&shrink_block_group->item) > 0) { - + spin_unlock(&shrink_block_group->lock); mutex_unlock(&root->fs_info->alloc_mutex); + trans = btrfs_start_transaction(root, 1); mutex_lock(&root->fs_info->alloc_mutex); + spin_lock(&shrink_block_group->lock); new_alloc_flags = update_block_group_flags(root, shrink_block_group->flags); @@ -3046,13 +3095,16 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root, } else { calc = shrink_block_group->key.offset; } + spin_unlock(&shrink_block_group->lock); + do_chunk_alloc(trans, root->fs_info->extent_root, calc + 2 * 1024 * 1024, new_alloc_flags, force); mutex_unlock(&root->fs_info->alloc_mutex); btrfs_end_transaction(trans, root); mutex_lock(&root->fs_info->alloc_mutex); - } + } else + spin_unlock(&shrink_block_group->lock); return 0; } @@ -3199,6 +3251,7 @@ next: mutex_unlock(&root->fs_info->alloc_mutex); trans = btrfs_start_transaction(root, 1); + mutex_lock(&root->fs_info->alloc_mutex); memcpy(&key, &shrink_block_group->key, sizeof(key)); @@ -3316,6 +3369,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) break; } + spin_lock_init(&cache->lock); read_extent_buffer(leaf, &cache->item, btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); @@ -3343,10 +3397,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) /* use EXTENT_LOCKED to prevent merging */ set_extent_bits(block_group_cache, found_key.objectid, found_key.objectid + found_key.offset - 1, - bit | EXTENT_LOCKED, GFP_NOFS); + EXTENT_LOCKED, GFP_NOFS); set_state_private(block_group_cache, found_key.objectid, (unsigned long)cache); - + set_extent_bits(block_group_cache, found_key.objectid, + found_key.objectid + found_key.offset - 1, + bit | EXTENT_LOCKED, GFP_NOFS); if (key.objectid >= btrfs_super_total_bytes(&info->super_copy)) break; @@ -3377,6 +3433,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, BUG_ON(!cache); cache->key.objectid = chunk_offset; cache->key.offset = size; + spin_lock_init(&cache->lock); btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); btrfs_set_block_group_used(&cache->item, bytes_used); @@ -3391,10 +3448,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, bit = block_group_state_bits(type); set_extent_bits(block_group_cache, chunk_offset, chunk_offset + size - 1, - bit | EXTENT_LOCKED, GFP_NOFS); - + EXTENT_LOCKED, GFP_NOFS); set_state_private(block_group_cache, chunk_offset, (unsigned long)cache); + set_extent_bits(block_group_cache, chunk_offset, + chunk_offset + size - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, sizeof(cache->item)); BUG_ON(ret); -- cgit v1.2.3-70-g09d2 From 3eaa2885276fd6dac7b076a793932428b7168e74 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jul 2008 11:57:52 -0400 Subject: Btrfs: Fix the defragmention code and the block relocation code for data=ordered Before setting an extent to delalloc, the code needs to wait for pending ordered extents. Also, the relocation code needs to wait for ordered IO before scanning the block group again. This is because the extents are not removed until the IO for the new extents is finished Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 7 +++++++ fs/btrfs/disk-io.c | 3 +++ fs/btrfs/extent-tree.c | 39 ++++++++++++++++++++++++---------- fs/btrfs/ioctl.c | 21 ++++++++++++------- fs/btrfs/ordered-data.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/ordered-data.h | 7 +++++++ 6 files changed, 113 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8ecac2e77a4..6675e916ebc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -538,6 +538,13 @@ struct btrfs_fs_info { struct list_head dead_roots; atomic_t nr_async_submits; + /* + * this is used by the balancing code to wait for all the pending + * ordered extents + */ + spinlock_t ordered_extent_lock; + struct list_head ordered_extents; + /* * there is a pool of worker threads for checksumming during writes * and a pool for checksumming after reads. This is because readers diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7ce3f83c5dd..ec01062eb41 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1252,6 +1252,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_nlink = 1; fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); + INIT_LIST_HEAD(&fs_info->ordered_extents); + spin_lock_init(&fs_info->ordered_extent_lock); + sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index febc6295c7a..f92b297e7da 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2640,6 +2640,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, struct file_ra_state *ra; unsigned long total_read = 0; unsigned long ra_pages; + struct btrfs_ordered_extent *ordered; struct btrfs_trans_handle *trans; ra = kzalloc(sizeof(*ra), GFP_NOFS); @@ -2658,9 +2659,9 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, calc_ra(i, last_index, ra_pages)); } total_read++; - if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size) +again: + if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) goto truncate_racing; - page = grab_cache_page(inode->i_mapping, i); if (!page) { goto out_unlock; @@ -2674,18 +2675,24 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, goto out_unlock; } } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - ClearPageDirty(page); -#else - cancel_dirty_page(page, PAGE_CACHE_SIZE); -#endif wait_on_page_writeback(page); - set_page_extent_mapped(page); + page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(io_tree, page_start, page_end, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_page(page); + page_cache_release(page); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + goto again; + } + set_page_extent_mapped(page); + + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); @@ -2694,10 +2701,18 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, unlock_page(page); page_cache_release(page); } - balance_dirty_pages_ratelimited_nr(inode->i_mapping, - total_read); out_unlock: + /* we have to start the IO in order to get the ordered extents + * instantiated. This allows the relocation to code to wait + * for all the ordered extents to hit the disk. + * + * Otherwise, it would constantly loop over the same extents + * because the old ones don't get deleted until the IO is + * started + */ + btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1, + WB_SYNC_NONE); kfree(ra); trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); if (trans) { @@ -3238,6 +3253,8 @@ next: btrfs_clean_old_snapshots(tree_root); + btrfs_wait_ordered_extents(tree_root); + trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); mutex_lock(&root->fs_info->alloc_mutex); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 83f17a5cbd6..a61f2e7e2db 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -213,6 +213,7 @@ int btrfs_defrag_file(struct file *file) struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_ordered_extent *ordered; struct page *page; unsigned long last_index; unsigned long ra_pages = root->fs_info->bdi.ra_pages; @@ -234,6 +235,7 @@ int btrfs_defrag_file(struct file *file) min(last_index, i + ra_pages - 1)); } total_read++; +again: page = grab_cache_page(inode->i_mapping, i); if (!page) goto out_unlock; @@ -247,18 +249,23 @@ int btrfs_defrag_file(struct file *file) } } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - ClearPageDirty(page); -#else - cancel_dirty_page(page, PAGE_CACHE_SIZE); -#endif wait_on_page_writeback(page); - set_page_extent_mapped(page); page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(io_tree, page_start, page_end, GFP_NOFS); + + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_page(page); + page_cache_release(page); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + goto again; + } + set_page_extent_mapped(page); + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index e42fd233e04..676e4bd65c5 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -167,20 +167,28 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->file_offset = file_offset; entry->start = start; entry->len = len; + entry->inode = inode; + /* one ref for the tree */ atomic_set(&entry->refs, 1); init_waitqueue_head(&entry->wait); INIT_LIST_HEAD(&entry->list); + INIT_LIST_HEAD(&entry->root_extent_list); node = tree_insert(&tree->tree, file_offset, &entry->rb_node); if (node) { - entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); - atomic_inc(&entry->refs); + printk("warning dup entry from add_ordered_extent\n"); + BUG(); } set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, entry_end(entry) - 1, GFP_NOFS); + spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + list_add_tail(&entry->root_extent_list, + &BTRFS_I(inode)->root->fs_info->ordered_extents); + spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + mutex_unlock(&tree->mutex); BUG_ON(node); return 0; @@ -285,11 +293,55 @@ int btrfs_remove_ordered_extent(struct inode *inode, rb_erase(node, &tree->tree); tree->last = NULL; set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); + + spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + list_del_init(&entry->root_extent_list); + spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + mutex_unlock(&tree->mutex); wake_up(&entry->wait); return 0; } +int btrfs_wait_ordered_extents(struct btrfs_root *root) +{ + struct list_head splice; + struct list_head *cur; + struct btrfs_ordered_extent *ordered; + struct inode *inode; + + INIT_LIST_HEAD(&splice); + + spin_lock(&root->fs_info->ordered_extent_lock); + list_splice_init(&root->fs_info->ordered_extents, &splice); + while(!list_empty(&splice)) { + cur = splice.next; + ordered = list_entry(cur, struct btrfs_ordered_extent, + root_extent_list); + list_del_init(&ordered->root_extent_list); + atomic_inc(&ordered->refs); + inode = ordered->inode; + + /* + * the inode can't go away until all the pages are gone + * and the pages won't go away while there is still + * an ordered extent and the ordered extent won't go + * away until it is off this list. So, we can safely + * increment i_count here and call iput later + */ + atomic_inc(&inode->i_count); + spin_unlock(&root->fs_info->ordered_extent_lock); + + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + iput(inode); + + spin_lock(&root->fs_info->ordered_extent_lock); + } + spin_unlock(&root->fs_info->ordered_extent_lock); + return 0; +} + /* * Used to start IO or wait for a given ordered extent to finish. * diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 199cb0b4f1d..5efe6b63c74 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -80,6 +80,9 @@ struct btrfs_ordered_extent { /* reference count */ atomic_t refs; + /* the inode we belong to */ + struct inode *inode; + /* list of checksums for insertion when the extent io is done */ struct list_head list; @@ -88,6 +91,9 @@ struct btrfs_ordered_extent { /* our friendly rbtree entry */ struct rb_node rb_node; + + /* a per root list of all the pending ordered extents */ + struct list_head root_extent_list; }; @@ -137,4 +143,5 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, pgoff_t start, pgoff_t end); int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode); +int btrfs_wait_ordered_extents(struct btrfs_root *root); #endif -- cgit v1.2.3-70-g09d2 From 8e8a1e31f2780b7865d40a8c5142a04f2bcdcb86 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 24 Jul 2008 12:17:14 -0400 Subject: Btrfs: Fix a few functions that exit without stopping their transaction Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 +++- fs/btrfs/ioctl.c | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f92b297e7da..e6a3ba9dd10 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3275,8 +3275,10 @@ next: ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) ret = -EIO; - if (ret < 0) + if (ret < 0) { + btrfs_end_transaction(trans, root); goto out; + } clear_extent_bits(&info->block_group_cache, key.objectid, key.objectid + key.offset - 1, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index faf081302d0..7d40778a90e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -78,8 +78,10 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, leaf = btrfs_alloc_free_block(trans, root, root->leafsize, objectid, trans->transid, 0, 0, 0, 0); - if (IS_ERR(leaf)) - return PTR_ERR(leaf); + if (IS_ERR(leaf)) { + ret = PTR_ERR(leaf); + goto fail; + } btrfs_set_header_nritems(leaf, 0); btrfs_set_header_level(leaf, 0); -- cgit v1.2.3-70-g09d2 From 974e35a82d022434efa50fe5f805ae14c542ba8c Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 24 Jul 2008 12:18:16 -0400 Subject: Btrfs: Properly release lock in pin_down_bytes When buffer isn't uptodate, pin_down_bytes may leave the tree locked after it returns. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e6a3ba9dd10..cdfb4ff4b45 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1485,8 +1485,8 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, struct extent_buffer *buf; buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { - if (btrfs_try_tree_lock(buf) && - btrfs_buffer_uptodate(buf, 0)) { + if (btrfs_buffer_uptodate(buf, 0) && + btrfs_try_tree_lock(buf)) { u64 transid = root->fs_info->running_transaction->transid; u64 header_transid = -- cgit v1.2.3-70-g09d2 From 31153d81284934601d08110ac7698fd9a535e4c0 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 28 Jul 2008 15:32:19 -0400 Subject: Btrfs: Add a leaf reference cache Much of the IO done while dropping snapshots is done looking up leaves in the filesystem trees to see if they point to any extents and to drop the references on any extents found. This creates a cache so that IO isn't required. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/ctree.c | 4 +- fs/btrfs/ctree.h | 8 +- fs/btrfs/disk-io.c | 14 +++ fs/btrfs/extent-tree.c | 115 ++++++++++++++++++++++--- fs/btrfs/ref-cache.c | 226 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ref-cache.h | 72 ++++++++++++++++ fs/btrfs/transaction.c | 67 +++++++++++---- 8 files changed, 476 insertions(+), 33 deletions(-) create mode 100644 fs/btrfs/ref-cache.c create mode 100644 fs/btrfs/ref-cache.h (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 5a0fd7b0e3e..a4b38177abd 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,7 +6,8 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ - extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o + extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ + ref-cache.o btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o else diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ab4ac0365c7..245eb00435d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -165,7 +165,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); WARN_ON(btrfs_header_generation(buf) > trans->transid); - ret = btrfs_inc_ref(trans, new_root, buf); + ret = btrfs_inc_ref(trans, new_root, buf, 0); kfree(new_root); if (ret) @@ -232,7 +232,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(buf) > trans->transid); if (btrfs_header_generation(buf) != trans->transid) { different_trans = 1; - ret = btrfs_inc_ref(trans, root, buf); + ret = btrfs_inc_ref(trans, root, buf, 1); if (ret) return ret; } else { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 07d321552db..34ed23d64eb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -592,6 +592,10 @@ struct btrfs_fs_info { u64 last_alloc; u64 last_data_alloc; + spinlock_t ref_cache_lock; + u64 total_ref_cache_size; + u64 running_ref_cache_size; + u64 avail_data_alloc_bits; u64 avail_metadata_alloc_bits; u64 avail_system_alloc_bits; @@ -613,6 +617,8 @@ struct btrfs_root { spinlock_t node_lock; struct extent_buffer *commit_root; + struct btrfs_leaf_ref_tree *ref_tree; + struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; @@ -1430,7 +1436,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, u64 search_end, struct btrfs_key *ins, u64 data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf); + struct extent_buffer *buf, int cache_ref); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 root_objectid, u64 ref_generation, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d6092396734..4f0e1d06c38 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -716,6 +716,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; + root->ref_tree = NULL; root->sectorsize = sectorsize; root->nodesize = nodesize; root->leafsize = leafsize; @@ -1165,12 +1166,19 @@ static int transaction_kthread(void *arg) vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); + printk("btrfs: total reference cache size %Lu\n", + root->fs_info->total_ref_cache_size); + mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; if (!cur) { mutex_unlock(&root->fs_info->trans_mutex); goto sleep; } + + printk("btrfs: running reference cache size %Lu\n", + root->fs_info->running_ref_cache_size); + now = get_seconds(); if (now < cur->start_time || now - cur->start_time < 30) { mutex_unlock(&root->fs_info->trans_mutex); @@ -1233,6 +1241,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->hash_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); + spin_lock_init(&fs_info->ref_cache_lock); init_completion(&fs_info->kobj_unregister); fs_info->tree_root = tree_root; @@ -1699,6 +1708,11 @@ int close_ctree(struct btrfs_root *root) printk("btrfs: at unmount delalloc count %Lu\n", fs_info->delalloc_bytes); } + if (fs_info->total_ref_cache_size) { + printk("btrfs: at umount reference cache size %Lu\n", + fs_info->total_ref_cache_size); + } + if (fs_info->extent_root->node) free_extent_buffer(fs_info->extent_root->node); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cdfb4ff4b45..7b24f151165 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -26,6 +26,7 @@ #include "transaction.h" #include "volumes.h" #include "locking.h" +#include "ref-cache.h" #define BLOCK_GROUP_DATA EXTENT_WRITEBACK #define BLOCK_GROUP_METADATA EXTENT_UPTODATE @@ -927,7 +928,7 @@ out: } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf) + struct extent_buffer *buf, int cache_ref) { u64 bytenr; u32 nritems; @@ -937,6 +938,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int level; int ret; int faili; + int nr_file_extents = 0; if (!root->ref_cows) return 0; @@ -959,6 +961,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (disk_bytenr == 0) continue; + if (buf != root->commit_root) + nr_file_extents++; + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(buf, fi), @@ -988,6 +993,53 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } } } + /* cache orignal leaf block's references */ + if (level == 0 && cache_ref && buf != root->commit_root) { + struct btrfs_leaf_ref *ref; + struct btrfs_extent_info *info; + + ref = btrfs_alloc_leaf_ref(nr_file_extents); + if (!ref) { + WARN_ON(1); + goto out; + } + + btrfs_item_key_to_cpu(buf, &ref->key, 0); + + ref->bytenr = buf->start; + ref->owner = btrfs_header_owner(buf); + ref->generation = btrfs_header_generation(buf); + ref->nritems = nr_file_extents; + info = ref->extents; + + for (i = 0; nr_file_extents > 0 && i < nritems; i++) { + u64 disk_bytenr; + btrfs_item_key_to_cpu(buf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf, i, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(buf, fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (disk_bytenr == 0) + continue; + + info->bytenr = disk_bytenr; + info->num_bytes = + btrfs_file_extent_disk_num_bytes(buf, fi); + info->objectid = key.objectid; + info->offset = key.offset; + info++; + } + + BUG_ON(!root->ref_tree); + ret = btrfs_add_leaf_ref(root, ref); + WARN_ON(ret); + btrfs_free_leaf_ref(ref); + } +out: return 0; fail: WARN_ON(1); @@ -2215,9 +2267,9 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } -static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *leaf) +static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *leaf) { u64 leaf_owner; u64 leaf_generation; @@ -2266,6 +2318,30 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } +static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_leaf_ref *ref) +{ + int i; + int ret; + struct btrfs_extent_info *info = ref->extents; + + mutex_unlock(&root->fs_info->alloc_mutex); + for (i = 0; i < ref->nritems; i++) { + mutex_lock(&root->fs_info->alloc_mutex); + ret = __btrfs_free_extent(trans, root, + info->bytenr, info->num_bytes, + ref->owner, ref->generation, + info->objectid, info->offset, 0); + mutex_unlock(&root->fs_info->alloc_mutex); + BUG_ON(ret); + info++; + } + mutex_lock(&root->fs_info->alloc_mutex); + + return 0; +} + static void noinline reada_walk_down(struct btrfs_root *root, struct extent_buffer *node, int slot) @@ -2341,6 +2417,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, struct extent_buffer *next; struct extent_buffer *cur; struct extent_buffer *parent; + struct btrfs_leaf_ref *ref; u32 blocksize; int ret; u32 refs; @@ -2370,7 +2447,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, btrfs_header_nritems(cur)) break; if (*level == 0) { - ret = drop_leaf_ref(trans, root, cur); + ret = drop_leaf_ref_no_cache(trans, root, cur); BUG_ON(ret); break; } @@ -2391,6 +2468,21 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, BUG_ON(ret); continue; } + + if (*level == 1) { + struct btrfs_key key; + btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); + ref = btrfs_lookup_leaf_ref(root, &key); + if (ref) { + ret = drop_leaf_ref(trans, root, ref); + BUG_ON(ret); + btrfs_remove_leaf_ref(root, ref); + btrfs_free_leaf_ref(ref); + *level = 0; + break; + } + } + next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); @@ -2398,7 +2490,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, if (path->slots[*level] == 0) reada_walk_down(root, cur, path->slots[*level]); - next = read_tree_block(root, bytenr, blocksize, ptr_gen); cond_resched(); @@ -2435,17 +2526,19 @@ out: WARN_ON(*level >= BTRFS_MAX_LEVEL); if (path->nodes[*level] == root->node) { - root_owner = root->root_key.objectid; parent = path->nodes[*level]; + bytenr = path->nodes[*level]->start; } else { parent = path->nodes[*level + 1]; - root_owner = btrfs_header_owner(parent); + bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); } + blocksize = btrfs_level_size(root, *level); + root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); - ret = __btrfs_free_extent(trans, root, path->nodes[*level]->start, - path->nodes[*level]->len, - root_owner, root_gen, 0, 0, 1); + + ret = __btrfs_free_extent(trans, root, bytenr, blocksize, + root_owner, root_gen, 0, 0, 1); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c new file mode 100644 index 00000000000..95a9faeb9dc --- /dev/null +++ b/fs/btrfs/ref-cache.c @@ -0,0 +1,226 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "ctree.h" +#include "ref-cache.h" +#include "transaction.h" + +struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents) +{ + struct btrfs_leaf_ref *ref; + + ref = kmalloc(btrfs_leaf_ref_size(nr_extents), GFP_NOFS); + if (ref) { + memset(ref, 0, sizeof(*ref)); + atomic_set(&ref->usage, 1); + } + return ref; +} + +void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref) +{ + if (!ref) + return; + WARN_ON(atomic_read(&ref->usage) == 0); + if (atomic_dec_and_test(&ref->usage)) { + BUG_ON(ref->in_tree); + kfree(ref); + } +} + +static int comp_keys(struct btrfs_key *k1, struct btrfs_key *k2) +{ + if (k1->objectid > k2->objectid) + return 1; + if (k1->objectid < k2->objectid) + return -1; + if (k1->type > k2->type) + return 1; + if (k1->type < k2->type) + return -1; + if (k1->offset > k2->offset) + return 1; + if (k1->offset < k2->offset) + return -1; + return 0; +} + +static struct rb_node *tree_insert(struct rb_root *root, struct btrfs_key *key, + struct rb_node *node) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct btrfs_leaf_ref *entry; + int ret; + + while(*p) { + parent = *p; + entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); + WARN_ON(!entry->in_tree); + + ret = comp_keys(key, &entry->key); + if (ret < 0) + p = &(*p)->rb_left; + else if (ret > 0) + p = &(*p)->rb_right; + else + return parent; + } + + entry = rb_entry(node, struct btrfs_leaf_ref, rb_node); + entry->in_tree = 1; + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct rb_node *tree_search(struct rb_root *root, struct btrfs_key *key) +{ + struct rb_node * n = root->rb_node; + struct btrfs_leaf_ref *entry; + int ret; + + while(n) { + entry = rb_entry(n, struct btrfs_leaf_ref, rb_node); + WARN_ON(!entry->in_tree); + + ret = comp_keys(key, &entry->key); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else + return n; + } + return NULL; +} + +int btrfs_remove_leaf_refs(struct btrfs_root *root) +{ + struct rb_node *rb; + struct btrfs_leaf_ref *ref = NULL; + struct btrfs_leaf_ref_tree *tree = root->ref_tree; + + if (!tree) + return 0; + + spin_lock(&tree->lock); + while(!btrfs_leaf_ref_tree_empty(tree)) { + tree->last = NULL; + rb = rb_first(&tree->root); + ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); + rb_erase(&ref->rb_node, &tree->root); + ref->in_tree = 0; + + spin_unlock(&tree->lock); + + btrfs_free_leaf_ref(ref); + + cond_resched(); + spin_lock(&tree->lock); + } + spin_unlock(&tree->lock); + return 0; +} + +struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, + struct btrfs_key *key) +{ + struct rb_node *rb; + struct btrfs_leaf_ref *ref = NULL; + struct btrfs_leaf_ref_tree *tree = root->ref_tree; + + if (!tree) + return NULL; + + spin_lock(&tree->lock); + if (tree->last && comp_keys(key, &tree->last->key) == 0) { + ref = tree->last; + } else { + rb = tree_search(&tree->root, key); + if (rb) { + ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); + tree->last = ref; + } + } + if (ref) + atomic_inc(&ref->usage); + spin_unlock(&tree->lock); + return ref; +} + +int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) +{ + int ret = 0; + struct rb_node *rb; + size_t size = btrfs_leaf_ref_size(ref->nritems); + struct btrfs_leaf_ref_tree *tree = root->ref_tree; + struct btrfs_transaction *trans = root->fs_info->running_transaction; + + spin_lock(&tree->lock); + rb = tree_insert(&tree->root, &ref->key, &ref->rb_node); + if (rb) { + ret = -EEXIST; + } else { + spin_lock(&root->fs_info->ref_cache_lock); + root->fs_info->total_ref_cache_size += size; + if (trans && tree->generation == trans->transid) + root->fs_info->running_ref_cache_size += size; + spin_unlock(&root->fs_info->ref_cache_lock); + + tree->last = ref; + atomic_inc(&ref->usage); + } + spin_unlock(&tree->lock); + return ret; +} + +int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) +{ + size_t size = btrfs_leaf_ref_size(ref->nritems); + struct btrfs_leaf_ref_tree *tree = root->ref_tree; + struct btrfs_transaction *trans = root->fs_info->running_transaction; + + BUG_ON(!ref->in_tree); + spin_lock(&tree->lock); + + spin_lock(&root->fs_info->ref_cache_lock); + root->fs_info->total_ref_cache_size -= size; + if (trans && tree->generation == trans->transid) + root->fs_info->running_ref_cache_size -= size; + spin_unlock(&root->fs_info->ref_cache_lock); + + if (tree->last == ref) { + struct rb_node *next = rb_next(&ref->rb_node); + if (next) { + tree->last = rb_entry(next, struct btrfs_leaf_ref, + rb_node); + } else + tree->last = NULL; + } + + rb_erase(&ref->rb_node, &tree->root); + ref->in_tree = 0; + + spin_unlock(&tree->lock); + + btrfs_free_leaf_ref(ref); + return 0; +} + diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h new file mode 100644 index 00000000000..79ecc47110f --- /dev/null +++ b/fs/btrfs/ref-cache.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +struct btrfs_extent_info { + u64 bytenr; + u64 num_bytes; + u64 objectid; + u64 offset; +}; + +struct btrfs_leaf_ref { + struct rb_node rb_node; + struct btrfs_key key; + int in_tree; + atomic_t usage; + + u64 bytenr; + u64 owner; + u64 generation; + int nritems; + struct btrfs_extent_info extents[]; +}; + +struct btrfs_leaf_ref_tree { + struct rb_root root; + struct btrfs_leaf_ref *last; + u64 generation; + spinlock_t lock; +}; + +static inline size_t btrfs_leaf_ref_size(int nr_extents) +{ + return sizeof(struct btrfs_leaf_ref) + + sizeof(struct btrfs_extent_info) * nr_extents; +} + +static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree) +{ + tree->root.rb_node = NULL; + tree->last = NULL; + tree->generation = 0; + spin_lock_init(&tree->lock); +} + +static inline int btrfs_leaf_ref_tree_empty(struct btrfs_leaf_ref_tree *tree) +{ + return RB_EMPTY_ROOT(&tree->root); +} + +void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree); +struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents); +void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref); +struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, + struct btrfs_key *key); +int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); +int btrfs_remove_leaf_refs(struct btrfs_root *root); +int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 768b0d223e6..543e5ee4033 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -24,6 +24,7 @@ #include "disk-io.h" #include "transaction.h" #include "locking.h" +#include "ref-cache.h" static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; @@ -31,6 +32,13 @@ extern struct kmem_cache *btrfs_transaction_cachep; #define BTRFS_ROOT_TRANS_TAG 0 +struct dirty_root { + struct list_head list; + struct btrfs_root *root; + struct btrfs_root *latest_root; + struct btrfs_leaf_ref_tree ref_tree; +}; + static noinline void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(transaction->use_count == 0); @@ -84,6 +92,7 @@ static noinline int join_transaction(struct btrfs_root *root) static noinline int record_root_in_trans(struct btrfs_root *root) { + struct dirty_root *dirty; u64 running_trans_id = root->fs_info->running_transaction->transid; if (root->ref_cows && root->last_trans < running_trans_id) { WARN_ON(root == root->fs_info->extent_root); @@ -91,7 +100,25 @@ static noinline int record_root_in_trans(struct btrfs_root *root) radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); + + dirty = kmalloc(sizeof(*dirty), GFP_NOFS); + BUG_ON(!dirty); + dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); + BUG_ON(!dirty->root); + + dirty->latest_root = root; + INIT_LIST_HEAD(&dirty->list); + btrfs_leaf_ref_tree_init(&dirty->ref_tree); + dirty->ref_tree.generation = running_trans_id; + root->commit_root = btrfs_root_node(root); + root->ref_tree = &dirty->ref_tree; + + memcpy(dirty->root, root, sizeof(*root)); + spin_lock_init(&dirty->root->node_lock); + mutex_init(&dirty->root->objectid_mutex); + dirty->root->node = root->commit_root; + dirty->root->commit_root = NULL; } else { WARN_ON(1); } @@ -310,12 +337,6 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, return 0; } -struct dirty_root { - struct list_head list; - struct btrfs_root *root; - struct btrfs_root *latest_root; -}; - int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, struct list_head *dead_list) @@ -325,8 +346,10 @@ int btrfs_add_dead_root(struct btrfs_root *root, dirty = kmalloc(sizeof(*dirty), GFP_NOFS); if (!dirty) return -ENOMEM; + btrfs_leaf_ref_tree_init(&dirty->ref_tree); dirty->root = root; dirty->latest_root = latest; + root->ref_tree = NULL; list_add(&dirty->list, dead_list); return 0; } @@ -354,11 +377,23 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, radix_tree_tag_clear(radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); + + BUG_ON(!root->ref_tree); + dirty = container_of(root->ref_tree, struct dirty_root, + ref_tree); + if (root->commit_root == root->node) { WARN_ON(root->node->start != btrfs_root_bytenr(&root->root_item)); + + BUG_ON(!btrfs_leaf_ref_tree_empty( + root->ref_tree)); free_extent_buffer(root->commit_root); root->commit_root = NULL; + root->ref_tree = NULL; + + kfree(dirty->root); + kfree(dirty); /* make sure to update the root on disk * so we get any updates to the block used @@ -370,23 +405,12 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, &root->root_item); continue; } - dirty = kmalloc(sizeof(*dirty), GFP_NOFS); - BUG_ON(!dirty); - dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); - BUG_ON(!dirty->root); memset(&root->root_item.drop_progress, 0, sizeof(struct btrfs_disk_key)); root->root_item.drop_level = 0; - - memcpy(dirty->root, root, sizeof(*root)); - dirty->root->node = root->commit_root; - dirty->latest_root = root; - spin_lock_init(&dirty->root->node_lock); - mutex_init(&dirty->root->objectid_mutex); - root->commit_root = NULL; - + root->ref_tree = NULL; root->root_key.offset = root->fs_info->generation; btrfs_set_root_bytenr(&root->root_item, root->node->start); @@ -409,6 +433,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, list_add(&dirty->list, list); } else { WARN_ON(1); + free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); } @@ -514,6 +539,8 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); + btrfs_remove_leaf_refs(dirty->root); + free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); @@ -698,6 +725,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, &dirty_fs_roots); BUG_ON(ret); + spin_lock(&root->fs_info->ref_cache_lock); + root->fs_info->running_ref_cache_size = 0; + spin_unlock(&root->fs_info->ref_cache_lock); + ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); -- cgit v1.2.3-70-g09d2 From 017e5369eb353559d68a11d4a718faa634533821 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 28 Jul 2008 15:32:51 -0400 Subject: Btrfs: Leaf reference cache update This changes the reference cache to make a single cache per root instead of one cache per transaction, and to key by the byte number of the disk block instead of the keys inside. This makes it much less likely to have cache misses if a snapshot or something has an extra reference on a higher node or a leaf while the first transaction that added the leaf into the cache is dropping. Some throttling is added to functions that free blocks heavily so they wait for old transactions to drop. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 11 +++++++- fs/btrfs/disk-io.c | 8 +++--- fs/btrfs/extent-tree.c | 18 +++++-------- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 8 +++--- fs/btrfs/ref-cache.c | 71 +++++++++++--------------------------------------- fs/btrfs/ref-cache.h | 18 ++++++------- fs/btrfs/transaction.c | 34 +++++++++--------------- 8 files changed, 61 insertions(+), 109 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 34ed23d64eb..4eca0aa1ce7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -594,7 +594,6 @@ struct btrfs_fs_info { spinlock_t ref_cache_lock; u64 total_ref_cache_size; - u64 running_ref_cache_size; u64 avail_data_alloc_bits; u64 avail_metadata_alloc_bits; @@ -606,10 +605,18 @@ struct btrfs_fs_info { void *bdev_holder; }; +struct btrfs_leaf_ref_tree { + struct rb_root root; + struct btrfs_leaf_ref *last; + struct list_head list; + spinlock_t lock; +}; + /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. */ +struct dirty_root; struct btrfs_root { struct extent_buffer *node; @@ -618,6 +625,8 @@ struct btrfs_root { struct extent_buffer *commit_root; struct btrfs_leaf_ref_tree *ref_tree; + struct btrfs_leaf_ref_tree ref_tree_struct; + struct dirty_root *dirty_root; struct btrfs_root_item root_item; struct btrfs_key root_key; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4f0e1d06c38..eccdf13a95a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -40,6 +40,7 @@ #include "print-tree.h" #include "async-thread.h" #include "locking.h" +#include "ref-cache.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -737,6 +738,10 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->node_lock); spin_lock_init(&root->orphan_lock); mutex_init(&root->objectid_mutex); + + btrfs_leaf_ref_tree_init(&root->ref_tree_struct); + root->ref_tree = &root->ref_tree_struct; + memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -1176,9 +1181,6 @@ static int transaction_kthread(void *arg) goto sleep; } - printk("btrfs: running reference cache size %Lu\n", - root->fs_info->running_ref_cache_size); - now = get_seconds(); if (now < cur->start_time || now - cur->start_time < 30) { mutex_unlock(&root->fs_info->trans_mutex); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7b24f151165..0e294cfaa60 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1004,8 +1004,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, goto out; } - btrfs_item_key_to_cpu(buf, &ref->key, 0); - ref->bytenr = buf->start; ref->owner = btrfs_header_owner(buf); ref->generation = btrfs_header_generation(buf); @@ -2387,19 +2385,15 @@ static void noinline reada_walk_down(struct btrfs_root *root, } } -/* - * we want to avoid as much random IO as we can with the alloc mutex - * held, so drop the lock and do the lookup, then do it again with the - * lock held. - */ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, u32 *refs) { + int ret; mutex_unlock(&root->fs_info->alloc_mutex); - lookup_extent_ref(NULL, root, start, len, refs); + ret = lookup_extent_ref(NULL, root, start, len, refs); cond_resched(); mutex_lock(&root->fs_info->alloc_mutex); - return lookup_extent_ref(NULL, root, start, len, refs); + return ret; } /* @@ -2468,11 +2462,11 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, BUG_ON(ret); continue; } - + if (*level == 1) { struct btrfs_key key; btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); - ref = btrfs_lookup_leaf_ref(root, &key); + ref = btrfs_lookup_leaf_ref(root, bytenr); if (ref) { ret = drop_leaf_ref(trans, root, ref); BUG_ON(ret); @@ -2482,7 +2476,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, break; } } - next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); @@ -2672,6 +2665,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = -EAGAIN; break; } + wake_up(&root->fs_info->transaction_throttle); } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e5ffb66ad32..3efec25e34b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -347,7 +347,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, btrfs_update_inode(trans, root, inode); } failed: - err = btrfs_end_transaction_throttle(trans, root); + err = btrfs_end_transaction(trans, root); out_unlock: unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cf9534b79ab..4f977ea5497 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2482,7 +2482,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); @@ -2535,7 +2535,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); @@ -2609,7 +2609,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); out_unlock: if (drop_on_err) @@ -3548,7 +3548,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); out_fail: if (drop_inode) { inode_dec_link_count(inode); diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index 95a9faeb9dc..ec9587784a3 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -29,6 +29,7 @@ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents) if (ref) { memset(ref, 0, sizeof(*ref)); atomic_set(&ref->usage, 1); + INIT_LIST_HEAD(&ref->list); } return ref; } @@ -44,40 +45,21 @@ void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref) } } -static int comp_keys(struct btrfs_key *k1, struct btrfs_key *k2) -{ - if (k1->objectid > k2->objectid) - return 1; - if (k1->objectid < k2->objectid) - return -1; - if (k1->type > k2->type) - return 1; - if (k1->type < k2->type) - return -1; - if (k1->offset > k2->offset) - return 1; - if (k1->offset < k2->offset) - return -1; - return 0; -} - -static struct rb_node *tree_insert(struct rb_root *root, struct btrfs_key *key, +static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, struct rb_node *node) { struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; struct btrfs_leaf_ref *entry; - int ret; while(*p) { parent = *p; entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); WARN_ON(!entry->in_tree); - ret = comp_keys(key, &entry->key); - if (ret < 0) + if (bytenr < entry->bytenr) p = &(*p)->rb_left; - else if (ret > 0) + else if (bytenr > entry->bytenr) p = &(*p)->rb_right; else return parent; @@ -90,20 +72,18 @@ static struct rb_node *tree_insert(struct rb_root *root, struct btrfs_key *key, return NULL; } -static struct rb_node *tree_search(struct rb_root *root, struct btrfs_key *key) +static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) { struct rb_node * n = root->rb_node; struct btrfs_leaf_ref *entry; - int ret; while(n) { entry = rb_entry(n, struct btrfs_leaf_ref, rb_node); WARN_ON(!entry->in_tree); - ret = comp_keys(key, &entry->key); - if (ret < 0) + if (bytenr < entry->bytenr) n = n->rb_left; - else if (ret > 0) + else if (bytenr > entry->bytenr) n = n->rb_right; else return n; @@ -122,11 +102,11 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root) spin_lock(&tree->lock); while(!btrfs_leaf_ref_tree_empty(tree)) { - tree->last = NULL; rb = rb_first(&tree->root); ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); rb_erase(&ref->rb_node, &tree->root); ref->in_tree = 0; + list_del_init(&ref->list); spin_unlock(&tree->lock); @@ -140,7 +120,7 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root) } struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, - struct btrfs_key *key) + u64 bytenr) { struct rb_node *rb; struct btrfs_leaf_ref *ref = NULL; @@ -150,15 +130,9 @@ struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, return NULL; spin_lock(&tree->lock); - if (tree->last && comp_keys(key, &tree->last->key) == 0) { - ref = tree->last; - } else { - rb = tree_search(&tree->root, key); - if (rb) { - ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); - tree->last = ref; - } - } + rb = tree_search(&tree->root, bytenr); + if (rb) + ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); if (ref) atomic_inc(&ref->usage); spin_unlock(&tree->lock); @@ -171,21 +145,17 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) struct rb_node *rb; size_t size = btrfs_leaf_ref_size(ref->nritems); struct btrfs_leaf_ref_tree *tree = root->ref_tree; - struct btrfs_transaction *trans = root->fs_info->running_transaction; spin_lock(&tree->lock); - rb = tree_insert(&tree->root, &ref->key, &ref->rb_node); + rb = tree_insert(&tree->root, ref->bytenr, &ref->rb_node); if (rb) { ret = -EEXIST; } else { spin_lock(&root->fs_info->ref_cache_lock); root->fs_info->total_ref_cache_size += size; - if (trans && tree->generation == trans->transid) - root->fs_info->running_ref_cache_size += size; spin_unlock(&root->fs_info->ref_cache_lock); - - tree->last = ref; atomic_inc(&ref->usage); + list_add_tail(&ref->list, &tree->list); } spin_unlock(&tree->lock); return ret; @@ -195,28 +165,17 @@ int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { size_t size = btrfs_leaf_ref_size(ref->nritems); struct btrfs_leaf_ref_tree *tree = root->ref_tree; - struct btrfs_transaction *trans = root->fs_info->running_transaction; BUG_ON(!ref->in_tree); spin_lock(&tree->lock); spin_lock(&root->fs_info->ref_cache_lock); root->fs_info->total_ref_cache_size -= size; - if (trans && tree->generation == trans->transid) - root->fs_info->running_ref_cache_size -= size; spin_unlock(&root->fs_info->ref_cache_lock); - if (tree->last == ref) { - struct rb_node *next = rb_next(&ref->rb_node); - if (next) { - tree->last = rb_entry(next, struct btrfs_leaf_ref, - rb_node); - } else - tree->last = NULL; - } - rb_erase(&ref->rb_node, &tree->root); ref->in_tree = 0; + list_del_init(&ref->list); spin_unlock(&tree->lock); diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h index 79ecc47110f..823c049f72f 100644 --- a/fs/btrfs/ref-cache.h +++ b/fs/btrfs/ref-cache.h @@ -15,6 +15,8 @@ * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */ +#ifndef __REFCACHE__ +#define __REFCACHE__ struct btrfs_extent_info { u64 bytenr; @@ -25,7 +27,6 @@ struct btrfs_extent_info { struct btrfs_leaf_ref { struct rb_node rb_node; - struct btrfs_key key; int in_tree; atomic_t usage; @@ -33,14 +34,9 @@ struct btrfs_leaf_ref { u64 owner; u64 generation; int nritems; - struct btrfs_extent_info extents[]; -}; -struct btrfs_leaf_ref_tree { - struct rb_root root; - struct btrfs_leaf_ref *last; - u64 generation; - spinlock_t lock; + struct list_head list; + struct btrfs_extent_info extents[]; }; static inline size_t btrfs_leaf_ref_size(int nr_extents) @@ -53,7 +49,7 @@ static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree) { tree->root.rb_node = NULL; tree->last = NULL; - tree->generation = 0; + INIT_LIST_HEAD(&tree->list); spin_lock_init(&tree->lock); } @@ -66,7 +62,9 @@ void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree); struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents); void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref); struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, - struct btrfs_key *key); + u64 bytenr); int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); int btrfs_remove_leaf_refs(struct btrfs_root *root); int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); + +#endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 543e5ee4033..fcef3cae0c9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -36,7 +36,6 @@ struct dirty_root { struct list_head list; struct btrfs_root *root; struct btrfs_root *latest_root; - struct btrfs_leaf_ref_tree ref_tree; }; static noinline void put_transaction(struct btrfs_transaction *transaction) @@ -108,13 +107,13 @@ static noinline int record_root_in_trans(struct btrfs_root *root) dirty->latest_root = root; INIT_LIST_HEAD(&dirty->list); - btrfs_leaf_ref_tree_init(&dirty->ref_tree); - dirty->ref_tree.generation = running_trans_id; root->commit_root = btrfs_root_node(root); - root->ref_tree = &dirty->ref_tree; + root->dirty_root = dirty; memcpy(dirty->root, root, sizeof(*root)); + dirty->root->ref_tree = &root->ref_tree_struct; + spin_lock_init(&dirty->root->node_lock); mutex_init(&dirty->root->objectid_mutex); dirty->root->node = root->commit_root; @@ -217,12 +216,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); - if (0 && cur_trans->in_commit && throttle) { + if (throttle && atomic_read(&root->fs_info->throttles)) { DEFINE_WAIT(wait); mutex_unlock(&root->fs_info->trans_mutex); prepare_to_wait(&root->fs_info->transaction_throttle, &wait, TASK_UNINTERRUPTIBLE); - schedule(); + if (atomic_read(&root->fs_info->throttles)) + schedule(); finish_wait(&root->fs_info->transaction_throttle, &wait); mutex_lock(&root->fs_info->trans_mutex); } @@ -333,6 +333,8 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, list_del_init(next); root = list_entry(next, struct btrfs_root, dirty_list); update_cowonly_root(trans, root); + if (root->fs_info->closing) + btrfs_remove_leaf_refs(root); } return 0; } @@ -346,10 +348,8 @@ int btrfs_add_dead_root(struct btrfs_root *root, dirty = kmalloc(sizeof(*dirty), GFP_NOFS); if (!dirty) return -ENOMEM; - btrfs_leaf_ref_tree_init(&dirty->ref_tree); dirty->root = root; dirty->latest_root = latest; - root->ref_tree = NULL; list_add(&dirty->list, dead_list); return 0; } @@ -379,18 +379,14 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, BTRFS_ROOT_TRANS_TAG); BUG_ON(!root->ref_tree); - dirty = container_of(root->ref_tree, struct dirty_root, - ref_tree); + dirty = root->dirty_root; if (root->commit_root == root->node) { WARN_ON(root->node->start != btrfs_root_bytenr(&root->root_item)); - BUG_ON(!btrfs_leaf_ref_tree_empty( - root->ref_tree)); free_extent_buffer(root->commit_root); root->commit_root = NULL; - root->ref_tree = NULL; kfree(dirty->root); kfree(dirty); @@ -410,7 +406,6 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, sizeof(struct btrfs_disk_key)); root->root_item.drop_level = 0; root->commit_root = NULL; - root->ref_tree = NULL; root->root_key.offset = root->fs_info->generation; btrfs_set_root_bytenr(&root->root_item, root->node->start); @@ -485,7 +480,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, while(!list_empty(list)) { struct btrfs_root *root; - dirty = list_entry(list->next, struct dirty_root, list); + dirty = list_entry(list->prev, struct dirty_root, list); list_del_init(&dirty->list); num_bytes = btrfs_root_used(&dirty->root->root_item); @@ -507,7 +502,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, if (err) ret = err; nr = trans->blocks_used; - ret = btrfs_end_transaction_throttle(trans, tree_root); + ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); mutex_unlock(&root->fs_info->drop_mutex); @@ -517,6 +512,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, } BUG_ON(ret); atomic_dec(&root->fs_info->throttles); + wake_up(&root->fs_info->transaction_throttle); mutex_lock(&root->fs_info->alloc_mutex); num_bytes -= btrfs_root_used(&dirty->root->root_item); @@ -539,8 +535,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); - btrfs_remove_leaf_refs(dirty->root); - free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); @@ -725,10 +719,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, &dirty_fs_roots); BUG_ON(ret); - spin_lock(&root->fs_info->ref_cache_lock); - root->fs_info->running_ref_cache_size = 0; - spin_unlock(&root->fs_info->ref_cache_lock); - ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); -- cgit v1.2.3-70-g09d2 From ab78c84de1ce4db1b2a2cef361625ad80abbab3f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jul 2008 16:15:18 -0400 Subject: Btrfs: Throttle operations if the reference cache gets too large A large reference cache is directly related to a lot of work pending for the cleaner thread. This throttles back new operations based on the size of the reference cache so the cleaner thread will be able to keep up. Overall, this actually makes the FS faster because the cleaner thread will be more likely to find things in cache. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 7 ++++-- fs/btrfs/extent-tree.c | 1 + fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 10 ++++----- fs/btrfs/transaction.c | 59 +++++++++++++++++++++++++++++++++++++------------- fs/btrfs/transaction.h | 1 + 7 files changed, 58 insertions(+), 22 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4eca0aa1ce7..5517dfc6f71 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -580,6 +580,7 @@ struct btrfs_fs_info { int do_barriers; int closing; atomic_t throttles; + atomic_t throttle_gen; u64 total_pinned; struct list_head dirty_cowonly_roots; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eccdf13a95a..27ffa9b7ddc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1171,8 +1171,10 @@ static int transaction_kthread(void *arg) vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); - printk("btrfs: total reference cache size %Lu\n", - root->fs_info->total_ref_cache_size); + if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { + printk("btrfs: total reference cache size %Lu\n", + root->fs_info->total_ref_cache_size); + } mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; @@ -1256,6 +1258,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); atomic_set(&fs_info->throttles, 0); + atomic_set(&fs_info->throttle_gen, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0e294cfaa60..6290cf41d64 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2650,6 +2650,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root } } while(1) { + atomic_inc(&root->fs_info->throttle_gen); wret = walk_down_tree(trans, root, path, &level); if (wret > 0) break; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3efec25e34b..ded5281f846 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -974,6 +974,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) btrfs_btree_balance_dirty(root, 1); + btrfs_throttle(root); cond_resched(); } out: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4f977ea5497..7c87f863d6f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2482,7 +2482,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); @@ -2535,7 +2535,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); @@ -2609,7 +2609,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); out_unlock: if (drop_on_err) @@ -3434,7 +3434,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, goto out_fail; out_fail: - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); out_unlock: return ret; } @@ -3548,7 +3548,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); out_fail: if (drop_inode) { inode_dec_link_count(inode); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index fcef3cae0c9..b8be6703189 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -202,35 +202,64 @@ static noinline int wait_for_commit(struct btrfs_root *root, return 0; } +void btrfs_throttle(struct btrfs_root *root) +{ + struct btrfs_fs_info *info = root->fs_info; + +harder: + if (atomic_read(&info->throttles)) { + DEFINE_WAIT(wait); + int thr; + int harder_count = 0; + thr = atomic_read(&info->throttle_gen); + + do { + prepare_to_wait(&info->transaction_throttle, + &wait, TASK_UNINTERRUPTIBLE); + if (!atomic_read(&info->throttles)) { + finish_wait(&info->transaction_throttle, &wait); + break; + } + schedule(); + finish_wait(&info->transaction_throttle, &wait); + } while (thr == atomic_read(&info->throttle_gen)); + + if (harder_count < 5 && + info->total_ref_cache_size > 5 * 1024 * 1024) { + harder_count++; + goto harder; + } + + if (harder_count < 10 && + info->total_ref_cache_size > 10 * 1024 * 1024) { + harder_count++; + goto harder; + } + } +} + static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, int throttle) { struct btrfs_transaction *cur_trans; + struct btrfs_fs_info *info = root->fs_info; - mutex_lock(&root->fs_info->trans_mutex); - cur_trans = root->fs_info->running_transaction; + mutex_lock(&info->trans_mutex); + cur_trans = info->running_transaction; WARN_ON(cur_trans != trans->transaction); WARN_ON(cur_trans->num_writers < 1); cur_trans->num_writers--; if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); - - if (throttle && atomic_read(&root->fs_info->throttles)) { - DEFINE_WAIT(wait); - mutex_unlock(&root->fs_info->trans_mutex); - prepare_to_wait(&root->fs_info->transaction_throttle, &wait, - TASK_UNINTERRUPTIBLE); - if (atomic_read(&root->fs_info->throttles)) - schedule(); - finish_wait(&root->fs_info->transaction_throttle, &wait); - mutex_lock(&root->fs_info->trans_mutex); - } - put_transaction(cur_trans); - mutex_unlock(&root->fs_info->trans_mutex); + mutex_unlock(&info->trans_mutex); memset(trans, 0, sizeof(*trans)); kmem_cache_free(btrfs_trans_handle_cachep, trans); + + if (throttle) + btrfs_throttle(root); + return 0; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 11fbdeceb26..df2ca2aad1c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -91,4 +91,5 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root); +void btrfs_throttle(struct btrfs_root *root); #endif -- cgit v1.2.3-70-g09d2 From f321e4910398cf7922265d269fb17fd26f312571 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 30 Jul 2008 09:26:11 -0400 Subject: Btrfs: Update and fix mount -o nodatacow To check whether a given file extent is referenced by multiple snapshots, the checker walks down the fs tree through dead root and checks all tree blocks in the path. We can easily detect whether a given tree block is directly referenced by other snapshot. We can also detect any indirect reference from other snapshot by checking reference's generation. The checker can always detect multiple references, but can't reliably detect cases of single reference. So btrfs may do file data cow even there is only one reference. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 9 +-- fs/btrfs/extent-tree.c | 202 ++++++++++++++++++++++++++++++------------------- fs/btrfs/inode.c | 6 +- fs/btrfs/transaction.c | 16 ++-- fs/btrfs/transaction.h | 5 ++ 5 files changed, 142 insertions(+), 96 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5517dfc6f71..83422088c62 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -617,7 +617,7 @@ struct btrfs_leaf_ref_tree { * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. */ -struct dirty_root; +struct btrfs_dirty_root; struct btrfs_root { struct extent_buffer *node; @@ -627,7 +627,7 @@ struct btrfs_root { struct extent_buffer *commit_root; struct btrfs_leaf_ref_tree *ref_tree; struct btrfs_leaf_ref_tree ref_tree_struct; - struct dirty_root *dirty_root; + struct btrfs_dirty_root *dirty_root; struct btrfs_root_item root_item; struct btrfs_key root_key; @@ -1399,9 +1399,8 @@ static inline struct dentry *fdentry(struct file *file) { } /* extent-tree.c */ -u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, - struct btrfs_path *count_path, - u64 expected_owner, u64 first_extent); +int btrfs_cross_ref_exists(struct btrfs_root *root, + struct btrfs_key *key, u64 bytenr); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6290cf41d64..fe1ddbd2bfd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -802,70 +802,57 @@ out: return 0; } -u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, - struct btrfs_path *count_path, - u64 expected_owner, - u64 first_extent) + +static int get_reference_status(struct btrfs_root *root, u64 bytenr, + u64 parent_gen, u64 ref_objectid, + u64 *min_generation, u32 *ref_count) { struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_path *path; - u64 bytenr; - u64 found_objectid; - u64 found_owner; + struct extent_buffer *leaf; + struct btrfs_extent_ref *ref_item; + struct btrfs_key key; + struct btrfs_key found_key; u64 root_objectid = root->root_key.objectid; - u32 total_count = 0; - u32 extent_refs; - u32 cur_count; + u64 ref_generation; u32 nritems; int ret; - struct btrfs_key key; - struct btrfs_key found_key; - struct extent_buffer *l; - struct btrfs_extent_item *item; - struct btrfs_extent_ref *ref_item; - int level = -1; - /* FIXME, needs locking */ - BUG(); - - mutex_lock(&root->fs_info->alloc_mutex); - path = btrfs_alloc_path(); -again: - if (level == -1) - bytenr = first_extent; - else - bytenr = count_path->nodes[level]->start; - - cur_count = 0; key.objectid = bytenr; key.offset = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + path = btrfs_alloc_path(); + mutex_lock(&root->fs_info->alloc_mutex); ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto out; BUG_ON(ret == 0); - l = path->nodes[0]; - btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != bytenr || found_key.type != BTRFS_EXTENT_ITEM_KEY) { + ret = 1; goto out; } - item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - extent_refs = btrfs_extent_refs(l, item); + *ref_count = 0; + *min_generation = (u64)-1; + while (1) { - l = path->nodes[0]; - nritems = btrfs_header_nritems(l); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(extent_root, path); + if (ret < 0) + goto out; if (ret == 0) continue; break; } - btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != bytenr) break; @@ -874,57 +861,120 @@ again: continue; } - cur_count++; - ref_item = btrfs_item_ptr(l, path->slots[0], + ref_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); - found_objectid = btrfs_ref_root(l, ref_item); - - if (found_objectid != root_objectid) { - total_count = 2; - goto out; - } - if (level == -1) { - found_owner = btrfs_ref_objectid(l, ref_item); - if (found_owner != expected_owner) { - total_count = 2; - goto out; - } - /* - * nasty. we don't count a reference held by - * the running transaction. This allows nodatacow - * to avoid cow most of the time - */ - if (found_owner >= BTRFS_FIRST_FREE_OBJECTID && - btrfs_ref_generation(l, ref_item) == - root->fs_info->generation) { - extent_refs--; - } + ref_generation = btrfs_ref_generation(leaf, ref_item); + /* + * For (parent_gen > 0 && parent_gen > ref_gen): + * + * we reach here through the oldest root, therefore + * all other reference from same snapshot should have + * a larger generation. + */ + if ((root_objectid != btrfs_ref_root(leaf, ref_item)) || + (parent_gen > 0 && parent_gen > ref_generation) || + (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID && + ref_objectid != btrfs_ref_objectid(leaf, ref_item))) { + if (ref_count) + *ref_count = 2; + break; } - total_count = 1; + + *ref_count = 1; + if (*min_generation > ref_generation) + *min_generation = ref_generation; + path->slots[0]++; } - /* - * if there is more than one reference against a data extent, - * we have to assume the other ref is another snapshot - */ - if (level == -1 && extent_refs > 1) { - total_count = 2; + ret = 0; +out: + mutex_unlock(&root->fs_info->alloc_mutex); + btrfs_free_path(path); + return ret; +} + +int btrfs_cross_ref_exists(struct btrfs_root *root, + struct btrfs_key *key, u64 bytenr) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *old_root; + struct btrfs_path *path = NULL; + struct extent_buffer *eb; + struct btrfs_file_extent_item *item; + u64 ref_generation; + u64 min_generation; + u64 extent_start; + u32 ref_count; + int level; + int ret; + + BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY); + ret = get_reference_status(root, bytenr, 0, key->objectid, + &min_generation, &ref_count); + if (ret) + return ret; + + if (ref_count != 1) + return 1; + + trans = btrfs_start_transaction(root, 0); + old_root = root->dirty_root->root; + ref_generation = old_root->root_key.offset; + + /* all references are created in running transaction */ + if (min_generation > ref_generation) { + ret = 0; goto out; } - if (cur_count == 0) { - total_count = 0; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; goto out; } - if (level >= 0 && root->node == count_path->nodes[level]) + + path->skip_locking = 1; + /* if no item found, the extent is referenced by other snapshot */ + ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0); + if (ret) goto out; - level++; - btrfs_release_path(root, path); - goto again; + eb = path->nodes[0]; + item = btrfs_item_ptr(eb, path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG || + btrfs_file_extent_disk_bytenr(eb, item) != bytenr) { + ret = 1; + goto out; + } + + for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) { + if (level >= 0) { + eb = path->nodes[level]; + if (!eb) + continue; + extent_start = eb->start; + } else + extent_start = bytenr; + + ret = get_reference_status(root, extent_start, ref_generation, + 0, &min_generation, &ref_count); + if (ret) + goto out; + + if (ref_count != 1) { + ret = 1; + goto out; + } + if (level >= 0) + ref_generation = btrfs_header_generation(eb); + } + ret = 0; out: - btrfs_free_path(path); - mutex_unlock(&root->fs_info->alloc_mutex); - return total_count; + if (path) + btrfs_free_path(path); + btrfs_end_transaction(trans, root); + return ret; } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7c87f863d6f..3aa82cec6bf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -249,11 +249,8 @@ again: if (bytenr == 0) goto not_found; - if (btrfs_count_snapshots_in_path(root, path, inode->i_ino, - bytenr) != 1) { + if (btrfs_cross_ref_exists(root, &found_key, bytenr)) goto not_found; - } - /* * we may be called by the resizer, make sure we're inside * the limits of the FS @@ -277,6 +274,7 @@ loop: goto again; not_found: + btrfs_release_path(root, path); cow_file_range(inode, start, end); start = end + 1; goto loop; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b8be6703189..216f3157162 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -32,12 +32,6 @@ extern struct kmem_cache *btrfs_transaction_cachep; #define BTRFS_ROOT_TRANS_TAG 0 -struct dirty_root { - struct list_head list; - struct btrfs_root *root; - struct btrfs_root *latest_root; -}; - static noinline void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(transaction->use_count == 0); @@ -91,7 +85,7 @@ static noinline int join_transaction(struct btrfs_root *root) static noinline int record_root_in_trans(struct btrfs_root *root) { - struct dirty_root *dirty; + struct btrfs_dirty_root *dirty; u64 running_trans_id = root->fs_info->running_transaction->transid; if (root->ref_cows && root->last_trans < running_trans_id) { WARN_ON(root == root->fs_info->extent_root); @@ -372,7 +366,7 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, struct list_head *dead_list) { - struct dirty_root *dirty; + struct btrfs_dirty_root *dirty; dirty = kmalloc(sizeof(*dirty), GFP_NOFS); if (!dirty) @@ -387,7 +381,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, struct radix_tree_root *radix, struct list_head *list) { - struct dirty_root *dirty; + struct btrfs_dirty_root *dirty; struct btrfs_root *gang[8]; struct btrfs_root *root; int i; @@ -498,7 +492,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) static noinline int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { - struct dirty_root *dirty; + struct btrfs_dirty_root *dirty; struct btrfs_trans_handle *trans; unsigned long nr; u64 num_bytes; @@ -509,7 +503,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, while(!list_empty(list)) { struct btrfs_root *root; - dirty = list_entry(list->prev, struct dirty_root, list); + dirty = list_entry(list->prev, struct btrfs_dirty_root, list); list_del_init(&dirty->list); num_bytes = btrfs_root_used(&dirty->root->root_item); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index df2ca2aad1c..f5adb23151f 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -52,6 +52,11 @@ struct btrfs_pending_snapshot { struct list_head list; }; +struct btrfs_dirty_root { + struct list_head list; + struct btrfs_root *root; + struct btrfs_root *latest_root; +}; static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, struct inode *inode) -- cgit v1.2.3-70-g09d2 From bcc63abbf3e9bf948a1b0129b3e6120ec7d7f698 Mon Sep 17 00:00:00 2001 From: Yan Date: Wed, 30 Jul 2008 16:29:20 -0400 Subject: Btrfs: implement memory reclaim for leaf reference cache The memory reclaiming issue happens when snapshot exists. In that case, some cache entries may not be used during old snapshot dropping, so they will remain in the cache until umount. The patch adds a field to struct btrfs_leaf_ref to record create time. Besides, the patch makes all dead roots of a given snapshot linked together in order of create time. After a old snapshot was completely dropped, we check the dead root list and remove all cache entries created before the oldest dead root in the list. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 1 - fs/btrfs/ctree.h | 3 ++- fs/btrfs/dir-item.c | 1 - fs/btrfs/disk-io.c | 5 +++-- fs/btrfs/extent-tree.c | 18 +++++++++--------- fs/btrfs/extent_io.c | 1 - fs/btrfs/file-item.c | 1 - fs/btrfs/file.c | 1 - fs/btrfs/inode.c | 22 +++++++++++----------- fs/btrfs/locking.c | 1 - fs/btrfs/print-tree.c | 1 - fs/btrfs/ref-cache.c | 48 +++++++++++++++++++++++++----------------------- fs/btrfs/ref-cache.h | 11 ++++++----- fs/btrfs/transaction.c | 40 ++++++++++++++++++++++++++++++---------- fs/btrfs/volumes.c | 1 - 15 files changed, 86 insertions(+), 69 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 245eb00435d..c4792062dd5 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3275,4 +3275,3 @@ int btrfs_previous_item(struct btrfs_root *root, } return 1; } - diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 83422088c62..be16cd49ef6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -666,7 +666,8 @@ struct btrfs_root { /* the dirty list is only used by non-reference counted roots */ struct list_head dirty_list; - spinlock_t orphan_lock; + spinlock_t list_lock; + struct list_head dead_list; struct list_head orphan_list; }; diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index eb4dd3d75cf..125094617fe 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -340,4 +340,3 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, } return 0; } - diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ec1ba8ddb35..e826730d750 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -735,8 +735,9 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, INIT_LIST_HEAD(&root->dirty_list); INIT_LIST_HEAD(&root->orphan_list); + INIT_LIST_HEAD(&root->dead_list); spin_lock_init(&root->node_lock); - spin_lock_init(&root->orphan_lock); + spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); btrfs_leaf_ref_tree_init(&root->ref_tree_struct); @@ -1717,7 +1718,7 @@ int close_ctree(struct btrfs_root *root) printk("btrfs: at umount reference cache size %Lu\n", fs_info->total_ref_cache_size); } - + if (fs_info->extent_root->node) free_extent_buffer(fs_info->extent_root->node); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe1ddbd2bfd..37ca8df30c3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -867,8 +867,8 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, /* * For (parent_gen > 0 && parent_gen > ref_gen): * - * we reach here through the oldest root, therefore - * all other reference from same snapshot should have + * we reach here through the oldest root, therefore + * all other reference from same snapshot should have * a larger generation. */ if ((root_objectid != btrfs_ref_root(leaf, ref_item)) || @@ -954,7 +954,7 @@ int btrfs_cross_ref_exists(struct btrfs_root *root, if (!eb) continue; extent_start = eb->start; - } else + } else extent_start = bytenr; ret = get_reference_status(root, extent_start, ref_generation, @@ -1048,7 +1048,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_leaf_ref *ref; struct btrfs_extent_info *info; - ref = btrfs_alloc_leaf_ref(nr_file_extents); + ref = btrfs_alloc_leaf_ref(root, nr_file_extents); if (!ref) { WARN_ON(1); goto out; @@ -1059,7 +1059,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, ref->generation = btrfs_header_generation(buf); ref->nritems = nr_file_extents; info = ref->extents; - + for (i = 0; nr_file_extents > 0 && i < nritems; i++) { u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); @@ -1085,7 +1085,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, BUG_ON(!root->ref_tree); ret = btrfs_add_leaf_ref(root, ref); WARN_ON(ret); - btrfs_free_leaf_ref(ref); + btrfs_free_leaf_ref(root, ref); } out: return 0; @@ -2316,7 +2316,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, } static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, struct extent_buffer *leaf) { u64 leaf_owner; @@ -2367,7 +2367,7 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, } static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, struct btrfs_leaf_ref *ref) { int i; @@ -2521,7 +2521,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, ret = drop_leaf_ref(trans, root, ref); BUG_ON(ret); btrfs_remove_leaf_ref(root, ref); - btrfs_free_leaf_ref(ref); + btrfs_free_leaf_ref(root, ref); *level = 0; break; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 964ec1622d6..5368e3b6eb9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3497,4 +3497,3 @@ out: return ret; } EXPORT_SYMBOL(try_release_extent_buffer); - diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index afe42d00b5a..2311061f070 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -422,4 +422,3 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, BUG_ON(ret); return ret; } - diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ded5281f846..412ab4a2638 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1095,4 +1095,3 @@ struct file_operations btrfs_file_operations = { .compat_ioctl = btrfs_ioctl, #endif }; - diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3aa82cec6bf..7af8be076ee 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -835,17 +835,17 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; - spin_lock(&root->orphan_lock); + spin_lock(&root->list_lock); /* already on the orphan list, we're good */ if (!list_empty(&BTRFS_I(inode)->i_orphan)) { - spin_unlock(&root->orphan_lock); + spin_unlock(&root->list_lock); return 0; } list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); - spin_unlock(&root->orphan_lock); + spin_unlock(&root->list_lock); /* * insert an orphan item to track this unlinked/truncated file @@ -864,20 +864,20 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; - spin_lock(&root->orphan_lock); + spin_lock(&root->list_lock); if (list_empty(&BTRFS_I(inode)->i_orphan)) { - spin_unlock(&root->orphan_lock); + spin_unlock(&root->list_lock); return 0; } list_del_init(&BTRFS_I(inode)->i_orphan); if (!trans) { - spin_unlock(&root->orphan_lock); + spin_unlock(&root->list_lock); return 0; } - spin_unlock(&root->orphan_lock); + spin_unlock(&root->list_lock); ret = btrfs_del_orphan_item(trans, root, inode->i_ino); @@ -973,9 +973,9 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) * add this inode to the orphan list so btrfs_orphan_del does * the proper thing when we hit it */ - spin_lock(&root->orphan_lock); + spin_lock(&root->list_lock); list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); - spin_unlock(&root->orphan_lock); + spin_unlock(&root->list_lock); /* * if this is a bad inode, means we actually succeeded in @@ -3269,13 +3269,13 @@ void btrfs_destroy_inode(struct inode *inode) BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) posix_acl_release(BTRFS_I(inode)->i_default_acl); - spin_lock(&BTRFS_I(inode)->root->orphan_lock); + spin_lock(&BTRFS_I(inode)->root->list_lock); if (!list_empty(&BTRFS_I(inode)->i_orphan)) { printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" " list\n", inode->i_ino); dump_stack(); } - spin_unlock(&BTRFS_I(inode)->root->orphan_lock); + spin_unlock(&BTRFS_I(inode)->root->list_lock); while(1) { ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index d617c29787f..d43e14c7471 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -56,4 +56,3 @@ int btrfs_tree_locked(struct extent_buffer *eb) { return mutex_is_locked(&eb->mutex); } - diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 14d86372030..f1374d597a1 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -198,4 +198,3 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) free_extent_buffer(next); } } - diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index ec9587784a3..272b9890c98 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -21,12 +21,18 @@ #include "ref-cache.h" #include "transaction.h" -struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents) +struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, + int nr_extents) { struct btrfs_leaf_ref *ref; + size_t size = btrfs_leaf_ref_size(nr_extents); - ref = kmalloc(btrfs_leaf_ref_size(nr_extents), GFP_NOFS); + ref = kmalloc(size, GFP_NOFS); if (ref) { + spin_lock(&root->fs_info->ref_cache_lock); + root->fs_info->total_ref_cache_size += size; + spin_unlock(&root->fs_info->ref_cache_lock); + memset(ref, 0, sizeof(*ref)); atomic_set(&ref->usage, 1); INIT_LIST_HEAD(&ref->list); @@ -34,14 +40,20 @@ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents) return ref; } -void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref) +void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { if (!ref) return; WARN_ON(atomic_read(&ref->usage) == 0); if (atomic_dec_and_test(&ref->usage)) { + size_t size = btrfs_leaf_ref_size(ref->nritems); + BUG_ON(ref->in_tree); kfree(ref); + + spin_lock(&root->fs_info->ref_cache_lock); + root->fs_info->total_ref_cache_size -= size; + spin_unlock(&root->fs_info->ref_cache_lock); } } @@ -64,7 +76,7 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, else return parent; } - + entry = rb_entry(node, struct btrfs_leaf_ref, rb_node); entry->in_tree = 1; rb_link_node(node, parent, p); @@ -91,9 +103,8 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) return NULL; } -int btrfs_remove_leaf_refs(struct btrfs_root *root) +int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen) { - struct rb_node *rb; struct btrfs_leaf_ref *ref = NULL; struct btrfs_leaf_ref_tree *tree = root->ref_tree; @@ -101,17 +112,18 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root) return 0; spin_lock(&tree->lock); - while(!btrfs_leaf_ref_tree_empty(tree)) { - rb = rb_first(&tree->root); - ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); + while(!list_empty(&tree->list)) { + ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list); + BUG_ON(!ref->in_tree); + if (ref->root_gen > max_root_gen) + break; + rb_erase(&ref->rb_node, &tree->root); ref->in_tree = 0; list_del_init(&ref->list); spin_unlock(&tree->lock); - - btrfs_free_leaf_ref(ref); - + btrfs_free_leaf_ref(root, ref); cond_resched(); spin_lock(&tree->lock); } @@ -143,7 +155,6 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { int ret = 0; struct rb_node *rb; - size_t size = btrfs_leaf_ref_size(ref->nritems); struct btrfs_leaf_ref_tree *tree = root->ref_tree; spin_lock(&tree->lock); @@ -151,9 +162,6 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) if (rb) { ret = -EEXIST; } else { - spin_lock(&root->fs_info->ref_cache_lock); - root->fs_info->total_ref_cache_size += size; - spin_unlock(&root->fs_info->ref_cache_lock); atomic_inc(&ref->usage); list_add_tail(&ref->list, &tree->list); } @@ -163,15 +171,10 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { - size_t size = btrfs_leaf_ref_size(ref->nritems); struct btrfs_leaf_ref_tree *tree = root->ref_tree; BUG_ON(!ref->in_tree); spin_lock(&tree->lock); - - spin_lock(&root->fs_info->ref_cache_lock); - root->fs_info->total_ref_cache_size -= size; - spin_unlock(&root->fs_info->ref_cache_lock); rb_erase(&ref->rb_node, &tree->root); ref->in_tree = 0; @@ -179,7 +182,6 @@ int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) spin_unlock(&tree->lock); - btrfs_free_leaf_ref(ref); + btrfs_free_leaf_ref(root, ref); return 0; } - diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h index 823c049f72f..c361b321c0c 100644 --- a/fs/btrfs/ref-cache.h +++ b/fs/btrfs/ref-cache.h @@ -30,6 +30,7 @@ struct btrfs_leaf_ref { int in_tree; atomic_t usage; + u64 root_gen; u64 bytenr; u64 owner; u64 generation; @@ -41,14 +42,13 @@ struct btrfs_leaf_ref { static inline size_t btrfs_leaf_ref_size(int nr_extents) { - return sizeof(struct btrfs_leaf_ref) + + return sizeof(struct btrfs_leaf_ref) + sizeof(struct btrfs_extent_info) * nr_extents; } static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree) { tree->root.rb_node = NULL; - tree->last = NULL; INIT_LIST_HEAD(&tree->list); spin_lock_init(&tree->lock); } @@ -59,12 +59,13 @@ static inline int btrfs_leaf_ref_tree_empty(struct btrfs_leaf_ref_tree *tree) } void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree); -struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents); -void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref); +struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, + int nr_extents); +void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, u64 bytenr); int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); -int btrfs_remove_leaf_refs(struct btrfs_root *root); +int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen); int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 216f3157162..52c5524896a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -98,20 +98,24 @@ static noinline int record_root_in_trans(struct btrfs_root *root) BUG_ON(!dirty); dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); BUG_ON(!dirty->root); - dirty->latest_root = root; INIT_LIST_HEAD(&dirty->list); root->commit_root = btrfs_root_node(root); - root->dirty_root = dirty; memcpy(dirty->root, root, sizeof(*root)); - dirty->root->ref_tree = &root->ref_tree_struct; - spin_lock_init(&dirty->root->node_lock); + spin_lock_init(&dirty->root->list_lock); mutex_init(&dirty->root->objectid_mutex); + INIT_LIST_HEAD(&dirty->root->dead_list); dirty->root->node = root->commit_root; dirty->root->commit_root = NULL; + + spin_lock(&root->list_lock); + list_add(&dirty->root->dead_list, &root->dead_list); + spin_unlock(&root->list_lock); + + root->dirty_root = dirty; } else { WARN_ON(1); } @@ -356,8 +360,6 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, list_del_init(next); root = list_entry(next, struct btrfs_root, dirty_list); update_cowonly_root(trans, root); - if (root->fs_info->closing) - btrfs_remove_leaf_refs(root); } return 0; } @@ -410,7 +412,11 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, free_extent_buffer(root->commit_root); root->commit_root = NULL; - + + spin_lock(&root->list_lock); + list_del_init(&dirty->root->dead_list); + spin_unlock(&root->list_lock); + kfree(dirty->root); kfree(dirty); @@ -497,6 +503,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, unsigned long nr; u64 num_bytes; u64 bytes_used; + u64 max_useless; int ret = 0; int err; @@ -554,10 +561,25 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, } mutex_unlock(&root->fs_info->drop_mutex); + spin_lock(&root->list_lock); + list_del_init(&dirty->root->dead_list); + if (!list_empty(&root->dead_list)) { + struct btrfs_root *oldest; + oldest = list_entry(root->dead_list.prev, + struct btrfs_root, dead_list); + max_useless = oldest->root_key.offset - 1; + } else { + max_useless = root->root_key.offset - 1; + } + spin_unlock(&root->list_lock); + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); + ret = btrfs_remove_leaf_refs(root, max_useless); + BUG_ON(ret); + free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); @@ -785,10 +807,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); put_transaction(cur_trans); + list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); if (root->fs_info->closing) list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); - else - list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); mutex_unlock(&root->fs_info->trans_mutex); kmem_cache_free(btrfs_trans_handle_cachep, trans); @@ -814,4 +835,3 @@ again: } return 0; } - diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5e6ee7a6f73..18db4cbe279 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2527,4 +2527,3 @@ again: error: return ret; } - -- cgit v1.2.3-70-g09d2 From 47ac14fa0f5306c6b97203f4f086bf1fa21dfddb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 31 Jul 2008 09:46:18 -0400 Subject: Btrfs: Add missing hunk from Yan Zheng's cache reclaim patch Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 37ca8df30c3..4765248000f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1054,6 +1054,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, goto out; } + ref->root_gen = root->root_key.offset; ref->bytenr = buf->start; ref->owner = btrfs_header_owner(buf); ref->generation = btrfs_header_generation(buf); -- cgit v1.2.3-70-g09d2 From 37d1aeee3990385e9bb436c50c2f7e120a668df6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 31 Jul 2008 10:48:37 -0400 Subject: Btrfs: Throttle tuning This avoids waiting for transactions with pages locked by breaking out the code to wait for the current transaction to close into a function called by btrfs_throttle. It also lowers the limits for where we start throttling. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 +++ fs/btrfs/file.c | 2 +- fs/btrfs/transaction.c | 38 +++++++++++++++++++++++++++----------- 3 files changed, 31 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4765248000f..b003b4364dd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2526,6 +2526,9 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, *level = 0; break; } + if (printk_ratelimit()) + printk("leaf ref miss for bytenr %llu\n", + (unsigned long long)bytenr); } next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 388ac397c2c..d3f2fe0b7c6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -253,7 +253,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); if (!trans) { err = -ENOMEM; goto out_unlock; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 52c5524896a..66af5140c8c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -124,17 +124,12 @@ static noinline int record_root_in_trans(struct btrfs_root *root) return 0; } -struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, - int num_blocks, int join) +static void wait_current_trans(struct btrfs_root *root) { - struct btrfs_trans_handle *h = - kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); struct btrfs_transaction *cur_trans; - int ret; - mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; - if (cur_trans && cur_trans->blocked && !join) { + if (cur_trans && cur_trans->blocked) { DEFINE_WAIT(wait); cur_trans->use_count++; while(1) { @@ -154,6 +149,18 @@ struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, } put_transaction(cur_trans); } +} + +struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, + int num_blocks, int join) +{ + struct btrfs_trans_handle *h = + kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); + int ret; + + mutex_lock(&root->fs_info->trans_mutex); + if (!join) + wait_current_trans(root); ret = join_transaction(root); BUG_ON(ret); @@ -200,7 +207,7 @@ static noinline int wait_for_commit(struct btrfs_root *root, return 0; } -void btrfs_throttle(struct btrfs_root *root) +static void throttle_on_drops(struct btrfs_root *root) { struct btrfs_fs_info *info = root->fs_info; @@ -223,19 +230,28 @@ harder: } while (thr == atomic_read(&info->throttle_gen)); if (harder_count < 5 && - info->total_ref_cache_size > 5 * 1024 * 1024) { + info->total_ref_cache_size > 1 * 1024 * 1024) { harder_count++; goto harder; } if (harder_count < 10 && - info->total_ref_cache_size > 10 * 1024 * 1024) { + info->total_ref_cache_size > 5 * 1024 * 1024) { harder_count++; goto harder; } } } +void btrfs_throttle(struct btrfs_root *root) +{ + mutex_lock(&root->fs_info->trans_mutex); + wait_current_trans(root); + mutex_unlock(&root->fs_info->trans_mutex); + + throttle_on_drops(root); +} + static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, int throttle) { @@ -256,7 +272,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); if (throttle) - btrfs_throttle(root); + throttle_on_drops(root); return 0; } -- cgit v1.2.3-70-g09d2 From f87f057b49ee52cf5c627ab27a706e3252767c9f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Aug 2008 11:27:23 -0400 Subject: Btrfs: Improve and cleanup locking done by walk_down_tree While dropping snapshots, walk_down_tree does most of the work of checking reference counts and limiting tree traversal to just the blocks that we are freeing. It dropped and held the allocation mutex in strange and confusing ways, this commit changes it to only hold the mutex while actually freeing a block. The rest of the checks around reference counts should be safe without the lock because we only allow one process in btrfs_drop_snapshot at a time. Other processes dropping reference counts should not drop it to 1 because their tree roots already have an extra ref on the block. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 85 +++++++++++++++++++++++++++++++++----------------- fs/btrfs/file.c | 13 +++++--- fs/btrfs/ioctl.c | 6 ++++ 3 files changed, 70 insertions(+), 34 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b003b4364dd..58bceeeda9c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2333,8 +2333,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, leaf_owner = btrfs_header_owner(leaf); leaf_generation = btrfs_header_generation(leaf); - mutex_unlock(&root->fs_info->alloc_mutex); - for (i = 0; i < nritems; i++) { u64 disk_bytenr; cond_resched(); @@ -2362,8 +2360,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->alloc_mutex); BUG_ON(ret); } - - mutex_lock(&root->fs_info->alloc_mutex); return 0; } @@ -2375,7 +2371,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, int ret; struct btrfs_extent_info *info = ref->extents; - mutex_unlock(&root->fs_info->alloc_mutex); for (i = 0; i < ref->nritems; i++) { mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, @@ -2386,7 +2381,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, BUG_ON(ret); info++; } - mutex_lock(&root->fs_info->alloc_mutex); return 0; } @@ -2440,10 +2434,39 @@ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, u32 *refs) { int ret; - mutex_unlock(&root->fs_info->alloc_mutex); + ret = lookup_extent_ref(NULL, root, start, len, refs); + BUG_ON(ret); + +#if 0 // some debugging code in case we see problems here + /* if the refs count is one, it won't get increased again. But + * if the ref count is > 1, someone may be decreasing it at + * the same time we are. + */ + if (*refs != 1) { + struct extent_buffer *eb = NULL; + eb = btrfs_find_create_tree_block(root, start, len); + if (eb) + btrfs_tree_lock(eb); + + mutex_lock(&root->fs_info->alloc_mutex); + ret = lookup_extent_ref(NULL, root, start, len, refs); + BUG_ON(ret); + mutex_unlock(&root->fs_info->alloc_mutex); + + if (eb) { + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + } + if (*refs == 1) { + printk("block %llu went down to one during drop_snap\n", + (unsigned long long)start); + } + + } +#endif + cond_resched(); - mutex_lock(&root->fs_info->alloc_mutex); return ret; } @@ -2467,8 +2490,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, int ret; u32 refs; - mutex_lock(&root->fs_info->alloc_mutex); - WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, @@ -2507,13 +2528,21 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); path->slots[*level]++; + + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, root_owner, root_gen, 0, 0, 1); BUG_ON(ret); + mutex_unlock(&root->fs_info->alloc_mutex); continue; } - + /* + * at this point, we have a single ref, and since the + * only place referencing this extent is a dead root + * the reference count should never go higher. + * So, we don't need to check it again + */ if (*level == 1) { struct btrfs_key key; btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); @@ -2533,33 +2562,23 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); - mutex_unlock(&root->fs_info->alloc_mutex); if (path->slots[*level] == 0) reada_walk_down(root, cur, path->slots[*level]); next = read_tree_block(root, bytenr, blocksize, ptr_gen); cond_resched(); - mutex_lock(&root->fs_info->alloc_mutex); - - /* we've dropped the lock, double check */ +#if 0 + /* + * this is a debugging check and can go away + * the ref should never go all the way down to 1 + * at this point + */ ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs); BUG_ON(ret); - if (refs != 1) { - parent = path->nodes[*level]; - root_owner = btrfs_header_owner(parent); - root_gen = btrfs_header_generation(parent); - - path->slots[*level]++; - free_extent_buffer(next); - ret = __btrfs_free_extent(trans, root, bytenr, - blocksize, - root_owner, - root_gen, 0, 0, 1); - BUG_ON(ret); - continue; - } + WARN_ON(refs != 1); +#endif } WARN_ON(*level <= 0); if (path->nodes[*level-1]) @@ -2584,6 +2603,8 @@ out: root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); + + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, root_owner, root_gen, 0, 0, 1); free_extent_buffer(path->nodes[*level]); @@ -2591,6 +2612,7 @@ out: *level += 1; BUG_ON(ret); mutex_unlock(&root->fs_info->alloc_mutex); + cond_resched(); return 0; } @@ -2834,6 +2856,11 @@ again: } set_page_extent_mapped(page); + /* + * make sure page_mkwrite is called for this page if userland + * wants to change it from mmap + */ + clear_page_dirty_for_io(page); set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c78f184ee5c..8915f2dc1bc 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -338,6 +338,13 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1); BUG_ON(err); mutex_unlock(&BTRFS_I(inode)->extent_mutex); + + /* + * an ugly way to do all the prop accounting around + * the page bits and mapping tags + */ + set_page_writeback(pages[0]); + end_page_writeback(pages[0]); did_inline = 1; } if (end_pos > isize) { @@ -833,11 +840,7 @@ again: start_pos, last_pos - 1, GFP_NOFS); } for (i = 0; i < num_pages; i++) { -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - ClearPageDirty(pages[i]); -#else - cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); -#endif + clear_page_dirty_for_io(pages[i]); set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5e627746c4e..224da287b3e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -268,6 +268,12 @@ again: } set_page_extent_mapped(page); + /* + * this makes sure page_mkwrite is called on the + * page if it is dirtied again later + */ + clear_page_dirty_for_io(page); + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); -- cgit v1.2.3-70-g09d2 From 18e35e0ab337ec99c7e03e9ae917745a352c0bb1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Aug 2008 13:11:41 -0400 Subject: Btrfs: Throttle less often waiting for snapshots to delete Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 7 +++++-- fs/btrfs/transaction.c | 14 -------------- 2 files changed, 5 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 58bceeeda9c..74bcd48a9c4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2535,6 +2535,10 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, root_gen, 0, 0, 1); BUG_ON(ret); mutex_unlock(&root->fs_info->alloc_mutex); + + atomic_inc(&root->fs_info->throttle_gen); + wake_up(&root->fs_info->transaction_throttle); + continue; } /* @@ -2603,7 +2607,6 @@ out: root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); - mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, root_owner, root_gen, 0, 0, 1); @@ -2726,7 +2729,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root } } while(1) { - atomic_inc(&root->fs_info->throttle_gen); wret = walk_down_tree(trans, root, path, &level); if (wret > 0) break; @@ -2742,6 +2744,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = -EAGAIN; break; } + atomic_inc(&root->fs_info->throttle_gen); wake_up(&root->fs_info->transaction_throttle); } for (i = 0; i <= orig_level; i++) { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 66af5140c8c..a6877949930 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -211,11 +211,9 @@ static void throttle_on_drops(struct btrfs_root *root) { struct btrfs_fs_info *info = root->fs_info; -harder: if (atomic_read(&info->throttles)) { DEFINE_WAIT(wait); int thr; - int harder_count = 0; thr = atomic_read(&info->throttle_gen); do { @@ -228,18 +226,6 @@ harder: schedule(); finish_wait(&info->transaction_throttle, &wait); } while (thr == atomic_read(&info->throttle_gen)); - - if (harder_count < 5 && - info->total_ref_cache_size > 1 * 1024 * 1024) { - harder_count++; - goto harder; - } - - if (harder_count < 10 && - info->total_ref_cache_size > 5 * 1024 * 1024) { - harder_count++; - goto harder; - } } } -- cgit v1.2.3-70-g09d2 From 65b51a009e29e64c0951f21ea17fdc66bbb0fbd7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Aug 2008 15:11:20 -0400 Subject: btrfs_search_slot: reduce lock contention by cowing in two stages A btree block cow has two parts, the first is to allocate a destination block and the second is to copy the old bock over. The first part needs locks in the extent allocation tree, and may need to do IO. This changeset splits that into a separate function that can be called without any tree locks held. btrfs_search_slot is changed to drop its path and start over if it has to COW a contended block. This often means that many writers will pre-alloc a new destination for a the same contended block, but they cache their prealloc for later use on lower levels in the tree. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 147 ++++++++++++++++++++++++++++++++++++++++--------- fs/btrfs/ctree.h | 6 +- fs/btrfs/extent-tree.c | 49 ++++++++++------- fs/btrfs/locking.c | 16 ++++++ fs/btrfs/locking.h | 1 + fs/btrfs/transaction.c | 2 +- 6 files changed, 173 insertions(+), 48 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c4792062dd5..7114faafa9d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -181,7 +181,8 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret, - u64 search_start, u64 empty_size) + u64 search_start, u64 empty_size, + u64 prealloc_dest) { u64 root_gen; struct extent_buffer *cow; @@ -216,10 +217,27 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, } else { first_key.objectid = 0; } - cow = btrfs_alloc_free_block(trans, root, buf->len, - root->root_key.objectid, - root_gen, first_key.objectid, level, - search_start, empty_size); + if (prealloc_dest) { + struct btrfs_key ins; + + ins.objectid = prealloc_dest; + ins.offset = buf->len; + ins.type = BTRFS_EXTENT_ITEM_KEY; + + ret = btrfs_alloc_reserved_extent(trans, root, + root->root_key.objectid, + root_gen, level, + first_key.objectid, + &ins); + BUG_ON(ret); + cow = btrfs_init_new_buffer(trans, root, prealloc_dest, + buf->len); + } else { + cow = btrfs_alloc_free_block(trans, root, buf->len, + root->root_key.objectid, + root_gen, first_key.objectid, + level, search_start, empty_size); + } if (IS_ERR(cow)) return PTR_ERR(cow); @@ -279,7 +297,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, - struct extent_buffer **cow_ret) + struct extent_buffer **cow_ret, u64 prealloc_dest) { u64 search_start; u64 header_trans; @@ -302,12 +320,14 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { *cow_ret = buf; spin_unlock(&root->fs_info->hash_lock); + WARN_ON(prealloc_dest); return 0; } spin_unlock(&root->fs_info->hash_lock); search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); ret = __btrfs_cow_block(trans, root, buf, parent, - parent_slot, cow_ret, search_start, 0); + parent_slot, cow_ret, search_start, 0, + prealloc_dest); return ret; } @@ -451,7 +471,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, err = __btrfs_cow_block(trans, root, cur, parent, i, &cur, search_start, min(16 * blocksize, - (end_slot - i) * blocksize)); + (end_slot - i) * blocksize), 0); if (err) { btrfs_tree_unlock(cur); free_extent_buffer(cur); @@ -803,7 +823,7 @@ static int balance_level(struct btrfs_trans_handle *trans, child = read_node_slot(root, mid, 0); btrfs_tree_lock(child); BUG_ON(!child); - ret = btrfs_cow_block(trans, root, child, mid, 0, &child); + ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); BUG_ON(ret); spin_lock(&root->node_lock); @@ -836,7 +856,7 @@ static int balance_level(struct btrfs_trans_handle *trans, if (left) { btrfs_tree_lock(left); wret = btrfs_cow_block(trans, root, left, - parent, pslot - 1, &left); + parent, pslot - 1, &left, 0); if (wret) { ret = wret; goto enospc; @@ -846,7 +866,7 @@ static int balance_level(struct btrfs_trans_handle *trans, if (right) { btrfs_tree_lock(right); wret = btrfs_cow_block(trans, root, right, - parent, pslot + 1, &right); + parent, pslot + 1, &right, 0); if (wret) { ret = wret; goto enospc; @@ -1021,7 +1041,7 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, wret = 1; } else { ret = btrfs_cow_block(trans, root, left, parent, - pslot - 1, &left); + pslot - 1, &left, 0); if (ret) wret = 1; else { @@ -1069,7 +1089,7 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, } else { ret = btrfs_cow_block(trans, root, right, parent, pslot + 1, - &right); + &right, 0); if (ret) wret = 1; else { @@ -1245,6 +1265,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root u8 lowest_level = 0; u64 blocknr; u64 gen; + struct btrfs_key prealloc_block; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len); @@ -1253,6 +1274,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root !mutex_is_locked(&root->fs_info->alloc_mutex)); if (ins_len < 0) lowest_unlock = 2; + + prealloc_block.objectid = 0; + again: if (p->skip_locking) b = btrfs_root_node(root); @@ -1261,27 +1285,82 @@ again: while (b) { level = btrfs_header_level(b); + + /* + * setup the path here so we can release it under lock + * contention with the cow code + */ + p->nodes[level] = b; + if (!p->skip_locking) + p->locks[level] = 1; + if (cow) { int wret; + + /* is a cow on this block not required */ + spin_lock(&root->fs_info->hash_lock); + if (btrfs_header_generation(b) == trans->transid && + !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { + spin_unlock(&root->fs_info->hash_lock); + goto cow_done; + } + spin_unlock(&root->fs_info->hash_lock); + + /* ok, we have to cow, is our old prealloc the right + * size? + */ + if (prealloc_block.objectid && + prealloc_block.offset != b->len) { + btrfs_free_reserved_extent(root, + prealloc_block.objectid, + prealloc_block.offset); + prealloc_block.objectid = 0; + } + + /* + * for higher level blocks, try not to allocate blocks + * with the block and the parent locks held. + */ + if (level > 1 && !prealloc_block.objectid && + btrfs_path_lock_waiting(p, level)) { + u32 size = b->len; + u64 hint = b->start; + + btrfs_release_path(root, p); + ret = btrfs_reserve_extent(trans, root, + size, size, 0, + hint, (u64)-1, + &prealloc_block, 0); + BUG_ON(ret); + goto again; + } + wret = btrfs_cow_block(trans, root, b, p->nodes[level + 1], p->slots[level + 1], - &b); + &b, prealloc_block.objectid); + prealloc_block.objectid = 0; if (wret) { free_extent_buffer(b); - return wret; + ret = wret; + goto done; } } +cow_done: BUG_ON(!cow && ins_len); if (level != btrfs_header_level(b)) WARN_ON(1); level = btrfs_header_level(b); + p->nodes[level] = b; if (!p->skip_locking) p->locks[level] = 1; + ret = check_block(root, p, level); - if (ret) - return -1; + if (ret) { + ret = -1; + goto done; + } ret = bin_search(b, key, level, &slot); if (level != 0) { @@ -1292,15 +1371,19 @@ again: BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); - if (sret) - return sret; + if (sret) { + ret = sret; + goto done; + } b = p->nodes[level]; slot = p->slots[level]; } else if (ins_len < 0) { int sret = balance_level(trans, root, p, level); - if (sret) - return sret; + if (sret) { + ret = sret; + goto done; + } b = p->nodes[level]; if (!b) { btrfs_release_path(NULL, p); @@ -1362,14 +1445,24 @@ again: int sret = split_leaf(trans, root, key, p, ins_len, ret == 0); BUG_ON(sret > 0); - if (sret) - return sret; + if (sret) { + ret = sret; + goto done; + } } unlock_up(p, level, lowest_unlock); - return ret; + goto done; } } - return 1; + ret = 1; +done: + if (prealloc_block.objectid) { + btrfs_free_reserved_extent(root, + prealloc_block.objectid, + prealloc_block.offset); + } + + return ret; } /* @@ -1840,7 +1933,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root /* cow and double check */ ret = btrfs_cow_block(trans, root, right, upper, - slot + 1, &right); + slot + 1, &right, 0); if (ret) goto out_unlock; @@ -2021,7 +2114,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* cow and double check */ ret = btrfs_cow_block(trans, root, left, - path->nodes[1], slot - 1, &left); + path->nodes[1], slot - 1, &left, 0); if (ret) { /* we hit -ENOSPC, but it isn't fatal here */ ret = 1; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d788ab0dcd9..9b025960bbd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1421,6 +1421,9 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int level, u64 hint, u64 empty_size); +struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u32 blocksize); int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -1451,6 +1454,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 root_objectid, u64 ref_generation, u64 owner_objectid, u64 owner_offset, int pin); +int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_io_tree *unpin); @@ -1484,7 +1488,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, - struct extent_buffer **cow_ret); + struct extent_buffer **cow_ret, u64 prealloc_dest); int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 74bcd48a9c4..98a1c0faeda 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2118,6 +2118,15 @@ again: return 0; } +int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) +{ + maybe_lock_mutex(root); + set_extent_dirty(&root->fs_info->free_space_cache, + start, start + len - 1, GFP_NOFS); + maybe_unlock_mutex(root); + return 0; +} + int btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, @@ -2267,6 +2276,26 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, maybe_unlock_mutex(root); return ret; } + +struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u32 blocksize) +{ + struct extent_buffer *buf; + + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); + if (!buf) + return ERR_PTR(-ENOMEM); + btrfs_set_header_generation(buf, trans->transid); + btrfs_tree_lock(buf); + clean_tree_block(trans, root, buf); + btrfs_set_buffer_uptodate(buf); + set_extent_dirty(&trans->transaction->dirty_pages, buf->start, + buf->start + buf->len - 1, GFP_NOFS); + trans->blocks_used++; + return buf; +} + /* * helper function to allocate a block for a given tree * returns the tree buffer or NULL. @@ -2293,26 +2322,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG_ON(ret > 0); return ERR_PTR(ret); } - buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); - if (!buf) { - btrfs_free_extent(trans, root, ins.objectid, blocksize, - root->root_key.objectid, ref_generation, - 0, 0, 0); - return ERR_PTR(-ENOMEM); - } - btrfs_set_header_generation(buf, trans->transid); - btrfs_tree_lock(buf); - clean_tree_block(trans, root, buf); - btrfs_set_buffer_uptodate(buf); - - if (PageDirty(buf->first_page)) { - printk("page %lu dirty\n", buf->first_page->index); - WARN_ON(1); - } - set_extent_dirty(&trans->transaction->dirty_pages, buf->start, - buf->start + buf->len - 1, GFP_NOFS); - trans->blocks_used++; + buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize); return buf; } diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index d43e14c7471..0cc314c10d6 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -56,3 +56,19 @@ int btrfs_tree_locked(struct extent_buffer *eb) { return mutex_is_locked(&eb->mutex); } + +int btrfs_path_lock_waiting(struct btrfs_path *path, int level) +{ + int i; + struct extent_buffer *eb; + for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) { + eb = path->nodes[i]; + if (!eb) + break; + smp_mb(); + if (!list_empty(&eb->mutex.wait_list)) + return 1; + } + return 0; +} + diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 2dab96d8280..bc1faef1251 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -23,4 +23,5 @@ int btrfs_tree_lock(struct extent_buffer *eb); int btrfs_tree_unlock(struct extent_buffer *eb); int btrfs_tree_locked(struct extent_buffer *eb); int btrfs_try_tree_lock(struct extent_buffer *eb); +int btrfs_path_lock_waiting(struct btrfs_path *path, int level); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a6877949930..9d84daf1000 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -622,7 +622,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); old = btrfs_lock_root_node(root); - btrfs_cow_block(trans, root, old, NULL, 0, &old); + btrfs_cow_block(trans, root, old, NULL, 0, &old, 0); btrfs_copy_root(trans, root, old, &tmp, objectid); btrfs_tree_unlock(old); -- cgit v1.2.3-70-g09d2 From 2dd3e67b1eaec8504da7e12b8afee77323a49f38 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Aug 2008 08:20:15 -0400 Subject: Btrfs: More throttle tuning * Make walk_down_tree wake up throttled tasks more often * Make walk_down_tree call cond_resched during long loops * As the size of the ref cache grows, wait longer in throttle * Get rid of the reada code in walk_down_tree, the leaves don't get read anymore, thanks to the ref cache. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 13 ++--------- fs/btrfs/extent-tree.c | 59 ++++++++++---------------------------------------- fs/btrfs/transaction.c | 15 +++++++++++++ 3 files changed, 29 insertions(+), 58 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d2d1cc87e8a..da9dda4338a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -188,13 +188,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, btrfs_csum_final(crc, result); if (verify) { - int from_this_trans = 0; - - if (root->fs_info->running_transaction && - btrfs_header_generation(buf) == - root->fs_info->running_transaction->transid) - from_this_trans = 1; - /* FIXME, this is not good */ if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { u32 val; @@ -203,11 +196,9 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE); printk("btrfs: %s checksum verify failed on %llu " - "wanted %X found %X from_this_trans %d " - "level %d\n", + "wanted %X found %X level %d\n", root->fs_info->sb->s_id, - buf->start, val, found, from_this_trans, - btrfs_header_level(buf)); + buf->start, val, found, btrfs_header_level(buf)); return 1; } } else { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 98a1c0faeda..1aeb695078b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2369,6 +2369,11 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, leaf_owner, leaf_generation, key.objectid, key.offset, 0); mutex_unlock(&root->fs_info->alloc_mutex); + + atomic_inc(&root->fs_info->throttle_gen); + wake_up(&root->fs_info->transaction_throttle); + cond_resched(); + BUG_ON(ret); } return 0; @@ -2389,6 +2394,11 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, ref->owner, ref->generation, info->objectid, info->offset, 0); mutex_unlock(&root->fs_info->alloc_mutex); + + atomic_inc(&root->fs_info->throttle_gen); + wake_up(&root->fs_info->transaction_throttle); + cond_resched(); + BUG_ON(ret); info++; } @@ -2396,51 +2406,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -static void noinline reada_walk_down(struct btrfs_root *root, - struct extent_buffer *node, - int slot) -{ - u64 bytenr; - u64 last = 0; - u32 nritems; - u32 refs; - u32 blocksize; - int ret; - int i; - int level; - int skipped = 0; - - nritems = btrfs_header_nritems(node); - level = btrfs_header_level(node); - if (level) - return; - - for (i = slot; i < nritems && skipped < 32; i++) { - bytenr = btrfs_node_blockptr(node, i); - if (last && ((bytenr > last && bytenr - last > 32 * 1024) || - (last > bytenr && last - bytenr > 32 * 1024))) { - skipped++; - continue; - } - blocksize = btrfs_level_size(root, level - 1); - if (i != slot) { - ret = lookup_extent_ref(NULL, root, bytenr, - blocksize, &refs); - BUG_ON(ret); - if (refs != 1) { - skipped++; - continue; - } - } - ret = readahead_tree_block(root, bytenr, blocksize, - btrfs_node_ptr_generation(node, i)); - last = bytenr + blocksize; - cond_resched(); - if (ret) - break; - } -} - int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, u32 *refs) { @@ -2549,6 +2514,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, atomic_inc(&root->fs_info->throttle_gen); wake_up(&root->fs_info->transaction_throttle); + cond_resched(); continue; } @@ -2578,8 +2544,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); - if (path->slots[*level] == 0) - reada_walk_down(root, cur, path->slots[*level]); next = read_tree_block(root, bytenr, blocksize, ptr_gen); cond_resched(); @@ -2601,6 +2565,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, path->nodes[*level-1] = next; *level = btrfs_header_level(next); path->slots[*level] = 0; + cond_resched(); } out: WARN_ON(*level < 0); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9d84daf1000..cf73342e821 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -210,7 +210,9 @@ static noinline int wait_for_commit(struct btrfs_root *root, static void throttle_on_drops(struct btrfs_root *root) { struct btrfs_fs_info *info = root->fs_info; + int harder_count = 0; +harder: if (atomic_read(&info->throttles)) { DEFINE_WAIT(wait); int thr; @@ -226,6 +228,19 @@ static void throttle_on_drops(struct btrfs_root *root) schedule(); finish_wait(&info->transaction_throttle, &wait); } while (thr == atomic_read(&info->throttle_gen)); + harder_count++; + + if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 && + harder_count < 2) + goto harder; + + if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 && + harder_count < 10) + goto harder; + + if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 && + harder_count < 20) + goto harder; } } -- cgit v1.2.3-70-g09d2 From d7a029a89ef370e74b63f18b81498d90d1ee3cc1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Aug 2008 23:17:26 -0400 Subject: Btrfs: Don't corrupt ram in shrink_extent_tree, leak it instead Far from the perfect fix, but these structs are small. TODO for the next release. The block group cache structs are referenced in many different places, and it isn't safe to just free them while resizing. A real fix will be a larger change to the allocator so that it doesn't have to carry about the block group cache structs to find good places to search for free blocks. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1aeb695078b..dbde1288124 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3437,8 +3437,10 @@ next: key.objectid, key.objectid + key.offset - 1, (unsigned int)-1, GFP_NOFS); + /* memset(shrink_block_group, 0, sizeof(*shrink_block_group)); kfree(shrink_block_group); + */ btrfs_del_item(trans, root, path); btrfs_release_path(root, path); -- cgit v1.2.3-70-g09d2 From ea8c281947950fac5f78818b767821d696c9512a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Aug 2008 23:17:27 -0400 Subject: Btrfs: Maintain a list of inodes that are delalloc and a way to wait on them Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 2 ++ fs/btrfs/ctree.h | 4 ++++ fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 12 +++++++++--- fs/btrfs/file.c | 3 +-- fs/btrfs/inode.c | 47 +++++++++++++++++++++++++++++++++++++++++------ fs/btrfs/ioctl.c | 4 ++-- 7 files changed, 60 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index df624fd735c..534ac4153dd 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -42,6 +42,8 @@ struct btrfs_inode { /* for keeping track of orphaned inodes */ struct list_head i_orphan; + struct list_head delalloc_inodes; + /* * transid of the trans_handle that last modified this inode */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 62499dd761b..116aee21bf7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -551,6 +551,7 @@ struct btrfs_fs_info { */ spinlock_t ordered_extent_lock; struct list_head ordered_extents; + struct list_head delalloc_inodes; /* * there is a pool of worker threads for checksumming during writes @@ -637,6 +638,7 @@ struct btrfs_root { struct kobject root_kobj; struct completion kobj_unregister; struct mutex objectid_mutex; + u64 objectid; u64 last_trans; @@ -1651,6 +1653,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, #define PageChecked PageFsMisc #endif +int btrfs_start_delalloc_inodes(struct btrfs_root *root); +int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); int btrfs_create_subvol_root(struct btrfs_root *new_root, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index da9dda4338a..76543683f3b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1234,6 +1234,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); + INIT_LIST_HEAD(&fs_info->delalloc_inodes); spin_lock_init(&fs_info->hash_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index dbde1288124..33cb2ac4cb2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1230,7 +1230,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->total_bytes += total_bytes; found->bytes_used += bytes_used; found->full = 0; - WARN_ON(found->total_bytes < found->bytes_used); *space_info = found; return 0; } @@ -2841,8 +2840,7 @@ again: */ clear_page_dirty_for_io(page); - set_extent_delalloc(io_tree, page_start, - page_end, GFP_NOFS); + btrfs_set_extent_delalloc(inode, page_start, page_end); set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); @@ -3319,6 +3317,13 @@ again: key.type = 0; cur_byte = key.objectid; + mutex_unlock(&root->fs_info->alloc_mutex); + + btrfs_start_delalloc_inodes(root); + btrfs_wait_ordered_extents(tree_root); + + mutex_lock(&root->fs_info->alloc_mutex); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; @@ -3401,6 +3406,7 @@ next: btrfs_clean_old_snapshots(tree_root); + btrfs_start_delalloc_inodes(root); btrfs_wait_ordered_extents(tree_root); trans = btrfs_start_transaction(tree_root, 1); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8915f2dc1bc..eb8e4556fa7 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -312,8 +312,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, * to reset the delalloc bit on things that already have * extents reserved. */ - set_extent_delalloc(io_tree, start_pos, - end_of_last_block, GFP_NOFS); + btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 640648c66b2..8a405a5fa6a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -303,6 +303,10 @@ int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); BTRFS_I(inode)->delalloc_bytes += end - start + 1; root->fs_info->delalloc_bytes += end - start + 1; + if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { + list_add_tail(&BTRFS_I(inode)->delalloc_inodes, + &root->fs_info->delalloc_inodes); + } spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); } return 0; @@ -325,6 +329,10 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, root->fs_info->delalloc_bytes -= end - start + 1; BTRFS_I(inode)->delalloc_bytes -= end - start + 1; } + if (BTRFS_I(inode)->delalloc_bytes == 0 && + !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { + list_del_init(&BTRFS_I(inode)->delalloc_inodes); + } spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); } return 0; @@ -408,6 +416,12 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, return 0; } +int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) +{ + return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, + GFP_NOFS); +} + struct btrfs_writepage_fixup { struct page *page; struct btrfs_work work; @@ -453,8 +467,7 @@ again: goto again; } - set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, - GFP_NOFS); + btrfs_set_extent_delalloc(inode, page_start, page_end); ClearPageChecked(page); out: unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); @@ -1530,8 +1543,7 @@ again: goto again; } - set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, - page_end, GFP_NOFS); + btrfs_set_extent_delalloc(inode, page_start, page_end); ret = 0; if (offset != PAGE_CACHE_SIZE) { kaddr = kmap(page); @@ -1766,6 +1778,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->extent_mutex); @@ -2158,6 +2171,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); + INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; @@ -2400,6 +2414,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; @@ -3049,8 +3064,7 @@ again: goto again; } - set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, - page_end, GFP_NOFS); + btrfs_set_extent_delalloc(inode, page_start, page_end); ret = 0; /* page is wholly or partially inside EOF */ @@ -3373,6 +3387,26 @@ out_unlock: return ret; } +int btrfs_start_delalloc_inodes(struct btrfs_root *root) +{ + struct list_head *head = &root->fs_info->delalloc_inodes; + struct btrfs_inode *binode; + unsigned long flags; + + spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); + while(!list_empty(head)) { + binode = list_entry(head->next, struct btrfs_inode, + delalloc_inodes); + atomic_inc(&binode->vfs_inode.i_count); + spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); + filemap_write_and_wait(binode->vfs_inode.i_mapping); + iput(&binode->vfs_inode); + spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); + } + spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); + return 0; +} + static int btrfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { @@ -3436,6 +3470,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0b63c3c77cf..e1046a54b1c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -274,8 +274,7 @@ again: */ clear_page_dirty_for_io(page); - set_extent_delalloc(io_tree, page_start, - page_end, GFP_NOFS); + btrfs_set_extent_delalloc(inode, page_start, page_end); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); @@ -784,6 +783,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_TRANS_END: return btrfs_ioctl_trans_end(file); case BTRFS_IOC_SYNC: + btrfs_start_delalloc_inodes(root); btrfs_sync_fs(file->f_dentry->d_sb, 1); return 0; } -- cgit v1.2.3-70-g09d2 From 7ea394f1192bee1af67ea4762c88ef4b7b0487a8 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Tue, 5 Aug 2008 13:05:02 -0400 Subject: Btrfs: Fix nodatacow for the new data=ordered mode Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 ++- fs/btrfs/extent-tree.c | 11 +++++---- fs/btrfs/inode.c | 60 +++++++++++++++++++++++++++++++------------------ fs/btrfs/ioctl.c | 1 + fs/btrfs/ordered-data.c | 16 ++++++++++--- fs/btrfs/ordered-data.h | 6 +++-- fs/btrfs/transaction.c | 11 +++++++++ 7 files changed, 74 insertions(+), 34 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 116aee21bf7..f90e5a7ac16 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1403,7 +1403,8 @@ static inline struct dentry *fdentry(struct file *file) { } /* extent-tree.c */ -int btrfs_cross_ref_exists(struct btrfs_root *root, +int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_key *key, u64 bytenr); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 33cb2ac4cb2..fff219ed61d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -893,10 +893,10 @@ out: return ret; } -int btrfs_cross_ref_exists(struct btrfs_root *root, +int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_key *key, u64 bytenr) { - struct btrfs_trans_handle *trans; struct btrfs_root *old_root; struct btrfs_path *path = NULL; struct extent_buffer *eb; @@ -908,6 +908,7 @@ int btrfs_cross_ref_exists(struct btrfs_root *root, int level; int ret; + BUG_ON(trans == NULL); BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY); ret = get_reference_status(root, bytenr, 0, key->objectid, &min_generation, &ref_count); @@ -917,7 +918,6 @@ int btrfs_cross_ref_exists(struct btrfs_root *root, if (ref_count != 1) return 1; - trans = btrfs_start_transaction(root, 0); old_root = root->dirty_root->root; ref_generation = old_root->root_key.offset; @@ -973,7 +973,6 @@ int btrfs_cross_ref_exists(struct btrfs_root *root, out: if (path) btrfs_free_path(path); - btrfs_end_transaction(trans, root); return ret; } @@ -3320,7 +3319,7 @@ again: mutex_unlock(&root->fs_info->alloc_mutex); btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(tree_root); + btrfs_wait_ordered_extents(tree_root, 0); mutex_lock(&root->fs_info->alloc_mutex); @@ -3407,7 +3406,7 @@ next: btrfs_clean_old_snapshots(tree_root); btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(tree_root); + btrfs_wait_ordered_extents(tree_root, 0); trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4d8ffc01931..c33053ba381 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -166,7 +166,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) cur_alloc_size = ins.offset; ret = btrfs_add_ordered_extent(inode, start, ins.objectid, - ins.offset); + ins.offset, 0); BUG_ON(ret); if (num_bytes < cur_alloc_size) { printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, @@ -187,31 +187,32 @@ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) u64 extent_start; u64 extent_end; u64 bytenr; - u64 cow_end; u64 loops = 0; u64 total_fs_bytes; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_block_group_cache *block_group; + struct btrfs_trans_handle *trans; struct extent_buffer *leaf; int found_type; struct btrfs_path *path; struct btrfs_file_extent_item *item; int ret; - int err; + int err = 0; struct btrfs_key found_key; total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); path = btrfs_alloc_path(); BUG_ON(!path); + trans = btrfs_join_transaction(root, 1); + BUG_ON(!trans); again: ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, start, 0); if (ret < 0) { - btrfs_free_path(path); - return ret; + err = ret; + goto out; } - cow_end = end; if (ret != 0) { if (path->slots[0] == 0) goto not_found; @@ -244,12 +245,11 @@ again: if (start < extent_start || start >= extent_end) goto not_found; - cow_end = min(end, extent_end - 1); bytenr = btrfs_file_extent_disk_bytenr(leaf, item); if (bytenr == 0) goto not_found; - if (btrfs_cross_ref_exists(root, &found_key, bytenr)) + if (btrfs_cross_ref_exists(trans, root, &found_key, bytenr)) goto not_found; /* * we may be called by the resizer, make sure we're inside @@ -260,24 +260,32 @@ again: if (!block_group || block_group->ro) goto not_found; + bytenr += btrfs_file_extent_offset(leaf, item); + extent_num_bytes = min(end + 1, extent_end) - start; + ret = btrfs_add_ordered_extent(inode, start, bytenr, + extent_num_bytes, 1); + if (ret) { + err = ret; + goto out; + } + + btrfs_release_path(root, path); start = extent_end; + if (start <= end) { + loops++; + goto again; + } } else { - goto not_found; - } -loop: - if (start > end) { +not_found: + btrfs_end_transaction(trans, root); btrfs_free_path(path); - return 0; + return cow_file_range(inode, start, end); } - btrfs_release_path(root, path); - loops++; - goto again; - -not_found: - btrfs_release_path(root, path); - cow_file_range(inode, start, end); - start = end + 1; - goto loop; +out: + WARN_ON(err); + btrfs_end_transaction(trans, root); + btrfs_free_path(path); + return err; } static int run_delalloc_range(struct inode *inode, u64 start, u64 end) @@ -385,6 +393,11 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, goto mapit; } + if (btrfs_test_opt(root, NODATASUM) || + btrfs_test_flag(inode, NODATASUM)) { + goto mapit; + } + return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, inode, rw, bio, mirror_num, __btrfs_submit_bio_hook); @@ -527,6 +540,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent = btrfs_lookup_ordered_extent(inode, start); BUG_ON(!ordered_extent); + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) + goto nocow; lock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, @@ -567,6 +582,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) unlock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); +nocow: add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3932c7cd0fa..59b64c738fd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index db200e6baf7..da6d43eb41d 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -152,7 +152,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, * inserted. */ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, - u64 start, u64 len) + u64 start, u64 len, int nocow) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; @@ -168,6 +168,8 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->start = start; entry->len = len; entry->inode = inode; + if (nocow) + set_bit(BTRFS_ORDERED_NOCOW, &entry->flags); /* one ref for the tree */ atomic_set(&entry->refs, 1); @@ -303,10 +305,11 @@ int btrfs_remove_ordered_extent(struct inode *inode, return 0; } -int btrfs_wait_ordered_extents(struct btrfs_root *root) +int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) { struct list_head splice; struct list_head *cur; + struct list_head *tmp; struct btrfs_ordered_extent *ordered; struct inode *inode; @@ -314,10 +317,16 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root) spin_lock(&root->fs_info->ordered_extent_lock); list_splice_init(&root->fs_info->ordered_extents, &splice); - while(!list_empty(&splice)) { + list_for_each_safe(cur, tmp, &splice) { cur = splice.next; ordered = list_entry(cur, struct btrfs_ordered_extent, root_extent_list); + if (nocow_only && + !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { + cond_resched_lock(&root->fs_info->ordered_extent_lock); + continue; + } + list_del_init(&ordered->root_extent_list); atomic_inc(&ordered->refs); inode = ordered->inode; @@ -338,6 +347,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root) spin_lock(&root->fs_info->ordered_extent_lock); } + list_splice_init(&splice, &root->fs_info->ordered_extents); spin_unlock(&root->fs_info->ordered_extent_lock); return 0; } diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 5efe6b63c74..fd45519f30a 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -64,6 +64,8 @@ struct btrfs_ordered_sum { #define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */ +#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ + struct btrfs_ordered_extent { /* logical offset in the file */ u64 file_offset; @@ -125,7 +127,7 @@ int btrfs_remove_ordered_extent(struct inode *inode, int btrfs_dec_test_ordered_pending(struct inode *inode, u64 file_offset, u64 io_size); int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, - u64 start, u64 len); + u64 start, u64 len, int nocow); int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_extent *entry, struct btrfs_ordered_sum *sum); @@ -143,5 +145,5 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, pgoff_t start, pgoff_t end); int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode); -int btrfs_wait_ordered_extents(struct btrfs_root *root); +int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index ebf5362da1d..9d3d08e9f8d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -438,6 +438,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, free_extent_buffer(root->commit_root); root->commit_root = NULL; + root->dirty_root = NULL; spin_lock(&root->list_lock); list_del_init(&dirty->root->dead_list); @@ -461,6 +462,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, sizeof(struct btrfs_disk_key)); root->root_item.drop_level = 0; root->commit_root = NULL; + root->dirty_root = NULL; root->root_key.offset = root->fs_info->generation; btrfs_set_root_bytenr(&root->root_item, root->node->start); @@ -762,7 +764,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } do { + int snap_pending = 0; joined = cur_trans->num_joined; + if (!list_empty(&trans->transaction->pending_snapshots)) + snap_pending = 1; + WARN_ON(cur_trans != trans->transaction); prepare_to_wait(&cur_trans->writer_wait, &wait, TASK_UNINTERRUPTIBLE); @@ -774,6 +780,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); + if (snap_pending) { + ret = btrfs_wait_ordered_extents(root, 1); + BUG_ON(ret); + } + schedule_timeout(timeout); mutex_lock(&root->fs_info->trans_mutex); -- cgit v1.2.3-70-g09d2 From 21af804c07141c035085f99798efaabbc7836a97 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 12 Aug 2008 14:13:26 +0100 Subject: Btrfs: Discard sector data in __free_extent() Date: Tue, 12 Aug 2008 14:13:26 +0100 Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fff219ed61d..e63b3b4bed7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "hash.h" #include "crc32c.h" #include "ctree.h" @@ -1716,6 +1717,10 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (refs == 0) { u64 super_used; u64 root_used; +#ifdef BIO_RW_DISCARD + u64 map_length = num_bytes; + struct btrfs_multi_bio *multi = NULL; +#endif if (pin) { ret = pin_down_bytes(root, bytenr, num_bytes, 0); @@ -1743,6 +1748,26 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ret = update_block_group(trans, root, bytenr, num_bytes, 0, mark_free); BUG_ON(ret); + +#ifdef BIO_RW_DISCARD + /* Tell the block device(s) that the sectors can be discarded */ + ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, + bytenr, &map_length, &multi, 0); + if (!ret) { + struct btrfs_bio_stripe *stripe = multi->stripes; + int i; + + if (map_length > num_bytes) + map_length = num_bytes; + + for (i = 0; i < multi->num_stripes; i++, stripe++) { + blkdev_issue_discard(stripe->dev->bdev, + stripe->physical >> 9, + map_length >> 9); + } + kfree(multi); + } +#endif } btrfs_free_path(path); finish_current_insert(trans, extent_root); -- cgit v1.2.3-70-g09d2 From e02119d5a7b4396c5a872582fddc8bd6d305a70a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 5 Sep 2008 16:13:11 -0400 Subject: Btrfs: Add a write ahead tree log to optimize synchronous operations File syncs and directory syncs are optimized by copying their items into a special (copy-on-write) log tree. There is one log tree per subvolume and the btrfs super block points to a tree of log tree roots. After a crash, items are copied out of the log tree and back into the subvolume. See tree-log.c for all the details. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/btrfs_inode.h | 8 + fs/btrfs/compat.h | 15 + fs/btrfs/ctree.c | 73 +- fs/btrfs/ctree.h | 72 +- fs/btrfs/dir-item.c | 3 + fs/btrfs/disk-io.c | 138 ++- fs/btrfs/disk-io.h | 8 +- fs/btrfs/extent-tree.c | 93 +- fs/btrfs/file.c | 39 +- fs/btrfs/inode.c | 261 ++--- fs/btrfs/root-tree.c | 5 +- fs/btrfs/transaction.c | 45 +- fs/btrfs/transaction.h | 1 + fs/btrfs/tree-defrag.c | 4 +- fs/btrfs/tree-log.c | 2804 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/tree-log.h | 41 + 17 files changed, 3408 insertions(+), 205 deletions(-) create mode 100644 fs/btrfs/tree-log.c create mode 100644 fs/btrfs/tree-log.h (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 3a01065d4ef..b7addbfd8c2 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -7,8 +7,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - ref-cache.o acl.o export.o - + ref-cache.o export.o tree-log.o acl.o else # Normal Makefile diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 111f90524ae..fcc8cf27e90 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -33,6 +33,7 @@ struct btrfs_inode { struct extent_io_tree io_failure_tree; struct mutex csum_mutex; struct mutex extent_mutex; + struct mutex log_mutex; struct inode vfs_inode; struct btrfs_ordered_inode_tree ordered_tree; @@ -44,10 +45,17 @@ struct btrfs_inode { struct list_head delalloc_inodes; + /* full 64 bit generation number */ + u64 generation; + /* * transid of the trans_handle that last modified this inode */ u64 last_trans; + /* + * transid that last logged this inode + */ + u64 logged_trans; u64 delalloc_bytes; u64 disk_i_size; u32 flags; diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h index d45fb37887b..b0ed1887d9b 100644 --- a/fs/btrfs/compat.h +++ b/fs/btrfs/compat.h @@ -22,6 +22,21 @@ static inline struct dentry *d_obtain_alias(struct inode *inode) } #endif +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +static inline void btrfs_drop_nlink(struct inode *inode) +{ + inode->i_nlink--; +} + +static inline void btrfs_inc_nlink(struct inode *inode) +{ + inode->i_nlink++; +} +#else +# define btrfs_drop_nlink(inode) drop_nlink(inode) +# define btrfs_inc_nlink(inode) inc_nlink(inode) +#endif + /* * Even if AppArmor isn't enabled, it still has different prototypes. * Add more distro/version pairs here to declare which has AppArmor applied. diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7114faafa9d..579124043d9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -60,7 +60,7 @@ void btrfs_free_path(struct btrfs_path *p) kmem_cache_free(btrfs_path_cachep, p); } -void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) +void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; @@ -176,7 +176,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, return 0; } -int __btrfs_cow_block(struct btrfs_trans_handle *trans, +int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, @@ -294,7 +294,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } -int btrfs_cow_block(struct btrfs_trans_handle *trans, +int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret, u64 prealloc_dest) @@ -677,9 +677,10 @@ static int noinline check_block(struct btrfs_root *root, * * slot may point to max if the key is bigger than all of the keys */ -static int generic_bin_search(struct extent_buffer *eb, unsigned long p, - int item_size, struct btrfs_key *key, - int max, int *slot) +static noinline int generic_bin_search(struct extent_buffer *eb, + unsigned long p, + int item_size, struct btrfs_key *key, + int max, int *slot) { int low = 0; int high = max; @@ -765,7 +766,7 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, return -1; } -static struct extent_buffer *read_node_slot(struct btrfs_root *root, +static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, struct extent_buffer *parent, int slot) { int level = btrfs_header_level(parent); @@ -781,7 +782,7 @@ static struct extent_buffer *read_node_slot(struct btrfs_root *root, btrfs_node_ptr_generation(parent, slot)); } -static int balance_level(struct btrfs_trans_handle *trans, +static noinline int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { @@ -1128,8 +1129,9 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, /* * readahead one full node of leaves */ -static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, - int level, int slot, u64 objectid) +static noinline void reada_for_search(struct btrfs_root *root, + struct btrfs_path *path, + int level, int slot, u64 objectid) { struct extent_buffer *node; struct btrfs_disk_key disk_key; @@ -1201,7 +1203,8 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, } } -static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) +static noinline void unlock_up(struct btrfs_path *path, int level, + int lowest_unlock) { int i; int skip_level = level; @@ -1759,8 +1762,9 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root * * returns 0 on success and < 0 on failure */ -static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int level) +static noinline int split_node(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) { u64 root_gen; struct extent_buffer *c; @@ -1874,7 +1878,8 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) +int noinline btrfs_leaf_free_space(struct btrfs_root *root, + struct extent_buffer *leaf) { int nritems = btrfs_header_nritems(leaf); int ret; @@ -2283,9 +2288,11 @@ out: * * returns 0 if all went well and < 0 on failure. */ -static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_key *ins_key, - struct btrfs_path *path, int data_size, int extend) +static noinline int split_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *ins_key, + struct btrfs_path *path, int data_size, + int extend) { u64 root_gen; struct extent_buffer *l; @@ -3079,6 +3086,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) * was nothing in the tree that matched the search criteria. */ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_key *max_key, struct btrfs_path *path, int cache_only, u64 min_trans) { @@ -3093,6 +3101,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, again: cur = btrfs_lock_root_node(root); level = btrfs_header_level(cur); + WARN_ON(path->nodes[level]); path->nodes[level] = cur; path->locks[level] = 1; @@ -3107,6 +3116,8 @@ again: /* at level = 0, we're done, setup the path and exit */ if (level == 0) { + if (slot >= nritems) + goto find_next_key; ret = 0; path->slots[level] = slot; btrfs_item_key_to_cpu(cur, &found_key, slot); @@ -3123,6 +3134,8 @@ again: u64 blockptr; u64 gen; struct extent_buffer *tmp; + struct btrfs_disk_key disk_key; + blockptr = btrfs_node_blockptr(cur, slot); gen = btrfs_node_ptr_generation(cur, slot); if (gen < min_trans) { @@ -3132,6 +3145,14 @@ again: if (!cache_only) break; + if (max_key) { + btrfs_node_key(cur, &disk_key, slot); + if (comp_keys(&disk_key, max_key) >= 0) { + ret = 1; + goto out; + } + } + tmp = btrfs_find_tree_block(root, blockptr, btrfs_level_size(root, level - 1)); @@ -3143,14 +3164,16 @@ again: free_extent_buffer(tmp); slot++; } +find_next_key: /* * we didn't find a candidate key in this node, walk forward * and find another one */ if (slot >= nritems) { - ret = btrfs_find_next_key(root, path, min_key, level, + path->slots[level] = slot; + sret = btrfs_find_next_key(root, path, min_key, level, cache_only, min_trans); - if (ret == 0) { + if (sret == 0) { btrfs_release_path(root, path); goto again; } else { @@ -3351,6 +3374,7 @@ int btrfs_previous_item(struct btrfs_root *root, { struct btrfs_key found_key; struct extent_buffer *leaf; + u32 nritems; int ret; while(1) { @@ -3362,9 +3386,20 @@ int btrfs_previous_item(struct btrfs_root *root, path->slots[0]--; } leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (nritems == 0) + return 1; + if (path->slots[0] == nritems) + path->slots[0]--; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.type == type) return 0; + if (found_key.objectid < min_objectid) + break; + if (found_key.objectid == min_objectid && + found_key.type < type) + break; } return 1; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b305ae7e10b..6532b60683e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -77,6 +77,10 @@ struct btrfs_ordered_sum; /* orhpan objectid for tracking unlinked/truncated files */ #define BTRFS_ORPHAN_OBJECTID -5ULL +/* does write ahead logging to speed up fsyncs */ +#define BTRFS_TREE_LOG_OBJECTID -6ULL +#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL + /* * All files have objectids higher than this. */ @@ -276,6 +280,7 @@ struct btrfs_super_block { __le64 generation; __le64 root; __le64 chunk_root; + __le64 log_root; __le64 total_bytes; __le64 bytes_used; __le64 root_dir_objectid; @@ -287,6 +292,7 @@ struct btrfs_super_block { __le32 sys_chunk_array_size; u8 root_level; u8 chunk_root_level; + u8 log_root_level; struct btrfs_dev_item dev_item; char label[BTRFS_LABEL_SIZE]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; @@ -392,7 +398,10 @@ struct btrfs_timespec { * make a new item type */ struct btrfs_inode_item { + /* nfs style generation number */ __le64 generation; + /* transid that last touched this inode */ + __le64 transid; __le64 size; __le64 nblocks; __le64 block_group; @@ -409,8 +418,13 @@ struct btrfs_inode_item { struct btrfs_timespec otime; } __attribute__ ((__packed__)); +struct btrfs_dir_log_item { + __le64 end; +} __attribute__ ((__packed__)); + struct btrfs_dir_item { struct btrfs_disk_key location; + __le64 transid; __le16 data_len; __le16 name_len; u8 type; @@ -505,6 +519,9 @@ struct btrfs_fs_info { struct btrfs_root *tree_root; struct btrfs_root *chunk_root; struct btrfs_root *dev_root; + + /* the log root tree is a directory of all the other log roots */ + struct btrfs_root *log_root_tree; struct radix_tree_root fs_roots_radix; struct extent_io_tree free_space_cache; @@ -518,6 +535,7 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; + u64 last_trans_new_blockgroup; u64 open_ioctl_trans; unsigned long mount_opt; u64 max_extent; @@ -527,6 +545,9 @@ struct btrfs_fs_info { wait_queue_head_t transaction_throttle; wait_queue_head_t transaction_wait; wait_queue_head_t async_submit_wait; + + wait_queue_head_t tree_log_wait; + struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; struct block_device *__bdev; @@ -535,6 +556,7 @@ struct btrfs_fs_info { struct backing_dev_info bdi; spinlock_t hash_lock; struct mutex trans_mutex; + struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; struct mutex alloc_mutex; @@ -544,8 +566,13 @@ struct btrfs_fs_info { struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; + atomic_t nr_async_submits; atomic_t nr_async_bios; + atomic_t tree_log_writers; + atomic_t tree_log_commit; + unsigned long tree_log_batch; + u64 tree_log_transid; /* * this is used by the balancing code to wait for all the pending @@ -583,6 +610,7 @@ struct btrfs_fs_info { struct completion kobj_unregister; int do_barriers; int closing; + int log_root_recovering; atomic_t throttles; atomic_t throttle_gen; @@ -596,6 +624,7 @@ struct btrfs_fs_info { u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; + u64 last_log_alloc; spinlock_t ref_cache_lock; u64 total_ref_cache_size; @@ -632,6 +661,7 @@ struct btrfs_root { struct btrfs_leaf_ref_tree *ref_tree; struct btrfs_leaf_ref_tree ref_tree_struct; struct btrfs_dirty_root *dirty_root; + struct btrfs_root *log_root; struct btrfs_root_item root_item; struct btrfs_key root_key; @@ -640,6 +670,7 @@ struct btrfs_root { struct kobject root_kobj; struct completion kobj_unregister; struct mutex objectid_mutex; + struct mutex log_mutex; u64 objectid; u64 last_trans; @@ -692,6 +723,8 @@ struct btrfs_root { * dir items are the name -> inode pointers in a directory. There is one * for every name in a directory. */ +#define BTRFS_DIR_LOG_ITEM_KEY 14 +#define BTRFS_DIR_LOG_INDEX_KEY 15 #define BTRFS_DIR_ITEM_KEY 16 #define BTRFS_DIR_INDEX_KEY 17 /* @@ -703,7 +736,8 @@ struct btrfs_root { */ #define BTRFS_CSUM_ITEM_KEY 19 -/* reserve 20-31 for other file stuff */ + +/* reserve 21-31 for other file/dir stuff */ /* * root items point to tree roots. There are typically in the root @@ -938,6 +972,7 @@ BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); /* struct btrfs_inode_item */ BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); +BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64); BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); BTRFS_SETGET_FUNCS(inode_nblocks, struct btrfs_inode_item, nblocks, 64); BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); @@ -1126,10 +1161,13 @@ static inline void btrfs_set_item_key(struct extent_buffer *eb, write_eb_member(eb, item, struct btrfs_item, key, disk_key); } +BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64); + /* struct btrfs_dir_item */ BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); +BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); static inline void btrfs_dir_item_key(struct extent_buffer *eb, struct btrfs_dir_item *item, @@ -1301,7 +1339,11 @@ BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, chunk_root, 64); BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, - chunk_root_level, 64); + chunk_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block, + log_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block, + log_root_level, 8); BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, total_bytes, 64); BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, @@ -1405,6 +1447,12 @@ static inline struct dentry *fdentry(struct file *file) { } /* extent-tree.c */ +int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path, + u64 start, u64 len); +int btrfs_update_pinned_extents(struct btrfs_root *root, + u64 bytenr, u64 num, int pin); +int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, u64 bytenr); @@ -1448,6 +1496,11 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, struct btrfs_key *ins); +int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins); int btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, @@ -1488,9 +1541,9 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, int lowest_level, int cache_only, u64 min_trans); int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_key *max_key, struct btrfs_path *path, int cache_only, u64 min_trans); - int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, @@ -1656,6 +1709,18 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, #define PageChecked PageFsMisc #endif +int btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, struct inode *inode, + const char *name, int name_len); +int btrfs_add_link(struct btrfs_trans_handle *trans, + struct inode *parent_inode, struct inode *inode, + const char *name, int name_len, int add_backref, u64 index); +int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, u64 new_size, + u32 min_type); + int btrfs_start_delalloc_inodes(struct btrfs_root *root); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); int btrfs_writepages(struct address_space *mapping, @@ -1715,6 +1780,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* file.c */ +int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end); int btrfs_check_file(struct btrfs_root *root, struct inode *inode); extern struct file_operations btrfs_file_operations; diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 125094617fe..e4f30090d64 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -96,6 +96,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, btrfs_set_dir_item_key(leaf, dir_item, &disk_key); btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR); btrfs_set_dir_name_len(leaf, dir_item, name_len); + btrfs_set_dir_transid(leaf, dir_item, trans->transid); btrfs_set_dir_data_len(leaf, dir_item, data_len); name_ptr = (unsigned long)(dir_item + 1); data_ptr = (unsigned long)((char *)name_ptr + name_len); @@ -142,6 +143,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_type(leaf, dir_item, type); btrfs_set_dir_data_len(leaf, dir_item, 0); btrfs_set_dir_name_len(leaf, dir_item, name_len); + btrfs_set_dir_transid(leaf, dir_item, trans->transid); name_ptr = (unsigned long)(dir_item + 1); write_extent_buffer(leaf, name, name_ptr, name_len); @@ -169,6 +171,7 @@ second_insert: btrfs_set_dir_type(leaf, dir_item, type); btrfs_set_dir_data_len(leaf, dir_item, 0); btrfs_set_dir_name_len(leaf, dir_item, name_len); + btrfs_set_dir_transid(leaf, dir_item, trans->transid); name_ptr = (unsigned long)(dir_item + 1); write_extent_buffer(leaf, name, name_ptr, name_len); btrfs_mark_buffer_dirty(leaf); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e7a938bfbc..a4373db5967 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -41,6 +41,7 @@ #include "async-thread.h" #include "locking.h" #include "ref-cache.h" +#include "tree-log.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -694,6 +695,18 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, } +int btrfs_write_tree_block(struct extent_buffer *buf) +{ + return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, + buf->start + buf->len - 1, WB_SYNC_NONE); +} + +int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) +{ + return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, + buf->start, buf->start + buf->len -1); +} + struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid) { @@ -732,15 +745,6 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -int wait_on_tree_block_writeback(struct btrfs_root *root, - struct extent_buffer *buf) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree, - buf); - return 0; -} - static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, u32 stripesize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, @@ -771,6 +775,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->node_lock); spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); + mutex_init(&root->log_mutex); btrfs_leaf_ref_tree_init(&root->ref_tree_struct); root->ref_tree = &root->ref_tree_struct; @@ -809,11 +814,74 @@ static int find_and_setup_root(struct btrfs_root *tree_root, return 0; } -struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) +int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct extent_buffer *eb; + int ret; + + if (!fs_info->log_root_tree) + return 0; + + eb = fs_info->log_root_tree->node; + + WARN_ON(btrfs_header_level(eb) != 0); + WARN_ON(btrfs_header_nritems(eb) != 0); + + ret = btrfs_free_extent(trans, fs_info->tree_root, + eb->start, eb->len, + BTRFS_TREE_LOG_OBJECTID, 0, 0, 0, 1); + BUG_ON(ret); + + free_extent_buffer(eb); + kfree(fs_info->log_root_tree); + fs_info->log_root_tree = NULL; + return 0; +} + +int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; + + root = kzalloc(sizeof(*root), GFP_NOFS); + if (!root) + return -ENOMEM; + + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, BTRFS_TREE_LOG_OBJECTID); + + root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; + root->root_key.type = BTRFS_ROOT_ITEM_KEY; + root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; + root->ref_cows = 0; + + root->node = btrfs_alloc_free_block(trans, root, root->leafsize, + BTRFS_TREE_LOG_OBJECTID, + 0, 0, 0, 0, 0); + + btrfs_set_header_nritems(root->node, 0); + btrfs_set_header_level(root->node, 0); + btrfs_set_header_bytenr(root->node, root->node->start); + btrfs_set_header_generation(root->node, trans->transid); + btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID); + + write_extent_buffer(root->node, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(root->node), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(root->node); + btrfs_tree_unlock(root->node); + fs_info->log_root_tree = root; + return 0; +} + +struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, + struct btrfs_key *location) +{ + struct btrfs_root *root; + struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_path *path; struct extent_buffer *l; u64 highest_inode; @@ -863,11 +931,13 @@ out: blocksize, 0); BUG_ON(!root->node); insert: - root->ref_cows = 1; - ret = btrfs_find_highest_inode(root, &highest_inode); - if (ret == 0) { - root->highest_inode = highest_inode; - root->last_inode_alloc = highest_inode; + if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { + root->ref_cows = 1; + ret = btrfs_find_highest_inode(root, &highest_inode); + if (ret == 0) { + root->highest_inode = highest_inode; + root->last_inode_alloc = highest_inode; + } } return root; } @@ -907,7 +977,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, if (root) return root; - root = btrfs_read_fs_root_no_radix(fs_info, location); + root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); if (IS_ERR(root)) return root; ret = radix_tree_insert(&fs_info->fs_roots_radix, @@ -1250,16 +1320,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 blocksize; u32 stripesize; struct buffer_head *bh; - struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), GFP_NOFS); - struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + struct btrfs_root *log_tree_root; + int ret; int err = -EINVAL; @@ -1343,6 +1415,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); mutex_init(&fs_info->trans_mutex); + mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->alloc_mutex); mutex_init(&fs_info->chunk_mutex); @@ -1352,6 +1425,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->async_submit_wait); + init_waitqueue_head(&fs_info->tree_log_wait); + atomic_set(&fs_info->tree_log_commit, 0); + atomic_set(&fs_info->tree_log_writers, 0); + fs_info->tree_log_transid = 0; #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1532,7 +1609,26 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!fs_info->transaction_kthread) goto fail_cleaner; + if (btrfs_super_log_root(disk_super) != 0) { + u32 blocksize; + u64 bytenr = btrfs_super_log_root(disk_super); + + blocksize = + btrfs_level_size(tree_root, + btrfs_super_log_root_level(disk_super)); + log_tree_root = kzalloc(sizeof(struct btrfs_root), + GFP_NOFS); + + __setup_root(nodesize, leafsize, sectorsize, stripesize, + log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); + + log_tree_root->node = read_tree_block(tree_root, bytenr, + blocksize, 0); + ret = btrfs_recover_log_trees(log_tree_root); + BUG_ON(ret); + } + fs_info->last_trans_committed = btrfs_super_generation(disk_super); return tree_root; fail_cleaner: diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 2562a273ae1..6b6fdc697f3 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -45,7 +45,7 @@ struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); -struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, +struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, struct btrfs_key *location); struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, struct btrfs_key *location); @@ -74,4 +74,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, extent_submit_bio_hook_t *submit_bio_hook); int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); +int btrfs_write_tree_block(struct extent_buffer *buf); +int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); +int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info); +int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e63b3b4bed7..646b9148ca2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -496,6 +496,23 @@ static int match_extent_ref(struct extent_buffer *leaf, return ret == 0; } +/* simple helper to search for an existing extent at a given offset */ +int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path, + u64 start, u64 len) +{ + int ret; + struct btrfs_key key; + + maybe_lock_mutex(root); + key.objectid = start; + key.offset = len; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, + 0, 0); + maybe_unlock_mutex(root); + return ret; +} + static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, @@ -1409,7 +1426,7 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) } -static int update_pinned_extents(struct btrfs_root *root, +int btrfs_update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin) { u64 len; @@ -1492,7 +1509,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, EXTENT_DIRTY); if (ret) break; - update_pinned_extents(root, start, end + 1 - start, 0); + btrfs_update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); set_extent_dirty(free_space_cache, start, end, GFP_NOFS); if (need_resched()) { @@ -1538,14 +1555,11 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, GFP_NOFS); - eb = btrfs_find_tree_block(extent_root, ins.objectid, + eb = btrfs_find_create_tree_block(extent_root, ins.objectid, ins.offset); - if (!btrfs_buffer_uptodate(eb, trans->transid)) { - mutex_unlock(&extent_root->fs_info->alloc_mutex); + if (!btrfs_buffer_uptodate(eb, trans->transid)) btrfs_read_buffer(eb, trans->transid); - mutex_lock(&extent_root->fs_info->alloc_mutex); - } btrfs_tree_lock(eb); level = btrfs_header_level(eb); @@ -1585,13 +1599,20 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, struct extent_buffer *buf; buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { + /* we can reuse a block if it hasn't been written + * and it is from this transaction. We can't + * reuse anything from the tree log root because + * it has tiny sub-transactions. + */ if (btrfs_buffer_uptodate(buf, 0) && btrfs_try_tree_lock(buf)) { u64 transid = root->fs_info->running_transaction->transid; u64 header_transid = btrfs_header_generation(buf); - if (header_transid == transid && + if (btrfs_header_owner(buf) != + BTRFS_TREE_LOG_OBJECTID && + header_transid == transid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { clean_tree_block(NULL, root, buf); @@ -1603,7 +1624,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, } free_extent_buffer(buf); } - update_pinned_extents(root, bytenr, num_bytes, 1); + btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); } else { set_extent_bits(&root->fs_info->pending_del, bytenr, bytenr + num_bytes - 1, @@ -1801,7 +1822,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct GFP_NOFS); if (!test_range_bit(&extent_root->fs_info->extent_ins, start, end, EXTENT_LOCKED, 0)) { - update_pinned_extents(extent_root, start, + btrfs_update_pinned_extents(extent_root, start, end + 1 - start, 1); ret = __free_extent(trans, extent_root, start, end + 1 - start, @@ -1919,6 +1940,12 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { last_ptr = &root->fs_info->last_data_alloc; } + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + last_ptr = &root->fs_info->last_log_alloc; + if (!last_ptr == 0 && root->fs_info->last_alloc) { + *last_ptr = root->fs_info->last_alloc + empty_cluster; + } + } if (last_ptr) { if (*last_ptr) @@ -2268,6 +2295,35 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, maybe_unlock_mutex(root); return ret; } + +/* + * this is used by the tree logging recovery code. It records that + * an extent has been allocated and makes sure to clear the free + * space cache bits as well + */ +int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins) +{ + int ret; + struct btrfs_block_group_cache *block_group; + + maybe_lock_mutex(root); + block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); + cache_block_group(root, block_group); + + clear_extent_dirty(&root->fs_info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1, + GFP_NOFS); + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, + ref_generation, owner, + owner_offset, ins); + maybe_unlock_mutex(root); + return ret; +} + /* * finds a free extent and does all the dirty work required for allocation * returns the key for the extent through ins, and a tree buffer for @@ -2350,9 +2406,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } -static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *leaf) +int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *leaf) { u64 leaf_owner; u64 leaf_generation; @@ -2402,9 +2457,9 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, return 0; } -static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_leaf_ref *ref) +static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_leaf_ref *ref) { int i; int ret; @@ -2512,7 +2567,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, btrfs_header_nritems(cur)) break; if (*level == 0) { - ret = drop_leaf_ref_no_cache(trans, root, cur); + ret = btrfs_drop_leaf_ref(trans, root, cur); BUG_ON(ret); break; } @@ -2552,7 +2607,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); ref = btrfs_lookup_leaf_ref(root, bytenr); if (ref) { - ret = drop_leaf_ref(trans, root, ref); + ret = cache_drop_leaf_ref(trans, root, ref); BUG_ON(ret); btrfs_remove_leaf_ref(root, ref); btrfs_free_leaf_ref(root, ref); @@ -3628,6 +3683,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, extent_root = root->fs_info->extent_root; block_group_cache = &root->fs_info->block_group_cache; + root->fs_info->last_trans_new_blockgroup = trans->transid; + cache = kzalloc(sizeof(*cache), GFP_NOFS); BUG_ON(!cache); cache->key.objectid = chunk_offset; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e9e86fbaa24..84ecf3ab851 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -36,6 +36,8 @@ #include "btrfs_inode.h" #include "ioctl.h" #include "print-tree.h" +#include "tree-log.h" +#include "locking.h" #include "compat.h" @@ -988,10 +990,27 @@ out_nolock: *ppos = pos; if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { - err = sync_page_range(inode, inode->i_mapping, - start_pos, num_written); + struct btrfs_trans_handle *trans; + + err = btrfs_fdatawrite_range(inode->i_mapping, start_pos, + start_pos + num_written -1, + WB_SYNC_NONE); + if (err < 0) + num_written = err; + + err = btrfs_wait_on_page_writeback_range(inode->i_mapping, + start_pos, start_pos + num_written - 1); if (err < 0) num_written = err; + + trans = btrfs_start_transaction(root, 1); + ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); + if (ret == 0) { + btrfs_sync_log(trans, root); + btrfs_end_transaction(trans, root); + } else { + btrfs_commit_transaction(trans, root); + } } else if (num_written > 0 && (file->f_flags & O_DIRECT)) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) do_sync_file_range(file, start_pos, @@ -1019,8 +1038,7 @@ int btrfs_release_file(struct inode * inode, struct file * filp) return 0; } -static int btrfs_sync_file(struct file *file, - struct dentry *dentry, int datasync) +int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1043,6 +1061,8 @@ static int btrfs_sync_file(struct file *file, } mutex_unlock(&root->fs_info->trans_mutex); + filemap_fdatawait(inode->i_mapping); + /* * ok we haven't committed the transaction yet, lets do a commit */ @@ -1054,7 +1074,16 @@ static int btrfs_sync_file(struct file *file, ret = -ENOMEM; goto out; } - ret = btrfs_commit_transaction(trans, root); + + ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); + if (ret < 0) + goto out; + if (ret > 0) { + ret = btrfs_commit_transaction(trans, root); + } else { + btrfs_sync_log(trans, root); + ret = btrfs_end_transaction(trans, root); + } out: return ret > 0 ? EIO : ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 43d3f2649ca..65df9d83023 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -46,6 +46,8 @@ #include "volumes.h" #include "ordered-data.h" #include "xattr.h" +#include "compat.h" +#include "tree-log.h" struct btrfs_iget_args { u64 ino; @@ -586,6 +588,7 @@ nocow: &ordered_extent->list); btrfs_ordered_update_i_size(inode, ordered_extent); + btrfs_update_inode(trans, root, inode); btrfs_remove_ordered_extent(inode, ordered_extent); /* once for us */ @@ -593,7 +596,6 @@ nocow: /* once for the tree */ btrfs_put_ordered_extent(ordered_extent); - btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); return 0; } @@ -1007,7 +1009,8 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item); - inode->i_generation = btrfs_inode_generation(leaf, inode_item); + BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); + inode->i_generation = BTRFS_I(inode)->generation; inode->i_rdev = 0; rdev = btrfs_inode_rdev(leaf, inode_item); @@ -1056,7 +1059,8 @@ make_bad: make_bad_inode(inode); } -static void fill_inode_item(struct extent_buffer *leaf, +static void fill_inode_item(struct btrfs_trans_handle *trans, + struct extent_buffer *leaf, struct btrfs_inode_item *item, struct inode *inode) { @@ -1082,7 +1086,8 @@ static void fill_inode_item(struct extent_buffer *leaf, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(leaf, item, inode->i_blocks); - btrfs_set_inode_generation(leaf, item, inode->i_generation); + btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); + btrfs_set_inode_transid(leaf, item, trans->transid); btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); btrfs_set_inode_block_group(leaf, item, @@ -1112,7 +1117,7 @@ int noinline btrfs_update_inode(struct btrfs_trans_handle *trans, inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - fill_inode_item(leaf, inode_item, inode); + fill_inode_item(trans, leaf, inode_item, inode); btrfs_mark_buffer_dirty(leaf); btrfs_set_inode_last_trans(trans, inode); ret = 0; @@ -1122,14 +1127,12 @@ failed: } -static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *dir, - struct dentry *dentry) +int btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, struct inode *inode, + const char *name, int name_len) { struct btrfs_path *path; - const char *name = dentry->d_name.name; - int name_len = dentry->d_name.len; int ret = 0; struct extent_buffer *leaf; struct btrfs_dir_item *di; @@ -1160,13 +1163,12 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, btrfs_release_path(root, path); ret = btrfs_del_inode_ref(trans, root, name, name_len, - dentry->d_inode->i_ino, - dentry->d_parent->d_inode->i_ino, &index); + inode->i_ino, + dir->i_ino, &index); if (ret) { printk("failed to delete reference to %.*s, " "inode %lu parent %lu\n", name_len, name, - dentry->d_inode->i_ino, - dentry->d_parent->d_inode->i_ino); + inode->i_ino, dir->i_ino); goto err; } @@ -1183,21 +1185,25 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, ret = btrfs_delete_one_dir_name(trans, root, path, di); btrfs_release_path(root, path); - dentry->d_inode->i_ctime = dir->i_ctime; + ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, + inode, dir->i_ino); + BUG_ON(ret); + + ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, + dir, index); + BUG_ON(ret); err: btrfs_free_path(path); - if (!ret) { - btrfs_i_size_write(dir, dir->i_size - name_len * 2); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - btrfs_update_inode(trans, root, dir); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - dentry->d_inode->i_nlink--; -#else - drop_nlink(dentry->d_inode); -#endif - ret = btrfs_update_inode(trans, root, dentry->d_inode); - dir->i_sb->s_dirt = 1; - } + if (ret) + goto out; + + btrfs_i_size_write(dir, dir->i_size - name_len * 2); + inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; + btrfs_update_inode(trans, root, dir); + btrfs_drop_nlink(inode); + ret = btrfs_update_inode(trans, root, inode); + dir->i_sb->s_dirt = 1; +out: return ret; } @@ -1218,7 +1224,8 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); - ret = btrfs_unlink_trans(trans, root, dir, dentry); + ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, + dentry->d_name.name, dentry->d_name.len); if (inode->i_nlink == 0) ret = btrfs_orphan_add(trans, inode); @@ -1256,7 +1263,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto fail_trans; /* now the directory is empty */ - err = btrfs_unlink_trans(trans, root, dir, dentry); + err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, + dentry->d_name.name, dentry->d_name.len); if (!err) { btrfs_i_size_write(inode, 0); } @@ -1283,10 +1291,10 @@ fail: * min_type is the minimum key type to truncate down to. If set to 0, this * will kill all the items on this inode, including the INODE_ITEM_KEY. */ -static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u32 min_type) +noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 new_size, u32 min_type) { int ret; struct btrfs_path *path; @@ -1307,7 +1315,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int extent_type = -1; u64 mask = root->sectorsize - 1; - btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); + if (root->ref_cows) + btrfs_drop_extent_cache(inode, + new_size & (~mask), (u64)-1); path = btrfs_alloc_path(); path->reada = -1; BUG_ON(!path); @@ -1324,7 +1334,13 @@ search_again: goto error; } if (ret > 0) { - BUG_ON(path->slots[0] == 0); + /* there are no items in the tree for us to truncate, we're + * done + */ + if (path->slots[0] == 0) { + ret = 0; + goto error; + } path->slots[0]--; } @@ -1358,10 +1374,10 @@ search_again: } if (found_type == BTRFS_CSUM_ITEM_KEY) { ret = btrfs_csum_truncate(trans, root, path, - inode->i_size); + new_size); BUG_ON(ret); } - if (item_end < inode->i_size) { + if (item_end < new_size) { if (found_type == BTRFS_DIR_ITEM_KEY) { found_type = BTRFS_INODE_ITEM_KEY; } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { @@ -1378,7 +1394,7 @@ search_again: btrfs_set_key_type(&key, found_type); goto next; } - if (found_key.offset >= inode->i_size) + if (found_key.offset >= new_size) del_item = 1; else del_item = 0; @@ -1394,7 +1410,7 @@ search_again: if (!del_item) { u64 orig_num_bytes = btrfs_file_extent_num_bytes(leaf, fi); - extent_num_bytes = inode->i_size - + extent_num_bytes = new_size - found_key.offset + root->sectorsize - 1; extent_num_bytes = extent_num_bytes & ~((u64)root->sectorsize - 1); @@ -1402,7 +1418,7 @@ search_again: extent_num_bytes); num_dec = (orig_num_bytes - extent_num_bytes); - if (extent_start != 0) + if (root->ref_cows && extent_start != 0) dec_i_blocks(inode, num_dec); btrfs_mark_buffer_dirty(leaf); } else { @@ -1413,22 +1429,29 @@ search_again: num_dec = btrfs_file_extent_num_bytes(leaf, fi); if (extent_start != 0) { found_extent = 1; - dec_i_blocks(inode, num_dec); + if (root->ref_cows) + dec_i_blocks(inode, num_dec); + } + if (root->ref_cows) { + root_gen = + btrfs_header_generation(leaf); } - root_gen = btrfs_header_generation(leaf); root_owner = btrfs_header_owner(leaf); } } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { if (!del_item) { - u32 newsize = inode->i_size - found_key.offset; - dec_i_blocks(inode, item_end + 1 - - found_key.offset - newsize); - newsize = - btrfs_file_extent_calc_inline_size(newsize); + u32 size = new_size - found_key.offset; + + if (root->ref_cows) { + dec_i_blocks(inode, item_end + 1 - + found_key.offset - size); + } + size = + btrfs_file_extent_calc_inline_size(size); ret = btrfs_truncate_item(trans, root, path, - newsize, 1); + size, 1); BUG_ON(ret); - } else { + } else if (root->ref_cows) { dec_i_blocks(inode, item_end + 1 - found_key.offset); } @@ -1666,7 +1689,7 @@ void btrfs_delete_inode(struct inode *inode) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); - ret = btrfs_truncate_in_trans(trans, root, inode, 0); + ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); if (ret) { btrfs_orphan_del(NULL, inode); goto no_delete_lock; @@ -1753,15 +1776,20 @@ static int fixup_tree_root_location(struct btrfs_root *root, return 0; } -static int btrfs_init_locked_inode(struct inode *inode, void *p) +static noinline void init_btrfs_i(struct inode *inode) { - struct btrfs_iget_args *args = p; - inode->i_ino = args->ino; - BTRFS_I(inode)->root = args->root; - BTRFS_I(inode)->delalloc_bytes = 0; - inode->i_mapping->writeback_index = 0; - BTRFS_I(inode)->disk_i_size = 0; - BTRFS_I(inode)->index_cnt = (u64)-1; + struct btrfs_inode *bi = BTRFS_I(inode); + + bi->i_acl = NULL; + bi->i_default_acl = NULL; + + bi->generation = 0; + bi->last_trans = 0; + bi->logged_trans = 0; + bi->delalloc_bytes = 0; + bi->disk_i_size = 0; + bi->flags = 0; + bi->index_cnt = (u64)-1; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); @@ -1771,6 +1799,15 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->extent_mutex); + mutex_init(&BTRFS_I(inode)->log_mutex); +} + +static int btrfs_init_locked_inode(struct inode *inode, void *p) +{ + struct btrfs_iget_args *args = p; + inode->i_ino = args->ino; + init_btrfs_i(inode); + BTRFS_I(inode)->root = args->root; return 0; } @@ -2263,21 +2300,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, * btrfs_get_inode_index_count has an explanation for the magic * number */ + init_btrfs_i(inode); BTRFS_I(inode)->index_cnt = 2; - - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_tree, - inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, - inode->i_mapping, GFP_NOFS); - btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); - INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); - mutex_init(&BTRFS_I(inode)->csum_mutex); - mutex_init(&BTRFS_I(inode)->extent_mutex); - BTRFS_I(inode)->delalloc_bytes = 0; - inode->i_mapping->writeback_index = 0; - BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->root = root; + BTRFS_I(inode)->generation = trans->transid; if (mode & S_IFDIR) owner = 0; @@ -2290,7 +2316,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, new_inode_group = group; } BTRFS_I(inode)->block_group = new_inode_group; - BTRFS_I(inode)->flags = 0; key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -2318,7 +2343,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); - fill_inode_item(path->nodes[0], inode_item, inode); + fill_inode_item(trans, path->nodes[0], inode_item, inode); ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, struct btrfs_inode_ref); @@ -2349,38 +2374,34 @@ static inline u8 btrfs_inode_type(struct inode *inode) return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; } -static int btrfs_add_link(struct btrfs_trans_handle *trans, - struct dentry *dentry, struct inode *inode, - int add_backref, u64 index) +int btrfs_add_link(struct btrfs_trans_handle *trans, + struct inode *parent_inode, struct inode *inode, + const char *name, int name_len, int add_backref, u64 index) { int ret; struct btrfs_key key; - struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; - struct inode *parent_inode = dentry->d_parent->d_inode; + struct btrfs_root *root = BTRFS_I(parent_inode)->root; key.objectid = inode->i_ino; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - ret = btrfs_insert_dir_item(trans, root, - dentry->d_name.name, dentry->d_name.len, - dentry->d_parent->d_inode->i_ino, + ret = btrfs_insert_dir_item(trans, root, name, name_len, + parent_inode->i_ino, &key, btrfs_inode_type(inode), index); if (ret == 0) { if (add_backref) { ret = btrfs_insert_inode_ref(trans, root, - dentry->d_name.name, - dentry->d_name.len, - inode->i_ino, - parent_inode->i_ino, - index); + name, name_len, + inode->i_ino, + parent_inode->i_ino, + index); } btrfs_i_size_write(parent_inode, parent_inode->i_size + - dentry->d_name.len * 2); + name_len * 2); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; - ret = btrfs_update_inode(trans, root, - dentry->d_parent->d_inode); + ret = btrfs_update_inode(trans, root, parent_inode); } return ret; } @@ -2389,7 +2410,9 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, struct dentry *dentry, struct inode *inode, int backref, u64 index) { - int err = btrfs_add_link(trans, dentry, inode, backref, index); + int err = btrfs_add_link(trans, dentry->d_parent->d_inode, + inode, dentry->d_name.name, + dentry->d_name.len, backref, index); if (!err) { d_instantiate(dentry, inode); return 0; @@ -2513,19 +2536,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping->backing_dev_info = &root->fs_info->bdi; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_tree, - inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, - inode->i_mapping, GFP_NOFS); - INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); - mutex_init(&BTRFS_I(inode)->csum_mutex); - mutex_init(&BTRFS_I(inode)->extent_mutex); - BTRFS_I(inode)->delalloc_bytes = 0; - BTRFS_I(inode)->disk_i_size = 0; - inode->i_mapping->writeback_index = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; - btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2556,11 +2567,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) return -ENOENT; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - inode->i_nlink++; -#else - inc_nlink(inode); -#endif + btrfs_inc_nlink(inode); err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; @@ -2650,7 +2657,9 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_fail; - err = btrfs_add_link(trans, dentry, inode, 0, index); + err = btrfs_add_link(trans, dentry->d_parent->d_inode, + inode, dentry->d_name.name, + dentry->d_name.len, 0, index); if (err) goto out_fail; @@ -3221,7 +3230,7 @@ static void btrfs_truncate(struct inode *inode) if (ret) goto out; /* FIXME, add redo link to tree so we don't leak on crash */ - ret = btrfs_truncate_in_trans(trans, root, inode, + ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, BTRFS_EXTENT_DATA_KEY); btrfs_update_inode(trans, root, inode); @@ -3304,6 +3313,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->last_trans = 0; + ei->logged_trans = 0; btrfs_ordered_inode_tree_init(&ei->ordered_tree); ei->i_acl = BTRFS_ACL_NOT_CACHED; ei->i_default_acl = BTRFS_ACL_NOT_CACHED; @@ -3463,31 +3473,39 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, btrfs_set_trans_block_group(trans, new_dir); - old_dentry->d_inode->i_nlink++; + btrfs_inc_nlink(old_dentry->d_inode); old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; - ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry); + ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, + old_dentry->d_name.name, + old_dentry->d_name.len); if (ret) goto out_fail; if (new_inode) { new_inode->i_ctime = CURRENT_TIME; - ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry); + ret = btrfs_unlink_inode(trans, root, new_dir, + new_dentry->d_inode, + new_dentry->d_name.name, + new_dentry->d_name.len); if (ret) goto out_fail; if (new_inode->i_nlink == 0) { - ret = btrfs_orphan_add(trans, new_inode); + ret = btrfs_orphan_add(trans, new_dentry->d_inode); if (ret) goto out_fail; } + } ret = btrfs_set_inode_index(new_dir, old_inode, &index); if (ret) goto out_fail; - ret = btrfs_add_link(trans, new_dentry, old_inode, 1, index); + ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode, + old_inode, new_dentry->d_name.name, + new_dentry->d_name.len, 1, index); if (ret) goto out_fail; @@ -3577,19 +3595,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping->backing_dev_info = &root->fs_info->bdi; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_tree, - inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, - inode->i_mapping, GFP_NOFS); - INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); - mutex_init(&BTRFS_I(inode)->csum_mutex); - mutex_init(&BTRFS_I(inode)->extent_mutex); - BTRFS_I(inode)->delalloc_bytes = 0; - BTRFS_I(inode)->disk_i_size = 0; - inode->i_mapping->writeback_index = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; - btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -3691,6 +3697,7 @@ static struct file_operations btrfs_dir_file_operations = { .compat_ioctl = btrfs_ioctl, #endif .release = btrfs_release_file, + .fsync = btrfs_sync_file, }; static struct extent_io_ops btrfs_extent_io_ops = { diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 36726696e58..e3984f902e7 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -202,8 +202,9 @@ again: memcpy(&found_key, &key, sizeof(key)); key.offset++; btrfs_release_path(root, path); - dead_root = btrfs_read_fs_root_no_radix(root->fs_info, - &found_key); + dead_root = + btrfs_read_fs_root_no_radix(root->fs_info->tree_root, + &found_key); if (IS_ERR(dead_root)) { ret = PTR_ERR(dead_root); goto err; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index eff3ad72991..49c4f5b40ed 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -25,6 +25,7 @@ #include "transaction.h" #include "locking.h" #include "ref-cache.h" +#include "tree-log.h" static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; @@ -57,6 +58,7 @@ static noinline int join_transaction(struct btrfs_root *root) root->fs_info->generation++; root->fs_info->last_alloc = 0; root->fs_info->last_data_alloc = 0; + root->fs_info->last_log_alloc = 0; cur_trans->num_writers = 1; cur_trans->num_joined = 0; cur_trans->transid = root->fs_info->generation; @@ -83,7 +85,7 @@ static noinline int join_transaction(struct btrfs_root *root) return 0; } -static noinline int record_root_in_trans(struct btrfs_root *root) +noinline int btrfs_record_root_in_trans(struct btrfs_root *root) { struct btrfs_dirty_root *dirty; u64 running_trans_id = root->fs_info->running_transaction->transid; @@ -151,7 +153,7 @@ static void wait_current_trans(struct btrfs_root *root) } } -struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, +static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, int num_blocks, int wait) { struct btrfs_trans_handle *h = @@ -164,7 +166,7 @@ struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, ret = join_transaction(root); BUG_ON(ret); - record_root_in_trans(root); + btrfs_record_root_in_trans(root); h->transid = root->fs_info->running_transaction->transid; h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; @@ -456,6 +458,8 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, BUG_ON(!root->ref_tree); dirty = root->dirty_root; + btrfs_free_log(trans, root); + if (root->commit_root == root->node) { WARN_ON(root->node->start != btrfs_root_bytenr(&root->root_item)); @@ -600,7 +604,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, num_bytes -= btrfs_root_used(&dirty->root->root_item); bytes_used = btrfs_root_used(&root->root_item); if (num_bytes) { - record_root_in_trans(root); + btrfs_record_root_in_trans(root); btrfs_set_root_used(&root->root_item, bytes_used - num_bytes); } @@ -745,7 +749,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, int ret; INIT_LIST_HEAD(&dirty_fs_roots); - mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { cur_trans = trans->transaction; @@ -821,10 +824,30 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, WARN_ON(cur_trans != trans->transaction); + /* btrfs_commit_tree_roots is responsible for getting the + * various roots consistent with each other. Every pointer + * in the tree of tree roots has to point to the most up to date + * root for every subvolume and other tree. So, we have to keep + * the tree logging code from jumping in and changing any + * of the trees. + * + * At this point in the commit, there can't be any tree-log + * writers, but a little lower down we drop the trans mutex + * and let new people in. By holding the tree_log_mutex + * from now until after the super is written, we avoid races + * with the tree-log code. + */ + mutex_lock(&root->fs_info->tree_log_mutex); + ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); BUG_ON(ret); + /* add_dirty_roots gets rid of all the tree log roots, it is now + * safe to free the root of tree log roots + */ + btrfs_free_log_root_tree(trans, root->fs_info); + ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); @@ -843,6 +866,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, chunk_root->node->start); btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, btrfs_header_level(chunk_root->node)); + + if (!root->fs_info->log_root_recovering) { + btrfs_set_super_log_root(&root->fs_info->super_copy, 0); + btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); + } + memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, sizeof(root->fs_info->super_copy)); @@ -857,6 +886,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, BUG_ON(ret); write_ctree_super(trans, root); + /* + * the super is written, we can safely allow the tree-loggers + * to go about their business + */ + mutex_unlock(&root->fs_info->tree_log_mutex); + btrfs_finish_extent_commit(trans, root, pinned_copy); mutex_lock(&root->fs_info->trans_mutex); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 598baa31241..cc63650d60d 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -98,4 +98,5 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root); void btrfs_throttle(struct btrfs_root *root); +int btrfs_record_root_in_trans(struct btrfs_root *root); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index cc2650b0695..b3bb5bbad76 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -81,12 +81,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, memcpy(&key, &root->defrag_progress, sizeof(key)); } - path->lowest_level = 1; path->keep_locks = 1; if (cache_only) min_trans = root->defrag_trans_start; - ret = btrfs_search_forward(root, &key, path, cache_only, min_trans); + ret = btrfs_search_forward(root, &key, NULL, path, + cache_only, min_trans); if (ret < 0) goto out; if (ret > 0) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c new file mode 100644 index 00000000000..d1ce8314b94 --- /dev/null +++ b/fs/btrfs/tree-log.c @@ -0,0 +1,2804 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "ctree.h" +#include "transaction.h" +#include "disk-io.h" +#include "locking.h" +#include "print-tree.h" +#include "compat.h" + +/* magic values for the inode_only field in btrfs_log_inode: + * + * LOG_INODE_ALL means to log everything + * LOG_INODE_EXISTS means to log just enough to recreate the inode + * during log replay + */ +#define LOG_INODE_ALL 0 +#define LOG_INODE_EXISTS 1 + +/* + * stages for the tree walking. The first + * stage (0) is to only pin down the blocks we find + * the second stage (1) is to make sure that all the inodes + * we find in the log are created in the subvolume. + * + * The last stage is to deal with directories and links and extents + * and all the other fun semantics + */ +#define LOG_WALK_PIN_ONLY 0 +#define LOG_WALK_REPLAY_INODES 1 +#define LOG_WALK_REPLAY_ALL 2 + +static int __btrfs_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + int inode_only); + +/* + * tree logging is a special write ahead log used to make sure that + * fsyncs and O_SYNCs can happen without doing full tree commits. + * + * Full tree commits are expensive because they require commonly + * modified blocks to be recowed, creating many dirty pages in the + * extent tree an 4x-6x higher write load than ext3. + * + * Instead of doing a tree commit on every fsync, we use the + * key ranges and transaction ids to find items for a given file or directory + * that have changed in this transaction. Those items are copied into + * a special tree (one per subvolume root), that tree is written to disk + * and then the fsync is considered complete. + * + * After a crash, items are copied out of the log-tree back into the + * subvolume tree. Any file data extents found are recorded in the extent + * allocation tree, and the log-tree freed. + * + * The log tree is read three times, once to pin down all the extents it is + * using in ram and once, once to create all the inodes logged in the tree + * and once to do all the other items. + */ + +/* + * btrfs_add_log_tree adds a new per-subvolume log tree into the + * tree of log tree roots. This must be called with a tree log transaction + * running (see start_log_trans). + */ +int btrfs_add_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_key key; + struct btrfs_root_item root_item; + struct btrfs_inode_item *inode_item; + struct extent_buffer *leaf; + struct btrfs_root *new_root = root; + int ret; + u64 objectid = root->root_key.objectid; + + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + BTRFS_TREE_LOG_OBJECTID, + 0, 0, 0, 0, 0); + if (IS_ERR(leaf)) { + ret = PTR_ERR(leaf); + return ret; + } + + btrfs_set_header_nritems(leaf, 0); + btrfs_set_header_level(leaf, 0); + btrfs_set_header_bytenr(leaf, leaf->start); + btrfs_set_header_generation(leaf, trans->transid); + btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); + + write_extent_buffer(leaf, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(leaf), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(leaf); + + inode_item = &root_item.inode; + memset(inode_item, 0, sizeof(*inode_item)); + inode_item->generation = cpu_to_le64(1); + inode_item->size = cpu_to_le64(3); + inode_item->nlink = cpu_to_le32(1); + inode_item->nblocks = cpu_to_le64(1); + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + + btrfs_set_root_bytenr(&root_item, leaf->start); + btrfs_set_root_level(&root_item, 0); + btrfs_set_root_refs(&root_item, 0); + btrfs_set_root_used(&root_item, 0); + + memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); + root_item.drop_level = 0; + + btrfs_tree_unlock(leaf); + free_extent_buffer(leaf); + leaf = NULL; + + btrfs_set_root_dirid(&root_item, 0); + + key.objectid = BTRFS_TREE_LOG_OBJECTID; + key.offset = objectid; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key, + &root_item); + if (ret) + goto fail; + + new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree, + &key); + BUG_ON(!new_root); + + WARN_ON(root->log_root); + root->log_root = new_root; + + /* + * log trees do not get reference counted because they go away + * before a real commit is actually done. They do store pointers + * to file data extents, and those reference counts still get + * updated (along with back refs to the log tree). + */ + new_root->ref_cows = 0; + new_root->last_trans = trans->transid; +fail: + return ret; +} + +/* + * start a sub transaction and setup the log tree + * this increments the log tree writer count to make the people + * syncing the tree wait for us to finish + */ +static int start_log_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + mutex_lock(&root->fs_info->tree_log_mutex); + if (!root->fs_info->log_root_tree) { + ret = btrfs_init_log_root_tree(trans, root->fs_info); + BUG_ON(ret); + } + if (!root->log_root) { + ret = btrfs_add_log_tree(trans, root); + BUG_ON(ret); + } + atomic_inc(&root->fs_info->tree_log_writers); + root->fs_info->tree_log_batch++; + mutex_unlock(&root->fs_info->tree_log_mutex); + return 0; +} + +/* + * returns 0 if there was a log transaction running and we were able + * to join, or returns -ENOENT if there were not transactions + * in progress + */ +static int join_running_log_trans(struct btrfs_root *root) +{ + int ret = -ENOENT; + + smp_mb(); + if (!root->log_root) + return -ENOENT; + + mutex_lock(&root->fs_info->tree_log_mutex); + if (root->log_root) { + ret = 0; + atomic_inc(&root->fs_info->tree_log_writers); + root->fs_info->tree_log_batch++; + } + mutex_unlock(&root->fs_info->tree_log_mutex); + return ret; +} + +/* + * indicate we're done making changes to the log tree + * and wake up anyone waiting to do a sync + */ +static int end_log_trans(struct btrfs_root *root) +{ + atomic_dec(&root->fs_info->tree_log_writers); + smp_mb(); + if (waitqueue_active(&root->fs_info->tree_log_wait)) + wake_up(&root->fs_info->tree_log_wait); + return 0; +} + + +/* + * the walk control struct is used to pass state down the chain when + * processing the log tree. The stage field tells us which part + * of the log tree processing we are currently doing. The others + * are state fields used for that specific part + */ +struct walk_control { + /* should we free the extent on disk when done? This is used + * at transaction commit time while freeing a log tree + */ + int free; + + /* should we write out the extent buffer? This is used + * while flushing the log tree to disk during a sync + */ + int write; + + /* should we wait for the extent buffer io to finish? Also used + * while flushing the log tree to disk for a sync + */ + int wait; + + /* pin only walk, we record which extents on disk belong to the + * log trees + */ + int pin; + + /* what stage of the replay code we're currently in */ + int stage; + + /* the root we are currently replaying */ + struct btrfs_root *replay_dest; + + /* the trans handle for the current replay */ + struct btrfs_trans_handle *trans; + + /* the function that gets used to process blocks we find in the + * tree. Note the extent_buffer might not be up to date when it is + * passed in, and it must be checked or read if you need the data + * inside it + */ + int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb, + struct walk_control *wc, u64 gen); +}; + +/* + * process_func used to pin down extents, write them or wait on them + */ +static int process_one_buffer(struct btrfs_root *log, + struct extent_buffer *eb, + struct walk_control *wc, u64 gen) +{ + if (wc->pin) { + mutex_lock(&log->fs_info->alloc_mutex); + btrfs_update_pinned_extents(log->fs_info->extent_root, + eb->start, eb->len, 1); + mutex_unlock(&log->fs_info->alloc_mutex); + } + + if (btrfs_buffer_uptodate(eb, gen)) { + if (wc->write) + btrfs_write_tree_block(eb); + if (wc->wait) + btrfs_wait_tree_block_writeback(eb); + } + return 0; +} + +/* + * Item overwrite used by replay and tree logging. eb, slot and key all refer + * to the src data we are copying out. + * + * root is the tree we are copying into, and path is a scratch + * path for use in this function (it should be released on entry and + * will be released on exit). + * + * If the key is already in the destination tree the existing item is + * overwritten. If the existing item isn't big enough, it is extended. + * If it is too large, it is truncated. + * + * If the key isn't in the destination yet, a new item is inserted. + */ +static noinline int overwrite_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + int ret; + u32 item_size; + u64 saved_i_size = 0; + int save_old_i_size = 0; + unsigned long src_ptr; + unsigned long dst_ptr; + int overwrite_root = 0; + + if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) + overwrite_root = 1; + + item_size = btrfs_item_size_nr(eb, slot); + src_ptr = btrfs_item_ptr_offset(eb, slot); + + /* look for the key in the destination tree */ + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret == 0) { + char *src_copy; + char *dst_copy; + u32 dst_size = btrfs_item_size_nr(path->nodes[0], + path->slots[0]); + if (dst_size != item_size) + goto insert; + + if (item_size == 0) { + btrfs_release_path(root, path); + return 0; + } + dst_copy = kmalloc(item_size, GFP_NOFS); + src_copy = kmalloc(item_size, GFP_NOFS); + + read_extent_buffer(eb, src_copy, src_ptr, item_size); + + dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); + read_extent_buffer(path->nodes[0], dst_copy, dst_ptr, + item_size); + ret = memcmp(dst_copy, src_copy, item_size); + + kfree(dst_copy); + kfree(src_copy); + /* + * they have the same contents, just return, this saves + * us from cowing blocks in the destination tree and doing + * extra writes that may not have been done by a previous + * sync + */ + if (ret == 0) { + btrfs_release_path(root, path); + return 0; + } + + } +insert: + btrfs_release_path(root, path); + /* try to insert the key into the destination tree */ + ret = btrfs_insert_empty_item(trans, root, path, + key, item_size); + + /* make sure any existing item is the correct size */ + if (ret == -EEXIST) { + u32 found_size; + found_size = btrfs_item_size_nr(path->nodes[0], + path->slots[0]); + if (found_size > item_size) { + btrfs_truncate_item(trans, root, path, item_size, 1); + } else if (found_size < item_size) { + ret = btrfs_del_item(trans, root, + path); + BUG_ON(ret); + + btrfs_release_path(root, path); + ret = btrfs_insert_empty_item(trans, + root, path, key, item_size); + BUG_ON(ret); + } + } else if (ret) { + BUG(); + } + dst_ptr = btrfs_item_ptr_offset(path->nodes[0], + path->slots[0]); + + /* don't overwrite an existing inode if the generation number + * was logged as zero. This is done when the tree logging code + * is just logging an inode to make sure it exists after recovery. + * + * Also, don't overwrite i_size on directories during replay. + * log replay inserts and removes directory items based on the + * state of the tree found in the subvolume, and i_size is modified + * as it goes + */ + if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) { + struct btrfs_inode_item *src_item; + struct btrfs_inode_item *dst_item; + + src_item = (struct btrfs_inode_item *)src_ptr; + dst_item = (struct btrfs_inode_item *)dst_ptr; + + if (btrfs_inode_generation(eb, src_item) == 0) + goto no_copy; + + if (overwrite_root && + S_ISDIR(btrfs_inode_mode(eb, src_item)) && + S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) { + save_old_i_size = 1; + saved_i_size = btrfs_inode_size(path->nodes[0], + dst_item); + } + } + + copy_extent_buffer(path->nodes[0], eb, dst_ptr, + src_ptr, item_size); + + if (save_old_i_size) { + struct btrfs_inode_item *dst_item; + dst_item = (struct btrfs_inode_item *)dst_ptr; + btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size); + } + + /* make sure the generation is filled in */ + if (key->type == BTRFS_INODE_ITEM_KEY) { + struct btrfs_inode_item *dst_item; + dst_item = (struct btrfs_inode_item *)dst_ptr; + if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) { + btrfs_set_inode_generation(path->nodes[0], dst_item, + trans->transid); + } + } +no_copy: + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(root, path); + return 0; +} + +/* + * simple helper to read an inode off the disk from a given root + * This can only be called for subvolume roots and not for the log + */ +static noinline struct inode *read_one_inode(struct btrfs_root *root, + u64 objectid) +{ + struct inode *inode; + inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); + if (inode->i_state & I_NEW) { + BTRFS_I(inode)->root = root; + BTRFS_I(inode)->location.objectid = objectid; + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.offset = 0; + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + + } + if (is_bad_inode(inode)) { + iput(inode); + inode = NULL; + } + return inode; +} + +/* replays a single extent in 'eb' at 'slot' with 'key' into the + * subvolume 'root'. path is released on entry and should be released + * on exit. + * + * extents in the log tree have not been allocated out of the extent + * tree yet. So, this completes the allocation, taking a reference + * as required if the extent already exists or creating a new extent + * if it isn't in the extent allocation tree yet. + * + * The extent is inserted into the file, dropping any existing extents + * from the file that overlap the new one. + */ +static noinline int replay_one_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + int found_type; + u64 mask = root->sectorsize - 1; + u64 extent_end; + u64 alloc_hint; + u64 start = key->offset; + struct btrfs_file_extent_item *item; + struct inode *inode = NULL; + unsigned long size; + int ret = 0; + + item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(eb, item); + + if (found_type == BTRFS_FILE_EXTENT_REG) + extent_end = start + btrfs_file_extent_num_bytes(eb, item); + else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + size = btrfs_file_extent_inline_len(eb, + btrfs_item_nr(eb, slot)); + extent_end = (start + size + mask) & ~mask; + } else { + ret = 0; + goto out; + } + + inode = read_one_inode(root, key->objectid); + if (!inode) { + ret = -EIO; + goto out; + } + + /* + * first check to see if we already have this extent in the + * file. This must be done before the btrfs_drop_extents run + * so we don't try to drop this extent. + */ + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + start, 0); + + if (ret == 0 && found_type == BTRFS_FILE_EXTENT_REG) { + struct btrfs_file_extent_item cmp1; + struct btrfs_file_extent_item cmp2; + struct btrfs_file_extent_item *existing; + struct extent_buffer *leaf; + + leaf = path->nodes[0]; + existing = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + read_extent_buffer(eb, &cmp1, (unsigned long)item, + sizeof(cmp1)); + read_extent_buffer(leaf, &cmp2, (unsigned long)existing, + sizeof(cmp2)); + + /* + * we already have a pointer to this exact extent, + * we don't have to do anything + */ + if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { + btrfs_release_path(root, path); + goto out; + } + } + btrfs_release_path(root, path); + + /* drop any overlapping extents */ + ret = btrfs_drop_extents(trans, root, inode, + start, extent_end, start, &alloc_hint); + BUG_ON(ret); + + BUG_ON(ret); + if (found_type == BTRFS_FILE_EXTENT_REG) { + struct btrfs_key ins; + + ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); + ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); + ins.type = BTRFS_EXTENT_ITEM_KEY; + + /* insert the extent pointer in the file */ + ret = overwrite_item(trans, root, path, eb, slot, key); + BUG_ON(ret); + + /* + * is this extent already allocated in the extent + * allocation tree? If so, just add a reference + */ + ret = btrfs_lookup_extent(root, path, ins.objectid, ins.offset); + btrfs_release_path(root, path); + if (ret == 0) { + ret = btrfs_inc_extent_ref(trans, root, + ins.objectid, ins.offset, + root->root_key.objectid, + trans->transid, key->objectid, start); + } else { + /* + * insert the extent pointer in the extent + * allocation tree + */ + ret = btrfs_alloc_logged_extent(trans, root, + root->root_key.objectid, + trans->transid, key->objectid, + start, &ins); + BUG_ON(ret); + } + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + /* inline extents are easy, we just overwrite them */ + ret = overwrite_item(trans, root, path, eb, slot, key); + BUG_ON(ret); + } + /* btrfs_drop_extents changes i_blocks, update it here */ + inode->i_blocks += (extent_end - start) >> 9; + btrfs_update_inode(trans, root, inode); +out: + if (inode) + iput(inode); + return ret; +} + +/* + * when cleaning up conflicts between the directory names in the + * subvolume, directory names in the log and directory names in the + * inode back references, we may have to unlink inodes from directories. + * + * This is a helper function to do the unlink of a specific directory + * item + */ +static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct inode *dir, + struct btrfs_dir_item *di) +{ + struct inode *inode; + char *name; + int name_len; + struct extent_buffer *leaf; + struct btrfs_key location; + int ret; + + leaf = path->nodes[0]; + + btrfs_dir_item_key_to_cpu(leaf, di, &location); + name_len = btrfs_dir_name_len(leaf, di); + name = kmalloc(name_len, GFP_NOFS); + read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); + btrfs_release_path(root, path); + + inode = read_one_inode(root, location.objectid); + BUG_ON(!inode); + + btrfs_inc_nlink(inode); + ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); + kfree(name); + + iput(inode); + return ret; +} + +/* + * helper function to see if a given name and sequence number found + * in an inode back reference are already in a directory and correctly + * point to this inode + */ +static noinline int inode_in_dir(struct btrfs_root *root, + struct btrfs_path *path, + u64 dirid, u64 objectid, u64 index, + const char *name, int name_len) +{ + struct btrfs_dir_item *di; + struct btrfs_key location; + int match = 0; + + di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, + index, name, name_len, 0); + if (di && !IS_ERR(di)) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); + if (location.objectid != objectid) + goto out; + } else + goto out; + btrfs_release_path(root, path); + + di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); + if (di && !IS_ERR(di)) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); + if (location.objectid != objectid) + goto out; + } else + goto out; + match = 1; +out: + btrfs_release_path(root, path); + return match; +} + +/* + * helper function to check a log tree for a named back reference in + * an inode. This is used to decide if a back reference that is + * found in the subvolume conflicts with what we find in the log. + * + * inode backreferences may have multiple refs in a single item, + * during replay we process one reference at a time, and we don't + * want to delete valid links to a file from the subvolume if that + * link is also in the log. + */ +static noinline int backref_in_log(struct btrfs_root *log, + struct btrfs_key *key, + char *name, int namelen) +{ + struct btrfs_path *path; + struct btrfs_inode_ref *ref; + unsigned long ptr; + unsigned long ptr_end; + unsigned long name_ptr; + int found_name_len; + int item_size; + int ret; + int match = 0; + + path = btrfs_alloc_path(); + ret = btrfs_search_slot(NULL, log, key, path, 0, 0); + if (ret != 0) + goto out; + + item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); + ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); + ptr_end = ptr + item_size; + while (ptr < ptr_end) { + ref = (struct btrfs_inode_ref *)ptr; + found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref); + if (found_name_len == namelen) { + name_ptr = (unsigned long)(ref + 1); + ret = memcmp_extent_buffer(path->nodes[0], name, + name_ptr, namelen); + if (ret == 0) { + match = 1; + goto out; + } + } + ptr = (unsigned long)(ref + 1) + found_name_len; + } +out: + btrfs_free_path(path); + return match; +} + + +/* + * replay one inode back reference item found in the log tree. + * eb, slot and key refer to the buffer and key found in the log tree. + * root is the destination we are replaying into, and path is for temp + * use by this function. (it should be released on return). + */ +static noinline int add_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_root *log, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + struct inode *dir; + int ret; + struct btrfs_key location; + struct btrfs_inode_ref *ref; + struct btrfs_dir_item *di; + struct inode *inode; + char *name; + int namelen; + unsigned long ref_ptr; + unsigned long ref_end; + + location.objectid = key->objectid; + location.type = BTRFS_INODE_ITEM_KEY; + location.offset = 0; + + /* + * it is possible that we didn't log all the parent directories + * for a given inode. If we don't find the dir, just don't + * copy the back ref in. The link count fixup code will take + * care of the rest + */ + dir = read_one_inode(root, key->offset); + if (!dir) + return -ENOENT; + + inode = read_one_inode(root, key->objectid); + BUG_ON(!dir); + + ref_ptr = btrfs_item_ptr_offset(eb, slot); + ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); + +again: + ref = (struct btrfs_inode_ref *)ref_ptr; + + namelen = btrfs_inode_ref_name_len(eb, ref); + name = kmalloc(namelen, GFP_NOFS); + BUG_ON(!name); + + read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); + + /* if we already have a perfect match, we're done */ + if (inode_in_dir(root, path, dir->i_ino, inode->i_ino, + btrfs_inode_ref_index(eb, ref), + name, namelen)) { + goto out; + } + + /* + * look for a conflicting back reference in the metadata. + * if we find one we have to unlink that name of the file + * before we add our new link. Later on, we overwrite any + * existing back reference, and we don't want to create + * dangling pointers in the directory. + */ +conflict_again: + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret == 0) { + char *victim_name; + int victim_name_len; + struct btrfs_inode_ref *victim_ref; + unsigned long ptr; + unsigned long ptr_end; + struct extent_buffer *leaf = path->nodes[0]; + + /* are we trying to overwrite a back ref for the root directory + * if so, just jump out, we're done + */ + if (key->objectid == key->offset) + goto out_nowrite; + + /* check all the names in this back reference to see + * if they are in the log. if so, we allow them to stay + * otherwise they must be unlinked as a conflict + */ + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]); + while(ptr < ptr_end) { + victim_ref = (struct btrfs_inode_ref *)ptr; + victim_name_len = btrfs_inode_ref_name_len(leaf, + victim_ref); + victim_name = kmalloc(victim_name_len, GFP_NOFS); + BUG_ON(!victim_name); + + read_extent_buffer(leaf, victim_name, + (unsigned long)(victim_ref + 1), + victim_name_len); + + if (!backref_in_log(log, key, victim_name, + victim_name_len)) { + btrfs_inc_nlink(inode); + btrfs_release_path(root, path); + ret = btrfs_unlink_inode(trans, root, dir, + inode, victim_name, + victim_name_len); + kfree(victim_name); + btrfs_release_path(root, path); + goto conflict_again; + } + kfree(victim_name); + ptr = (unsigned long)(victim_ref + 1) + victim_name_len; + } + BUG_ON(ret); + } + btrfs_release_path(root, path); + + /* look for a conflicting sequence number */ + di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + btrfs_inode_ref_index(eb, ref), + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(root, path); + + + /* look for a conflicting name */ + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(root, path); + + /* insert our name */ + ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, + btrfs_inode_ref_index(eb, ref)); + BUG_ON(ret); + + btrfs_update_inode(trans, root, inode); + +out: + ref_ptr = (unsigned long)(ref + 1) + namelen; + kfree(name); + if (ref_ptr < ref_end) + goto again; + + /* finally write the back reference in the inode */ + ret = overwrite_item(trans, root, path, eb, slot, key); + BUG_ON(ret); + +out_nowrite: + btrfs_release_path(root, path); + iput(dir); + iput(inode); + return 0; +} + +/* + * replay one csum item from the log tree into the subvolume 'root' + * eb, slot and key all refer to the log tree + * path is for temp use by this function and should be released on return + * + * This copies the checksums out of the log tree and inserts them into + * the subvolume. Any existing checksums for this range in the file + * are overwritten, and new items are added where required. + * + * We keep this simple by reusing the btrfs_ordered_sum code from + * the data=ordered mode. This basically means making a copy + * of all the checksums in ram, which we have to do anyway for kmap + * rules. + * + * The copy is then sent down to btrfs_csum_file_blocks, which + * does all the hard work of finding existing items in the file + * or adding new ones. + */ +static noinline int replay_one_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + int ret; + u32 item_size = btrfs_item_size_nr(eb, slot); + u64 cur_offset; + unsigned long file_bytes; + struct btrfs_ordered_sum *sums; + struct btrfs_sector_sum *sector_sum; + struct inode *inode; + unsigned long ptr; + + file_bytes = (item_size / BTRFS_CRC32_SIZE) * root->sectorsize; + inode = read_one_inode(root, key->objectid); + if (!inode) { + return -EIO; + } + + sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS); + if (!sums) { + iput(inode); + return -ENOMEM; + } + + INIT_LIST_HEAD(&sums->list); + sums->len = file_bytes; + sums->file_offset = key->offset; + + /* + * copy all the sums into the ordered sum struct + */ + sector_sum = sums->sums; + cur_offset = key->offset; + ptr = btrfs_item_ptr_offset(eb, slot); + while(item_size > 0) { + sector_sum->offset = cur_offset; + read_extent_buffer(eb, §or_sum->sum, ptr, BTRFS_CRC32_SIZE); + sector_sum++; + item_size -= BTRFS_CRC32_SIZE; + ptr += BTRFS_CRC32_SIZE; + cur_offset += root->sectorsize; + } + + /* let btrfs_csum_file_blocks add them into the file */ + ret = btrfs_csum_file_blocks(trans, root, inode, sums); + BUG_ON(ret); + kfree(sums); + iput(inode); + + return 0; +} +/* + * There are a few corners where the link count of the file can't + * be properly maintained during replay. So, instead of adding + * lots of complexity to the log code, we just scan the backrefs + * for any file that has been through replay. + * + * The scan will update the link count on the inode to reflect the + * number of back refs found. If it goes down to zero, the iput + * will free the inode. + */ +static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + u64 nlink = 0; + unsigned long ptr; + unsigned long ptr_end; + int name_len; + + key.objectid = inode->i_ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = (u64)-1; + + path = btrfs_alloc_path(); + + while(1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + break; + if (ret > 0) { + if (path->slots[0] == 0) + break; + path->slots[0]--; + } + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid != inode->i_ino || + key.type != BTRFS_INODE_REF_KEY) + break; + ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); + ptr_end = ptr + btrfs_item_size_nr(path->nodes[0], + path->slots[0]); + while(ptr < ptr_end) { + struct btrfs_inode_ref *ref; + + ref = (struct btrfs_inode_ref *)ptr; + name_len = btrfs_inode_ref_name_len(path->nodes[0], + ref); + ptr = (unsigned long)(ref + 1) + name_len; + nlink++; + } + + if (key.offset == 0) + break; + key.offset--; + btrfs_release_path(root, path); + } + btrfs_free_path(path); + if (nlink != inode->i_nlink) { + inode->i_nlink = nlink; + btrfs_update_inode(trans, root, inode); + } + + return 0; +} + +static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path) +{ + int ret; + struct btrfs_key key; + struct inode *inode; + + key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; + key.type = BTRFS_ORPHAN_ITEM_KEY; + key.offset = (u64)-1; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + break; + + if (ret == 1) { + if (path->slots[0] == 0) + break; + path->slots[0]--; + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID || + key.type != BTRFS_ORPHAN_ITEM_KEY) + break; + + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + + btrfs_release_path(root, path); + inode = read_one_inode(root, key.offset); + BUG_ON(!inode); + + ret = fixup_inode_link_count(trans, root, inode); + BUG_ON(ret); + + iput(inode); + + if (key.offset == 0) + break; + key.offset--; + } + btrfs_release_path(root, path); + return 0; +} + + +/* + * record a given inode in the fixup dir so we can check its link + * count when replay is done. The link count is incremented here + * so the inode won't go away until we check it + */ +static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 objectid) +{ + struct btrfs_key key; + int ret = 0; + struct inode *inode; + + inode = read_one_inode(root, objectid); + BUG_ON(!inode); + + key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; + btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); + key.offset = objectid; + + ret = btrfs_insert_empty_item(trans, root, path, &key, 0); + + btrfs_release_path(root, path); + if (ret == 0) { + btrfs_inc_nlink(inode); + btrfs_update_inode(trans, root, inode); + } else if (ret == -EEXIST) { + ret = 0; + } else { + BUG(); + } + iput(inode); + + return ret; +} + +/* + * when replaying the log for a directory, we only insert names + * for inodes that actually exist. This means an fsync on a directory + * does not implicitly fsync all the new files in it + */ +static noinline int insert_one_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 dirid, u64 index, + char *name, int name_len, u8 type, + struct btrfs_key *location) +{ + struct inode *inode; + struct inode *dir; + int ret; + + inode = read_one_inode(root, location->objectid); + if (!inode) + return -ENOENT; + + dir = read_one_inode(root, dirid); + if (!dir) { + iput(inode); + return -EIO; + } + ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index); + + /* FIXME, put inode into FIXUP list */ + + iput(inode); + iput(dir); + return ret; +} + +/* + * take a single entry in a log directory item and replay it into + * the subvolume. + * + * if a conflicting item exists in the subdirectory already, + * the inode it points to is unlinked and put into the link count + * fix up tree. + * + * If a name from the log points to a file or directory that does + * not exist in the FS, it is skipped. fsyncs on directories + * do not force down inodes inside that directory, just changes to the + * names or unlinks in a directory. + */ +static noinline int replay_one_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, + struct btrfs_dir_item *di, + struct btrfs_key *key) +{ + char *name; + int name_len; + struct btrfs_dir_item *dst_di; + struct btrfs_key found_key; + struct btrfs_key log_key; + struct inode *dir; + struct inode *inode; + u8 log_type; + int ret; + + dir = read_one_inode(root, key->objectid); + BUG_ON(!dir); + + name_len = btrfs_dir_name_len(eb, di); + name = kmalloc(name_len, GFP_NOFS); + log_type = btrfs_dir_type(eb, di); + read_extent_buffer(eb, name, (unsigned long)(di + 1), + name_len); + + btrfs_dir_item_key_to_cpu(eb, di, &log_key); + if (key->type == BTRFS_DIR_ITEM_KEY) { + dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, + name, name_len, 1); + } + else if (key->type == BTRFS_DIR_INDEX_KEY) { + dst_di = btrfs_lookup_dir_index_item(trans, root, path, + key->objectid, + key->offset, name, + name_len, 1); + } else { + BUG(); + } + if (!dst_di || IS_ERR(dst_di)) { + /* we need a sequence number to insert, so we only + * do inserts for the BTRFS_DIR_INDEX_KEY types + */ + if (key->type != BTRFS_DIR_INDEX_KEY) + goto out; + goto insert; + } + + btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key); + /* the existing item matches the logged item */ + if (found_key.objectid == log_key.objectid && + found_key.type == log_key.type && + found_key.offset == log_key.offset && + btrfs_dir_type(path->nodes[0], dst_di) == log_type) { + goto out; + } + + /* + * don't drop the conflicting directory entry if the inode + * for the new entry doesn't exist + */ + inode = read_one_inode(root, log_key.objectid); + if (!inode) + goto out; + + iput(inode); + ret = drop_one_dir_item(trans, root, path, dir, dst_di); + BUG_ON(ret); + + if (key->type == BTRFS_DIR_INDEX_KEY) + goto insert; +out: + btrfs_release_path(root, path); + kfree(name); + iput(dir); + return 0; + +insert: + btrfs_release_path(root, path); + ret = insert_one_name(trans, root, path, key->objectid, key->offset, + name, name_len, log_type, &log_key); + + if (ret && ret != -ENOENT) + BUG(); + goto out; +} + +/* + * find all the names in a directory item and reconcile them into + * the subvolume. Only BTRFS_DIR_ITEM_KEY types will have more than + * one name in a directory item, but the same code gets used for + * both directory index types + */ +static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + int ret; + u32 item_size = btrfs_item_size_nr(eb, slot); + struct btrfs_dir_item *di; + int name_len; + unsigned long ptr; + unsigned long ptr_end; + + ptr = btrfs_item_ptr_offset(eb, slot); + ptr_end = ptr + item_size; + while(ptr < ptr_end) { + di = (struct btrfs_dir_item *)ptr; + name_len = btrfs_dir_name_len(eb, di); + ret = replay_one_name(trans, root, path, eb, di, key); + BUG_ON(ret); + ptr = (unsigned long)(di + 1); + ptr += name_len; + } + return 0; +} + +/* + * directory replay has two parts. There are the standard directory + * items in the log copied from the subvolume, and range items + * created in the log while the subvolume was logged. + * + * The range items tell us which parts of the key space the log + * is authoritative for. During replay, if a key in the subvolume + * directory is in a logged range item, but not actually in the log + * that means it was deleted from the directory before the fsync + * and should be removed. + */ +static noinline int find_dir_range(struct btrfs_root *root, + struct btrfs_path *path, + u64 dirid, int key_type, + u64 *start_ret, u64 *end_ret) +{ + struct btrfs_key key; + u64 found_end; + struct btrfs_dir_log_item *item; + int ret; + int nritems; + + if (*start_ret == (u64)-1) + return 1; + + key.objectid = dirid; + key.type = key_type; + key.offset = *start_ret; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + if (path->slots[0] == 0) + goto out; + path->slots[0]--; + } + if (ret != 0) + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type != key_type || key.objectid != dirid) { + ret = 1; + goto next; + } + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_dir_log_item); + found_end = btrfs_dir_log_end(path->nodes[0], item); + + if (*start_ret >= key.offset && *start_ret <= found_end) { + ret = 0; + *start_ret = key.offset; + *end_ret = found_end; + goto out; + } + ret = 1; +next: + /* check the next slot in the tree to see if it is a valid item */ + nritems = btrfs_header_nritems(path->nodes[0]); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + goto out; + } else { + path->slots[0]++; + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type != key_type || key.objectid != dirid) { + ret = 1; + goto out; + } + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_dir_log_item); + found_end = btrfs_dir_log_end(path->nodes[0], item); + *start_ret = key.offset; + *end_ret = found_end; + ret = 0; +out: + btrfs_release_path(root, path); + return ret; +} + +/* + * this looks for a given directory item in the log. If the directory + * item is not in the log, the item is removed and the inode it points + * to is unlinked + */ +static noinline int check_item_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_root *log, + struct btrfs_path *path, + struct btrfs_path *log_path, + struct inode *dir, + struct btrfs_key *dir_key) +{ + int ret; + struct extent_buffer *eb; + int slot; + u32 item_size; + struct btrfs_dir_item *di; + struct btrfs_dir_item *log_di; + int name_len; + unsigned long ptr; + unsigned long ptr_end; + char *name; + struct inode *inode; + struct btrfs_key location; + +again: + eb = path->nodes[0]; + slot = path->slots[0]; + item_size = btrfs_item_size_nr(eb, slot); + ptr = btrfs_item_ptr_offset(eb, slot); + ptr_end = ptr + item_size; + while(ptr < ptr_end) { + di = (struct btrfs_dir_item *)ptr; + name_len = btrfs_dir_name_len(eb, di); + name = kmalloc(name_len, GFP_NOFS); + if (!name) { + ret = -ENOMEM; + goto out; + } + read_extent_buffer(eb, name, (unsigned long)(di + 1), + name_len); + log_di = NULL; + if (dir_key->type == BTRFS_DIR_ITEM_KEY) { + log_di = btrfs_lookup_dir_item(trans, log, log_path, + dir_key->objectid, + name, name_len, 0); + } else if (dir_key->type == BTRFS_DIR_INDEX_KEY) { + log_di = btrfs_lookup_dir_index_item(trans, log, + log_path, + dir_key->objectid, + dir_key->offset, + name, name_len, 0); + } + if (!log_di || IS_ERR(log_di)) { + btrfs_dir_item_key_to_cpu(eb, di, &location); + btrfs_release_path(root, path); + btrfs_release_path(log, log_path); + inode = read_one_inode(root, location.objectid); + BUG_ON(!inode); + + ret = link_to_fixup_dir(trans, root, + path, location.objectid); + BUG_ON(ret); + btrfs_inc_nlink(inode); + ret = btrfs_unlink_inode(trans, root, dir, inode, + name, name_len); + BUG_ON(ret); + kfree(name); + iput(inode); + + /* there might still be more names under this key + * check and repeat if required + */ + ret = btrfs_search_slot(NULL, root, dir_key, path, + 0, 0); + if (ret == 0) + goto again; + ret = 0; + goto out; + } + btrfs_release_path(log, log_path); + kfree(name); + + ptr = (unsigned long)(di + 1); + ptr += name_len; + } + ret = 0; +out: + btrfs_release_path(root, path); + btrfs_release_path(log, log_path); + return ret; +} + +/* + * deletion replay happens before we copy any new directory items + * out of the log or out of backreferences from inodes. It + * scans the log to find ranges of keys that log is authoritative for, + * and then scans the directory to find items in those ranges that are + * not present in the log. + * + * Anything we don't find in the log is unlinked and removed from the + * directory. + */ +static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_root *log, + struct btrfs_path *path, + u64 dirid) +{ + u64 range_start; + u64 range_end; + int key_type = BTRFS_DIR_LOG_ITEM_KEY; + int ret = 0; + struct btrfs_key dir_key; + struct btrfs_key found_key; + struct btrfs_path *log_path; + struct inode *dir; + + dir_key.objectid = dirid; + dir_key.type = BTRFS_DIR_ITEM_KEY; + log_path = btrfs_alloc_path(); + if (!log_path) + return -ENOMEM; + + dir = read_one_inode(root, dirid); + /* it isn't an error if the inode isn't there, that can happen + * because we replay the deletes before we copy in the inode item + * from the log + */ + if (!dir) { + btrfs_free_path(log_path); + return 0; + } +again: + range_start = 0; + range_end = 0; + while(1) { + ret = find_dir_range(log, path, dirid, key_type, + &range_start, &range_end); + if (ret != 0) + break; + + dir_key.offset = range_start; + while(1) { + int nritems; + ret = btrfs_search_slot(NULL, root, &dir_key, path, + 0, 0); + if (ret < 0) + goto out; + + nritems = btrfs_header_nritems(path->nodes[0]); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + break; + } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + if (found_key.objectid != dirid || + found_key.type != dir_key.type) + goto next_type; + + if (found_key.offset > range_end) + break; + + ret = check_item_in_log(trans, root, log, path, + log_path, dir, &found_key); + BUG_ON(ret); + if (found_key.offset == (u64)-1) + break; + dir_key.offset = found_key.offset + 1; + } + btrfs_release_path(root, path); + if (range_end == (u64)-1) + break; + range_start = range_end + 1; + } + +next_type: + ret = 0; + if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { + key_type = BTRFS_DIR_LOG_INDEX_KEY; + dir_key.type = BTRFS_DIR_INDEX_KEY; + btrfs_release_path(root, path); + goto again; + } +out: + btrfs_release_path(root, path); + btrfs_free_path(log_path); + iput(dir); + return ret; +} + +/* + * the process_func used to replay items from the log tree. This + * gets called in two different stages. The first stage just looks + * for inodes and makes sure they are all copied into the subvolume. + * + * The second stage copies all the other item types from the log into + * the subvolume. The two stage approach is slower, but gets rid of + * lots of complexity around inodes referencing other inodes that exist + * only in the log (references come from either directory items or inode + * back refs). + */ +static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, + struct walk_control *wc, u64 gen) +{ + int nritems; + struct btrfs_path *path; + struct btrfs_root *root = wc->replay_dest; + struct btrfs_key key; + u32 item_size; + int level; + int i; + int ret; + + btrfs_read_buffer(eb, gen); + + level = btrfs_header_level(eb); + + if (level != 0) + return 0; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + nritems = btrfs_header_nritems(eb); + for (i = 0; i < nritems; i++) { + btrfs_item_key_to_cpu(eb, &key, i); + item_size = btrfs_item_size_nr(eb, i); + + /* inode keys are done during the first stage */ + if (key.type == BTRFS_INODE_ITEM_KEY && + wc->stage == LOG_WALK_REPLAY_INODES) { + struct inode *inode; + struct btrfs_inode_item *inode_item; + u32 mode; + + inode_item = btrfs_item_ptr(eb, i, + struct btrfs_inode_item); + mode = btrfs_inode_mode(eb, inode_item); + if (S_ISDIR(mode)) { + ret = replay_dir_deletes(wc->trans, + root, log, path, key.objectid); + BUG_ON(ret); + } + ret = overwrite_item(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + + /* for regular files, truncate away + * extents past the new EOF + */ + if (S_ISREG(mode)) { + inode = read_one_inode(root, + key.objectid); + BUG_ON(!inode); + + ret = btrfs_truncate_inode_items(wc->trans, + root, inode, inode->i_size, + BTRFS_EXTENT_DATA_KEY); + BUG_ON(ret); + iput(inode); + } + ret = link_to_fixup_dir(wc->trans, root, + path, key.objectid); + BUG_ON(ret); + } + if (wc->stage < LOG_WALK_REPLAY_ALL) + continue; + + /* these keys are simply copied */ + if (key.type == BTRFS_XATTR_ITEM_KEY) { + ret = overwrite_item(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + } else if (key.type == BTRFS_INODE_REF_KEY) { + ret = add_inode_ref(wc->trans, root, log, path, + eb, i, &key); + BUG_ON(ret && ret != -ENOENT); + } else if (key.type == BTRFS_EXTENT_DATA_KEY) { + ret = replay_one_extent(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + } else if (key.type == BTRFS_CSUM_ITEM_KEY) { + ret = replay_one_csum(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + } else if (key.type == BTRFS_DIR_ITEM_KEY || + key.type == BTRFS_DIR_INDEX_KEY) { + ret = replay_one_dir_item(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + } + } + btrfs_free_path(path); + return 0; +} + +static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + struct walk_control *wc) +{ + u64 root_owner; + u64 root_gen; + u64 bytenr; + u64 ptr_gen; + struct extent_buffer *next; + struct extent_buffer *cur; + struct extent_buffer *parent; + u32 blocksize; + int ret = 0; + + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + while(*level > 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + cur = path->nodes[*level]; + + if (btrfs_header_level(cur) != *level) + WARN_ON(1); + + if (path->slots[*level] >= + btrfs_header_nritems(cur)) + break; + + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); + blocksize = btrfs_level_size(root, *level - 1); + + parent = path->nodes[*level]; + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); + + next = btrfs_find_create_tree_block(root, bytenr, blocksize); + + wc->process_func(root, next, wc, ptr_gen); + + if (*level == 1) { + path->slots[*level]++; + if (wc->free) { + btrfs_read_buffer(next, ptr_gen); + + btrfs_tree_lock(next); + clean_tree_block(trans, root, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + + ret = btrfs_drop_leaf_ref(trans, root, next); + BUG_ON(ret); + + WARN_ON(root_owner != + BTRFS_TREE_LOG_OBJECTID); + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, root_owner, + root_gen, 0, 0, 1); + BUG_ON(ret); + } + free_extent_buffer(next); + continue; + } + btrfs_read_buffer(next, ptr_gen); + + WARN_ON(*level <= 0); + if (path->nodes[*level-1]) + free_extent_buffer(path->nodes[*level-1]); + path->nodes[*level-1] = next; + *level = btrfs_header_level(next); + path->slots[*level] = 0; + cond_resched(); + } + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + if (path->nodes[*level] == root->node) { + parent = path->nodes[*level]; + } else { + parent = path->nodes[*level + 1]; + } + bytenr = path->nodes[*level]->start; + + blocksize = btrfs_level_size(root, *level); + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); + + wc->process_func(root, path->nodes[*level], wc, + btrfs_header_generation(path->nodes[*level])); + + if (wc->free) { + next = path->nodes[*level]; + btrfs_tree_lock(next); + clean_tree_block(trans, root, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + + if (*level == 0) { + ret = btrfs_drop_leaf_ref(trans, root, next); + BUG_ON(ret); + } + WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); + ret = btrfs_free_extent(trans, root, bytenr, blocksize, + root_owner, root_gen, 0, 0, 1); + BUG_ON(ret); + } + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + *level += 1; + + cond_resched(); + return 0; +} + +static int noinline walk_up_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + struct walk_control *wc) +{ + u64 root_owner; + u64 root_gen; + int i; + int slot; + int ret; + + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + slot = path->slots[i]; + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { + struct extent_buffer *node; + node = path->nodes[i]; + path->slots[i]++; + *level = i; + WARN_ON(*level == 0); + return 0; + } else { + if (path->nodes[*level] == root->node) { + root_owner = root->root_key.objectid; + root_gen = + btrfs_header_generation(path->nodes[*level]); + } else { + struct extent_buffer *node; + node = path->nodes[*level + 1]; + root_owner = btrfs_header_owner(node); + root_gen = btrfs_header_generation(node); + } + wc->process_func(root, path->nodes[*level], wc, + btrfs_header_generation(path->nodes[*level])); + if (wc->free) { + struct extent_buffer *next; + + next = path->nodes[*level]; + + btrfs_tree_lock(next); + clean_tree_block(trans, root, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + + if (*level == 0) { + ret = btrfs_drop_leaf_ref(trans, root, + next); + BUG_ON(ret); + } + + WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); + ret = btrfs_free_extent(trans, root, + path->nodes[*level]->start, + path->nodes[*level]->len, + root_owner, root_gen, 0, 0, 1); + BUG_ON(ret); + } + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + *level = i + 1; + } + } + return 1; +} + +/* + * drop the reference count on the tree rooted at 'snap'. This traverses + * the tree freeing any blocks that have a ref count of zero after being + * decremented. + */ +static int walk_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *log, struct walk_control *wc) +{ + int ret = 0; + int wret; + int level; + struct btrfs_path *path; + int i; + int orig_level; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + level = btrfs_header_level(log->node); + orig_level = level; + path->nodes[level] = log->node; + extent_buffer_get(log->node); + path->slots[level] = 0; + + while(1) { + wret = walk_down_log_tree(trans, log, path, &level, wc); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + + wret = walk_up_log_tree(trans, log, path, &level, wc); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + } + + /* was the root node processed? if not, catch it here */ + if (path->nodes[orig_level]) { + wc->process_func(log, path->nodes[orig_level], wc, + btrfs_header_generation(path->nodes[orig_level])); + if (wc->free) { + struct extent_buffer *next; + + next = path->nodes[orig_level]; + + btrfs_tree_lock(next); + clean_tree_block(trans, log, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + + if (orig_level == 0) { + ret = btrfs_drop_leaf_ref(trans, log, + next); + BUG_ON(ret); + } + WARN_ON(log->root_key.objectid != + BTRFS_TREE_LOG_OBJECTID); + ret = btrfs_free_extent(trans, log, + next->start, next->len, + log->root_key.objectid, + btrfs_header_generation(next), + 0, 0, 1); + BUG_ON(ret); + } + } + + for (i = 0; i <= orig_level; i++) { + if (path->nodes[i]) { + free_extent_buffer(path->nodes[i]); + path->nodes[i] = NULL; + } + } + btrfs_free_path(path); + if (wc->free) + free_extent_buffer(log->node); + return ret; +} + +int wait_log_commit(struct btrfs_root *log) +{ + DEFINE_WAIT(wait); + u64 transid = log->fs_info->tree_log_transid; + + do { + prepare_to_wait(&log->fs_info->tree_log_wait, &wait, + TASK_UNINTERRUPTIBLE); + mutex_unlock(&log->fs_info->tree_log_mutex); + if (atomic_read(&log->fs_info->tree_log_commit)) + schedule(); + finish_wait(&log->fs_info->tree_log_wait, &wait); + mutex_lock(&log->fs_info->tree_log_mutex); + } while(transid == log->fs_info->tree_log_transid && + atomic_read(&log->fs_info->tree_log_commit)); + return 0; +} + +/* + * btrfs_sync_log does sends a given tree log down to the disk and + * updates the super blocks to record it. When this call is done, + * you know that any inodes previously logged are safely on disk + */ +int btrfs_sync_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + unsigned long batch; + struct btrfs_root *log = root->log_root; + struct walk_control wc = { + .write = 1, + .process_func = process_one_buffer + }; + + mutex_lock(&log->fs_info->tree_log_mutex); + if (atomic_read(&log->fs_info->tree_log_commit)) { + wait_log_commit(log); + goto out; + } + atomic_set(&log->fs_info->tree_log_commit, 1); + + while(1) { + mutex_unlock(&log->fs_info->tree_log_mutex); + schedule_timeout_uninterruptible(1); + mutex_lock(&log->fs_info->tree_log_mutex); + batch = log->fs_info->tree_log_batch; + + while(atomic_read(&log->fs_info->tree_log_writers)) { + DEFINE_WAIT(wait); + prepare_to_wait(&log->fs_info->tree_log_wait, &wait, + TASK_UNINTERRUPTIBLE); + batch = log->fs_info->tree_log_batch; + mutex_unlock(&log->fs_info->tree_log_mutex); + if (atomic_read(&log->fs_info->tree_log_writers)) + schedule(); + mutex_lock(&log->fs_info->tree_log_mutex); + finish_wait(&log->fs_info->tree_log_wait, &wait); + } + if (batch == log->fs_info->tree_log_batch) + break; + } + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + + ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc); + BUG_ON(ret); + + wc.wait = 1; + + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + + ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc); + BUG_ON(ret); + + btrfs_set_super_log_root(&root->fs_info->super_for_commit, + log->fs_info->log_root_tree->node->start); + btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, + btrfs_header_level(log->fs_info->log_root_tree->node)); + + write_ctree_super(trans, log->fs_info->tree_root); + log->fs_info->tree_log_transid++; + log->fs_info->tree_log_batch = 0; + atomic_set(&log->fs_info->tree_log_commit, 0); + smp_mb(); + if (waitqueue_active(&log->fs_info->tree_log_wait)) + wake_up(&log->fs_info->tree_log_wait); +out: + mutex_unlock(&log->fs_info->tree_log_mutex); + return 0; + +} + +/* + * free all the extents used by the tree log. This should be called + * at commit time of the full transaction + */ +int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) +{ + int ret; + struct btrfs_root *log; + struct key; + struct walk_control wc = { + .free = 1, + .process_func = process_one_buffer + }; + + if (!root->log_root) + return 0; + + log = root->log_root; + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + + log = root->log_root; + ret = btrfs_del_root(trans, root->fs_info->log_root_tree, + &log->root_key); + BUG_ON(ret); + root->log_root = NULL; + kfree(root->log_root); + return 0; +} + +/* + * helper function to update the item for a given subvolumes log root + * in the tree of log roots + */ +static int update_log_root(struct btrfs_trans_handle *trans, + struct btrfs_root *log) +{ + u64 bytenr = btrfs_root_bytenr(&log->root_item); + int ret; + + if (log->node->start == bytenr) + return 0; + + btrfs_set_root_bytenr(&log->root_item, log->node->start); + btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); + ret = btrfs_update_root(trans, log->fs_info->log_root_tree, + &log->root_key, &log->root_item); + BUG_ON(ret); + return ret; +} + +/* + * If both a file and directory are logged, and unlinks or renames are + * mixed in, we have a few interesting corners: + * + * create file X in dir Y + * link file X to X.link in dir Y + * fsync file X + * unlink file X but leave X.link + * fsync dir Y + * + * After a crash we would expect only X.link to exist. But file X + * didn't get fsync'd again so the log has back refs for X and X.link. + * + * We solve this by removing directory entries and inode backrefs from the + * log when a file that was logged in the current transaction is + * unlinked. Any later fsync will include the updated log entries, and + * we'll be able to reconstruct the proper directory items from backrefs. + * + * This optimizations allows us to avoid relogging the entire inode + * or the entire directory. + */ +int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct inode *dir, u64 index) +{ + struct btrfs_root *log; + struct btrfs_dir_item *di; + struct btrfs_path *path; + int ret; + int bytes_del = 0; + + ret = join_running_log_trans(root); + if (ret) + return 0; + + mutex_lock(&BTRFS_I(dir)->log_mutex); + + log = root->log_root; + path = btrfs_alloc_path(); + di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, + name, name_len, -1); + if (di && !IS_ERR(di)) { + ret = btrfs_delete_one_dir_name(trans, log, path, di); + bytes_del += name_len; + BUG_ON(ret); + } + btrfs_release_path(log, path); + di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, + index, name, name_len, -1); + if (di && !IS_ERR(di)) { + ret = btrfs_delete_one_dir_name(trans, log, path, di); + bytes_del += name_len; + BUG_ON(ret); + } + + /* update the directory size in the log to reflect the names + * we have removed + */ + if (bytes_del) { + struct btrfs_key key; + + key.objectid = dir->i_ino; + key.offset = 0; + key.type = BTRFS_INODE_ITEM_KEY; + btrfs_release_path(log, path); + + ret = btrfs_search_slot(trans, log, &key, path, 0, 1); + if (ret == 0) { + struct btrfs_inode_item *item; + u64 i_size; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + i_size = btrfs_inode_size(path->nodes[0], item); + if (i_size > bytes_del) + i_size -= bytes_del; + else + i_size = 0; + btrfs_set_inode_size(path->nodes[0], item, i_size); + btrfs_mark_buffer_dirty(path->nodes[0]); + } else + ret = 0; + btrfs_release_path(log, path); + } + + btrfs_free_path(path); + mutex_unlock(&BTRFS_I(dir)->log_mutex); + end_log_trans(root); + + return 0; +} + +/* see comments for btrfs_del_dir_entries_in_log */ +int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct inode *inode, u64 dirid) +{ + struct btrfs_root *log; + u64 index; + int ret; + + ret = join_running_log_trans(root); + if (ret) + return 0; + log = root->log_root; + mutex_lock(&BTRFS_I(inode)->log_mutex); + + ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, + dirid, &index); + mutex_unlock(&BTRFS_I(inode)->log_mutex); + end_log_trans(root); + + if (ret == 0 || ret == -ENOENT) + return 0; + return ret; +} + +/* + * creates a range item in the log for 'dirid'. first_offset and + * last_offset tell us which parts of the key space the log should + * be considered authoritative for. + */ +static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, + struct btrfs_root *log, + struct btrfs_path *path, + int key_type, u64 dirid, + u64 first_offset, u64 last_offset) +{ + int ret; + struct btrfs_key key; + struct btrfs_dir_log_item *item; + + key.objectid = dirid; + key.offset = first_offset; + if (key_type == BTRFS_DIR_ITEM_KEY) + key.type = BTRFS_DIR_LOG_ITEM_KEY; + else + key.type = BTRFS_DIR_LOG_INDEX_KEY; + ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); + BUG_ON(ret); + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_dir_log_item); + btrfs_set_dir_log_end(path->nodes[0], item, last_offset); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(log, path); + return 0; +} + +/* + * log all the items included in the current transaction for a given + * directory. This also creates the range items in the log tree required + * to replay anything deleted before the fsync + */ +static noinline int log_dir_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + struct btrfs_path *path, + struct btrfs_path *dst_path, int key_type, + u64 min_offset, u64 *last_offset_ret) +{ + struct btrfs_key min_key; + struct btrfs_key max_key; + struct btrfs_root *log = root->log_root; + struct extent_buffer *src; + int ret; + int i; + int nritems; + u64 first_offset = min_offset; + u64 last_offset = (u64)-1; + + log = root->log_root; + max_key.objectid = inode->i_ino; + max_key.offset = (u64)-1; + max_key.type = key_type; + + min_key.objectid = inode->i_ino; + min_key.type = key_type; + min_key.offset = min_offset; + + path->keep_locks = 1; + + ret = btrfs_search_forward(root, &min_key, &max_key, + path, 0, trans->transid); + + /* + * we didn't find anything from this transaction, see if there + * is anything at all + */ + if (ret != 0 || min_key.objectid != inode->i_ino || + min_key.type != key_type) { + min_key.objectid = inode->i_ino; + min_key.type = key_type; + min_key.offset = (u64)-1; + btrfs_release_path(root, path); + ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); + if (ret < 0) { + btrfs_release_path(root, path); + return ret; + } + ret = btrfs_previous_item(root, path, inode->i_ino, key_type); + + /* if ret == 0 there are items for this type, + * create a range to tell us the last key of this type. + * otherwise, there are no items in this directory after + * *min_offset, and we create a range to indicate that. + */ + if (ret == 0) { + struct btrfs_key tmp; + btrfs_item_key_to_cpu(path->nodes[0], &tmp, + path->slots[0]); + if (key_type == tmp.type) { + first_offset = max(min_offset, tmp.offset) + 1; + } + } + goto done; + } + + /* go backward to find any previous key */ + ret = btrfs_previous_item(root, path, inode->i_ino, key_type); + if (ret == 0) { + struct btrfs_key tmp; + btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); + if (key_type == tmp.type) { + first_offset = tmp.offset; + ret = overwrite_item(trans, log, dst_path, + path->nodes[0], path->slots[0], + &tmp); + } + } + btrfs_release_path(root, path); + + /* find the first key from this transaction again */ + ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); + if (ret != 0) { + WARN_ON(1); + goto done; + } + + /* + * we have a block from this transaction, log every item in it + * from our directory + */ + while(1) { + struct btrfs_key tmp; + src = path->nodes[0]; + nritems = btrfs_header_nritems(src); + for (i = path->slots[0]; i < nritems; i++) { + btrfs_item_key_to_cpu(src, &min_key, i); + + if (min_key.objectid != inode->i_ino || + min_key.type != key_type) + goto done; + ret = overwrite_item(trans, log, dst_path, src, i, + &min_key); + BUG_ON(ret); + } + path->slots[0] = nritems; + + /* + * look ahead to the next item and see if it is also + * from this directory and from this transaction + */ + ret = btrfs_next_leaf(root, path); + if (ret == 1) { + last_offset = (u64)-1; + goto done; + } + btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); + if (tmp.objectid != inode->i_ino || tmp.type != key_type) { + last_offset = (u64)-1; + goto done; + } + if (btrfs_header_generation(path->nodes[0]) != trans->transid) { + ret = overwrite_item(trans, log, dst_path, + path->nodes[0], path->slots[0], + &tmp); + + BUG_ON(ret); + last_offset = tmp.offset; + goto done; + } + } +done: + *last_offset_ret = last_offset; + btrfs_release_path(root, path); + btrfs_release_path(log, dst_path); + + /* insert the log range keys to indicate where the log is valid */ + ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino, + first_offset, last_offset); + BUG_ON(ret); + return 0; +} + +/* + * logging directories is very similar to logging inodes, We find all the items + * from the current transaction and write them to the log. + * + * The recovery code scans the directory in the subvolume, and if it finds a + * key in the range logged that is not present in the log tree, then it means + * that dir entry was unlinked during the transaction. + * + * In order for that scan to work, we must include one key smaller than + * the smallest logged by this transaction and one key larger than the largest + * key logged by this transaction. + */ +static noinline int log_directory_changes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + struct btrfs_path *path, + struct btrfs_path *dst_path) +{ + u64 min_key; + u64 max_key; + int ret; + int key_type = BTRFS_DIR_ITEM_KEY; + +again: + min_key = 0; + max_key = 0; + while(1) { + ret = log_dir_items(trans, root, inode, path, + dst_path, key_type, min_key, + &max_key); + BUG_ON(ret); + if (max_key == (u64)-1) + break; + min_key = max_key + 1; + } + + if (key_type == BTRFS_DIR_ITEM_KEY) { + key_type = BTRFS_DIR_INDEX_KEY; + goto again; + } + return 0; +} + +/* + * a helper function to drop items from the log before we relog an + * inode. max_key_type indicates the highest item type to remove. + * This cannot be run for file data extents because it does not + * free the extents they point to. + */ +static int drop_objectid_items(struct btrfs_trans_handle *trans, + struct btrfs_root *log, + struct btrfs_path *path, + u64 objectid, int max_key_type) +{ + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + + key.objectid = objectid; + key.type = max_key_type; + key.offset = (u64)-1; + + while(1) { + ret = btrfs_search_slot(trans, log, &key, path, -1, 1); + + if (ret != 1) + break; + + if (path->slots[0] == 0) + break; + + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + + if (found_key.objectid != objectid) + break; + + ret = btrfs_del_item(trans, log, path); + BUG_ON(ret); + btrfs_release_path(log, path); + } + btrfs_release_path(log, path); + return 0; +} + +/* log a single inode in the tree log. + * At least one parent directory for this inode must exist in the tree + * or be logged already. + * + * Any items from this inode changed by the current transaction are copied + * to the log tree. An extra reference is taken on any extents in this + * file, allowing us to avoid a whole pile of corner cases around logging + * blocks that have been removed from the tree. + * + * See LOG_INODE_ALL and related defines for a description of what inode_only + * does. + * + * This handles both files and directories. + */ +static int __btrfs_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + int inode_only) +{ + struct btrfs_path *path; + struct btrfs_path *dst_path; + struct btrfs_key min_key; + struct btrfs_key max_key; + struct btrfs_root *log = root->log_root; + unsigned long src_offset; + unsigned long dst_offset; + struct extent_buffer *src; + struct btrfs_file_extent_item *extent; + struct btrfs_inode_item *inode_item; + u32 size; + int ret; + + log = root->log_root; + + path = btrfs_alloc_path(); + dst_path = btrfs_alloc_path(); + + min_key.objectid = inode->i_ino; + min_key.type = BTRFS_INODE_ITEM_KEY; + min_key.offset = 0; + + max_key.objectid = inode->i_ino; + if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) + max_key.type = BTRFS_XATTR_ITEM_KEY; + else + max_key.type = (u8)-1; + max_key.offset = (u64)-1; + + /* + * if this inode has already been logged and we're in inode_only + * mode, we don't want to delete the things that have already + * been written to the log. + * + * But, if the inode has been through an inode_only log, + * the logged_trans field is not set. This allows us to catch + * any new names for this inode in the backrefs by logging it + * again + */ + if (inode_only == LOG_INODE_EXISTS && + BTRFS_I(inode)->logged_trans == trans->transid) { + btrfs_free_path(path); + btrfs_free_path(dst_path); + goto out; + } + mutex_lock(&BTRFS_I(inode)->log_mutex); + + /* + * a brute force approach to making sure we get the most uptodate + * copies of everything. + */ + if (S_ISDIR(inode->i_mode)) { + int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; + + if (inode_only == LOG_INODE_EXISTS) + max_key_type = BTRFS_XATTR_ITEM_KEY; + ret = drop_objectid_items(trans, log, path, + inode->i_ino, max_key_type); + } else { + ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); + } + BUG_ON(ret); + path->keep_locks = 1; + + while(1) { + ret = btrfs_search_forward(root, &min_key, &max_key, + path, 0, trans->transid); + if (ret != 0) + break; + + if (min_key.objectid != inode->i_ino) + break; + if (min_key.type > max_key.type) + break; + + src = path->nodes[0]; + size = btrfs_item_size_nr(src, path->slots[0]); + ret = btrfs_insert_empty_item(trans, log, dst_path, &min_key, + size); + if (ret) + BUG(); + + dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], + dst_path->slots[0]); + + src_offset = btrfs_item_ptr_offset(src, path->slots[0]); + + copy_extent_buffer(dst_path->nodes[0], src, dst_offset, + src_offset, size); + + if (inode_only == LOG_INODE_EXISTS && + min_key.type == BTRFS_INODE_ITEM_KEY) { + inode_item = btrfs_item_ptr(dst_path->nodes[0], + dst_path->slots[0], + struct btrfs_inode_item); + btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0); + + /* set the generation to zero so the recover code + * can tell the difference between an logging + * just to say 'this inode exists' and a logging + * to say 'update this inode with these values' + */ + btrfs_set_inode_generation(dst_path->nodes[0], + inode_item, 0); + } + /* take a reference on file data extents so that truncates + * or deletes of this inode don't have to relog the inode + * again + */ + if (btrfs_key_type(&min_key) == BTRFS_EXTENT_DATA_KEY) { + int found_type; + extent = btrfs_item_ptr(src, path->slots[0], + struct btrfs_file_extent_item); + + found_type = btrfs_file_extent_type(src, extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + u64 ds = btrfs_file_extent_disk_bytenr(src, + extent); + u64 dl = btrfs_file_extent_disk_num_bytes(src, + extent); + /* ds == 0 is a hole */ + if (ds != 0) { + ret = btrfs_inc_extent_ref(trans, log, + ds, dl, + log->root_key.objectid, + 0, + inode->i_ino, + min_key.offset); + BUG_ON(ret); + } + } + } + + btrfs_mark_buffer_dirty(dst_path->nodes[0]); + btrfs_release_path(root, path); + btrfs_release_path(log, dst_path); + + if (min_key.offset < (u64)-1) + min_key.offset++; + else if (min_key.type < (u8)-1) + min_key.type++; + else if (min_key.objectid < (u64)-1) + min_key.objectid++; + else + break; + } + if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { + btrfs_release_path(root, path); + btrfs_release_path(log, dst_path); + ret = log_directory_changes(trans, root, inode, path, dst_path); + BUG_ON(ret); + } + mutex_unlock(&BTRFS_I(inode)->log_mutex); + + btrfs_free_path(path); + btrfs_free_path(dst_path); + + mutex_lock(&root->fs_info->tree_log_mutex); + ret = update_log_root(trans, log); + BUG_ON(ret); + mutex_unlock(&root->fs_info->tree_log_mutex); +out: + return 0; +} + +int btrfs_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + int inode_only) +{ + int ret; + + start_log_trans(trans, root); + ret = __btrfs_log_inode(trans, root, inode, inode_only); + end_log_trans(root); + return ret; +} + +/* + * helper function around btrfs_log_inode to make sure newly created + * parent directories also end up in the log. A minimal inode and backref + * only logging is done of any parent directories that are older than + * the last committed transaction + */ +int btrfs_log_dentry(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct dentry *dentry) +{ + int inode_only = LOG_INODE_ALL; + struct super_block *sb; + int ret; + + start_log_trans(trans, root); + sb = dentry->d_inode->i_sb; + while(1) { + ret = __btrfs_log_inode(trans, root, dentry->d_inode, + inode_only); + BUG_ON(ret); + inode_only = LOG_INODE_EXISTS; + + dentry = dentry->d_parent; + if (!dentry || !dentry->d_inode || sb != dentry->d_inode->i_sb) + break; + + if (BTRFS_I(dentry->d_inode)->generation <= + root->fs_info->last_trans_committed) + break; + } + end_log_trans(root); + return 0; +} + +/* + * it is not safe to log dentry if the chunk root has added new + * chunks. This returns 0 if the dentry was logged, and 1 otherwise. + * If this returns 1, you must commit the transaction to safely get your + * data on disk. + */ +int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct dentry *dentry) +{ + u64 gen; + gen = root->fs_info->last_trans_new_blockgroup; + if (gen > root->fs_info->last_trans_committed) + return 1; + else + return btrfs_log_dentry(trans, root, dentry); +} + +/* + * should be called during mount to recover any replay any log trees + * from the FS + */ +int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) +{ + int ret; + struct btrfs_path *path; + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_key tmp_key; + struct btrfs_root *log; + struct btrfs_fs_info *fs_info = log_root_tree->fs_info; + struct walk_control wc = { + .process_func = process_one_buffer, + .stage = 0, + }; + + fs_info->log_root_recovering = 1; + path = btrfs_alloc_path(); + BUG_ON(!path); + + trans = btrfs_start_transaction(fs_info->tree_root, 1); + + wc.trans = trans; + wc.pin = 1; + + walk_log_tree(trans, log_root_tree, &wc); + +again: + key.objectid = BTRFS_TREE_LOG_OBJECTID; + key.offset = (u64)-1; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + + while(1) { + ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); + if (ret < 0) + break; + if (ret > 0) { + if (path->slots[0] == 0) + break; + path->slots[0]--; + } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + btrfs_release_path(log_root_tree, path); + if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) + break; + + log = btrfs_read_fs_root_no_radix(log_root_tree, + &found_key); + BUG_ON(!log); + + + tmp_key.objectid = found_key.offset; + tmp_key.type = BTRFS_ROOT_ITEM_KEY; + tmp_key.offset = (u64)-1; + + wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); + + BUG_ON(!wc.replay_dest); + + btrfs_record_root_in_trans(wc.replay_dest); + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + + if (wc.stage == LOG_WALK_REPLAY_ALL) { + ret = fixup_inode_link_counts(trans, wc.replay_dest, + path); + BUG_ON(ret); + } + + key.offset = found_key.offset - 1; + free_extent_buffer(log->node); + kfree(log); + + if (found_key.offset == 0) + break; + } + btrfs_release_path(log_root_tree, path); + + /* step one is to pin it all, step two is to replay just inodes */ + if (wc.pin) { + wc.pin = 0; + wc.process_func = replay_one_buffer; + wc.stage = LOG_WALK_REPLAY_INODES; + goto again; + } + /* step three is to replay everything */ + if (wc.stage < LOG_WALK_REPLAY_ALL) { + wc.stage++; + goto again; + } + + btrfs_free_path(path); + + free_extent_buffer(log_root_tree->node); + log_root_tree->log_root = NULL; + fs_info->log_root_recovering = 0; + + /* step 4: commit the transaction, which also unpins the blocks */ + btrfs_commit_transaction(trans, fs_info->tree_root); + + kfree(log_root_tree); + return 0; +} diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h new file mode 100644 index 00000000000..b9409b32ed0 --- /dev/null +++ b/fs/btrfs/tree-log.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __TREE_LOG_ +#define __TREE_LOG_ + +int btrfs_sync_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_log_dentry(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct dentry *dentry); +int btrfs_recover_log_trees(struct btrfs_root *tree_root); +int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct dentry *dentry); +int btrfs_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + int inode_only); +int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct inode *dir, u64 index); +int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct inode *inode, u64 dirid); +#endif -- cgit v1.2.3-70-g09d2 From 4bef084857ab8fe71cf49eae349c25e440a49150 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 8 Sep 2008 11:18:08 -0400 Subject: Btrfs: Tree logging fixes * Pin down data blocks to prevent them from being reallocated like so: trans 1: allocate file extent trans 2: free file extent trans 3: free file extent during old snapshot deletion trans 3: allocate file extent to new file trans 3: fsync new file Before the tree logging code, this was legal because the fsync would commit the transation that did the final data extent free and the transaction that allocated the extent to the new file at the same time. With the tree logging code, the tree log subtransaction can commit before the transaction that freed the extent. If we crash, we're left with two different files using the extent. * Don't wait in start_transaction if log replay is going on. This avoids deadlocks from iput while we're cleaning up link counts in the replay code. * Don't deadlock in replay_one_name by trying to read an inode off the disk while holding paths for the directory * Hold the buffer lock while we mark a buffer as written. This closes a race where someone is changing a buffer while we write it. They are supposed to mark it dirty again after they change it, but this violates the cow rules. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 33 ++++++++++++++++++++++++++++++--- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent-tree.c | 20 +++++++++++++++++--- fs/btrfs/extent_io.c | 47 ++++++++++++++++++++++++++++++++++++----------- fs/btrfs/extent_io.h | 1 + fs/btrfs/transaction.c | 16 +++++++++++++--- fs/btrfs/tree-log.c | 13 +++++++++---- 7 files changed, 107 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a4373db5967..42bf9916805 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -307,9 +307,7 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) goto err; } found_level = btrfs_header_level(eb); - spin_lock(&root->fs_info->hash_lock); - btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); - spin_unlock(&root->fs_info->hash_lock); + csum_tree_block(root, eb, 0); err: free_extent_buffer(eb); @@ -1998,7 +1996,36 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) return ret; } +int btree_lock_page_hook(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_buffer *eb; + unsigned long len; + u64 bytenr = page_offset(page); + + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + + len = page->private >> 2; + eb = find_extent_buffer(io_tree, bytenr, len, GFP_NOFS); + if (!eb) + goto out; + + btrfs_tree_lock(eb); + spin_lock(&root->fs_info->hash_lock); + btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); + spin_unlock(&root->fs_info->hash_lock); + btrfs_tree_unlock(eb); + free_extent_buffer(eb); +out: + lock_page(page); + return 0; +} + static struct extent_io_ops btree_extent_io_ops = { + .write_cache_pages_lock_hook = btree_lock_page_hook, .writepage_io_hook = btree_writepage_io_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, .submit_bio_hook = btree_submit_bio_hook, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 6b6fdc697f3..f84f5058dbb 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -80,4 +80,5 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); +int btree_lock_page_hook(struct page *page); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 646b9148ca2..3181759da1c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1590,13 +1590,17 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, } static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, - int pending) + int is_data, int pending) { int err = 0; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); if (!pending) { struct extent_buffer *buf; + + if (is_data) + goto pinit; + buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { /* we can reuse a block if it hasn't been written @@ -1624,6 +1628,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, } free_extent_buffer(buf); } +pinit: btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); } else { set_extent_bits(&root->fs_info->pending_del, @@ -1744,7 +1749,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root #endif if (pin) { - ret = pin_down_bytes(root, bytenr, num_bytes, 0); + ret = pin_down_bytes(root, bytenr, num_bytes, + owner_objectid >= BTRFS_FIRST_FREE_OBJECTID, 0); if (ret > 0) mark_free = 1; BUG_ON(ret < 0); @@ -1862,9 +1868,17 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ref_generation = 0; if (root == extent_root) { - pin_down_bytes(root, bytenr, num_bytes, 1); + pin_down_bytes(root, bytenr, num_bytes, 0, 1); return 0; } + /* if metadata always pin */ + if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) + pin = 1; + + /* if data pin when any transaction has committed this */ + if (ref_generation != trans->transid) + pin = 1; + ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid, ref_generation, owner_objectid, owner_offset, pin, pin == 0); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 239e7c908ab..319a0c7a4a5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -29,7 +29,10 @@ static struct kmem_cache *extent_buffer_cache; static LIST_HEAD(buffers); static LIST_HEAD(states); + +#ifdef LEAK_DEBUG static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; +#endif #define BUFFER_LRU_MAX 64 @@ -106,7 +109,9 @@ EXPORT_SYMBOL(extent_io_tree_init); struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; +#ifdef LEAK_DEBUG unsigned long flags; +#endif state = kmem_cache_alloc(extent_state_cache, mask); if (!state) @@ -114,10 +119,11 @@ struct extent_state *alloc_extent_state(gfp_t mask) state->state = 0; state->private = 0; state->tree = NULL; +#ifdef LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_add(&state->leak_list, &states); spin_unlock_irqrestore(&leak_lock, flags); - +#endif atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); return state; @@ -129,11 +135,15 @@ void free_extent_state(struct extent_state *state) if (!state) return; if (atomic_dec_and_test(&state->refs)) { +#ifdef LEAK_DEBUG unsigned long flags; +#endif WARN_ON(state->tree); +#ifdef LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_del(&state->leak_list); spin_unlock_irqrestore(&leak_lock, flags); +#endif kmem_cache_free(extent_state_cache, state); } } @@ -2070,13 +2080,13 @@ done: } #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) -/* Taken directly from 2.6.23 for 2.6.18 back port */ +/* Taken directly from 2.6.23 with a mod for a lockpage hook */ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, void *data); +#endif /** - * write_cache_pages - walk the list of dirty pages of the given address space - * and write all of them. + * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @writepage: function called for each page @@ -2090,9 +2100,10 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. */ -static int write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, writepage_t writepage, - void *data) +int extent_write_cache_pages(struct extent_io_tree *tree, + struct address_space *mapping, + struct writeback_control *wbc, + writepage_t writepage, void *data) { struct backing_dev_info *bdi = mapping->backing_dev_info; int ret = 0; @@ -2138,7 +2149,10 @@ retry: * swizzled back from swapper_space to tmpfs file * mapping */ - lock_page(page); + if (tree->ops && tree->ops->write_cache_pages_lock_hook) + tree->ops->write_cache_pages_lock_hook(page); + else + lock_page(page); if (unlikely(page->mapping != mapping)) { unlock_page(page); @@ -2187,9 +2201,12 @@ retry: } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; + + if (wbc->range_cont) + wbc->range_start = index << PAGE_CACHE_SHIFT; return ret; } -#endif +EXPORT_SYMBOL(extent_write_cache_pages); int extent_write_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, @@ -2214,7 +2231,8 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, ret = __extent_writepage(page, wbc, &epd); - write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); + extent_write_cache_pages(tree, mapping, &wbc_writepages, + __extent_writepage, &epd); if (epd.bio) { submit_one_bio(WRITE, epd.bio, 0); } @@ -2235,7 +2253,8 @@ int extent_writepages(struct extent_io_tree *tree, .get_extent = get_extent, }; - ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); + ret = extent_write_cache_pages(tree, mapping, wbc, + __extent_writepage, &epd); if (epd.bio) { submit_one_bio(WRITE, epd.bio, 0); } @@ -2567,15 +2586,19 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, gfp_t mask) { struct extent_buffer *eb = NULL; +#ifdef LEAK_DEBUG unsigned long flags; +#endif eb = kmem_cache_zalloc(extent_buffer_cache, mask); eb->start = start; eb->len = len; mutex_init(&eb->mutex); +#ifdef LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_add(&eb->leak_list, &buffers); spin_unlock_irqrestore(&leak_lock, flags); +#endif atomic_set(&eb->refs, 1); return eb; @@ -2583,10 +2606,12 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, static void __free_extent_buffer(struct extent_buffer *eb) { +#ifdef LEAK_DEBUG unsigned long flags; spin_lock_irqsave(&leak_lock, flags); list_del(&eb->leak_list); spin_unlock_irqrestore(&leak_lock, flags); +#endif kmem_cache_free(extent_buffer_cache, eb); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 315cfceae31..3cb411a5f4d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -50,6 +50,7 @@ struct extent_io_ops { unsigned long old, unsigned long bits); int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits); + int (*write_cache_pages_lock_hook)(struct page *page); }; struct extent_io_tree { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 49c4f5b40ed..61a377bcb2f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -161,7 +161,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, int ret; mutex_lock(&root->fs_info->trans_mutex); - if ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2) + if (!root->fs_info->log_root_recovering && + ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) wait_current_trans(root); ret = join_transaction(root); BUG_ON(ret); @@ -328,9 +329,17 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, index = start >> PAGE_CACHE_SHIFT; start = (u64)(index + 1) << PAGE_CACHE_SHIFT; - page = find_lock_page(btree_inode->i_mapping, index); + page = find_get_page(btree_inode->i_mapping, index); if (!page) continue; + + btree_lock_page_hook(page); + if (!page->mapping) { + unlock_page(page); + page_cache_release(page); + continue; + } + if (PageWriteback(page)) { if (PageDirty(page)) wait_on_page_writeback(page); @@ -360,7 +369,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, if (!page) continue; if (PageDirty(page)) { - lock_page(page); + btree_lock_page_hook(page); + wait_on_page_writeback(page); err = write_one_page(page, 0); if (err) werr = err; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d1ce8314b94..13d7ee8e0c5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1176,8 +1176,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, struct btrfs_key found_key; struct btrfs_key log_key; struct inode *dir; - struct inode *inode; u8 log_type; + int exists; int ret; dir = read_one_inode(root, key->objectid); @@ -1190,6 +1190,13 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, name_len); btrfs_dir_item_key_to_cpu(eb, di, &log_key); + exists = btrfs_lookup_inode(trans, root, path, &log_key, 0); + if (exists == 0) + exists = 1; + else + exists = 0; + btrfs_release_path(root, path); + if (key->type == BTRFS_DIR_ITEM_KEY) { dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, name, name_len, 1); @@ -1224,11 +1231,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, * don't drop the conflicting directory entry if the inode * for the new entry doesn't exist */ - inode = read_one_inode(root, log_key.objectid); - if (!inode) + if (!exists) goto out; - iput(inode); ret = drop_one_dir_item(trans, root, path, dir, dst_di); BUG_ON(ret); -- cgit v1.2.3-70-g09d2 From d00aff00139b40f2e9c60299d76aac29d72e48ba Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 15:54:42 -0400 Subject: Btrfs: Optimize tree log block allocations Since tree log blocks get freed every transaction, they never really need to be written to disk. This skips the step where we update metadata to record they were allocated. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++--- fs/btrfs/extent-tree.c | 19 ++++++++++++++----- fs/btrfs/tree-log.c | 20 +++++++------------- 3 files changed, 23 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7c06eb4ecfd..5edb7f88579 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -829,9 +829,8 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_level(eb) != 0); WARN_ON(btrfs_header_nritems(eb) != 0); - ret = btrfs_free_extent(trans, fs_info->tree_root, - eb->start, eb->len, - BTRFS_TREE_LOG_OBJECTID, 0, 0, 0, 1); + ret = btrfs_free_reserved_extent(fs_info->tree_root, + eb->start, eb->len); BUG_ON(ret); free_extent_buffer(eb); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3181759da1c..c479d71e286 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1872,8 +1872,15 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, return 0; } /* if metadata always pin */ - if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) + if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + /* btrfs_free_reserved_extent */ + set_extent_dirty(&root->fs_info->free_space_cache, + bytenr, bytenr + num_bytes - 1, GFP_NOFS); + return 0; + } pin = 1; + } /* if data pin when any transaction has committed this */ if (ref_generation != trans->transid) @@ -2361,11 +2368,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, min_alloc_size, empty_size, hint_byte, search_end, ins, data); BUG_ON(ret); - ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, - ref_generation, owner, - owner_offset, ins); - BUG_ON(ret); + if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, + ref_generation, owner, + owner_offset, ins); + BUG_ON(ret); + } maybe_unlock_mutex(root); return ret; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f43ee33ec2d..5f77bee0f84 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1728,9 +1728,8 @@ static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, root_owner, - root_gen, 0, 0, 1); + ret = btrfs_free_reserved_extent(root, + bytenr, blocksize); BUG_ON(ret); } free_extent_buffer(next); @@ -1775,8 +1774,7 @@ static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, BUG_ON(ret); } WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_extent(trans, root, bytenr, blocksize, - root_owner, root_gen, 0, 0, 1); + ret = btrfs_free_reserved_extent(root, bytenr, blocksize); BUG_ON(ret); } free_extent_buffer(path->nodes[*level]); @@ -1837,10 +1835,9 @@ static int noinline walk_up_log_tree(struct btrfs_trans_handle *trans, } WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_extent(trans, root, + ret = btrfs_free_reserved_extent(root, path->nodes[*level]->start, - path->nodes[*level]->len, - root_owner, root_gen, 0, 0, 1); + path->nodes[*level]->len); BUG_ON(ret); } free_extent_buffer(path->nodes[*level]); @@ -1910,11 +1907,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, } WARN_ON(log->root_key.objectid != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_extent(trans, log, - next->start, next->len, - log->root_key.objectid, - btrfs_header_generation(next), - 0, 0, 1); + ret = btrfs_free_reserved_extent(log, next->start, + next->len); BUG_ON(ret); } } -- cgit v1.2.3-70-g09d2 From d0c803c4049c5ca322d4795d8b74f28768603e0e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 16:17:57 -0400 Subject: Btrfs: Record dirty pages tree-log pages in an extent_io tree This is the same way the transaction code makes sure that all the other tree blocks are safely on disk. There's an extent_io tree for each root, and any blocks allocated to the tree logs are recorded in that tree. At tree-log sync, the extent_io tree is walked to flush down the dirty pages and wait for them. The main benefit is less time spent walking the tree log and skipping clean pages, and getting sequential IO down to the drive. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/disk-io.c | 17 +++++++++++++++-- fs/btrfs/extent-tree.c | 7 ++++++- fs/btrfs/transaction.c | 21 ++++++++++++++------- fs/btrfs/transaction.h | 2 ++ fs/btrfs/tree-log.c | 29 +++++++++++++++-------------- 6 files changed, 54 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2ed6918f32e..eb65fd80888 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -668,6 +668,8 @@ struct btrfs_root { struct btrfs_key root_key; struct btrfs_fs_info *fs_info; struct inode *inode; + struct extent_io_tree dirty_log_pages; + struct kobject root_kobj; struct completion kobj_unregister; struct mutex objectid_mutex; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5edb7f88579..57fbf107e59 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -777,6 +777,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); + extent_io_tree_init(&root->dirty_log_pages, + fs_info->btree_inode->i_mapping, GFP_NOFS); btrfs_leaf_ref_tree_init(&root->ref_tree_struct); root->ref_tree = &root->ref_tree_struct; @@ -819,11 +821,23 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct extent_buffer *eb; + struct btrfs_root *log_root_tree = fs_info->log_root_tree; + u64 start = 0; + u64 end = 0; int ret; - if (!fs_info->log_root_tree) + if (!log_root_tree) return 0; + while(1) { + ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, + 0, &start, &end, EXTENT_DIRTY); + if (ret) + break; + + clear_extent_dirty(&log_root_tree->dirty_log_pages, + start, end, GFP_NOFS); + } eb = fs_info->log_root_tree->node; WARN_ON(btrfs_header_level(eb) != 0); @@ -1412,7 +1426,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); insert_inode_hash(fs_info->btree_inode); - mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->tree_log_mutex); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c479d71e286..c0bb6b9ac4c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2392,8 +2392,13 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, btrfs_tree_lock(buf); clean_tree_block(trans, root, buf); btrfs_set_buffer_uptodate(buf); - set_extent_dirty(&trans->transaction->dirty_pages, buf->start, + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + set_extent_dirty(&root->dirty_log_pages, buf->start, + buf->start + buf->len - 1, GFP_NOFS); + } else { + set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); + } trans->blocks_used++; return buf; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 61a377bcb2f..151b00d5259 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -302,23 +302,18 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, } -int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages) { int ret; int err = 0; int werr = 0; - struct extent_io_tree *dirty_pages; struct page *page; struct inode *btree_inode = root->fs_info->btree_inode; u64 start = 0; u64 end; unsigned long index; - if (!trans || !trans->transaction) { - return filemap_write_and_wait(btree_inode->i_mapping); - } - dirty_pages = &trans->transaction->dirty_pages; while(1) { ret = find_first_extent_bit(dirty_pages, start, &start, &end, EXTENT_DIRTY); @@ -385,6 +380,18 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, return werr; } +int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + if (!trans || !trans->transaction) { + struct inode *btree_inode; + btree_inode = root->fs_info->btree_inode; + return filemap_write_and_wait(btree_inode->i_mapping); + } + return btrfs_write_and_wait_marked_extents(root, + &trans->transaction->dirty_pages); +} + static int update_cowonly_root(struct btrfs_trans_handle *trans, struct btrfs_root *root) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index cc63650d60d..eef2cb7d7e7 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -99,4 +99,6 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root); void btrfs_throttle(struct btrfs_root *root); int btrfs_record_root_in_trans(struct btrfs_root *root); +int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages); #endif diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ae96451bc22..bfa71080096 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1954,10 +1954,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, int ret; unsigned long batch; struct btrfs_root *log = root->log_root; - struct walk_control wc = { - .write = 1, - .process_func = process_one_buffer - }; mutex_lock(&log->fs_info->tree_log_mutex); if (atomic_read(&log->fs_info->tree_log_commit)) { @@ -1985,18 +1981,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (batch == log->fs_info->tree_log_batch) break; } - ret = walk_log_tree(trans, log, &wc); - BUG_ON(ret); - - ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc); - BUG_ON(ret); - - wc.wait = 1; - ret = walk_log_tree(trans, log, &wc); + ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); BUG_ON(ret); - - ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc); + ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree, + &root->fs_info->log_root_tree->dirty_log_pages); BUG_ON(ret); btrfs_set_super_log_root(&root->fs_info->super_for_commit, @@ -2025,6 +2014,8 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) int ret; struct btrfs_root *log; struct key; + u64 start; + u64 end; struct walk_control wc = { .free = 1, .process_func = process_one_buffer @@ -2037,6 +2028,16 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) ret = walk_log_tree(trans, log, &wc); BUG_ON(ret); + while(1) { + ret = find_first_extent_bit(&log->dirty_log_pages, + 0, &start, &end, EXTENT_DIRTY); + if (ret) + break; + + clear_extent_dirty(&log->dirty_log_pages, + start, end, GFP_NOFS); + } + log = root->log_root; ret = btrfs_del_root(trans, root->fs_info->log_root_tree, &log->root_key); -- cgit v1.2.3-70-g09d2 From ef8bbdfe7e12dc9b4e80756f6d606c4639c65851 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 23 Sep 2008 13:14:11 -0400 Subject: Btrfs: fix cache_block_group error handling cache block group had a few bugs in the error handling code, this makes sure paths get properly released and the correct return value goes out. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c0bb6b9ac4c..1c10ffc837c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -66,7 +66,7 @@ static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) { struct btrfs_path *path; - int ret; + int ret = 0; struct btrfs_key key; struct extent_buffer *leaf; struct extent_io_tree *free_space_cache; @@ -102,10 +102,10 @@ static int cache_block_group(struct btrfs_root *root, btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) - return ret; + goto err; ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); if (ret < 0) - return ret; + goto err; if (ret == 0) { leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); @@ -161,9 +161,10 @@ next: last + hole_size - 1, GFP_NOFS); } block_group->cached = 1; + ret = 0; err: btrfs_free_path(path); - return 0; + return ret; } struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct -- cgit v1.2.3-70-g09d2 From 0f9dd46cda36b8de3b9f48bc42bd09d20b9c3b52 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 23 Sep 2008 13:14:11 -0400 Subject: Btrfs: free space accounting redo 1) replace the per fs_info extent_io_tree that tracked free space with two rb-trees per block group to track free space areas via offset and size. The reason to do this is because most allocations come with a hint byte where to start, so we can usually find a chunk of free space at that hint byte to satisfy the allocation and get good space packing. If we cannot find free space at or after the given offset we fall back on looking for a chunk of the given size as close to that given offset as possible. When we fall back on the size search we also try to find a slot as close to the size we want as possible, to avoid breaking small chunks off of huge areas if possible. 2) remove the extent_io_tree that tracked the block group cache from fs_info and replaced it with an rb-tree thats tracks block group cache via offset. also added a per space_info list that tracks the block group cache for the particular space so we can lookup related block groups easily. 3) cleaned up the allocation code to make it a little easier to read and a little less complicated. Basically there are 3 steps, first look from our provided hint. If we couldn't find from that given hint, start back at our original search start and look for space from there. If that fails try to allocate space if we can and start looking again. If not we're screwed and need to start over again. 4) small fixes. there were some issues in volumes.c where we wouldn't allocate the rest of the disk. fixed cow_file_range to actually pass the alloc_hint, which has helped a good bit in making the fs_mark test I run have semi-normal results as we run out of space. Generally with data allocations we don't track where we last allocated from, so everytime we did a data allocation we'd search through every block group that we have looking for free space. Now searching a block group with no free space isn't terribly time consuming, it was causing a slight degradation as we got more data block groups. The alloc_hint has fixed this slight degredation and made things semi-normal. There is still one nagging problem I'm working on where we will get ENOSPC when there is definitely plenty of space. This only happens with metadata allocations, and only when we are almost full. So you generally hit the 85% mark first, but sometimes you'll hit the BUG before you hit the 85% wall. I'm still tracking it down, but until then this seems to be pretty stable and make a significant performance gain. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 3 +- fs/btrfs/ctree.h | 46 ++- fs/btrfs/disk-io.c | 7 +- fs/btrfs/extent-tree.c | 869 +++++++++++++++++++++++--------------------- fs/btrfs/extent_io.c | 4 + fs/btrfs/free-space-cache.c | 415 +++++++++++++++++++++ fs/btrfs/inode.c | 3 +- fs/btrfs/volumes.c | 11 +- 9 files changed, 925 insertions(+), 435 deletions(-) create mode 100644 fs/btrfs/free-space-cache.c (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index b7addbfd8c2..eb36ae981bd 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -7,7 +7,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - ref-cache.o export.o tree-log.o acl.o + ref-cache.o export.o tree-log.o acl.o free-space-cache.o else # Normal Makefile diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 18e84472abb..6f467901246 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2725,9 +2725,8 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, total_size = total_data + (nr * sizeof(struct btrfs_item)); ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); - if (ret == 0) { + if (ret == 0) return -EEXIST; - } if (ret < 0) goto out; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index eb65fd80888..730aae3bc18 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -483,7 +483,6 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_DUP (1 << 5) #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) - struct btrfs_block_group_item { __le64 used; __le64 chunk_objectid; @@ -498,17 +497,40 @@ struct btrfs_space_info { int full; int force_alloc; struct list_head list; + + /* for block groups in our same type */ + struct list_head block_groups; + spinlock_t lock; +}; + +struct btrfs_free_space { + struct rb_node bytes_index; + struct rb_node offset_index; + u64 offset; + u64 bytes; }; struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; - struct btrfs_space_info *space_info; spinlock_t lock; u64 pinned; u64 flags; int cached; int ro; + int dirty; + + struct btrfs_space_info *space_info; + + /* free space cache stuff */ + struct rb_root free_space_bytes; + struct rb_root free_space_offset; + + /* block group cache stuff */ + struct rb_node cache_node; + + /* for block groups in the same raid type */ + struct list_head list; }; struct btrfs_device; @@ -525,8 +547,10 @@ struct btrfs_fs_info { struct btrfs_root *log_root_tree; struct radix_tree_root fs_roots_radix; - struct extent_io_tree free_space_cache; - struct extent_io_tree block_group_cache; + /* block group cache stuff */ + spinlock_t block_group_cache_lock; + struct rb_root block_group_cache_tree; + struct extent_io_tree pinned_extents; struct extent_io_tree pending_del; struct extent_io_tree extent_ins; @@ -1814,4 +1838,18 @@ int btrfs_sync_fs(struct super_block *sb, int wait); int btrfs_check_acl(struct inode *inode, int mask); int btrfs_init_acl(struct inode *inode, struct inode *dir); int btrfs_acl_chmod(struct inode *inode); + +/* free-space-cache.c */ +int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, + u64 bytenr, u64 size); +int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, + u64 bytenr, u64 size); +void btrfs_remove_free_space_cache(struct btrfs_block_group_cache + *block_group); +struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache + *block_group, u64 offset, + u64 bytes); +void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, + u64 bytes); +u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f6f7821d43a..535bd0fe1a7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1410,10 +1410,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; - extent_io_tree_init(&fs_info->free_space_cache, - fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&fs_info->block_group_cache, - fs_info->btree_inode->i_mapping, GFP_NOFS); + spin_lock_init(&fs_info->block_group_cache_lock); + fs_info->block_group_cache_tree.rb_node = NULL; + extent_io_tree_init(&fs_info->pinned_extents, fs_info->btree_inode->i_mapping, GFP_NOFS); extent_io_tree_init(&fs_info->pending_del, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1c10ffc837c..813566acc5d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -29,12 +29,6 @@ #include "locking.h" #include "ref-cache.h" -#define BLOCK_GROUP_DATA EXTENT_WRITEBACK -#define BLOCK_GROUP_METADATA EXTENT_UPTODATE -#define BLOCK_GROUP_SYSTEM EXTENT_NEW - -#define BLOCK_GROUP_DIRTY EXTENT_DIRTY - static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -62,6 +56,127 @@ void maybe_unlock_mutex(struct btrfs_root *root) } } +static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) +{ + return (cache->flags & bits) == bits; +} + +/* + * this adds the block group to the fs_info rb tree for the block group + * cache + */ +int btrfs_add_block_group_cache(struct btrfs_fs_info *info, + struct btrfs_block_group_cache *block_group) +{ + struct rb_node **p; + struct rb_node *parent = NULL; + struct btrfs_block_group_cache *cache; + + spin_lock(&info->block_group_cache_lock); + p = &info->block_group_cache_tree.rb_node; + + while (*p) { + parent = *p; + cache = rb_entry(parent, struct btrfs_block_group_cache, + cache_node); + if (block_group->key.objectid < cache->key.objectid) { + p = &(*p)->rb_left; + } else if (block_group->key.objectid > cache->key.objectid) { + p = &(*p)->rb_right; + } else { + spin_unlock(&info->block_group_cache_lock); + return -EEXIST; + } + } + + rb_link_node(&block_group->cache_node, parent, p); + rb_insert_color(&block_group->cache_node, + &info->block_group_cache_tree); + spin_unlock(&info->block_group_cache_lock); + + return 0; +} + +/* + * This will return the block group at or after bytenr if contains is 0, else + * it will return the block group that contains the bytenr + */ +static struct btrfs_block_group_cache * +block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, + int contains) +{ + struct btrfs_block_group_cache *cache, *ret = NULL; + struct rb_node *n; + u64 end, start; + + spin_lock(&info->block_group_cache_lock); + n = info->block_group_cache_tree.rb_node; + + while (n) { + cache = rb_entry(n, struct btrfs_block_group_cache, + cache_node); + end = cache->key.objectid + cache->key.offset - 1; + start = cache->key.objectid; + + if (bytenr < start) { + if (!contains && (!ret || start < ret->key.objectid)) + ret = cache; + n = n->rb_left; + } else if (bytenr > start) { + if (contains && bytenr <= end) { + ret = cache; + break; + } + n = n->rb_right; + } else { + ret = cache; + break; + } + } + spin_unlock(&info->block_group_cache_lock); + + return ret; +} + +/* + * this is only called by cache_block_group, since we could have freed extents + * we need to check the pinned_extents for any extents that can't be used yet + * since their free space will be released as soon as the transaction commits. + */ +static int add_new_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_fs_info *info, u64 start, u64 end) +{ + u64 extent_start, extent_end, size; + int ret; + + while (start < end) { + ret = find_first_extent_bit(&info->pinned_extents, start, + &extent_start, &extent_end, + EXTENT_DIRTY); + if (ret) + break; + + if (extent_start == start) { + start = extent_end + 1; + } else if (extent_start > start && extent_start < end) { + size = extent_start - start; + ret = btrfs_add_free_space(block_group, start, size); + BUG_ON(ret); + start = extent_end + 1; + } else { + break; + } + } + + if (start < end) { + size = end - start; + ret = btrfs_add_free_space(block_group, start, size); + BUG_ON(ret); + } + + return 0; +} + static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) { @@ -69,10 +184,8 @@ static int cache_block_group(struct btrfs_root *root, int ret = 0; struct btrfs_key key; struct extent_buffer *leaf; - struct extent_io_tree *free_space_cache; int slot; u64 last = 0; - u64 hole_size; u64 first_free; int found = 0; @@ -80,7 +193,6 @@ static int cache_block_group(struct btrfs_root *root, return 0; root = root->fs_info->extent_root; - free_space_cache = &root->fs_info->free_space_cache; if (block_group->cached) return 0; @@ -96,7 +208,8 @@ static int cache_block_group(struct btrfs_root *root, * skip the locking here */ path->skip_locking = 1; - first_free = block_group->key.objectid; + first_free = max_t(u64, block_group->key.objectid, + BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); key.objectid = block_group->key.objectid; key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -119,32 +232,28 @@ static int cache_block_group(struct btrfs_root *root, ret = btrfs_next_leaf(root, path); if (ret < 0) goto err; - if (ret == 0) { + if (ret == 0) continue; - } else { + else break; - } } btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid < block_group->key.objectid) { + if (key.objectid < block_group->key.objectid) goto next; - } + if (key.objectid >= block_group->key.objectid + - block_group->key.offset) { + block_group->key.offset) break; - } if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { if (!found) { last = first_free; found = 1; } - if (key.objectid > last) { - hole_size = key.objectid - last; - set_extent_dirty(free_space_cache, last, - last + hole_size - 1, - GFP_NOFS); - } + + add_new_free_space(block_group, root->fs_info, last, + key.objectid); + last = key.objectid + key.offset; } next: @@ -153,13 +262,11 @@ next: if (!found) last = first_free; - if (block_group->key.objectid + - block_group->key.offset > last) { - hole_size = block_group->key.objectid + - block_group->key.offset - last; - set_extent_dirty(free_space_cache, last, - last + hole_size - 1, GFP_NOFS); - } + + add_new_free_space(block_group, root->fs_info, last, + block_group->key.objectid + + block_group->key.offset); + block_group->cached = 1; ret = 0; err: @@ -167,166 +274,79 @@ err: return ret; } +/* + * return the block group that starts at or after bytenr + */ struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) { - struct extent_io_tree *block_group_cache; - struct btrfs_block_group_cache *block_group = NULL; - u64 ptr; - u64 start; - u64 end; - int ret; + struct btrfs_block_group_cache *cache; - bytenr = max_t(u64, bytenr, - BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); - block_group_cache = &info->block_group_cache; - ret = find_first_extent_bit(block_group_cache, - bytenr, &start, &end, - BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | - BLOCK_GROUP_SYSTEM); - if (ret) { - return NULL; - } - ret = get_state_private(block_group_cache, start, &ptr); - if (ret) - return NULL; + cache = block_group_cache_tree_search(info, bytenr, 0); - block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; - return block_group; + return cache; } +/* + * return the block group that contains teh given bytenr + */ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr) { - struct extent_io_tree *block_group_cache; - struct btrfs_block_group_cache *block_group = NULL; - u64 ptr; - u64 start; - u64 end; - int ret; + struct btrfs_block_group_cache *cache; - bytenr = max_t(u64, bytenr, - BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); - block_group_cache = &info->block_group_cache; - ret = find_first_extent_bit(block_group_cache, - bytenr, &start, &end, - BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | - BLOCK_GROUP_SYSTEM); - if (ret) { - return NULL; - } - ret = get_state_private(block_group_cache, start, &ptr); - if (ret) - return NULL; + cache = block_group_cache_tree_search(info, bytenr, 1); - block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; - if (block_group->key.objectid <= bytenr && bytenr < - block_group->key.objectid + block_group->key.offset) - return block_group; - return NULL; + return cache; } -static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) -{ - return (cache->flags & bits) == bits; -} - -static int noinline find_search_start(struct btrfs_root *root, - struct btrfs_block_group_cache **cache_ret, - u64 *start_ret, u64 num, int data) +static int noinline find_free_space(struct btrfs_root *root, + struct btrfs_block_group_cache **cache_ret, + u64 *start_ret, u64 num, int data) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; - struct extent_io_tree *free_space_cache; - struct extent_state *state; + struct btrfs_free_space *info = NULL; u64 last; - u64 start = 0; - u64 cache_miss = 0; u64 total_fs_bytes; u64 search_start = *start_ret; - int wrapped = 0; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - free_space_cache = &root->fs_info->free_space_cache; if (!cache) goto out; + last = max(search_start, cache->key.objectid); + again: ret = cache_block_group(root, cache); - if (ret) { + if (ret) goto out; - } - last = max(search_start, cache->key.objectid); - if (!block_group_bits(cache, data) || cache->ro) + if (cache->ro || !block_group_bits(cache, data)) goto new_group; - spin_lock_irq(&free_space_cache->lock); - state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); - while(1) { - if (!state) { - if (!cache_miss) - cache_miss = last; - spin_unlock_irq(&free_space_cache->lock); - goto new_group; - } - - start = max(last, state->start); - last = state->end + 1; - if (last - start < num) { - do { - state = extent_state_next(state); - } while(state && !(state->state & EXTENT_DIRTY)); - continue; - } - spin_unlock_irq(&free_space_cache->lock); - if (cache->ro) { - goto new_group; - } - if (start + num > cache->key.objectid + cache->key.offset) - goto new_group; - if (!block_group_bits(cache, data)) { - printk("block group bits don't match %Lu %d\n", cache->flags, data); - } - *start_ret = start; + info = btrfs_find_free_space(cache, last, num); + if (info) { + *start_ret = info->offset; return 0; } -out: - cache = btrfs_lookup_block_group(root->fs_info, search_start); - if (!cache) { - printk("Unable to find block group for %Lu\n", search_start); - WARN_ON(1); - } - return -ENOSPC; new_group: last = cache->key.objectid + cache->key.offset; -wrapped: + cache = btrfs_lookup_first_block_group(root->fs_info, last); - if (!cache || cache->key.objectid >= total_fs_bytes) { -no_cache: - if (!wrapped) { - wrapped = 1; - last = search_start; - goto wrapped; - } + if (!cache || cache->key.objectid >= total_fs_bytes) goto out; - } - if (cache_miss && !cache->cached) { - cache_block_group(root, cache); - last = cache_miss; - cache = btrfs_lookup_first_block_group(root->fs_info, last); - } - cache_miss = 0; - cache = btrfs_find_block_group(root, cache, last, data, 0); - if (!cache) - goto no_cache; + *cache_ret = cache; goto again; + +out: + return -ENOSPC; } static u64 div_factor(u64 num, int factor) @@ -338,16 +358,19 @@ static u64 div_factor(u64 num, int factor) return num; } -static int block_group_state_bits(u64 flags) +static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, + u64 flags) { - int bits = 0; - if (flags & BTRFS_BLOCK_GROUP_DATA) - bits |= BLOCK_GROUP_DATA; - if (flags & BTRFS_BLOCK_GROUP_METADATA) - bits |= BLOCK_GROUP_METADATA; - if (flags & BTRFS_BLOCK_GROUP_SYSTEM) - bits |= BLOCK_GROUP_SYSTEM; - return bits; + struct list_head *head = &info->space_info; + struct list_head *cur; + struct btrfs_space_info *found; + list_for_each(cur, head) { + found = list_entry(cur, struct btrfs_space_info, list); + if (found->flags == flags) + return found; + } + return NULL; + } static struct btrfs_block_group_cache * @@ -356,28 +379,19 @@ __btrfs_find_block_group(struct btrfs_root *root, u64 search_start, int data, int owner) { struct btrfs_block_group_cache *cache; - struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *sinfo; u64 used; u64 last = 0; - u64 start; - u64 end; u64 free_check; - u64 ptr; - int bit; - int ret; int full_search = 0; int factor = 10; int wrapped = 0; - block_group_cache = &info->block_group_cache; - if (data & BTRFS_BLOCK_GROUP_METADATA) factor = 9; - bit = block_group_state_bits(data); - if (search_start) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_first_block_group(info, search_start); @@ -408,20 +422,30 @@ __btrfs_find_block_group(struct btrfs_root *root, else last = search_start; } + sinfo = __find_space_info(root->fs_info, data); + if (!sinfo) + goto found; again: while(1) { - ret = find_first_extent_bit(block_group_cache, last, - &start, &end, bit); - if (ret) - break; + struct list_head *l; - ret = get_state_private(block_group_cache, start, &ptr); - if (ret) { - last = end + 1; - continue; + cache = NULL; + + spin_lock(&sinfo->lock); + list_for_each(l, &sinfo->block_groups) { + struct btrfs_block_group_cache *entry; + entry = list_entry(l, struct btrfs_block_group_cache, + list); + if ((entry->key.objectid >= last) && + (!cache || (entry->key.objectid < + cache->key.objectid))) + cache = entry; } + spin_unlock(&sinfo->lock); + + if (!cache) + break; - cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; spin_lock(&cache->lock); last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); @@ -462,6 +486,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, ret = __btrfs_find_block_group(root, hint, search_start, data, owner); return ret; } + static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset) { @@ -1175,34 +1200,37 @@ fail: int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct extent_io_tree *block_group_cache; - struct btrfs_block_group_cache *cache; - int ret; + struct btrfs_block_group_cache *cache, *entry; + struct rb_node *n; int err = 0; int werr = 0; struct btrfs_path *path; u64 last = 0; - u64 start; - u64 end; - u64 ptr; - block_group_cache = &root->fs_info->block_group_cache; path = btrfs_alloc_path(); if (!path) return -ENOMEM; mutex_lock(&root->fs_info->alloc_mutex); while(1) { - ret = find_first_extent_bit(block_group_cache, last, - &start, &end, BLOCK_GROUP_DIRTY); - if (ret) - break; + cache = NULL; + spin_lock(&root->fs_info->block_group_cache_lock); + for (n = rb_first(&root->fs_info->block_group_cache_tree); + n; n = rb_next(n)) { + entry = rb_entry(n, struct btrfs_block_group_cache, + cache_node); + if (entry->dirty) { + cache = entry; + break; + } + } + spin_unlock(&root->fs_info->block_group_cache_lock); - last = end + 1; - ret = get_state_private(block_group_cache, start, &ptr); - if (ret) + if (!cache) break; - cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; + + last += cache->key.offset; + err = write_one_cache_group(trans, root, path, cache); /* @@ -1214,29 +1242,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, werr = err; continue; } - clear_extent_bits(block_group_cache, start, end, - BLOCK_GROUP_DIRTY, GFP_NOFS); + + cache->dirty = 0; } btrfs_free_path(path); mutex_unlock(&root->fs_info->alloc_mutex); return werr; } -static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, - u64 flags) -{ - struct list_head *head = &info->space_info; - struct list_head *cur; - struct btrfs_space_info *found; - list_for_each(cur, head) { - found = list_entry(cur, struct btrfs_space_info, list); - if (found->flags == flags) - return found; - } - return NULL; - -} - static int update_space_info(struct btrfs_fs_info *info, u64 flags, u64 total_bytes, u64 bytes_used, struct btrfs_space_info **space_info) @@ -1256,6 +1269,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, return -ENOMEM; list_add(&found->list, &info->space_info); + INIT_LIST_HEAD(&found->block_groups); + spin_lock_init(&found->lock); found->flags = flags; found->total_bytes = total_bytes; found->bytes_used = bytes_used; @@ -1318,7 +1333,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 thresh; u64 start; u64 num_bytes; - int ret; + int ret = 0; flags = reduce_alloc_profile(extent_root, flags); @@ -1355,10 +1370,11 @@ printk("space info full %Lu\n", flags); ret = btrfs_make_block_group(trans, extent_root, 0, flags, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); + out_unlock: mutex_unlock(&extent_root->fs_info->chunk_mutex); out: - return 0; + return ret; } static int update_block_group(struct btrfs_trans_handle *trans, @@ -1371,8 +1387,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 total = num_bytes; u64 old_val; u64 byte_in_group; - u64 start; - u64 end; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); while(total) { @@ -1382,12 +1396,9 @@ static int update_block_group(struct btrfs_trans_handle *trans, } byte_in_group = bytenr - cache->key.objectid; WARN_ON(byte_in_group > cache->key.offset); - start = cache->key.objectid; - end = start + cache->key.offset - 1; - set_extent_bits(&info->block_group_cache, start, end, - BLOCK_GROUP_DIRTY, GFP_NOFS); spin_lock(&cache->lock); + cache->dirty = 1; old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { @@ -1401,9 +1412,11 @@ static int update_block_group(struct btrfs_trans_handle *trans, btrfs_set_block_group_used(&cache->item, old_val); spin_unlock(&cache->lock); if (mark_free) { - set_extent_dirty(&info->free_space_cache, - bytenr, bytenr + num_bytes - 1, - GFP_NOFS); + int ret; + ret = btrfs_add_free_space(cache, bytenr, + num_bytes); + if (ret) + return -1; } } total -= num_bytes; @@ -1414,16 +1427,13 @@ static int update_block_group(struct btrfs_trans_handle *trans, static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) { - u64 start; - u64 end; - int ret; - ret = find_first_extent_bit(&root->fs_info->block_group_cache, - search_start, &start, &end, - BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | - BLOCK_GROUP_SYSTEM); - if (ret) + struct btrfs_block_group_cache *cache; + + cache = btrfs_lookup_first_block_group(root->fs_info, search_start); + if (!cache) return 0; - return start; + + return cache->key.objectid; } @@ -1501,8 +1511,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, u64 start; u64 end; int ret; - struct extent_io_tree *free_space_cache; - free_space_cache = &root->fs_info->free_space_cache; + struct btrfs_block_group_cache *cache; mutex_lock(&root->fs_info->alloc_mutex); while(1) { @@ -1512,7 +1521,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, break; btrfs_update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); - set_extent_dirty(free_space_cache, start, end, GFP_NOFS); + cache = btrfs_lookup_block_group(root->fs_info, start); + if (cache->cached) + btrfs_add_free_space(cache, start, end - start + 1); if (need_resched()) { mutex_unlock(&root->fs_info->alloc_mutex); cond_resched(); @@ -1875,9 +1886,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, /* if metadata always pin */ if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + struct btrfs_block_group_cache *cache; + /* btrfs_free_reserved_extent */ - set_extent_dirty(&root->fs_info->free_space_cache, - bytenr, bytenr + num_bytes - 1, GFP_NOFS); + cache = btrfs_lookup_block_group(root->fs_info, bytenr); + BUG_ON(!cache); + btrfs_add_free_space(cache, bytenr, num_bytes); return 0; } pin = 1; @@ -1942,8 +1956,6 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, u64 total_needed = num_bytes; u64 *last_ptr = NULL; struct btrfs_block_group_cache *block_group; - int full_scan = 0; - int wrapped = 0; int chunk_alloc_done = 0; int empty_cluster = 2 * 1024 * 1024; int allowed_chunk_alloc = 0; @@ -1959,9 +1971,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, empty_cluster = 256 * 1024; } - if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { + if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) last_ptr = &root->fs_info->last_data_alloc; - } + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { last_ptr = &root->fs_info->last_log_alloc; if (!last_ptr == 0 && root->fs_info->last_alloc) { @@ -1972,9 +1984,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (last_ptr) { if (*last_ptr) hint_byte = *last_ptr; - else { + else empty_size += empty_cluster; - } } search_start = max(search_start, first_logical_byte(root, 0)); @@ -1983,145 +1994,172 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); - if (hint_byte) { - block_group = btrfs_lookup_first_block_group(info, hint_byte); - if (!block_group) - hint_byte = search_start; - block_group = btrfs_find_block_group(root, block_group, - hint_byte, data, 1); - if (last_ptr && *last_ptr == 0 && block_group) - hint_byte = block_group->key.objectid; - } else { - block_group = btrfs_find_block_group(root, - trans->block_group, - search_start, data, 1); - } search_start = max(search_start, hint_byte); - total_needed += empty_size; -check_failed: - if (!block_group) { - block_group = btrfs_lookup_first_block_group(info, - search_start); - if (!block_group) - block_group = btrfs_lookup_first_block_group(info, - orig_search_start); - } - if (full_scan && !chunk_alloc_done) { - if (allowed_chunk_alloc) { - do_chunk_alloc(trans, root, - num_bytes + 2 * 1024 * 1024, data, 1); - allowed_chunk_alloc = 0; - } else if (block_group && block_group_bits(block_group, data)) { - block_group->space_info->force_alloc = 1; +new_group: + block_group = btrfs_lookup_block_group(info, search_start); + + /* + * Ok this looks a little tricky, buts its really simple. First if we + * didn't find a block group obviously we want to start over. + * Secondly, if the block group we found does not match the type we + * need, and we have a last_ptr and its not 0, chances are the last + * allocation we made was at the end of the block group, so lets go + * ahead and skip the looking through the rest of the block groups and + * start at the beginning. This helps with metadata allocations, + * since you are likely to have a bunch of data block groups to search + * through first before you realize that you need to start over, so go + * ahead and start over and save the time. + */ + if (!block_group || (!block_group_bits(block_group, data) && + last_ptr && *last_ptr)) { + if (search_start != orig_search_start) { + if (last_ptr && *last_ptr) + *last_ptr = 0; + search_start = orig_search_start; + goto new_group; + } else if (!chunk_alloc_done && allowed_chunk_alloc) { + ret = do_chunk_alloc(trans, root, + num_bytes + 2 * 1024 * 1024, + data, 1); + if (ret < 0) { + struct btrfs_space_info *info; + + info = __find_space_info(root->fs_info, data); + goto error; + } + BUG_ON(ret); + chunk_alloc_done = 1; + search_start = orig_search_start; + goto new_group; + } else { + ret = -ENOSPC; + goto error; } - chunk_alloc_done = 1; - } - ret = find_search_start(root, &block_group, &search_start, - total_needed, data); - if (ret == -ENOSPC && last_ptr && *last_ptr) { - *last_ptr = 0; - block_group = btrfs_lookup_first_block_group(info, - orig_search_start); - search_start = orig_search_start; - ret = find_search_start(root, &block_group, &search_start, - total_needed, data); } - if (ret == -ENOSPC) - goto enospc; - if (ret) - goto error; - if (last_ptr && *last_ptr && search_start != *last_ptr) { - *last_ptr = 0; - if (!empty_size) { - empty_size += empty_cluster; - total_needed += empty_size; + /* + * this is going to seach through all of the existing block groups it + * can find, so if we don't find something we need to see if we can + * allocate what we need. + */ + ret = find_free_space(root, &block_group, &search_start, + total_needed, data); + if (ret == -ENOSPC) { + /* + * instead of allocating, start at the original search start + * and see if there is something to be found, if not then we + * allocate + */ + if (search_start != orig_search_start) { + if (last_ptr && *last_ptr) { + *last_ptr = 0; + total_needed += empty_cluster; + } + search_start = orig_search_start; + goto new_group; } - block_group = btrfs_lookup_first_block_group(info, - orig_search_start); - search_start = orig_search_start; - ret = find_search_start(root, &block_group, - &search_start, total_needed, data); - if (ret == -ENOSPC) - goto enospc; - if (ret) + + /* + * we've already allocated, we're pretty screwed + */ + if (chunk_alloc_done) { goto error; + } else if (!allowed_chunk_alloc && block_group && + block_group_bits(block_group, data)) { + block_group->space_info->force_alloc = 1; + goto error; + } else if (!allowed_chunk_alloc) { + goto error; + } + + ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024, + data, 1); + if (ret < 0) + goto error; + + BUG_ON(ret); + chunk_alloc_done = 1; + if (block_group) + search_start = block_group->key.objectid + + block_group->key.offset; + else + search_start = orig_search_start; + goto new_group; } + if (ret) + goto error; + search_start = stripe_align(root, search_start); ins->objectid = search_start; ins->offset = num_bytes; - if (ins->objectid + num_bytes >= search_end) - goto enospc; + if (ins->objectid + num_bytes >= search_end) { + search_start = orig_search_start; + if (chunk_alloc_done) { + ret = -ENOSPC; + goto error; + } + goto new_group; + } if (ins->objectid + num_bytes > block_group->key.objectid + block_group->key.offset) { + if (search_start == orig_search_start && chunk_alloc_done) { + ret = -ENOSPC; + goto error; + } search_start = block_group->key.objectid + block_group->key.offset; goto new_group; } - if (test_range_bit(&info->extent_ins, ins->objectid, - ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { - search_start = ins->objectid + num_bytes; - goto new_group; - } - - if (test_range_bit(&info->pinned_extents, ins->objectid, - ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { - search_start = ins->objectid + num_bytes; - goto new_group; - } - if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; goto new_group; } - if (!(data & BTRFS_BLOCK_GROUP_DATA)) { - block_group = btrfs_lookup_block_group(info, ins->objectid); - if (block_group) - trans->block_group = block_group; - } + if (!(data & BTRFS_BLOCK_GROUP_DATA)) + trans->block_group = block_group; + ins->offset = num_bytes; if (last_ptr) { *last_ptr = ins->objectid + ins->offset; if (*last_ptr == - btrfs_super_total_bytes(&root->fs_info->super_copy)) { + btrfs_super_total_bytes(&root->fs_info->super_copy)) *last_ptr = 0; - } - } - return 0; - -new_group: - if (search_start + num_bytes >= search_end) { -enospc: - search_start = orig_search_start; - if (full_scan) { - ret = -ENOSPC; - goto error; - } - if (wrapped) { - if (!full_scan) - total_needed -= empty_size; - full_scan = 1; - } else - wrapped = 1; } - block_group = btrfs_lookup_first_block_group(info, search_start); - cond_resched(); - block_group = btrfs_find_block_group(root, block_group, - search_start, data, 0); - goto check_failed; + ret = 0; error: return ret; } +static void dump_space_info(struct btrfs_space_info *info, u64 bytes) +{ + struct btrfs_block_group_cache *cache; + struct list_head *l; + + printk(KERN_INFO "space_info has %Lu free, is %sfull\n", + info->total_bytes - info->bytes_used - info->bytes_pinned, + (info->full) ? "" : "not "); + + spin_lock(&info->lock); + list_for_each(l, &info->block_groups) { + cache = list_entry(l, struct btrfs_block_group_cache, list); + spin_lock(&cache->lock); + printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used " + "%Lu pinned\n", + cache->key.objectid, cache->key.offset, + btrfs_block_group_used(&cache->item), cache->pinned); + btrfs_dump_free_space(cache, bytes); + spin_unlock(&cache->lock); + } + spin_unlock(&info->lock); +} static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, @@ -2133,6 +2171,7 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, u64 search_start = 0; u64 alloc_profile; struct btrfs_fs_info *info = root->fs_info; + struct btrfs_block_group_cache *cache; if (data) { alloc_profile = info->avail_data_alloc_bits & @@ -2160,11 +2199,9 @@ again: BTRFS_BLOCK_GROUP_METADATA | (info->metadata_alloc_profile & info->avail_metadata_alloc_bits), 0); - BUG_ON(ret); } ret = do_chunk_alloc(trans, root->fs_info->extent_root, num_bytes + 2 * 1024 * 1024, data, 0); - BUG_ON(ret); } WARN_ON(num_bytes < root->sectorsize); @@ -2175,26 +2212,44 @@ again: if (ret == -ENOSPC && num_bytes > min_alloc_size) { num_bytes = num_bytes >> 1; + num_bytes = num_bytes & ~(root->sectorsize - 1); num_bytes = max(num_bytes, min_alloc_size); do_chunk_alloc(trans, root->fs_info->extent_root, num_bytes, data, 1); goto again; } if (ret) { - printk("allocation failed flags %Lu\n", data); + struct btrfs_space_info *sinfo; + + sinfo = __find_space_info(root->fs_info, data); + printk("allocation failed flags %Lu, wanted %Lu\n", + data, num_bytes); + dump_space_info(sinfo, num_bytes); BUG(); } - clear_extent_dirty(&root->fs_info->free_space_cache, - ins->objectid, ins->objectid + ins->offset - 1, - GFP_NOFS); - return 0; + cache = btrfs_lookup_block_group(root->fs_info, ins->objectid); + if (!cache) { + printk(KERN_ERR "Unable to find block group for %Lu\n", ins->objectid); + return -ENOSPC; + } + + ret = btrfs_remove_free_space(cache, ins->objectid, ins->offset); + + return ret; } int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) { + struct btrfs_block_group_cache *cache; + maybe_lock_mutex(root); - set_extent_dirty(&root->fs_info->free_space_cache, - start, start + len - 1, GFP_NOFS); + cache = btrfs_lookup_block_group(root->fs_info, start); + if (!cache) { + printk(KERN_ERR "Unable to find block group for %Lu\n", start); + maybe_unlock_mutex(root); + return -ENOSPC; + } + btrfs_add_free_space(cache, start, len); maybe_unlock_mutex(root); return 0; } @@ -2264,8 +2319,8 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_items(trans, extent_root, path, keys, sizes, 2); - BUG_ON(ret); + extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_item); btrfs_set_extent_refs(path->nodes[0], extent_item, 1); @@ -2336,9 +2391,9 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); cache_block_group(root, block_group); - clear_extent_dirty(&root->fs_info->free_space_cache, - ins->objectid, ins->objectid + ins->offset - 1, - GFP_NOFS); + ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset); + BUG_ON(ret); + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, ref_generation, owner, owner_offset, ins); @@ -2843,31 +2898,24 @@ out: int btrfs_free_block_groups(struct btrfs_fs_info *info) { - u64 start; - u64 end; - u64 ptr; - int ret; + struct btrfs_block_group_cache *block_group; + struct rb_node *n; mutex_lock(&info->alloc_mutex); - while(1) { - ret = find_first_extent_bit(&info->block_group_cache, 0, - &start, &end, (unsigned int)-1); - if (ret) - break; - ret = get_state_private(&info->block_group_cache, start, &ptr); - if (!ret) - kfree((void *)(unsigned long)ptr); - clear_extent_bits(&info->block_group_cache, start, - end, (unsigned int)-1, GFP_NOFS); - } - while(1) { - ret = find_first_extent_bit(&info->free_space_cache, 0, - &start, &end, EXTENT_DIRTY); - if (ret) - break; - clear_extent_dirty(&info->free_space_cache, start, - end, GFP_NOFS); - } + spin_lock(&info->block_group_cache_lock); + while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { + block_group = rb_entry(n, struct btrfs_block_group_cache, + cache_node); + + btrfs_remove_free_space_cache(block_group); + rb_erase(&block_group->cache_node, + &info->block_group_cache_tree); + spin_lock(&block_group->space_info->lock); + list_del(&block_group->list); + spin_unlock(&block_group->space_info->lock); + kfree(block_group); + } + spin_unlock(&info->block_group_cache_lock); mutex_unlock(&info->alloc_mutex); return 0; } @@ -3386,7 +3434,6 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) u64 total_found; u64 shrink_last_byte; struct btrfs_block_group_cache *shrink_block_group; - struct btrfs_fs_info *info = root->fs_info; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; @@ -3542,15 +3589,17 @@ next: goto out; } - clear_extent_bits(&info->block_group_cache, key.objectid, - key.objectid + key.offset - 1, - (unsigned int)-1, GFP_NOFS); - - - clear_extent_bits(&info->free_space_cache, - key.objectid, key.objectid + key.offset - 1, - (unsigned int)-1, GFP_NOFS); + spin_lock(&root->fs_info->block_group_cache_lock); + rb_erase(&shrink_block_group->cache_node, + &root->fs_info->block_group_cache_tree); + spin_unlock(&root->fs_info->block_group_cache_lock); + ret = btrfs_remove_free_space(shrink_block_group, key.objectid, + key.offset); + if (ret) { + btrfs_end_transaction(trans, root); + goto out; + } /* memset(shrink_block_group, 0, sizeof(*shrink_block_group)); kfree(shrink_block_group); @@ -3566,9 +3615,9 @@ next: /* the code to unpin extents might set a few bits in the free * space cache for this range again */ - clear_extent_bits(&info->free_space_cache, - key.objectid, key.objectid + key.offset - 1, - (unsigned int)-1, GFP_NOFS); + /* XXX? */ + ret = btrfs_remove_free_space(shrink_block_group, key.objectid, + key.offset); out: btrfs_free_path(path); mutex_unlock(&root->fs_info->alloc_mutex); @@ -3616,16 +3665,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; int ret; - int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; struct btrfs_space_info *space_info; - struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; - block_group_cache = &info->block_group_cache; root = info->extent_root; key.objectid = 0; key.offset = 0; @@ -3653,6 +3699,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) } spin_lock_init(&cache->lock); + INIT_LIST_HEAD(&cache->list); read_extent_buffer(leaf, &cache->item, btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); @@ -3661,31 +3708,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); cache->flags = btrfs_block_group_flags(&cache->item); - bit = 0; - if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { - bit = BLOCK_GROUP_DATA; - } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { - bit = BLOCK_GROUP_SYSTEM; - } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { - bit = BLOCK_GROUP_METADATA; - } - set_avail_alloc_bits(info, cache->flags); ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), &space_info); BUG_ON(ret); cache->space_info = space_info; + spin_lock(&space_info->lock); + list_add(&cache->list, &space_info->block_groups); + spin_unlock(&space_info->lock); + + ret = btrfs_add_block_group_cache(root->fs_info, cache); + BUG_ON(ret); - /* use EXTENT_LOCKED to prevent merging */ - set_extent_bits(block_group_cache, found_key.objectid, - found_key.objectid + found_key.offset - 1, - EXTENT_LOCKED, GFP_NOFS); - set_state_private(block_group_cache, found_key.objectid, - (unsigned long)cache); - set_extent_bits(block_group_cache, found_key.objectid, - found_key.objectid + found_key.offset - 1, - bit | EXTENT_LOCKED, GFP_NOFS); if (key.objectid >= btrfs_super_total_bytes(&info->super_copy)) break; @@ -3703,22 +3738,22 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 size) { int ret; - int bit = 0; struct btrfs_root *extent_root; struct btrfs_block_group_cache *cache; - struct extent_io_tree *block_group_cache; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); extent_root = root->fs_info->extent_root; - block_group_cache = &root->fs_info->block_group_cache; root->fs_info->last_trans_new_blockgroup = trans->transid; cache = kzalloc(sizeof(*cache), GFP_NOFS); - BUG_ON(!cache); + if (!cache) + return -ENOMEM; + cache->key.objectid = chunk_offset; cache->key.offset = size; spin_lock_init(&cache->lock); + INIT_LIST_HEAD(&cache->list); btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); btrfs_set_block_group_used(&cache->item, bytes_used); @@ -3729,16 +3764,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, &cache->space_info); BUG_ON(ret); + spin_lock(&cache->space_info->lock); + list_add(&cache->list, &cache->space_info->block_groups); + spin_unlock(&cache->space_info->lock); - bit = block_group_state_bits(type); - set_extent_bits(block_group_cache, chunk_offset, - chunk_offset + size - 1, - EXTENT_LOCKED, GFP_NOFS); - set_state_private(block_group_cache, chunk_offset, - (unsigned long)cache); - set_extent_bits(block_group_cache, chunk_offset, - chunk_offset + size - 1, - bit | EXTENT_LOCKED, GFP_NOFS); + ret = btrfs_add_block_group_cache(root->fs_info, cache); + BUG_ON(ret); ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, sizeof(cache->item)); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 319a0c7a4a5..8624f3e8803 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2634,6 +2634,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, if (eb) { atomic_inc(&eb->refs); spin_unlock(&tree->buffer_lock); + mark_page_accessed(eb->first_page); return eb; } spin_unlock(&tree->buffer_lock); @@ -2713,6 +2714,9 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, atomic_inc(&eb->refs); spin_unlock(&tree->buffer_lock); + if (eb) + mark_page_accessed(eb->first_page); + return eb; } EXPORT_SYMBOL(find_extent_buffer); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c new file mode 100644 index 00000000000..01c26e8ae55 --- /dev/null +++ b/fs/btrfs/free-space-cache.c @@ -0,0 +1,415 @@ +/* + * Copyright (C) 2008 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "ctree.h" + +static int tree_insert_offset(struct rb_root *root, u64 offset, + struct rb_node *node) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct btrfs_free_space *info; + + while (*p) { + parent = *p; + info = rb_entry(parent, struct btrfs_free_space, offset_index); + + if (offset < info->offset) + p = &(*p)->rb_left; + else if (offset > info->offset) + p = &(*p)->rb_right; + else + return -EEXIST; + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + + return 0; +} + +static int tree_insert_bytes(struct rb_root *root, u64 bytes, + struct rb_node *node) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct btrfs_free_space *info; + + while (*p) { + parent = *p; + info = rb_entry(parent, struct btrfs_free_space, bytes_index); + + if (bytes < info->bytes) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + + return 0; +} + +/* + * searches the tree for the given offset. If contains is set we will return + * the free space that contains the given offset. If contains is not set we + * will return the free space that starts at or after the given offset and is + * at least bytes long. + */ +static struct btrfs_free_space *tree_search_offset(struct rb_root *root, + u64 offset, u64 bytes, + int contains) +{ + struct rb_node *n = root->rb_node; + struct btrfs_free_space *entry, *ret = NULL; + + while (n) { + entry = rb_entry(n, struct btrfs_free_space, offset_index); + + if (offset < entry->offset) { + if (!contains && + (!ret || entry->offset < ret->offset) && + (bytes <= entry->bytes)) + ret = entry; + n = n->rb_left; + } else if (offset > entry->offset) { + if (contains && + (entry->offset + entry->bytes - 1) >= offset) { + ret = entry; + break; + } + n = n->rb_right; + } else { + if (bytes > entry->bytes) { + n = n->rb_right; + continue; + } + ret = entry; + break; + } + } + + return ret; +} + +/* + * return a chunk at least bytes size, as close to offset that we can get. + */ +static struct btrfs_free_space *tree_search_bytes(struct rb_root *root, + u64 offset, u64 bytes) +{ + struct rb_node *n = root->rb_node; + struct btrfs_free_space *entry, *ret = NULL; + + while (n) { + entry = rb_entry(n, struct btrfs_free_space, bytes_index); + + if (bytes < entry->bytes) { + /* + * We prefer to get a hole size as close to the size we + * are asking for so we don't take small slivers out of + * huge holes, but we also want to get as close to the + * offset as possible so we don't have a whole lot of + * fragmentation. + */ + if (offset <= entry->offset) { + if (!ret) + ret = entry; + else if (entry->bytes < ret->bytes) + ret = entry; + else if (entry->offset < ret->offset) + ret = entry; + } + n = n->rb_left; + } else if (bytes > entry->bytes) { + n = n->rb_right; + } else { + /* + * Ok we may have multiple chunks of the wanted size, + * so we don't want to take the first one we find, we + * want to take the one closest to our given offset, so + * keep searching just in case theres a better match. + */ + n = n->rb_right; + if (offset > entry->offset) + continue; + else if (!ret || entry->offset < ret->offset) + ret = entry; + } + } + + return ret; +} + +static void unlink_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *info) +{ + rb_erase(&info->offset_index, &block_group->free_space_offset); + rb_erase(&info->bytes_index, &block_group->free_space_bytes); +} + +static int link_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *info) +{ + int ret = 0; + + + ret = tree_insert_offset(&block_group->free_space_offset, info->offset, + &info->offset_index); + if (ret) + return ret; + + ret = tree_insert_bytes(&block_group->free_space_bytes, info->bytes, + &info->bytes_index); + if (ret) + return ret; + + return ret; +} + +int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes) +{ + struct btrfs_free_space *right_info; + struct btrfs_free_space *left_info; + struct btrfs_free_space *info = NULL; + struct btrfs_free_space *alloc_info; + int ret = 0; + + alloc_info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); + if (!alloc_info) + return -ENOMEM; + + /* + * first we want to see if there is free space adjacent to the range we + * are adding, if there is remove that struct and add a new one to + * cover the entire range + */ + spin_lock(&block_group->lock); + + right_info = tree_search_offset(&block_group->free_space_offset, + offset+bytes, 0, 1); + left_info = tree_search_offset(&block_group->free_space_offset, + offset-1, 0, 1); + + if (right_info && right_info->offset == offset+bytes) { + unlink_free_space(block_group, right_info); + info = right_info; + info->offset = offset; + info->bytes += bytes; + } else if (right_info && right_info->offset != offset+bytes) { + printk(KERN_ERR "adding space in the middle of an existing " + "free space area. existing: offset=%Lu, bytes=%Lu. " + "new: offset=%Lu, bytes=%Lu\n", right_info->offset, + right_info->bytes, offset, bytes); + BUG(); + } + + if (left_info) { + unlink_free_space(block_group, left_info); + + if (unlikely((left_info->offset + left_info->bytes) != + offset)) { + printk(KERN_ERR "free space to the left of new free " + "space isn't quite right. existing: offset=%Lu," + " bytes=%Lu. new: offset=%Lu, bytes=%Lu\n", + left_info->offset, left_info->bytes, offset, + bytes); + BUG(); + } + + if (info) { + info->offset = left_info->offset; + info->bytes += left_info->bytes; + kfree(left_info); + } else { + info = left_info; + info->bytes += bytes; + } + } + + if (info) { + ret = link_free_space(block_group, info); + if (!ret) + info = NULL; + goto out; + } + + info = alloc_info; + alloc_info = NULL; + info->offset = offset; + info->bytes = bytes; + + ret = link_free_space(block_group, info); + if (ret) + kfree(info); +out: + spin_unlock(&block_group->lock); + if (ret) { + printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); + if (ret == -EEXIST) + BUG(); + } + + if (alloc_info) + kfree(alloc_info); + + return ret; +} + +int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes) +{ + struct btrfs_free_space *info; + int ret = 0; + + spin_lock(&block_group->lock); + info = tree_search_offset(&block_group->free_space_offset, offset, 0, + 1); + + if (info && info->offset == offset) { + if (info->bytes < bytes) { + printk(KERN_ERR "Found free space at %Lu, size %Lu," + "trying to use %Lu\n", + info->offset, info->bytes, bytes); + WARN_ON(1); + ret = -EINVAL; + goto out; + } + + unlink_free_space(block_group, info); + + if (info->bytes == bytes) { + kfree(info); + goto out; + } + + info->offset += bytes; + info->bytes -= bytes; + + ret = link_free_space(block_group, info); + BUG_ON(ret); + } else { + WARN_ON(1); + } +out: + spin_unlock(&block_group->lock); + return ret; +} + +void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, + u64 bytes) +{ + struct btrfs_free_space *info; + struct rb_node *n; + int count = 0; + + for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { + info = rb_entry(n, struct btrfs_free_space, offset_index); + if (info->bytes >= bytes) + count++; + //printk(KERN_INFO "offset=%Lu, bytes=%Lu\n", info->offset, + // info->bytes); + } + printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" + "\n", count); +} + +u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) +{ + struct btrfs_free_space *info; + struct rb_node *n; + u64 ret = 0; + + for (n = rb_first(&block_group->free_space_offset); n; + n = rb_next(n)) { + info = rb_entry(n, struct btrfs_free_space, offset_index); + ret += info->bytes; + } + + return ret; +} + +void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) +{ + struct btrfs_free_space *info; + struct rb_node *node; + + spin_lock(&block_group->lock); + while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { + info = rb_entry(node, struct btrfs_free_space, bytes_index); + unlink_free_space(block_group, info); + kfree(info); + if (need_resched()) { + spin_unlock(&block_group->lock); + cond_resched(); + spin_lock(&block_group->lock); + } + } + spin_unlock(&block_group->lock); +} + +struct btrfs_free_space *btrfs_find_free_space_offset(struct + btrfs_block_group_cache + *block_group, u64 offset, + u64 bytes) +{ + struct btrfs_free_space *ret; + + spin_lock(&block_group->lock); + ret = tree_search_offset(&block_group->free_space_offset, offset, + bytes, 0); + spin_unlock(&block_group->lock); + + return ret; +} + +struct btrfs_free_space *btrfs_find_free_space_bytes(struct + btrfs_block_group_cache + *block_group, u64 offset, + u64 bytes) +{ + struct btrfs_free_space *ret; + + spin_lock(&block_group->lock); + + ret = tree_search_bytes(&block_group->free_space_bytes, offset, bytes); + spin_unlock(&block_group->lock); + + return ret; +} + +struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache + *block_group, u64 offset, + u64 bytes) +{ + struct btrfs_free_space *ret; + + spin_lock(&block_group->lock); + ret = tree_search_offset(&block_group->free_space_offset, offset, + bytes, 0); + if (!ret) + ret = tree_search_bytes(&block_group->free_space_bytes, + offset, bytes); + + spin_unlock(&block_group->lock); + + return ret; +} diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 12c1c0530f3..65b4f864b0d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -141,7 +141,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) while(num_bytes > 0) { cur_alloc_size = min(num_bytes, root->fs_info->max_extent); ret = btrfs_reserve_extent(trans, root, cur_alloc_size, - root->sectorsize, 0, 0, + root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); @@ -558,7 +558,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) trans->transid, inode->i_ino, ordered_extent->file_offset, &ins); BUG_ON(ret); - mutex_lock(&BTRFS_I(inode)->extent_mutex); ret = btrfs_drop_extents(trans, root, inode, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1546fa6f4f7..b9e5c2d82dd 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -64,8 +64,8 @@ static void lock_chunks(struct btrfs_root *root) static void unlock_chunks(struct btrfs_root *root) { - mutex_unlock(&root->fs_info->alloc_mutex); mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); } int btrfs_cleanup_fs_uuids(void) @@ -1668,8 +1668,13 @@ again: else min_free = calc_size; - /* we add 1MB because we never use the first 1MB of the device */ - min_free += 1024 * 1024; + /* + * we add 1MB because we never use the first 1MB of the device, unless + * we've looped, then we are likely allocating the maximum amount of + * space left already + */ + if (!looped) + min_free += 1024 * 1024; /* build a private list of devices we will allocate from */ while(index < num_stripes) { -- cgit v1.2.3-70-g09d2 From 1c2308f8e7d8491467e0095af2b01500f1b70819 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 23 Sep 2008 13:14:13 -0400 Subject: Add check for tree-log roots in btrfs_alloc_reserved_extents Tree log blocks are only reserved, and should not ever get fully allocated on disk. This check makes sure they stay out of the extent tree. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 813566acc5d..558fbe40736 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2365,6 +2365,9 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, struct btrfs_key *ins) { int ret; + + if (root_objectid == BTRFS_TREE_LOG_OBJECTID) + return 0; maybe_lock_mutex(root); ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, ref_generation, owner, -- cgit v1.2.3-70-g09d2 From 31840ae1a6b433ca0e6a8d341756ff478bbf959e Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Tue, 23 Sep 2008 13:14:14 -0400 Subject: Btrfs: Full back reference support This patch makes the back reference system to explicit record the location of parent node for all types of extents. The location of parent node is placed into the offset field of backref key. Every time a tree block is balanced, the back references for the affected lower level extents are updated. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 228 ++++++---- fs/btrfs/ctree.h | 62 ++- fs/btrfs/disk-io.c | 4 +- fs/btrfs/extent-tree.c | 1143 +++++++++++++++++++++++++++++------------------- fs/btrfs/extent_io.c | 3 +- fs/btrfs/file.c | 121 +++-- fs/btrfs/inode.c | 57 ++- fs/btrfs/ioctl.c | 57 +-- fs/btrfs/print-tree.c | 5 +- fs/btrfs/tree-log.c | 108 ++--- 10 files changed, 1066 insertions(+), 722 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6f467901246..50aea8cb653 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -125,7 +125,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, u32 nritems; int ret = 0; int level; - struct btrfs_key first_key; struct btrfs_root *new_root; new_root = kmalloc(sizeof(*new_root), GFP_NOFS); @@ -141,18 +140,10 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); - if (nritems) { - if (level == 0) - btrfs_item_key_to_cpu(buf, &first_key, 0); - else - btrfs_node_key_to_cpu(buf, &first_key, 0); - } else { - first_key.objectid = 0; - } - cow = btrfs_alloc_free_block(trans, new_root, buf->len, - new_root_objectid, - trans->transid, first_key.objectid, - level, buf->start, 0); + + cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0, + new_root_objectid, trans->transid, + level, buf->start, 0); if (IS_ERR(cow)) { kfree(new_root); return PTR_ERR(cow); @@ -165,7 +156,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); WARN_ON(btrfs_header_generation(buf) > trans->transid); - ret = btrfs_inc_ref(trans, new_root, buf, 0); + ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL); kfree(new_root); if (ret) @@ -184,39 +175,31 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, u64 search_start, u64 empty_size, u64 prealloc_dest) { - u64 root_gen; + u64 parent_start; struct extent_buffer *cow; u32 nritems; int ret = 0; int different_trans = 0; int level; int unlock_orig = 0; - struct btrfs_key first_key; if (*cow_ret == buf) unlock_orig = 1; WARN_ON(!btrfs_tree_locked(buf)); - if (root->ref_cows) { - root_gen = trans->transid; - } else { - root_gen = 0; - } + if (parent) + parent_start = parent->start; + else + parent_start = 0; + WARN_ON(root->ref_cows && trans->transid != root->fs_info->running_transaction->transid); WARN_ON(root->ref_cows && trans->transid != root->last_trans); level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); - if (nritems) { - if (level == 0) - btrfs_item_key_to_cpu(buf, &first_key, 0); - else - btrfs_node_key_to_cpu(buf, &first_key, 0); - } else { - first_key.objectid = 0; - } + if (prealloc_dest) { struct btrfs_key ins; @@ -224,19 +207,19 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, ins.offset = buf->len; ins.type = BTRFS_EXTENT_ITEM_KEY; - ret = btrfs_alloc_reserved_extent(trans, root, + ret = btrfs_alloc_reserved_extent(trans, root, parent_start, root->root_key.objectid, - root_gen, level, - first_key.objectid, + trans->transid, level, 0, &ins); BUG_ON(ret); cow = btrfs_init_new_buffer(trans, root, prealloc_dest, buf->len); } else { cow = btrfs_alloc_free_block(trans, root, buf->len, + parent_start, root->root_key.objectid, - root_gen, first_key.objectid, - level, search_start, empty_size); + trans->transid, level, + search_start, empty_size); } if (IS_ERR(cow)) return PTR_ERR(cow); @@ -249,17 +232,23 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(buf) > trans->transid); if (btrfs_header_generation(buf) != trans->transid) { + u32 nr_extents; different_trans = 1; - ret = btrfs_inc_ref(trans, root, buf, 1); + ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents); if (ret) return ret; + + ret = btrfs_cache_ref(trans, root, buf, nr_extents); + WARN_ON(ret); } else { + ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems); + if (ret) + return ret; clean_tree_block(trans, root, buf); } if (buf == root->node) { WARN_ON(parent && parent != buf); - root_gen = btrfs_header_generation(buf); spin_lock(&root->node_lock); root->node = cow; @@ -268,13 +257,14 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, if (buf != root->commit_root) { btrfs_free_extent(trans, root, buf->start, - buf->len, root->root_key.objectid, - root_gen, 0, 0, 1); + buf->len, buf->start, + root->root_key.objectid, + btrfs_header_generation(buf), + 0, 0, 1); } free_extent_buffer(buf); add_root_to_dirty_list(root); } else { - root_gen = btrfs_header_generation(parent); btrfs_set_node_blockptr(parent, parent_slot, cow->start); WARN_ON(trans->transid == 0); @@ -283,8 +273,8 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(parent); WARN_ON(btrfs_header_generation(parent) != trans->transid); btrfs_free_extent(trans, root, buf->start, buf->len, - btrfs_header_owner(parent), root_gen, - 0, 0, 1); + parent_start, btrfs_header_owner(parent), + btrfs_header_generation(parent), 0, 0, 1); } if (unlock_orig) btrfs_tree_unlock(buf); @@ -831,6 +821,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, root->node = child; spin_unlock(&root->node_lock); + ret = btrfs_update_extent_ref(trans, root, child->start, + mid->start, child->start, + root->root_key.objectid, + trans->transid, level - 1, 0); + BUG_ON(ret); + add_root_to_dirty_list(root); btrfs_tree_unlock(child); path->locks[level] = 0; @@ -840,7 +836,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* once for the path */ free_extent_buffer(mid); ret = btrfs_free_extent(trans, root, mid->start, mid->len, - root->root_key.objectid, + mid->start, root->root_key.objectid, btrfs_header_generation(mid), 0, 0, 1); /* once for the root ptr */ free_extent_buffer(mid); @@ -905,7 +901,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (wret) ret = wret; wret = btrfs_free_extent(trans, root, bytenr, - blocksize, + blocksize, parent->start, btrfs_header_owner(parent), generation, 0, 0, 1); if (wret) @@ -954,6 +950,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (wret) ret = wret; wret = btrfs_free_extent(trans, root, bytenr, blocksize, + parent->start, btrfs_header_owner(parent), root_gen, 0, 0, 1); if (wret) @@ -1499,6 +1496,41 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, return ret; } +/* + * update item key. + * + * This function isn't completely safe. It's the caller's responsibility + * that the new key won't break the order + */ +int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *new_key) +{ + struct btrfs_disk_key disk_key; + struct extent_buffer *eb; + int slot; + + eb = path->nodes[0]; + slot = path->slots[0]; + if (slot > 0) { + btrfs_item_key(eb, &disk_key, slot - 1); + if (comp_keys(&disk_key, new_key) >= 0) + return -1; + } + if (slot < btrfs_header_nritems(eb) - 1) { + btrfs_item_key(eb, &disk_key, slot + 1); + if (comp_keys(&disk_key, new_key) <= 0) + return -1; + } + + btrfs_cpu_key_to_disk(&disk_key, new_key); + btrfs_set_item_key(eb, &disk_key, slot); + btrfs_mark_buffer_dirty(eb); + if (slot == 0) + fixup_low_keys(trans, root, path, &disk_key, 1); + return 0; +} + /* * try to push data from one node into the next node left in the * tree. @@ -1558,6 +1590,10 @@ static int push_node_left(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(dst, dst_nritems + push_items); btrfs_mark_buffer_dirty(src); btrfs_mark_buffer_dirty(dst); + + ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items); + BUG_ON(ret); + return ret; } @@ -1619,6 +1655,10 @@ static int balance_node_right(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(src); btrfs_mark_buffer_dirty(dst); + + ret = btrfs_update_ref(trans, root, src, dst, 0, push_items); + BUG_ON(ret); + return ret; } @@ -1633,30 +1673,24 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - u64 root_gen; u64 lower_gen; struct extent_buffer *lower; struct extent_buffer *c; struct extent_buffer *old; struct btrfs_disk_key lower_key; + int ret; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - if (root->ref_cows) - root_gen = trans->transid; - else - root_gen = 0; - lower = path->nodes[level-1]; if (level == 1) btrfs_item_key(lower, &lower_key, 0); else btrfs_node_key(lower, &lower_key, 0); - c = btrfs_alloc_free_block(trans, root, root->nodesize, - root->root_key.objectid, - root_gen, le64_to_cpu(lower_key.objectid), + c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, + root->root_key.objectid, trans->transid, level, root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); @@ -1679,7 +1713,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); lower_gen = btrfs_header_generation(lower); - WARN_ON(lower_gen == 0); + WARN_ON(lower_gen != trans->transid); btrfs_set_node_ptr_generation(c, 0, lower_gen); @@ -1690,6 +1724,12 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, root->node = c; spin_unlock(&root->node_lock); + ret = btrfs_update_extent_ref(trans, root, lower->start, + lower->start, c->start, + root->root_key.objectid, + trans->transid, level - 1, 0); + BUG_ON(ret); + /* the super has an extra ref to root->node */ free_extent_buffer(old); @@ -1698,20 +1738,6 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, path->nodes[level] = c; path->locks[level] = 1; path->slots[level] = 0; - - if (root->ref_cows && lower_gen != trans->transid) { - struct btrfs_path *back_path = btrfs_alloc_path(); - int ret; - mutex_lock(&root->fs_info->alloc_mutex); - ret = btrfs_insert_extent_backref(trans, - root->fs_info->extent_root, - path, lower->start, - root->root_key.objectid, - trans->transid, 0, 0); - BUG_ON(ret); - mutex_unlock(&root->fs_info->alloc_mutex); - btrfs_free_path(back_path); - } return 0; } @@ -1766,7 +1792,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - u64 root_gen; struct extent_buffer *c; struct extent_buffer *split; struct btrfs_disk_key disk_key; @@ -1793,17 +1818,11 @@ static noinline int split_node(struct btrfs_trans_handle *trans, } c_nritems = btrfs_header_nritems(c); - if (root->ref_cows) - root_gen = trans->transid; - else - root_gen = 0; - btrfs_node_key(c, &disk_key, 0); split = btrfs_alloc_free_block(trans, root, root->nodesize, - root->root_key.objectid, - root_gen, - btrfs_disk_key_objectid(&disk_key), - level, c->start, 0); + path->nodes[level + 1]->start, + root->root_key.objectid, + trans->transid, level, c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); @@ -1840,6 +1859,9 @@ static noinline int split_node(struct btrfs_trans_handle *trans, if (wret) ret = wret; + ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid); + BUG_ON(ret); + if (path->slots[level] >= mid) { path->slots[level] -= mid; btrfs_tree_unlock(c); @@ -1955,10 +1977,23 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root else nr = 1; + if (path->slots[0] >= left_nritems) + push_space += data_size + sizeof(*item); + i = left_nritems - 1; while (i >= nr) { item = btrfs_item_nr(left, i); + if (!empty && push_items > 0) { + if (path->slots[0] > i) + break; + if (path->slots[0] == i) { + int space = btrfs_leaf_free_space(root, left); + if (space + push_space * 2 > free_space) + break; + } + } + if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -1973,6 +2008,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root this_item_size = btrfs_item_size(left, item); if (this_item_size + sizeof(*item) + push_space > free_space) break; + push_items++; push_space += this_item_size + sizeof(*item); if (i == 0) @@ -2046,6 +2082,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(left); btrfs_mark_buffer_dirty(right); + ret = btrfs_update_ref(trans, root, left, right, 0, push_items); + BUG_ON(ret); + btrfs_item_key(right, &disk_key, 0); btrfs_set_node_key(upper, &disk_key, slot + 1); btrfs_mark_buffer_dirty(upper); @@ -2147,6 +2186,16 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root KM_USER1); } + if (!empty && push_items > 0) { + if (path->slots[0] < i) + break; + if (path->slots[0] == i) { + int space = btrfs_leaf_free_space(root, right); + if (space + push_space * 2 > free_space) + break; + } + } + if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -2255,6 +2304,10 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (right_nritems) btrfs_mark_buffer_dirty(right); + ret = btrfs_update_ref(trans, root, right, left, + old_left_nritems, push_items); + BUG_ON(ret); + btrfs_item_key(right, &disk_key, 0); wret = fixup_low_keys(trans, root, path, &disk_key, 1); if (wret) @@ -2294,7 +2347,6 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_path *path, int data_size, int extend) { - u64 root_gen; struct extent_buffer *l; u32 nritems; int mid; @@ -2313,11 +2365,6 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, if (extend) space_needed = data_size; - if (root->ref_cows) - root_gen = trans->transid; - else - root_gen = 0; - /* first try to make some room by pushing left and right */ if (ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size, 0); @@ -2348,13 +2395,10 @@ again: nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; - btrfs_item_key(l, &disk_key, 0); - right = btrfs_alloc_free_block(trans, root, root->leafsize, - root->root_key.objectid, - root_gen, - le64_to_cpu(disk_key.objectid), - 0, l->start, 0); + path->nodes[1]->start, + root->root_key.objectid, + trans->transid, 0, l->start, 0); if (IS_ERR(right)) { BUG_ON(1); return PTR_ERR(right); @@ -2485,6 +2529,9 @@ again: btrfs_mark_buffer_dirty(l); BUG_ON(path->slots[0] != slot); + ret = btrfs_update_ref(trans, root, l, right, 0, nritems); + BUG_ON(ret); + if (mid <= slot) { btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); @@ -2956,6 +3003,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = wret; wret = btrfs_free_extent(trans, root, leaf->start, leaf->len, + path->nodes[1]->start, btrfs_header_owner(path->nodes[1]), root_gen, 0, 0, 1); if (wret) @@ -3007,7 +3055,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, free_extent_buffer(leaf); wret = btrfs_free_extent(trans, root, bytenr, - blocksize, + blocksize, path->nodes[1]->start, btrfs_header_owner(path->nodes[1]), root_gen, 0, 0, 1); if (wret) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 730aae3bc18..138c157bbc4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -40,7 +40,7 @@ extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; struct btrfs_ordered_sum; -#define BTRFS_MAGIC "_B8RfS_M" +#define BTRFS_MAGIC "_B9RfS_M" #define BTRFS_ACL_NOT_CACHED ((void *)-1) @@ -81,6 +81,9 @@ struct btrfs_ordered_sum; #define BTRFS_TREE_LOG_OBJECTID -6ULL #define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL +/* dummy objectid represents multiple objectids */ +#define BTRFS_MULTIPLE_OBJECTIDS -255ULL + /* * All files have objectids in this range. */ @@ -369,6 +372,7 @@ struct btrfs_extent_ref { __le64 generation; __le64 objectid; __le64 offset; + __le32 num_refs; } __attribute__ ((__packed__)); /* dev extents record free space on individual devices. The owner @@ -1047,9 +1051,6 @@ btrfs_inode_otime(struct btrfs_inode_item *inode_item) BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); -/* struct btrfs_extent_item */ -BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); - /* struct btrfs_dev_extent */ BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, chunk_tree, 64); @@ -1070,14 +1071,20 @@ BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); BTRFS_SETGET_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64); +BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32); BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64); BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref, generation, 64); BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref, objectid, 64); -BTRFS_SETGET_STACK_FUNCS(stack_ref_offset, struct btrfs_extent_ref, offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_ref_offset, struct btrfs_extent_ref, + offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref, + num_refs, 32); +/* struct btrfs_extent_item */ +BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 32); @@ -1474,8 +1481,7 @@ static inline struct dentry *fdentry(struct file *file) { } /* extent-tree.c */ -int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path, - u64 start, u64 len); +int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin); int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, @@ -1495,10 +1501,9 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data, int owner); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u32 blocksize, + u32 blocksize, u64 parent, u64 root_objectid, u64 ref_generation, - u64 first_objectid, int level, u64 hint, u64 empty_size); @@ -1508,23 +1513,24 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, u64 bytenr, + struct btrfs_path *path, + u64 bytenr, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 num_bytes, u64 min_bytes, + u64 num_bytes, u64 parent, u64 min_bytes, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, u64 data); int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, struct btrfs_key *ins); int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, struct btrfs_key *ins); @@ -1535,9 +1541,16 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, u64 search_end, struct btrfs_key *ins, u64 data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int cache_ref); -int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, + struct extent_buffer *orig_buf, struct extent_buffer *buf, + u32 *nr_extents); +int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *buf, u32 nr_extents); +int btrfs_update_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *orig_buf, + struct extent_buffer *buf, int start_slot, int nr); +int btrfs_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner_objectid, u64 owner_offset, int pin); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); @@ -1545,10 +1558,15 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_io_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset); + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset); +int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 orig_parent, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); @@ -1561,7 +1579,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type); - +int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *new_key); struct extent_buffer *btrfs_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 25be96946a2..d35ca6a3f51 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -882,8 +882,8 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, root->ref_cows = 0; root->node = btrfs_alloc_free_block(trans, root, root->leafsize, - BTRFS_TREE_LOG_OBJECTID, - 0, 0, 0, 0, 0); + 0, BTRFS_TREE_LOG_OBJECTID, + trans->transid, 0, 0, 0); btrfs_set_header_nritems(root->node, 0); btrfs_set_header_level(root->node, 0); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 558fbe40736..5258923d621 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -29,6 +29,21 @@ #include "locking.h" #include "ref-cache.h" +#define PENDING_EXTENT_INSERT 0 +#define PENDING_EXTENT_DELETE 1 +#define PENDING_BACKREF_UPDATE 2 + +struct pending_extent_op { + int type; + u64 bytenr; + u64 num_bytes; + u64 parent; + u64 orig_parent; + u64 generation; + u64 orig_generation; + int level; +}; + static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -487,48 +502,15 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, return ret; } -static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset) -{ - u32 high_crc = ~(u32)0; - u32 low_crc = ~(u32)0; - __le64 lenum; - lenum = cpu_to_le64(root_objectid); - high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); - lenum = cpu_to_le64(ref_generation); - low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); - if (owner >= BTRFS_FIRST_FREE_OBJECTID) { - lenum = cpu_to_le64(owner); - low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); - lenum = cpu_to_le64(owner_offset); - low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); - } - return ((u64)high_crc << 32) | (u64)low_crc; -} - -static int match_extent_ref(struct extent_buffer *leaf, - struct btrfs_extent_ref *disk_ref, - struct btrfs_extent_ref *cpu_ref) -{ - int ret; - int len; - - if (cpu_ref->objectid) - len = sizeof(*cpu_ref); - else - len = 2 * sizeof(u64); - ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref, - len); - return ret == 0; -} - /* simple helper to search for an existing extent at a given offset */ -int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path, - u64 start, u64 len) +int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) { int ret; struct btrfs_key key; + struct btrfs_path *path; + path = btrfs_alloc_path(); + BUG_ON(!path); maybe_lock_mutex(root); key.objectid = start; key.offset = len; @@ -536,72 +518,7 @@ int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path, ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 0, 0); maybe_unlock_mutex(root); - return ret; -} - -static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 bytenr, - u64 root_objectid, - u64 ref_generation, u64 owner, - u64 owner_offset, int del) -{ - u64 hash; - struct btrfs_key key; - struct btrfs_key found_key; - struct btrfs_extent_ref ref; - struct extent_buffer *leaf; - struct btrfs_extent_ref *disk_ref; - int ret; - int ret2; - - btrfs_set_stack_ref_root(&ref, root_objectid); - btrfs_set_stack_ref_generation(&ref, ref_generation); - btrfs_set_stack_ref_objectid(&ref, owner); - btrfs_set_stack_ref_offset(&ref, owner_offset); - - hash = hash_extent_ref(root_objectid, ref_generation, owner, - owner_offset); - key.offset = hash; - key.objectid = bytenr; - key.type = BTRFS_EXTENT_REF_KEY; - - while (1) { - ret = btrfs_search_slot(trans, root, &key, path, - del ? -1 : 0, del); - if (ret < 0) - goto out; - leaf = path->nodes[0]; - if (ret != 0) { - u32 nritems = btrfs_header_nritems(leaf); - if (path->slots[0] >= nritems) { - ret2 = btrfs_next_leaf(root, path); - if (ret2) - goto out; - leaf = path->nodes[0]; - } - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid != bytenr || - found_key.type != BTRFS_EXTENT_REF_KEY) - goto out; - key.offset = found_key.offset; - if (del) { - btrfs_release_path(root, path); - continue; - } - } - disk_ref = btrfs_item_ptr(path->nodes[0], - path->slots[0], - struct btrfs_extent_ref); - if (match_extent_ref(path->nodes[0], disk_ref, &ref)) { - ret = 0; - goto out; - } - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - key.offset = found_key.offset + 1; - btrfs_release_path(root, path); - } -out: + btrfs_free_path(path); return ret; } @@ -622,7 +539,7 @@ out: * File extents can be referenced by: * * - multiple snapshots, subvolumes, or different generations in one subvol - * - different files inside a single subvolume (in theory, not implemented yet) + * - different files inside a single subvolume * - different offsets inside a file (bookend extents in file.c) * * The extent ref structure has fields for: @@ -631,119 +548,284 @@ out: * - Generation number of the tree holding the reference * - objectid of the file holding the reference * - offset in the file corresponding to the key holding the reference + * - number of references holding by parent node (alway 1 for tree blocks) + * + * Btree leaf may hold multiple references to a file extent. In most cases, + * these references are from same file and the corresponding offsets inside + * the file are close together. So inode objectid and offset in file are + * just hints, they provide hints about where in the btree the references + * can be found and when we can stop searching. * * When a file extent is allocated the fields are filled in: - * (root_key.objectid, trans->transid, inode objectid, offset in file) + * (root_key.objectid, trans->transid, inode objectid, offset in file, 1) * * When a leaf is cow'd new references are added for every file extent found - * in the leaf. It looks the same as the create case, but trans->transid - * will be different when the block is cow'd. + * in the leaf. It looks similar to the create case, but trans->transid will + * be different when the block is cow'd. * - * (root_key.objectid, trans->transid, inode objectid, offset in file) + * (root_key.objectid, trans->transid, inode objectid, offset in file, + * number of references in the leaf) * - * When a file extent is removed either during snapshot deletion or file - * truncation, the corresponding back reference is found - * by searching for: + * Because inode objectid and offset in file are just hints, they are not + * used when backrefs are deleted. When a file extent is removed either + * during snapshot deletion or file truncation, we find the corresponding + * back back reference and check the following fields. * - * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), - * inode objectid, offset in file) + * (btrfs_header_owner(leaf), btrfs_header_generation(leaf)) * * Btree extents can be referenced by: * * - Different subvolumes * - Different generations of the same subvolume * - * Storing sufficient information for a full reverse mapping of a btree - * block would require storing the lowest key of the block in the backref, - * and it would require updating that lowest key either before write out or - * every time it changed. Instead, the objectid of the lowest key is stored - * along with the level of the tree block. This provides a hint - * about where in the btree the block can be found. Searches through the - * btree only need to look for a pointer to that block, so they stop one - * level higher than the level recorded in the backref. - * - * Some btrees do not do reference counting on their extents. These - * include the extent tree and the tree of tree roots. Backrefs for these - * trees always have a generation of zero. - * * When a tree block is created, back references are inserted: * - * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid) + * (root->root_key.objectid, trans->transid, level, 0, 1) * - * When a tree block is cow'd in a reference counted root, - * new back references are added for all the blocks it points to. - * These are of the form (trans->transid will have increased since creation): + * When a tree block is cow'd, new back references are added for all the + * blocks it points to. If the tree block isn't in reference counted root, + * the old back references are removed. These new back references are of + * the form (trans->transid will have increased since creation): * - * (root->root_key.objectid, trans->transid, level, lowest_key_objectid) + * (root->root_key.objectid, trans->transid, level, 0, 1) * - * Because the lowest_key_objectid and the level are just hints - * they are not used when backrefs are deleted. When a backref is deleted: + * When a backref is in deleting, the following fields are checked: * * if backref was for a tree root: - * root_objectid = root->root_key.objectid + * (btrfs_header_owner(itself), btrfs_header_generation(itself)) * else - * root_objectid = btrfs_header_owner(parent) + * (btrfs_header_owner(parent), btrfs_header_generation(parent)) * - * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0) + * Back Reference Key composing: * - * Back Reference Key hashing: - * - * Back references have four fields, each 64 bits long. Unfortunately, - * This is hashed into a single 64 bit number and placed into the key offset. - * The key objectid corresponds to the first byte in the extent, and the - * key type is set to BTRFS_EXTENT_REF_KEY + * The key objectid corresponds to the first byte in the extent, the key + * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first + * byte of parent extent. If a extent is tree root, the key offset is set + * to the key objectid. */ -int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 bytenr, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset) + +static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr, + u64 parent, u64 ref_root, + u64 ref_generation, int del) { - u64 hash; struct btrfs_key key; - struct btrfs_extent_ref ref; - struct btrfs_extent_ref *disk_ref; + struct btrfs_extent_ref *ref; + struct extent_buffer *leaf; int ret; - btrfs_set_stack_ref_root(&ref, root_objectid); - btrfs_set_stack_ref_generation(&ref, ref_generation); - btrfs_set_stack_ref_objectid(&ref, owner); - btrfs_set_stack_ref_offset(&ref, owner_offset); + key.objectid = bytenr; + key.type = BTRFS_EXTENT_REF_KEY; + key.offset = parent; + + ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); + if (btrfs_ref_root(leaf, ref) != ref_root || + btrfs_ref_generation(leaf, ref) != ref_generation) { + ret = -EIO; + WARN_ON(1); + goto out; + } + ret = 0; +out: + return ret; +} + +static int noinline insert_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 parent, + u64 ref_root, u64 ref_generation, + u64 owner_objectid, u64 owner_offset) +{ + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_extent_ref *ref; + u32 num_refs; + int ret; - hash = hash_extent_ref(root_objectid, ref_generation, owner, - owner_offset); - key.offset = hash; key.objectid = bytenr; key.type = BTRFS_EXTENT_REF_KEY; + key.offset = parent; - ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref)); - while (ret == -EEXIST) { - disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_ref); - if (match_extent_ref(path->nodes[0], disk_ref, &ref)) + ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref)); + if (ret == 0) { + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref); + btrfs_set_ref_root(leaf, ref, ref_root); + btrfs_set_ref_generation(leaf, ref, ref_generation); + btrfs_set_ref_objectid(leaf, ref, owner_objectid); + btrfs_set_ref_offset(leaf, ref, owner_offset); + btrfs_set_ref_num_refs(leaf, ref, 1); + } else if (ret == -EEXIST) { + u64 existing_owner; + BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref); + if (btrfs_ref_root(leaf, ref) != ref_root || + btrfs_ref_generation(leaf, ref) != ref_generation) { + ret = -EIO; + WARN_ON(1); goto out; - key.offset++; - btrfs_release_path(root, path); - ret = btrfs_insert_empty_item(trans, root, path, &key, - sizeof(ref)); - } - if (ret) + } + + num_refs = btrfs_ref_num_refs(leaf, ref); + BUG_ON(num_refs == 0); + btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); + + existing_owner = btrfs_ref_objectid(leaf, ref); + if (existing_owner == owner_objectid && + btrfs_ref_offset(leaf, ref) > owner_offset) { + btrfs_set_ref_offset(leaf, ref, owner_offset); + } else if (existing_owner != owner_objectid && + existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { + btrfs_set_ref_objectid(leaf, ref, + BTRFS_MULTIPLE_OBJECTIDS); + btrfs_set_ref_offset(leaf, ref, 0); + } + ret = 0; + } else { goto out; - disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_ref); - write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref, - sizeof(ref)); + } btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_release_path(root, path); return ret; } +static int noinline remove_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path) +{ + struct extent_buffer *leaf; + struct btrfs_extent_ref *ref; + u32 num_refs; + int ret = 0; + + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); + num_refs = btrfs_ref_num_refs(leaf, ref); + BUG_ON(num_refs == 0); + num_refs -= 1; + if (num_refs == 0) { + ret = btrfs_del_item(trans, root, path); + } else { + btrfs_set_ref_num_refs(leaf, ref, num_refs); + btrfs_mark_buffer_dirty(leaf); + } + btrfs_release_path(root, path); + return ret; +} + +static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 orig_parent, u64 parent, + u64 orig_root, u64 ref_root, + u64 orig_generation, u64 ref_generation, + u64 owner_objectid, u64 owner_offset) +{ + int ret; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_path *path; + + if (root == root->fs_info->extent_root) { + struct pending_extent_op *extent_op; + u64 num_bytes; + + BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL); + num_bytes = btrfs_level_size(root, (int)owner_objectid); + if (test_range_bit(&root->fs_info->extent_ins, bytenr, + bytenr + num_bytes - 1, EXTENT_LOCKED, 0)) { + u64 priv; + ret = get_state_private(&root->fs_info->extent_ins, + bytenr, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *) + (unsigned long)priv; + BUG_ON(extent_op->parent != orig_parent); + BUG_ON(extent_op->generation != orig_generation); + extent_op->parent = parent; + extent_op->generation = ref_generation; + } else { + extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); + BUG_ON(!extent_op); + + extent_op->type = PENDING_BACKREF_UPDATE; + extent_op->bytenr = bytenr; + extent_op->num_bytes = num_bytes; + extent_op->parent = parent; + extent_op->orig_parent = orig_parent; + extent_op->generation = ref_generation; + extent_op->orig_generation = orig_generation; + extent_op->level = (int)owner_objectid; + + set_extent_bits(&root->fs_info->extent_ins, + bytenr, bytenr + num_bytes - 1, + EXTENT_LOCKED, GFP_NOFS); + set_state_private(&root->fs_info->extent_ins, + bytenr, (unsigned long)extent_op); + } + return 0; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + ret = lookup_extent_backref(trans, extent_root, path, + bytenr, orig_parent, orig_root, + orig_generation, 1); + if (ret) + goto out; + ret = remove_extent_backref(trans, extent_root, path); + if (ret) + goto out; + ret = insert_extent_backref(trans, extent_root, path, bytenr, + parent, ref_root, ref_generation, + owner_objectid, owner_offset); + BUG_ON(ret); + finish_current_insert(trans, extent_root); + del_pending_extents(trans, extent_root); +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 orig_parent, u64 parent, + u64 ref_root, u64 ref_generation, + u64 owner_objectid, u64 owner_offset) +{ + int ret; + if (ref_root == BTRFS_TREE_LOG_OBJECTID && + owner_objectid < BTRFS_FIRST_FREE_OBJECTID) + return 0; + maybe_lock_mutex(root); + ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, + parent, ref_root, ref_root, + ref_generation, ref_generation, + owner_objectid, owner_offset); + maybe_unlock_mutex(root); + return ret; +} + static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset) + struct btrfs_root *root, u64 bytenr, + u64 orig_parent, u64 parent, + u64 orig_root, u64 ref_root, + u64 orig_generation, u64 ref_generation, + u64 owner_objectid, u64 owner_offset) { struct btrfs_path *path; int ret; @@ -752,24 +834,28 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_extent_item *item; u32 refs; - WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); if (!path) return -ENOMEM; path->reada = 1; key.objectid = bytenr; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = num_bytes; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = (u64)-1; + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); if (ret < 0) return ret; - if (ret != 0) { - BUG(); - } - BUG_ON(ret != 0); + BUG_ON(ret == 0 || path->slots[0] == 0); + + path->slots[0]--; l = path->nodes[0]; + + btrfs_item_key_to_cpu(l, &key, path->slots[0]); + BUG_ON(key.objectid != bytenr); + BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(l, item); btrfs_set_extent_refs(l, item, refs + 1); @@ -778,9 +864,10 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_release_path(root->fs_info->extent_root, path); path->reada = 1; - ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, - path, bytenr, root_objectid, - ref_generation, owner, owner_offset); + ret = insert_extent_backref(trans, root->fs_info->extent_root, + path, bytenr, parent, + ref_root, ref_generation, + owner_objectid, owner_offset); BUG_ON(ret); finish_current_insert(trans, root->fs_info->extent_root); del_pending_extents(trans, root->fs_info->extent_root); @@ -790,18 +877,20 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, } int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset) + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 ref_root, u64 ref_generation, + u64 owner_objectid, u64 owner_offset) { int ret; - - mutex_lock(&root->fs_info->alloc_mutex); - ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, - root_objectid, ref_generation, - owner, owner_offset); - mutex_unlock(&root->fs_info->alloc_mutex); + if (ref_root == BTRFS_TREE_LOG_OBJECTID && + owner_objectid < BTRFS_FIRST_FREE_OBJECTID) + return 0; + maybe_lock_mutex(root); + ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, + 0, ref_root, 0, ref_generation, + owner_objectid, owner_offset); + maybe_unlock_mutex(root); return ret; } @@ -813,9 +902,9 @@ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, return 0; } -static int lookup_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u32 *refs) +int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u32 *refs) { struct btrfs_path *path; int ret; @@ -846,7 +935,6 @@ out: return 0; } - static int get_reference_status(struct btrfs_root *root, u64 bytenr, u64 parent_gen, u64 ref_objectid, u64 *min_generation, u32 *ref_count) @@ -863,7 +951,7 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, int ret; key.objectid = bytenr; - key.offset = 0; + key.offset = (u64)-1; key.type = BTRFS_EXTENT_ITEM_KEY; path = btrfs_alloc_path(); @@ -872,7 +960,10 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, if (ret < 0) goto out; BUG_ON(ret == 0); + if (ret < 0 || path->slots[0] == 0) + goto out; + path->slots[0]--; leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); @@ -909,7 +1000,7 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, struct btrfs_extent_ref); ref_generation = btrfs_ref_generation(leaf, ref_item); /* - * For (parent_gen > 0 && parent_gen > ref_gen): + * For (parent_gen > 0 && parent_gen > ref_generation): * * we reach here through the oldest root, therefore * all other reference from same snapshot should have @@ -919,8 +1010,7 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, (parent_gen > 0 && parent_gen > ref_generation) || (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID && ref_objectid != btrfs_ref_objectid(leaf, ref_item))) { - if (ref_count) - *ref_count = 2; + *ref_count = 2; break; } @@ -1020,80 +1110,29 @@ out: return ret; } -int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int cache_ref) +int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *buf, u32 nr_extents) { - u64 bytenr; u32 nritems; struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int level; - int ret; - int faili; - int nr_file_extents = 0; + int ret = 0; if (!root->ref_cows) return 0; level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); - for (i = 0; i < nritems; i++) { - cond_resched(); - if (level == 0) { - u64 disk_bytenr; - btrfs_item_key_to_cpu(buf, &key, i); - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(buf, i, - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(buf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); - if (disk_bytenr == 0) - continue; - - if (buf != root->commit_root) - nr_file_extents++; - - mutex_lock(&root->fs_info->alloc_mutex); - ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr, - btrfs_file_extent_disk_num_bytes(buf, fi), - root->root_key.objectid, trans->transid, - key.objectid, key.offset); - mutex_unlock(&root->fs_info->alloc_mutex); - if (ret) { - faili = i; - WARN_ON(1); - goto fail; - } - } else { - bytenr = btrfs_node_blockptr(buf, i); - btrfs_node_key_to_cpu(buf, &key, i); - mutex_lock(&root->fs_info->alloc_mutex); - ret = __btrfs_inc_extent_ref(trans, root, bytenr, - btrfs_level_size(root, level - 1), - root->root_key.objectid, - trans->transid, - level - 1, key.objectid); - mutex_unlock(&root->fs_info->alloc_mutex); - if (ret) { - faili = i; - WARN_ON(1); - goto fail; - } - } - } - /* cache orignal leaf block's references */ - if (level == 0 && cache_ref && buf != root->commit_root) { + if (level == 0) { struct btrfs_leaf_ref *ref; struct btrfs_extent_info *info; - ref = btrfs_alloc_leaf_ref(root, nr_file_extents); + ref = btrfs_alloc_leaf_ref(root, nr_extents); if (!ref) { - WARN_ON(1); + ret = -ENOMEM; goto out; } @@ -1101,10 +1140,10 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, ref->bytenr = buf->start; ref->owner = btrfs_header_owner(buf); ref->generation = btrfs_header_generation(buf); - ref->nritems = nr_file_extents; + ref->nritems = nr_extents; info = ref->extents; - for (i = 0; nr_file_extents > 0 && i < nritems; i++) { + for (i = 0; nr_extents > 0 && i < nritems; i++) { u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) @@ -1132,13 +1171,52 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_free_leaf_ref(root, ref); } out: - return 0; -fail: - WARN_ON(1); -#if 0 - for (i =0; i < faili; i++) { + return ret; +} + +int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *orig_buf, struct extent_buffer *buf, + u32 *nr_extents) +{ + u64 bytenr; + u64 ref_root; + u64 orig_root; + u64 ref_generation; + u64 orig_generation; + u32 nritems; + u32 nr_file_extents = 0; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + int i; + int level; + int ret = 0; + int faili = 0; + int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, + u64, u64, u64, u64, u64, u64, u64, u64, u64); + + ref_root = btrfs_header_owner(buf); + ref_generation = btrfs_header_generation(buf); + orig_root = btrfs_header_owner(orig_buf); + orig_generation = btrfs_header_generation(orig_buf); + + nritems = btrfs_header_nritems(buf); + level = btrfs_header_level(buf); + + if (root->ref_cows) { + process_func = __btrfs_inc_extent_ref; + } else { + if (level == 0 && + root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) + goto out; + if (level != 0 && + root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) + goto out; + process_func = __btrfs_update_extent_ref; + } + + for (i = 0; i < nritems; i++) { + cond_resched(); if (level == 0) { - u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -1147,24 +1225,131 @@ fail: if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); - if (disk_bytenr == 0) + bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (bytenr == 0) continue; - err = btrfs_free_extent(trans, root, disk_bytenr, - btrfs_file_extent_disk_num_bytes(buf, - fi), 0); - BUG_ON(err); + + nr_file_extents++; + + maybe_lock_mutex(root); + ret = process_func(trans, root, bytenr, + orig_buf->start, buf->start, + orig_root, ref_root, + orig_generation, ref_generation, + key.objectid, key.offset); + maybe_unlock_mutex(root); + + if (ret) { + faili = i; + WARN_ON(1); + goto fail; + } } else { bytenr = btrfs_node_blockptr(buf, i); - err = btrfs_free_extent(trans, root, bytenr, - btrfs_level_size(root, level - 1), 0); - BUG_ON(err); + maybe_lock_mutex(root); + ret = process_func(trans, root, bytenr, + orig_buf->start, buf->start, + orig_root, ref_root, + orig_generation, ref_generation, + level - 1, 0); + maybe_unlock_mutex(root); + if (ret) { + faili = i; + WARN_ON(1); + goto fail; + } } } -#endif +out: + if (nr_extents) { + if (level == 0) + *nr_extents = nr_file_extents; + else + *nr_extents = nritems; + } + return 0; +fail: + WARN_ON(1); return ret; } +int btrfs_update_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *orig_buf, + struct extent_buffer *buf, int start_slot, int nr) + +{ + u64 bytenr; + u64 ref_root; + u64 orig_root; + u64 ref_generation; + u64 orig_generation; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + int i; + int ret; + int slot; + int level; + + BUG_ON(start_slot < 0); + BUG_ON(start_slot + nr > btrfs_header_nritems(buf)); + + ref_root = btrfs_header_owner(buf); + ref_generation = btrfs_header_generation(buf); + orig_root = btrfs_header_owner(orig_buf); + orig_generation = btrfs_header_generation(orig_buf); + level = btrfs_header_level(buf); + + if (!root->ref_cows) { + if (level == 0 && + root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) + return 0; + if (level != 0 && + root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) + return 0; + } + + for (i = 0, slot = start_slot; i < nr; i++, slot++) { + cond_resched(); + if (level == 0) { + btrfs_item_key_to_cpu(buf, &key, slot); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf, slot, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(buf, fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (bytenr == 0) + continue; + maybe_lock_mutex(root); + ret = __btrfs_update_extent_ref(trans, root, bytenr, + orig_buf->start, buf->start, + orig_root, ref_root, + orig_generation, ref_generation, + key.objectid, key.offset); + maybe_unlock_mutex(root); + if (ret) + goto fail; + } else { + bytenr = btrfs_node_blockptr(buf, slot); + maybe_lock_mutex(root); + ret = __btrfs_update_extent_ref(trans, root, bytenr, + orig_buf->start, buf->start, + orig_root, ref_root, + orig_generation, ref_generation, + level - 1, 0); + maybe_unlock_mutex(root); + if (ret) + goto fail; + } + } + return 0; +fail: + WARN_ON(1); + return -1; +} + static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -1539,19 +1724,18 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, { u64 start; u64 end; + u64 priv; struct btrfs_fs_info *info = extent_root->fs_info; - struct extent_buffer *eb; struct btrfs_path *path; - struct btrfs_key ins; - struct btrfs_disk_key first; + struct btrfs_extent_ref *ref; + struct pending_extent_op *extent_op; + struct btrfs_key key; struct btrfs_extent_item extent_item; int ret; - int level; int err = 0; WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); btrfs_set_stack_extent_refs(&extent_item, 1); - btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); path = btrfs_alloc_path(); while(1) { @@ -1560,37 +1744,54 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, if (ret) break; - ins.objectid = start; - ins.offset = end + 1 - start; - err = btrfs_insert_item(trans, extent_root, &ins, + ret = get_state_private(&info->extent_ins, start, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *)(unsigned long)priv; + + if (extent_op->type == PENDING_EXTENT_INSERT) { + key.objectid = start; + key.offset = end + 1 - start; + key.type = BTRFS_EXTENT_ITEM_KEY; + err = btrfs_insert_item(trans, extent_root, &key, &extent_item, sizeof(extent_item)); - clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, - GFP_NOFS); + BUG_ON(err); - eb = btrfs_find_create_tree_block(extent_root, ins.objectid, - ins.offset); + clear_extent_bits(&info->extent_ins, start, end, + EXTENT_LOCKED, GFP_NOFS); - if (!btrfs_buffer_uptodate(eb, trans->transid)) - btrfs_read_buffer(eb, trans->transid); + err = insert_extent_backref(trans, extent_root, path, + start, extent_op->parent, + extent_root->root_key.objectid, + extent_op->generation, + extent_op->level, 0); + BUG_ON(err); + } else if (extent_op->type == PENDING_BACKREF_UPDATE) { + err = lookup_extent_backref(trans, extent_root, path, + start, extent_op->orig_parent, + extent_root->root_key.objectid, + extent_op->orig_generation, 0); + BUG_ON(err); - btrfs_tree_lock(eb); - level = btrfs_header_level(eb); - if (level == 0) { - btrfs_item_key(eb, &first, 0); + clear_extent_bits(&info->extent_ins, start, end, + EXTENT_LOCKED, GFP_NOFS); + + key.objectid = start; + key.offset = extent_op->parent; + key.type = BTRFS_EXTENT_REF_KEY; + err = btrfs_set_item_key_safe(trans, extent_root, path, + &key); + BUG_ON(err); + ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_ref); + btrfs_set_ref_generation(path->nodes[0], ref, + extent_op->generation); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(extent_root, path); } else { - btrfs_node_key(eb, &first, 0); + BUG_ON(1); } - btrfs_tree_unlock(eb); - free_extent_buffer(eb); - /* - * the first key is just a hint, so the race we've created - * against reading it is fine - */ - err = btrfs_insert_extent_backref(trans, extent_root, path, - start, extent_root->root_key.objectid, - 0, level, - btrfs_disk_key_objectid(&first)); - BUG_ON(err); + kfree(extent_op); + if (need_resched()) { mutex_unlock(&extent_root->fs_info->alloc_mutex); cond_resched(); @@ -1601,52 +1802,44 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, return 0; } -static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, - int is_data, int pending) +static int pin_down_bytes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int is_data) { int err = 0; + struct extent_buffer *buf; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); - if (!pending) { - struct extent_buffer *buf; - - if (is_data) - goto pinit; - - buf = btrfs_find_tree_block(root, bytenr, num_bytes); - if (buf) { - /* we can reuse a block if it hasn't been written - * and it is from this transaction. We can't - * reuse anything from the tree log root because - * it has tiny sub-transactions. - */ - if (btrfs_buffer_uptodate(buf, 0) && - btrfs_try_tree_lock(buf)) { - u64 transid = - root->fs_info->running_transaction->transid; - u64 header_transid = - btrfs_header_generation(buf); - if (btrfs_header_owner(buf) != - BTRFS_TREE_LOG_OBJECTID && - header_transid == transid && - !btrfs_header_flag(buf, - BTRFS_HEADER_FLAG_WRITTEN)) { - clean_tree_block(NULL, root, buf); - btrfs_tree_unlock(buf); - free_extent_buffer(buf); - return 1; - } - btrfs_tree_unlock(buf); - } + if (is_data) + goto pinit; + + buf = btrfs_find_tree_block(root, bytenr, num_bytes); + if (!buf) + goto pinit; + + /* we can reuse a block if it hasn't been written + * and it is from this transaction. We can't + * reuse anything from the tree log root because + * it has tiny sub-transactions. + */ + if (btrfs_buffer_uptodate(buf, 0) && + btrfs_try_tree_lock(buf)) { + u64 header_owner = btrfs_header_owner(buf); + u64 header_transid = btrfs_header_generation(buf); + if (header_owner != BTRFS_TREE_LOG_OBJECTID && + header_transid == trans->transid && + !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { + clean_tree_block(NULL, root, buf); + btrfs_tree_unlock(buf); free_extent_buffer(buf); + return 1; } -pinit: - btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); - } else { - set_extent_bits(&root->fs_info->pending_del, - bytenr, bytenr + num_bytes - 1, - EXTENT_LOCKED, GFP_NOFS); + btrfs_tree_unlock(buf); } + free_extent_buffer(buf); +pinit: + btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); + BUG_ON(err < 0); return 0; } @@ -1654,11 +1847,12 @@ pinit: /* * remove an extent from the root, returns 0 on success */ -static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, +static int __free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, int pin, - int mark_free) + u64 owner_objectid, u64 owner_offset, + int pin, int mark_free) { struct btrfs_path *path; struct btrfs_key key; @@ -1681,10 +1875,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root return -ENOMEM; path->reada = 1; - ret = lookup_extent_backref(trans, extent_root, path, - bytenr, root_objectid, - ref_generation, - owner_objectid, owner_offset, 1); + ret = lookup_extent_backref(trans, extent_root, path, bytenr, parent, + root_objectid, ref_generation, 1); if (ret == 0) { struct btrfs_key found_key; extent_slot = path->slots[0]; @@ -1702,8 +1894,15 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (path->slots[0] - extent_slot > 5) break; } - if (!found_extent) - ret = btrfs_del_item(trans, extent_root, path); + if (!found_extent) { + ret = remove_extent_backref(trans, extent_root, path); + BUG_ON(ret); + btrfs_release_path(extent_root, path); + ret = btrfs_search_slot(trans, extent_root, + &key, path, -1, 1); + BUG_ON(ret); + extent_slot = path->slots[0]; + } } else { btrfs_print_leaf(extent_root, path->nodes[0]); WARN_ON(1); @@ -1712,14 +1911,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root root_objectid, ref_generation, owner_objectid, owner_offset); } - if (!found_extent) { - btrfs_release_path(extent_root, path); - ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); - if (ret < 0) - return ret; - BUG_ON(ret); - extent_slot = path->slots[0]; - } leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, extent_slot, @@ -1732,6 +1923,10 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(leaf); if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { + struct btrfs_extent_ref *ref; + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref); + BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); /* if the back ref and the extent are next to each other * they get deleted below in one shot */ @@ -1739,15 +1934,13 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_to_del = 2; } else if (found_extent) { /* otherwise delete the extent back ref */ - ret = btrfs_del_item(trans, extent_root, path); + ret = remove_extent_backref(trans, extent_root, path); BUG_ON(ret); /* if refs are 0, we need to setup the path for deletion */ if (refs == 0) { btrfs_release_path(extent_root, path); ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); - if (ret < 0) - return ret; BUG_ON(ret); } } @@ -1761,8 +1954,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root #endif if (pin) { - ret = pin_down_bytes(root, bytenr, num_bytes, - owner_objectid >= BTRFS_FIRST_FREE_OBJECTID, 0); + ret = pin_down_bytes(trans, root, bytenr, num_bytes, + owner_objectid >= BTRFS_FIRST_FREE_OBJECTID); if (ret > 0) mark_free = 1; BUG_ON(ret < 0); @@ -1781,9 +1974,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root root_used - num_bytes); ret = btrfs_del_items(trans, extent_root, path, path->slots[0], num_to_del); - if (ret) { - return ret; - } + BUG_ON(ret); ret = update_block_group(trans, root, bytenr, num_bytes, 0, mark_free); BUG_ON(ret); @@ -1822,33 +2013,61 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct { int ret; int err = 0; + int mark_free = 0; u64 start; u64 end; + u64 priv; struct extent_io_tree *pending_del; - struct extent_io_tree *pinned_extents; + struct extent_io_tree *extent_ins; + struct pending_extent_op *extent_op; WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); + extent_ins = &extent_root->fs_info->extent_ins; pending_del = &extent_root->fs_info->pending_del; - pinned_extents = &extent_root->fs_info->pinned_extents; while(1) { ret = find_first_extent_bit(pending_del, 0, &start, &end, EXTENT_LOCKED); if (ret) break; + + ret = get_state_private(pending_del, start, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *)(unsigned long)priv; + clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, GFP_NOFS); - if (!test_range_bit(&extent_root->fs_info->extent_ins, - start, end, EXTENT_LOCKED, 0)) { - btrfs_update_pinned_extents(extent_root, start, - end + 1 - start, 1); + + ret = pin_down_bytes(trans, extent_root, start, + end + 1 - start, 0); + mark_free = ret > 0; + if (!test_range_bit(extent_ins, start, end, + EXTENT_LOCKED, 0)) { +free_extent: ret = __free_extent(trans, extent_root, - start, end + 1 - start, - extent_root->root_key.objectid, - 0, 0, 0, 0, 0); + start, end + 1 - start, + extent_op->orig_parent, + extent_root->root_key.objectid, + extent_op->orig_generation, + extent_op->level, 0, 0, mark_free); + kfree(extent_op); } else { - clear_extent_bits(&extent_root->fs_info->extent_ins, - start, end, EXTENT_LOCKED, GFP_NOFS); + kfree(extent_op); + ret = get_state_private(extent_ins, start, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *) + (unsigned long)priv; + + clear_extent_bits(extent_ins, start, end, + EXTENT_LOCKED, GFP_NOFS); + + if (extent_op->type == PENDING_BACKREF_UPDATE) + goto free_extent; + + ret = update_block_group(trans, extent_root, start, + end + 1 - start, 0, mark_free); + BUG_ON(ret); + kfree(extent_op); } if (ret) err = ret; @@ -1866,21 +2085,36 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct * remove an extent from the root, returns 0 on success */ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 root_objectid, - u64 ref_generation, u64 owner_objectid, - u64 owner_offset, int pin) + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid, u64 owner_offset, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; int pending_ret; int ret; WARN_ON(num_bytes < root->sectorsize); - if (!root->ref_cows) - ref_generation = 0; - if (root == extent_root) { - pin_down_bytes(root, bytenr, num_bytes, 0, 1); + struct pending_extent_op *extent_op; + + extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); + BUG_ON(!extent_op); + + extent_op->type = PENDING_EXTENT_DELETE; + extent_op->bytenr = bytenr; + extent_op->num_bytes = num_bytes; + extent_op->parent = parent; + extent_op->orig_parent = parent; + extent_op->generation = ref_generation; + extent_op->orig_generation = ref_generation; + extent_op->level = (int)owner_objectid; + + set_extent_bits(&root->fs_info->pending_del, + bytenr, bytenr + num_bytes - 1, + EXTENT_LOCKED, GFP_NOFS); + set_state_private(&root->fs_info->pending_del, + bytenr, (unsigned long)extent_op); return 0; } /* if metadata always pin */ @@ -1901,9 +2135,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (ref_generation != trans->transid) pin = 1; - ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid, - ref_generation, owner_objectid, owner_offset, - pin, pin == 0); + ret = __free_extent(trans, root, bytenr, num_bytes, parent, + root_objectid, ref_generation, owner_objectid, + owner_offset, pin, pin == 0); finish_current_insert(trans, root->fs_info->extent_root); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); @@ -1911,15 +2145,15 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } int btrfs_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 root_objectid, - u64 ref_generation, u64 owner_objectid, - u64 owner_offset, int pin) + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid, u64 owner_offset, int pin) { int ret; maybe_lock_mutex(root); - ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, + ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, root_objectid, ref_generation, owner_objectid, owner_offset, pin); maybe_unlock_mutex(root); @@ -2271,7 +2505,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, } static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, struct btrfs_key *ins) @@ -2289,6 +2523,9 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key keys[2]; + if (parent == 0) + parent = ins->objectid; + /* block accounting for super block */ spin_lock_irq(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); @@ -2300,17 +2537,32 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, btrfs_set_root_used(&root->root_item, root_used + num_bytes); if (root == extent_root) { + struct pending_extent_op *extent_op; + + extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); + BUG_ON(!extent_op); + + extent_op->type = PENDING_EXTENT_INSERT; + extent_op->bytenr = ins->objectid; + extent_op->num_bytes = ins->offset; + extent_op->parent = parent; + extent_op->orig_parent = 0; + extent_op->generation = ref_generation; + extent_op->orig_generation = 0; + extent_op->level = (int)owner; + set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, EXTENT_LOCKED, GFP_NOFS); + set_state_private(&root->fs_info->extent_ins, + ins->objectid, (unsigned long)extent_op); goto update_block; } memcpy(&keys[0], ins, sizeof(*ins)); - keys[1].offset = hash_extent_ref(root_objectid, ref_generation, - owner, owner_offset); keys[1].objectid = ins->objectid; keys[1].type = BTRFS_EXTENT_REF_KEY; + keys[1].offset = parent; sizes[0] = sizeof(*extent_item); sizes[1] = sizeof(*ref); @@ -2331,6 +2583,7 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); btrfs_set_ref_objectid(path->nodes[0], ref, owner); btrfs_set_ref_offset(path->nodes[0], ref, owner_offset); + btrfs_set_ref_num_refs(path->nodes[0], ref, 1); btrfs_mark_buffer_dirty(path->nodes[0]); @@ -2359,7 +2612,7 @@ out: } int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, struct btrfs_key *ins) @@ -2369,9 +2622,9 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, if (root_objectid == BTRFS_TREE_LOG_OBJECTID) return 0; maybe_lock_mutex(root); - ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, - ref_generation, owner, - owner_offset, ins); + ret = __btrfs_alloc_reserved_extent(trans, root, parent, + root_objectid, ref_generation, + owner, owner_offset, ins); maybe_unlock_mutex(root); return ret; } @@ -2382,7 +2635,7 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, * space cache bits as well */ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, struct btrfs_key *ins) @@ -2396,10 +2649,9 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset); BUG_ON(ret); - - ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, - ref_generation, owner, - owner_offset, ins); + ret = __btrfs_alloc_reserved_extent(trans, root, parent, + root_objectid, ref_generation, + owner, owner_offset, ins); maybe_unlock_mutex(root); return ret; } @@ -2413,9 +2665,9 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 num_bytes, u64 min_alloc_size, + u64 num_bytes, u64 parent, u64 min_alloc_size, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, + u64 owner_objectid, u64 owner_offset, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, u64 data) { @@ -2428,9 +2680,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_end, ins, data); BUG_ON(ret); if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { - ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, - ref_generation, owner, - owner_offset, ins); + ret = __btrfs_alloc_reserved_extent(trans, root, parent, + root_objectid, ref_generation, + owner_objectid, owner_offset, ins); BUG_ON(ret); } @@ -2468,10 +2720,9 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, */ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u32 blocksize, + u32 blocksize, u64 parent, u64 root_objectid, u64 ref_generation, - u64 first_objectid, int level, u64 hint, u64 empty_size) @@ -2480,10 +2731,9 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int ret; struct extent_buffer *buf; - ret = btrfs_alloc_extent(trans, root, blocksize, blocksize, - root_objectid, ref_generation, - level, first_objectid, empty_size, hint, - (u64)-1, &ins, 0); + ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize, + root_objectid, ref_generation, level, 0, + empty_size, hint, (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); return ERR_PTR(ret); @@ -2531,15 +2781,14 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(leaf, fi), - leaf_owner, leaf_generation, + leaf->start, leaf_owner, leaf_generation, key.objectid, key.offset, 0); mutex_unlock(&root->fs_info->alloc_mutex); + BUG_ON(ret); atomic_inc(&root->fs_info->throttle_gen); wake_up(&root->fs_info->transaction_throttle); cond_resched(); - - BUG_ON(ret); } return 0; } @@ -2554,10 +2803,10 @@ static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, for (i = 0; i < ref->nritems; i++) { mutex_lock(&root->fs_info->alloc_mutex); - ret = __btrfs_free_extent(trans, root, - info->bytenr, info->num_bytes, - ref->owner, ref->generation, - info->objectid, info->offset, 0); + ret = __btrfs_free_extent(trans, root, info->bytenr, + info->num_bytes, ref->bytenr, + ref->owner, ref->generation, + info->objectid, info->offset, 0); mutex_unlock(&root->fs_info->alloc_mutex); atomic_inc(&root->fs_info->throttle_gen); @@ -2576,7 +2825,7 @@ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, { int ret; - ret = lookup_extent_ref(NULL, root, start, len, refs); + ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); BUG_ON(ret); #if 0 // some debugging code in case we see problems here @@ -2672,8 +2921,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, - blocksize, root_owner, - root_gen, 0, 0, 1); + blocksize, parent->start, + root_owner, root_gen, 0, 0, 1); BUG_ON(ret); mutex_unlock(&root->fs_info->alloc_mutex); @@ -2690,8 +2939,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, * So, we don't need to check it again */ if (*level == 1) { - struct btrfs_key key; - btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); ref = btrfs_lookup_leaf_ref(root, bytenr); if (ref) { ret = cache_drop_leaf_ref(trans, root, ref); @@ -2750,12 +2997,13 @@ out: mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, - root_owner, root_gen, 0, 0, 1); + parent->start, root_owner, root_gen, + 0, 0, 1); + mutex_unlock(&root->fs_info->alloc_mutex); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; BUG_ON(ret); - mutex_unlock(&root->fs_info->alloc_mutex); cond_resched(); return 0; @@ -2792,19 +3040,18 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans, root_item->drop_level = i; return 0; } else { - if (path->nodes[*level] == root->node) { - root_owner = root->root_key.objectid; - root_gen = - btrfs_header_generation(path->nodes[*level]); - } else { - struct extent_buffer *node; - node = path->nodes[*level + 1]; - root_owner = btrfs_header_owner(node); - root_gen = btrfs_header_generation(node); - } + struct extent_buffer *parent; + if (path->nodes[*level] == root->node) + parent = path->nodes[*level]; + else + parent = path->nodes[*level + 1]; + + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, path->nodes[*level]->len, + parent->start, root_owner, root_gen, 0, 0, 1); BUG_ON(ret); free_extent_buffer(path->nodes[*level]); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8624f3e8803..58ad25838a4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2201,9 +2201,10 @@ retry: } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; - +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) if (wbc->range_cont) wbc->range_start = index << PAGE_CACHE_SHIFT; +#endif return ret; } EXPORT_SYMBOL(extent_write_cache_pages); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 58b329ddb42..48a702d41c8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -524,6 +524,9 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, { u64 extent_end = 0; u64 search_start = start; + u64 leaf_start; + u64 root_gen; + u64 root_owner; struct extent_buffer *leaf; struct btrfs_file_extent_item *extent; struct btrfs_path *path; @@ -562,6 +565,9 @@ next_slot: bookend = 0; found_extent = 0; found_inline = 0; + leaf_start = 0; + root_gen = 0; + root_owner = 0; extent = NULL; leaf = path->nodes[0]; slot = path->slots[0]; @@ -628,27 +634,18 @@ next_slot: search_start = extent_end; if (end <= extent_end && start >= key.offset && found_inline) { *hint_byte = EXTENT_MAP_INLINE; - continue; + goto out; + } + + if (found_extent) { + read_extent_buffer(leaf, &old, (unsigned long)extent, + sizeof(old)); + root_gen = btrfs_header_generation(leaf); + root_owner = btrfs_header_owner(leaf); + leaf_start = leaf->start; } + if (end < extent_end && end >= key.offset) { - if (found_extent) { - u64 disk_bytenr = - btrfs_file_extent_disk_bytenr(leaf, extent); - u64 disk_num_bytes = - btrfs_file_extent_disk_num_bytes(leaf, - extent); - read_extent_buffer(leaf, &old, - (unsigned long)extent, - sizeof(old)); - if (disk_bytenr != 0) { - ret = btrfs_inc_extent_ref(trans, root, - disk_bytenr, disk_num_bytes, - root->root_key.objectid, - trans->transid, - key.objectid, end); - BUG_ON(ret); - } - } bookend = 1; if (found_inline && start <= key.offset) keep = 1; @@ -687,49 +684,12 @@ next_slot: } /* delete the entire extent */ if (!keep) { - u64 disk_bytenr = 0; - u64 disk_num_bytes = 0; - u64 extent_num_bytes = 0; - u64 root_gen; - u64 root_owner; - - root_gen = btrfs_header_generation(leaf); - root_owner = btrfs_header_owner(leaf); - if (found_extent) { - disk_bytenr = - btrfs_file_extent_disk_bytenr(leaf, - extent); - disk_num_bytes = - btrfs_file_extent_disk_num_bytes(leaf, - extent); - extent_num_bytes = - btrfs_file_extent_num_bytes(leaf, extent); - *hint_byte = - btrfs_file_extent_disk_bytenr(leaf, - extent); - } ret = btrfs_del_item(trans, root, path); /* TODO update progress marker and return */ BUG_ON(ret); - btrfs_release_path(root, path); extent = NULL; - if (found_extent && disk_bytenr != 0) { - dec_i_blocks(inode, extent_num_bytes); - ret = btrfs_free_extent(trans, root, - disk_bytenr, - disk_num_bytes, - root_owner, - root_gen, inode->i_ino, - key.offset, 0); - } - - BUG_ON(ret); - if (!bookend && search_start >= end) { - ret = 0; - goto out; - } - if (!bookend) - continue; + btrfs_release_path(root, path); + /* the extent will be freed later */ } if (bookend && found_inline && start <= key.offset) { u32 new_size; @@ -737,10 +697,13 @@ next_slot: extent_end - end); dec_i_blocks(inode, (extent_end - key.offset) - (extent_end - end)); - btrfs_truncate_item(trans, root, path, new_size, 0); + ret = btrfs_truncate_item(trans, root, path, + new_size, 0); + BUG_ON(ret); } /* create bookend, splitting the extent in two */ if (bookend && found_extent) { + u64 disk_bytenr; struct btrfs_key ins; ins.objectid = inode->i_ino; ins.offset = end; @@ -748,13 +711,9 @@ next_slot: btrfs_release_path(root, path); ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*extent)); + BUG_ON(ret); leaf = path->nodes[0]; - if (ret) { - btrfs_print_leaf(root, leaf); - printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.type, ins.offset, start, end, key.offset, extent_end, keep); - } - BUG_ON(ret); extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); write_extent_buffer(leaf, &old, @@ -770,11 +729,43 @@ next_slot: BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(path->nodes[0]); - if (le64_to_cpu(old.disk_bytenr) != 0) { + + disk_bytenr = le64_to_cpu(old.disk_bytenr); + if (disk_bytenr != 0) { + ret = btrfs_inc_extent_ref(trans, root, + disk_bytenr, + le64_to_cpu(old.disk_num_bytes), + leaf->start, + root->root_key.objectid, + trans->transid, + ins.objectid, ins.offset); + BUG_ON(ret); + } + btrfs_release_path(root, path); + if (disk_bytenr != 0) { inode->i_blocks += btrfs_file_extent_num_bytes(leaf, extent) >> 9; } + } + + if (found_extent && !keep) { + u64 disk_bytenr = le64_to_cpu(old.disk_bytenr); + + if (disk_bytenr != 0) { + dec_i_blocks(inode, le64_to_cpu(old.num_bytes)); + ret = btrfs_free_extent(trans, root, + disk_bytenr, + le64_to_cpu(old.disk_num_bytes), + leaf_start, root_owner, + root_gen, key.objectid, + key.offset, 0); + BUG_ON(ret); + *hint_byte = disk_bytenr; + } + } + + if (search_start >= end) { ret = 0; goto out; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 65b4f864b0d..2e7d82ec5d1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -528,6 +528,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_file_extent_item *extent_item; + struct btrfs_path *path = NULL; + struct extent_buffer *leaf; u64 alloc_hint = 0; struct list_head list; struct btrfs_key ins; @@ -544,20 +547,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) goto nocow; + path = btrfs_alloc_path(); + BUG_ON(!path); + lock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); INIT_LIST_HEAD(&list); - ins.objectid = ordered_extent->start; - ins.offset = ordered_extent->len; - ins.type = BTRFS_EXTENT_ITEM_KEY; - - ret = btrfs_alloc_reserved_extent(trans, root, root->root_key.objectid, - trans->transid, inode->i_ino, - ordered_extent->file_offset, &ins); - BUG_ON(ret); mutex_lock(&BTRFS_I(inode)->extent_mutex); ret = btrfs_drop_extents(trans, root, inode, @@ -566,18 +564,42 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->len, ordered_extent->file_offset, &alloc_hint); BUG_ON(ret); - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - ordered_extent->file_offset, - ordered_extent->start, - ordered_extent->len, - ordered_extent->len, 0); + + ins.objectid = inode->i_ino; + ins.offset = ordered_extent->file_offset; + ins.type = BTRFS_EXTENT_DATA_KEY; + ret = btrfs_insert_empty_item(trans, root, path, &ins, + sizeof(*extent_item)); BUG_ON(ret); + leaf = path->nodes[0]; + extent_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, extent_item, trans->transid); + btrfs_set_file_extent_type(leaf, extent_item, BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_disk_bytenr(leaf, extent_item, + ordered_extent->start); + btrfs_set_file_extent_disk_num_bytes(leaf, extent_item, + ordered_extent->len); + btrfs_set_file_extent_offset(leaf, extent_item, 0); + btrfs_set_file_extent_num_bytes(leaf, extent_item, + ordered_extent->len); + btrfs_mark_buffer_dirty(leaf); btrfs_drop_extent_cache(inode, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1); mutex_unlock(&BTRFS_I(inode)->extent_mutex); + ins.objectid = ordered_extent->start; + ins.offset = ordered_extent->len; + ins.type = BTRFS_EXTENT_ITEM_KEY; + ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, + root->root_key.objectid, + trans->transid, inode->i_ino, + ordered_extent->file_offset, &ins); + BUG_ON(ret); + btrfs_release_path(root, path); + inode->i_blocks += ordered_extent->len >> 9; unlock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, @@ -596,6 +618,8 @@ nocow: btrfs_put_ordered_extent(ordered_extent); btrfs_end_transaction(trans, root); + if (path) + btrfs_free_path(path); return 0; } @@ -1433,10 +1457,7 @@ search_again: if (root->ref_cows) dec_i_blocks(inode, num_dec); } - if (root->ref_cows) { - root_gen = - btrfs_header_generation(leaf); - } + root_gen = btrfs_header_generation(leaf); root_owner = btrfs_header_owner(leaf); } } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { @@ -1477,7 +1498,7 @@ delete: if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, - root_owner, + leaf->start, root_owner, root_gen, inode->i_ino, found_key.offset, 0); BUG_ON(ret); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f84b5f6991c..4c6e0c15754 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -76,9 +76,8 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, if (ret) goto fail; - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, - objectid, trans->transid, 0, 0, - 0, 0); + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, + objectid, trans->transid, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); goto fail; @@ -525,13 +524,10 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) struct file *src_file; struct inode *src; struct btrfs_trans_handle *trans; - struct btrfs_ordered_extent *ordered; struct btrfs_path *path; struct extent_buffer *leaf; char *buf; struct btrfs_key key; - struct btrfs_key new_key; - u32 size; u32 nritems; int slot; int ret; @@ -576,6 +572,7 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) /* do any pending delalloc/csum calc on src, one way or another, and lock file content */ while (1) { + struct btrfs_ordered_extent *ordered; lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) @@ -619,6 +616,32 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) key.objectid != src->i_ino) break; + if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY || + btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { + u32 size; + struct btrfs_key new_key; + + size = btrfs_item_size_nr(leaf, slot); + read_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, slot), + size); + btrfs_release_path(root, path); + + memcpy(&new_key, &key, sizeof(new_key)); + new_key.objectid = inode->i_ino; + ret = btrfs_insert_empty_item(trans, root, path, + &new_key, size); + if (ret) + goto out; + + leaf = path->nodes[0]; + slot = path->slots[0]; + write_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, slot), + size); + btrfs_mark_buffer_dirty(leaf); + } + if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { struct btrfs_file_extent_item *extent; int found_type; @@ -634,31 +657,15 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) /* ds == 0 means there's a hole */ if (ds != 0) { ret = btrfs_inc_extent_ref(trans, root, - ds, dl, + ds, dl, leaf->start, root->root_key.objectid, trans->transid, inode->i_ino, key.offset); - if (ret) - goto out; + BUG_ON(ret); } } } - - if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY || - btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { - size = btrfs_item_size_nr(leaf, slot); - read_extent_buffer(leaf, buf, - btrfs_item_ptr_offset(leaf, slot), - size); - btrfs_release_path(root, path); - memcpy(&new_key, &key, sizeof(new_key)); - new_key.objectid = inode->i_ino; - ret = btrfs_insert_item(trans, root, &new_key, - buf, size); - BUG_ON(ret); - } else { - btrfs_release_path(root, path); - } + btrfs_release_path(root, path); key.offset++; } ret = 0; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index f1374d597a1..3577badfa5b 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -102,11 +102,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_EXTENT_REF_KEY: ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref); printk("\t\textent back ref root %llu gen %llu " - "owner %llu offset %llu\n", + "owner %llu offset %llu num_refs %lu\n", (unsigned long long)btrfs_ref_root(l, ref), (unsigned long long)btrfs_ref_generation(l, ref), (unsigned long long)btrfs_ref_objectid(l, ref), - (unsigned long long)btrfs_ref_offset(l, ref)); + (unsigned long long)btrfs_ref_offset(l, ref), + (unsigned long)btrfs_ref_num_refs(l, ref)); break; case BTRFS_EXTENT_DATA_KEY: diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 56de3fb2d8d..88bbfd959f1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -89,9 +89,9 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, int ret; u64 objectid = root->root_key.objectid; - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, BTRFS_TREE_LOG_OBJECTID, - 0, 0, 0, 0, 0); + trans->transid, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); return ret; @@ -433,6 +433,49 @@ insert: trans->transid); } } + + if (overwrite_root && + key->type == BTRFS_EXTENT_DATA_KEY) { + int extent_type; + struct btrfs_file_extent_item *fi; + + fi = (struct btrfs_file_extent_item *)dst_ptr; + extent_type = btrfs_file_extent_type(path->nodes[0], fi); + if (extent_type == BTRFS_FILE_EXTENT_REG) { + struct btrfs_key ins; + ins.objectid = btrfs_file_extent_disk_bytenr( + path->nodes[0], fi); + ins.offset = btrfs_file_extent_disk_num_bytes( + path->nodes[0], fi); + ins.type = BTRFS_EXTENT_ITEM_KEY; + + /* + * is this extent already allocated in the extent + * allocation tree? If so, just add a reference + */ + ret = btrfs_lookup_extent(root, ins.objectid, + ins.offset); + if (ret == 0) { + ret = btrfs_inc_extent_ref(trans, root, + ins.objectid, ins.offset, + path->nodes[0]->start, + root->root_key.objectid, + trans->transid, + key->objectid, key->offset); + } else { + /* + * insert the extent pointer in the extent + * allocation tree + */ + ret = btrfs_alloc_logged_extent(trans, root, + path->nodes[0]->start, + root->root_key.objectid, + trans->transid, key->objectid, + key->offset, &ins); + BUG_ON(ret); + } + } + } no_copy: btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root, path); @@ -551,45 +594,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, start, extent_end, start, &alloc_hint); BUG_ON(ret); + /* insert the extent */ + ret = overwrite_item(trans, root, path, eb, slot, key); BUG_ON(ret); - if (found_type == BTRFS_FILE_EXTENT_REG) { - struct btrfs_key ins; - - ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); - ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); - ins.type = BTRFS_EXTENT_ITEM_KEY; - - /* insert the extent pointer in the file */ - ret = overwrite_item(trans, root, path, eb, slot, key); - BUG_ON(ret); - /* - * is this extent already allocated in the extent - * allocation tree? If so, just add a reference - */ - ret = btrfs_lookup_extent(root, path, ins.objectid, ins.offset); - btrfs_release_path(root, path); - if (ret == 0) { - ret = btrfs_inc_extent_ref(trans, root, - ins.objectid, ins.offset, - root->root_key.objectid, - trans->transid, key->objectid, start); - } else { - /* - * insert the extent pointer in the extent - * allocation tree - */ - ret = btrfs_alloc_logged_extent(trans, root, - root->root_key.objectid, - trans->transid, key->objectid, - start, &ins); - BUG_ON(ret); - } - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - /* inline extents are easy, we just overwrite them */ - ret = overwrite_item(trans, root, path, eb, slot, key); - BUG_ON(ret); - } /* btrfs_drop_extents changes i_blocks, update it here */ inode->i_blocks += (extent_end - start) >> 9; btrfs_update_inode(trans, root, inode); @@ -1806,16 +1814,14 @@ static int noinline walk_up_log_tree(struct btrfs_trans_handle *trans, WARN_ON(*level == 0); return 0; } else { - if (path->nodes[*level] == root->node) { - root_owner = root->root_key.objectid; - root_gen = - btrfs_header_generation(path->nodes[*level]); - } else { - struct extent_buffer *node; - node = path->nodes[*level + 1]; - root_owner = btrfs_header_owner(node); - root_gen = btrfs_header_generation(node); - } + struct extent_buffer *parent; + if (path->nodes[*level] == root->node) + parent = path->nodes[*level]; + else + parent = path->nodes[*level + 1]; + + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); wc->process_func(root, path->nodes[*level], wc, btrfs_header_generation(path->nodes[*level])); if (wc->free) { @@ -2525,8 +2531,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (ds != 0) { ret = btrfs_inc_extent_ref(trans, log, ds, dl, + dst_path->nodes[0]->start, BTRFS_TREE_LOG_OBJECTID, - 0, ins_keys[i].objectid, + trans->transid, + ins_keys[i].objectid, ins_keys[i].offset); BUG_ON(ret); } -- cgit v1.2.3-70-g09d2 From 4434c33c7f233a7ee5dcaf357fbf795818ea8861 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 24 Sep 2008 12:50:10 -0400 Subject: Btrfs: fix sleep with spinlock held during unmount The code to free block groups needs to drop the space info spin lock before calling btrfs_remove_free_space_cache (which can schedule). This is safe because at unmount time, nobody else is going to play with the block groups. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5258923d621..fe4e11b31a4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3157,9 +3157,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) block_group = rb_entry(n, struct btrfs_block_group_cache, cache_node); + spin_unlock(&info->block_group_cache_lock); btrfs_remove_free_space_cache(block_group); + spin_lock(&info->block_group_cache_lock); + rb_erase(&block_group->cache_node, &info->block_group_cache_tree); + spin_lock(&block_group->space_info->lock); list_del(&block_group->list); spin_unlock(&block_group->space_info->lock); -- cgit v1.2.3-70-g09d2 From e8569813849b5da394a195c7e76b4faa452b12d1 Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Fri, 26 Sep 2008 10:05:48 -0400 Subject: Btrfs: allocator fixes for space balancing update * Reserved extent accounting: reserved extents have been allocated in the rbtrees that track free space but have not been allocated on disk. They were never properly accounted for in the past, making it hard to know how much space was really free. * btrfs_find_block_group used to return NULL for block groups that had been removed by the space balancing code. This made it hard to account for space during the final stages of a balance run. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 + fs/btrfs/extent-tree.c | 136 +++++++++++++++++++++++-------------------------- 2 files changed, 67 insertions(+), 71 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3b3c1ca50c5..c683aaa925f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -498,6 +498,7 @@ struct btrfs_space_info { u64 total_bytes; u64 bytes_used; u64 bytes_pinned; + u64 bytes_reserved; int full; int force_alloc; struct list_head list; @@ -519,6 +520,7 @@ struct btrfs_block_group_cache { struct btrfs_block_group_item item; spinlock_t lock; u64 pinned; + u64 reserved; u64 flags; int cached; int ro; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe4e11b31a4..3e2f969de42 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -325,12 +325,9 @@ static int noinline find_free_space(struct btrfs_root *root, struct btrfs_block_group_cache *cache = *cache_ret; struct btrfs_free_space *info = NULL; u64 last; - u64 total_fs_bytes; u64 search_start = *start_ret; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); - total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - if (!cache) goto out; @@ -354,7 +351,7 @@ new_group: last = cache->key.objectid + cache->key.offset; cache = btrfs_lookup_first_block_group(root->fs_info, last); - if (!cache || cache->key.objectid >= total_fs_bytes) + if (!cache) goto out; *cache_ret = cache; @@ -385,7 +382,6 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, return found; } return NULL; - } static struct btrfs_block_group_cache * @@ -396,7 +392,6 @@ __btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *cache; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; - struct btrfs_space_info *sinfo; u64 used; u64 last = 0; u64 free_check; @@ -413,7 +408,7 @@ __btrfs_find_block_group(struct btrfs_root *root, if (shint && block_group_bits(shint, data) && !shint->ro) { spin_lock(&shint->lock); used = btrfs_block_group_used(&shint->item); - if (used + shint->pinned < + if (used + shint->pinned + shint->reserved < div_factor(shint->key.offset, factor)) { spin_unlock(&shint->lock); return shint; @@ -424,7 +419,7 @@ __btrfs_find_block_group(struct btrfs_root *root, if (hint && !hint->ro && block_group_bits(hint, data)) { spin_lock(&hint->lock); used = btrfs_block_group_used(&hint->item); - if (used + hint->pinned < + if (used + hint->pinned + hint->reserved < div_factor(hint->key.offset, factor)) { spin_unlock(&hint->lock); return hint; @@ -437,27 +432,9 @@ __btrfs_find_block_group(struct btrfs_root *root, else last = search_start; } - sinfo = __find_space_info(root->fs_info, data); - if (!sinfo) - goto found; again: - while(1) { - struct list_head *l; - - cache = NULL; - - spin_lock(&sinfo->lock); - list_for_each(l, &sinfo->block_groups) { - struct btrfs_block_group_cache *entry; - entry = list_entry(l, struct btrfs_block_group_cache, - list); - if ((entry->key.objectid >= last) && - (!cache || (entry->key.objectid < - cache->key.objectid))) - cache = entry; - } - spin_unlock(&sinfo->lock); - + while (1) { + cache = btrfs_lookup_first_block_group(root->fs_info, last); if (!cache) break; @@ -467,7 +444,8 @@ again: if (!cache->ro && block_group_bits(cache, data)) { free_check = div_factor(cache->key.offset, factor); - if (used + cache->pinned < free_check) { + if (used + cache->pinned + cache->reserved < + free_check) { found_group = cache; spin_unlock(&cache->lock); goto found; @@ -1414,6 +1392,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (!cache) break; + cache->dirty = 0; last += cache->key.offset; err = write_one_cache_group(trans, root, @@ -1427,8 +1406,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, werr = err; continue; } - - cache->dirty = 0; } btrfs_free_path(path); mutex_unlock(&root->fs_info->alloc_mutex); @@ -1460,6 +1437,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->total_bytes = total_bytes; found->bytes_used = bytes_used; found->bytes_pinned = 0; + found->bytes_reserved = 0; found->full = 0; found->force_alloc = 0; *space_info = found; @@ -1539,8 +1517,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, thresh = div_factor(space_info->total_bytes, 6); if (!force && - (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < - thresh) + (space_info->bytes_used + space_info->bytes_pinned + + space_info->bytes_reserved + alloc_bytes) < thresh) goto out; mutex_lock(&extent_root->fs_info->chunk_mutex); @@ -1621,7 +1599,6 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) return cache->key.objectid; } - int btrfs_update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin) { @@ -1639,29 +1616,20 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, } while (num > 0) { cache = btrfs_lookup_block_group(fs_info, bytenr); - if (!cache) { - u64 first = first_logical_byte(root, bytenr); - WARN_ON(first < bytenr); - len = min(first - bytenr, num); - } else { - len = min(num, cache->key.offset - - (bytenr - cache->key.objectid)); - } + BUG_ON(!cache); + len = min(num, cache->key.offset - + (bytenr - cache->key.objectid)); if (pin) { - if (cache) { - spin_lock(&cache->lock); - cache->pinned += len; - cache->space_info->bytes_pinned += len; - spin_unlock(&cache->lock); - } + spin_lock(&cache->lock); + cache->pinned += len; + cache->space_info->bytes_pinned += len; + spin_unlock(&cache->lock); fs_info->total_pinned += len; } else { - if (cache) { - spin_lock(&cache->lock); - cache->pinned -= len; - cache->space_info->bytes_pinned -= len; - spin_unlock(&cache->lock); - } + spin_lock(&cache->lock); + cache->pinned -= len; + cache->space_info->bytes_pinned -= len; + spin_unlock(&cache->lock); fs_info->total_pinned -= len; } bytenr += len; @@ -1670,6 +1638,36 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, return 0; } +static int update_reserved_extents(struct btrfs_root *root, + u64 bytenr, u64 num, int reserve) +{ + u64 len; + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *fs_info = root->fs_info; + + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); + while (num > 0) { + cache = btrfs_lookup_block_group(fs_info, bytenr); + BUG_ON(!cache); + len = min(num, cache->key.offset - + (bytenr - cache->key.objectid)); + if (reserve) { + spin_lock(&cache->lock); + cache->reserved += len; + cache->space_info->bytes_reserved += len; + spin_unlock(&cache->lock); + } else { + spin_lock(&cache->lock); + cache->reserved -= len; + cache->space_info->bytes_reserved -= len; + spin_unlock(&cache->lock); + } + bytenr += len; + num -= len; + } + return 0; +} + int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) { u64 last = 0; @@ -2126,6 +2124,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, cache = btrfs_lookup_block_group(root->fs_info, bytenr); BUG_ON(!cache); btrfs_add_free_space(cache, bytenr, num_bytes); + update_reserved_extents(root, bytenr, num_bytes, 0); return 0; } pin = 1; @@ -2225,14 +2224,11 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, search_start = max(search_start, first_logical_byte(root, 0)); orig_search_start = search_start; - if (search_end == (u64)-1) - search_end = btrfs_super_total_bytes(&info->super_copy); - search_start = max(search_start, hint_byte); total_needed += empty_size; new_group: - block_group = btrfs_lookup_block_group(info, search_start); + block_group = btrfs_lookup_first_block_group(info, search_start); /* * Ok this looks a little tricky, buts its really simple. First if we @@ -2257,12 +2253,8 @@ new_group: ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024, data, 1); - if (ret < 0) { - struct btrfs_space_info *info; - - info = __find_space_info(root->fs_info, data); + if (ret < 0) goto error; - } BUG_ON(ret); chunk_alloc_done = 1; search_start = orig_search_start; @@ -2378,22 +2370,24 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) struct list_head *l; printk(KERN_INFO "space_info has %Lu free, is %sfull\n", - info->total_bytes - info->bytes_used - info->bytes_pinned, - (info->full) ? "" : "not "); + info->total_bytes - info->bytes_used - info->bytes_pinned - + info->bytes_reserved, (info->full) ? "" : "not "); spin_lock(&info->lock); list_for_each(l, &info->block_groups) { cache = list_entry(l, struct btrfs_block_group_cache, list); spin_lock(&cache->lock); printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used " - "%Lu pinned\n", + "%Lu pinned %Lu reserved\n", cache->key.objectid, cache->key.offset, - btrfs_block_group_used(&cache->item), cache->pinned); + btrfs_block_group_used(&cache->item), + cache->pinned, cache->reserved); btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } spin_unlock(&info->lock); } + static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, @@ -2500,6 +2494,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, empty_size, hint_byte, search_end, ins, data); + update_reserved_extents(root, ins->objectid, ins->offset, 1); maybe_unlock_mutex(root); return ret; } @@ -2625,6 +2620,7 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, ref_generation, owner, owner_offset, ins); + update_reserved_extents(root, ins->objectid, ins->offset, 0); maybe_unlock_mutex(root); return ret; } @@ -2685,6 +2681,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, owner_objectid, owner_offset, ins); BUG_ON(ret); + } else { + update_reserved_extents(root, ins->objectid, ins->offset, 1); } maybe_unlock_mutex(root); return ret; @@ -3974,10 +3972,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) ret = btrfs_add_block_group_cache(root->fs_info, cache); BUG_ON(ret); - - if (key.objectid >= - btrfs_super_total_bytes(&info->super_copy)) - break; } ret = 0; error: -- cgit v1.2.3-70-g09d2 From e465768938f95388723b0fd3c50a0ae48173edb9 Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Fri, 26 Sep 2008 10:04:53 -0400 Subject: Btrfs: Add shared reference cache Btrfs has a cache of reference counts in leaves, allowing it to avoid reading tree leaves while deleting snapshots. To reduce contention with multiple subvolumes, this cache is private to each subvolume. This patch adds shared reference cache support. The new space balancing code plays with multiple subvols at the same time, So the old per-subvol reference cache is not well suited. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 19 +++++++++++------ fs/btrfs/disk-io.c | 2 ++ fs/btrfs/extent-tree.c | 17 +++++++++++---- fs/btrfs/ref-cache.c | 58 +++++++++++++++++++++++++++++++++----------------- fs/btrfs/ref-cache.h | 7 ++++-- fs/btrfs/transaction.c | 2 +- 6 files changed, 71 insertions(+), 34 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c683aaa925f..b9f9f815ed0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -81,6 +81,10 @@ struct btrfs_ordered_sum; #define BTRFS_TREE_LOG_OBJECTID -6ULL #define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL +/* for space balancing */ +#define BTRFS_TREE_RELOC_OBJECTID -8ULL +#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL + /* dummy objectid represents multiple objectids */ #define BTRFS_MULTIPLE_OBJECTIDS -255ULL @@ -539,6 +543,12 @@ struct btrfs_block_group_cache { struct list_head list; }; +struct btrfs_leaf_ref_tree { + struct rb_root root; + struct list_head list; + spinlock_t lock; +}; + struct btrfs_device; struct btrfs_fs_devices; struct btrfs_fs_info { @@ -637,6 +647,8 @@ struct btrfs_fs_info { struct task_struct *cleaner_kthread; int thread_pool_size; + struct btrfs_leaf_ref_tree shared_ref_tree; + struct kobject super_kobj; struct completion kobj_unregister; int do_barriers; @@ -670,13 +682,6 @@ struct btrfs_fs_info { void *bdev_holder; }; -struct btrfs_leaf_ref_tree { - struct rb_root root; - struct btrfs_leaf_ref *last; - struct list_head list; - spinlock_t lock; -}; - /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 71e81f3a765..8969fee2331 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1406,6 +1406,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; + btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); + BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3e2f969de42..9ab099bc01a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1091,16 +1091,26 @@ out: int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, u32 nr_extents) { - u32 nritems; struct btrfs_key key; struct btrfs_file_extent_item *fi; + u64 root_gen; + u32 nritems; int i; int level; int ret = 0; + int shared = 0; if (!root->ref_cows) return 0; + if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { + shared = 0; + root_gen = root->root_key.offset; + } else { + shared = 1; + root_gen = trans->transid - 1; + } + level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); @@ -1114,7 +1124,7 @@ int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, goto out; } - ref->root_gen = root->root_key.offset; + ref->root_gen = root_gen; ref->bytenr = buf->start; ref->owner = btrfs_header_owner(buf); ref->generation = btrfs_header_generation(buf); @@ -1143,8 +1153,7 @@ int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, info++; } - BUG_ON(!root->ref_tree); - ret = btrfs_add_leaf_ref(root, ref); + ret = btrfs_add_leaf_ref(root, ref, shared); WARN_ON(ret); btrfs_free_leaf_ref(root, ref); } diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index 272b9890c98..c5809988c87 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -78,7 +78,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, } entry = rb_entry(node, struct btrfs_leaf_ref, rb_node); - entry->in_tree = 1; rb_link_node(node, parent, p); rb_insert_color(node, root); return NULL; @@ -103,23 +102,29 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) return NULL; } -int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen) +int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, + int shared) { struct btrfs_leaf_ref *ref = NULL; struct btrfs_leaf_ref_tree *tree = root->ref_tree; + if (shared) + tree = &root->fs_info->shared_ref_tree; if (!tree) return 0; spin_lock(&tree->lock); while(!list_empty(&tree->list)) { ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list); - BUG_ON(!ref->in_tree); + BUG_ON(ref->tree != tree); if (ref->root_gen > max_root_gen) break; + if (!xchg(&ref->in_tree, 0)) { + cond_resched_lock(&tree->lock); + continue; + } rb_erase(&ref->rb_node, &tree->root); - ref->in_tree = 0; list_del_init(&ref->list); spin_unlock(&tree->lock); @@ -137,32 +142,43 @@ struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, struct rb_node *rb; struct btrfs_leaf_ref *ref = NULL; struct btrfs_leaf_ref_tree *tree = root->ref_tree; - - if (!tree) - return NULL; - - spin_lock(&tree->lock); - rb = tree_search(&tree->root, bytenr); - if (rb) - ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); - if (ref) - atomic_inc(&ref->usage); - spin_unlock(&tree->lock); - return ref; +again: + if (tree) { + spin_lock(&tree->lock); + rb = tree_search(&tree->root, bytenr); + if (rb) + ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); + if (ref) + atomic_inc(&ref->usage); + spin_unlock(&tree->lock); + if (ref) + return ref; + } + if (tree != &root->fs_info->shared_ref_tree) { + tree = &root->fs_info->shared_ref_tree; + goto again; + } + return NULL; } -int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) +int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, + int shared) { int ret = 0; struct rb_node *rb; struct btrfs_leaf_ref_tree *tree = root->ref_tree; + if (shared) + tree = &root->fs_info->shared_ref_tree; + spin_lock(&tree->lock); rb = tree_insert(&tree->root, ref->bytenr, &ref->rb_node); if (rb) { ret = -EEXIST; } else { atomic_inc(&ref->usage); + ref->tree = tree; + ref->in_tree = 1; list_add_tail(&ref->list, &tree->list); } spin_unlock(&tree->lock); @@ -171,13 +187,15 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { - struct btrfs_leaf_ref_tree *tree = root->ref_tree; + struct btrfs_leaf_ref_tree *tree; + + if (!xchg(&ref->in_tree, 0)) + return 0; - BUG_ON(!ref->in_tree); + tree = ref->tree; spin_lock(&tree->lock); rb_erase(&ref->rb_node, &tree->root); - ref->in_tree = 0; list_del_init(&ref->list); spin_unlock(&tree->lock); diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h index c361b321c0c..617564787f5 100644 --- a/fs/btrfs/ref-cache.h +++ b/fs/btrfs/ref-cache.h @@ -27,6 +27,7 @@ struct btrfs_extent_info { struct btrfs_leaf_ref { struct rb_node rb_node; + struct btrfs_leaf_ref_tree *tree; int in_tree; atomic_t usage; @@ -64,8 +65,10 @@ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, u64 bytenr); -int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); -int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen); +int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, + int shared); +int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, + int shared); int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 151b00d5259..656baefa525 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -650,7 +650,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); - ret = btrfs_remove_leaf_refs(root, max_useless); + ret = btrfs_remove_leaf_refs(root, max_useless, 0); BUG_ON(ret); free_extent_buffer(dirty->root->node); -- cgit v1.2.3-70-g09d2 From 1a40e23b95da45051ee4d74374c58ae87a14051c Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Fri, 26 Sep 2008 10:09:34 -0400 Subject: Btrfs: update space balancing code This patch updates the space balancing code to utilize the new backref format. Before, btrfs-vol -b would break any COW links on data blocks or metadata. This was slow and caused the amount of space used to explode if a large number of snapshots were present. The new code can keeps the sharing of all data extents and most of the tree blocks. To maintain the sharing of data extents, the space balance code uses a seperate inode hold data extent pointers, then updates the references to point to the new location. To maintain the sharing of tree blocks, the space balance code uses reloc trees to relocate tree blocks in reference counted roots. There is one reloc tree for each subvol, and all reloc trees share same root key objectid. Reloc trees are snapshots of the latest committed roots of subvols (root->commit_root). To relocate a tree block referenced by a subvol, there are two steps. COW the block through subvol's reloc tree, then update block pointer in the subvol to point to the new block. Since all reloc trees share same root key objectid, doing special handing for tree blocks owned by them is easy. Once a tree block has been COWed in one reloc tree, we can use the resulting new block directly when the same block is required to COW again through other reloc trees. In this way, relocated tree blocks are shared between reloc trees, so they are also shared between subvols. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 155 +++- fs/btrfs/ctree.h | 26 +- fs/btrfs/disk-io.c | 9 + fs/btrfs/extent-tree.c | 2074 ++++++++++++++++++++++++++++++++++++++---------- fs/btrfs/root-tree.c | 5 +- fs/btrfs/transaction.c | 15 +- fs/btrfs/volumes.c | 9 +- 7 files changed, 1848 insertions(+), 445 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f9cd40967d0..50e81f43e6d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -179,7 +179,6 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer *cow; u32 nritems; int ret = 0; - int different_trans = 0; int level; int unlock_orig = 0; @@ -233,13 +232,33 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(buf) > trans->transid); if (btrfs_header_generation(buf) != trans->transid) { u32 nr_extents; - different_trans = 1; ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents); if (ret) return ret; ret = btrfs_cache_ref(trans, root, buf, nr_extents); WARN_ON(ret); + } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) { + /* + * There are only two places that can drop reference to + * tree blocks owned by living reloc trees, one is here, + * the other place is btrfs_merge_path. In both places, + * we check reference count while tree block is locked. + * Furthermore, if reference count is one, it won't get + * increased by someone else. + */ + u32 refs; + ret = btrfs_lookup_extent_ref(trans, root, buf->start, + buf->len, &refs); + BUG_ON(ret); + if (refs == 1) { + ret = btrfs_update_ref(trans, root, buf, cow, + 0, nritems); + clean_tree_block(trans, root, buf); + } else { + ret = btrfs_inc_ref(trans, root, buf, cow, NULL); + } + BUG_ON(ret); } else { ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems); if (ret) @@ -247,6 +266,14 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, clean_tree_block(trans, root, buf); } + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { + ret = btrfs_add_reloc_mapping(root, buf->start, + buf->len, cow->start); + BUG_ON(ret); + ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); + WARN_ON(ret); + } + if (buf == root->node) { WARN_ON(parent && parent != buf); @@ -1466,6 +1493,130 @@ done: return ret; } +int btrfs_merge_path(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *node_keys, + u64 *nodes, int lowest_level) +{ + struct extent_buffer *eb; + struct extent_buffer *parent; + struct btrfs_key key; + u64 bytenr; + u64 generation; + u32 blocksize; + int level; + int slot; + int key_match; + int ret; + + eb = btrfs_lock_root_node(root); + ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); + BUG_ON(ret); + + parent = eb; + while (1) { + level = btrfs_header_level(parent); + if (level == 0 || level <= lowest_level) + break; + + ret = bin_search(parent, &node_keys[lowest_level], level, + &slot); + if (ret && slot > 0) + slot--; + + bytenr = btrfs_node_blockptr(parent, slot); + if (nodes[level - 1] == bytenr) + break; + + blocksize = btrfs_level_size(root, level - 1); + generation = btrfs_node_ptr_generation(parent, slot); + btrfs_node_key_to_cpu(eb, &key, slot); + key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); + + /* + * if node keys match and node pointer hasn't been modified + * in the running transaction, we can merge the path. for + * reloc trees, the node pointer check is skipped, this is + * because the reloc trees are fully controlled by the space + * balance code, no one else can modify them. + */ + if (!nodes[level - 1] || !key_match || + (generation == trans->transid && + root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) { +next_level: + if (level == 1 || level == lowest_level + 1) + break; + + eb = read_tree_block(root, bytenr, blocksize, + generation); + btrfs_tree_lock(eb); + + ret = btrfs_cow_block(trans, root, eb, parent, slot, + &eb, 0); + BUG_ON(ret); + + btrfs_tree_unlock(parent); + free_extent_buffer(parent); + parent = eb; + continue; + } + + if (generation == trans->transid) { + u32 refs; + BUG_ON(btrfs_header_owner(eb) != + BTRFS_TREE_RELOC_OBJECTID); + /* + * lock the block to keep __btrfs_cow_block from + * changing the reference count. + */ + eb = read_tree_block(root, bytenr, blocksize, + generation); + btrfs_tree_lock(eb); + + ret = btrfs_lookup_extent_ref(trans, root, bytenr, + blocksize, &refs); + BUG_ON(ret); + /* + * if replace block whose reference count is one, + * we have to "drop the subtree". so skip it for + * simplicity + */ + if (refs == 1) { + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + goto next_level; + } + } + + btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); + btrfs_set_node_ptr_generation(parent, slot, trans->transid); + btrfs_mark_buffer_dirty(parent); + + ret = btrfs_inc_extent_ref(trans, root, + nodes[level - 1], + blocksize, parent->start, + btrfs_header_owner(parent), + btrfs_header_generation(parent), + level - 1, 0); + BUG_ON(ret); + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, parent->start, + btrfs_header_owner(parent), + btrfs_header_generation(parent), + level - 1, 0, 1); + BUG_ON(ret); + + if (generation == trans->transid) { + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + } + break; + } + btrfs_tree_unlock(parent); + free_extent_buffer(parent); + return 0; +} + /* * adjust the pointers going up the tree, starting at level * making sure the right key of each node is points to 'key'. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3e62a1b0a1f..2775e270881 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -604,6 +604,7 @@ struct btrfs_fs_info { struct mutex chunk_mutex; struct mutex drop_mutex; struct mutex volume_mutex; + struct mutex tree_reloc_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; @@ -647,6 +648,10 @@ struct btrfs_fs_info { struct task_struct *cleaner_kthread; int thread_pool_size; + /* tree relocation relocated fields */ + struct extent_io_tree reloc_mapping_tree; + struct list_head dead_reloc_roots; + struct btrfs_leaf_ref_tree reloc_ref_tree; struct btrfs_leaf_ref_tree shared_ref_tree; struct kobject super_kobj; @@ -698,6 +703,7 @@ struct btrfs_root { struct btrfs_leaf_ref_tree ref_tree_struct; struct btrfs_dirty_root *dirty_root; struct btrfs_root *log_root; + struct btrfs_root *reloc_root; struct btrfs_root_item root_item; struct btrfs_key root_key; @@ -1517,7 +1523,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u32 blocksize); -int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -1582,10 +1587,29 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, u64 type, u64 chunk_objectid, u64 chunk_offset, u64 size); +int btrfs_remove_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 group_start); +int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); +int btrfs_free_reloc_root(struct btrfs_root *root); +int btrfs_drop_dead_reloc_roots(struct btrfs_root *root); +int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, + u64 num_bytes, u64 new_bytenr); +int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, + u64 num_bytes, u64 *new_bytenr); +void btrfs_free_reloc_mappings(struct btrfs_root *root); +int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, u64 orig_start); +int btrfs_add_dead_reloc_root(struct btrfs_root *root); +int btrfs_cleanup_reloc_trees(struct btrfs_root *root); /* ctree.c */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type); +int btrfs_merge_path(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *node_keys, + u64 *nodes, int lowest_level); int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *new_key); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8969fee2331..45bc3132b05 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1406,6 +1406,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; + extent_io_tree_init(&fs_info->reloc_mapping_tree, + fs_info->btree_inode->i_mapping, GFP_NOFS); + INIT_LIST_HEAD(&fs_info->dead_reloc_roots); + btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); BTRFS_I(fs_info->btree_inode)->root = tree_root; @@ -1421,6 +1425,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); + mutex_init(&fs_info->tree_reloc_mutex); init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->async_submit_wait); @@ -1627,6 +1632,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_recover_log_trees(log_tree_root); BUG_ON(ret); } + + ret = btrfs_cleanup_reloc_trees(tree_root); + BUG_ON(ret); + fs_info->last_trans_committed = btrfs_super_generation(disk_super); return tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9ab099bc01a..8043b9d584a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1834,6 +1834,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, u64 header_owner = btrfs_header_owner(buf); u64 header_transid = btrfs_header_generation(buf); if (header_owner != BTRFS_TREE_LOG_OBJECTID && + header_owner != BTRFS_TREE_RELOC_OBJECTID && header_transid == trans->transid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { clean_tree_block(NULL, root, buf); @@ -2487,6 +2488,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) return -ENOSPC; } btrfs_add_free_space(cache, start, len); + update_reserved_extents(root, start, len, 0); maybe_unlock_mutex(root); return 0; } @@ -2947,6 +2949,10 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, */ if (*level == 1) { ref = btrfs_lookup_leaf_ref(root, bytenr); + if (ref && ref->generation != ptr_gen) { + btrfs_free_leaf_ref(root, ref); + ref = NULL; + } if (ref) { ret = cache_drop_leaf_ref(trans, root, ref); BUG_ON(ret); @@ -3153,34 +3159,6 @@ out: return ret; } -int btrfs_free_block_groups(struct btrfs_fs_info *info) -{ - struct btrfs_block_group_cache *block_group; - struct rb_node *n; - - mutex_lock(&info->alloc_mutex); - spin_lock(&info->block_group_cache_lock); - while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { - block_group = rb_entry(n, struct btrfs_block_group_cache, - cache_node); - - spin_unlock(&info->block_group_cache_lock); - btrfs_remove_free_space_cache(block_group); - spin_lock(&info->block_group_cache_lock); - - rb_erase(&block_group->cache_node, - &info->block_group_cache_tree); - - spin_lock(&block_group->space_info->lock); - list_del(&block_group->list); - spin_unlock(&block_group->space_info->lock); - kfree(block_group); - } - spin_unlock(&info->block_group_cache_lock); - mutex_unlock(&info->alloc_mutex); - return 0; -} - static unsigned long calc_ra(unsigned long start, unsigned long last, unsigned long nr) { @@ -3192,37 +3170,43 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, { u64 page_start; u64 page_end; + unsigned long first_index; unsigned long last_index; unsigned long i; struct page *page; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct file_ra_state *ra; - unsigned long total_read = 0; - unsigned long ra_pages; struct btrfs_ordered_extent *ordered; - struct btrfs_trans_handle *trans; + unsigned int total_read = 0; + unsigned int total_dirty = 0; + int ret = 0; ra = kzalloc(sizeof(*ra), GFP_NOFS); mutex_lock(&inode->i_mutex); - i = start >> PAGE_CACHE_SHIFT; + first_index = start >> PAGE_CACHE_SHIFT; last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; - ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; + /* make sure the dirty trick played by the caller work */ + ret = invalidate_inode_pages2_range(inode->i_mapping, + first_index, last_index); + if (ret) + goto out_unlock; file_ra_state_init(ra, inode->i_mapping); - for (; i <= last_index; i++) { - if (total_read % ra_pages == 0) { + for (i = first_index ; i <= last_index; i++) { + if (total_read % ra->ra_pages == 0) { btrfs_force_ra(inode->i_mapping, ra, NULL, i, - calc_ra(i, last_index, ra_pages)); + calc_ra(i, last_index, ra->ra_pages)); } total_read++; again: if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) - goto truncate_racing; + BUG_ON(1); page = grab_cache_page(inode->i_mapping, i); if (!page) { + ret = -ENOMEM; goto out_unlock; } if (!PageUptodate(page)) { @@ -3231,6 +3215,7 @@ again: if (!PageUptodate(page)) { unlock_page(page); page_cache_release(page); + ret = -EIO; goto out_unlock; } } @@ -3251,14 +3236,13 @@ again: } set_page_extent_mapped(page); - /* - * make sure page_mkwrite is called for this page if userland - * wants to change it from mmap - */ - clear_page_dirty_for_io(page); - btrfs_set_extent_delalloc(inode, page_start, page_end); + if (i == first_index) + set_extent_bits(io_tree, page_start, page_end, + EXTENT_BOUNDARY, GFP_NOFS); + set_page_dirty(page); + total_dirty++; unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); @@ -3266,350 +3250,1460 @@ again: } out_unlock: - /* we have to start the IO in order to get the ordered extents - * instantiated. This allows the relocation to code to wait - * for all the ordered extents to hit the disk. - * - * Otherwise, it would constantly loop over the same extents - * because the old ones don't get deleted until the IO is - * started - */ - btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1, - WB_SYNC_NONE); kfree(ra); - trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); - if (trans) { - btrfs_end_transaction(trans, BTRFS_I(inode)->root); - mark_inode_dirty(inode); - } mutex_unlock(&inode->i_mutex); - return 0; - -truncate_racing: - vmtruncate(inode, inode->i_size); - balance_dirty_pages_ratelimited_nr(inode->i_mapping, - total_read); - goto out_unlock; + balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); + return ret; } -/* - * The back references tell us which tree holds a ref on a block, - * but it is possible for the tree root field in the reference to - * reflect the original root before a snapshot was made. In this - * case we should search through all the children of a given root - * to find potential holders of references on a block. - * - * Instead, we do something a little less fancy and just search - * all the roots for a given key/block combination. - */ -static int find_root_for_ref(struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_key *key0, - int level, - int file_key, - struct btrfs_root **found_root, - u64 bytenr) -{ - struct btrfs_key root_location; - struct btrfs_root *cur_root = *found_root; - struct btrfs_file_extent_item *file_extent; - u64 root_search_start = BTRFS_FS_TREE_OBJECTID; - u64 found_bytenr; - int ret; +static int noinline relocate_data_extent(struct inode *reloc_inode, + struct btrfs_key *extent_key, + u64 offset) +{ + struct btrfs_root *root = BTRFS_I(reloc_inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; + struct extent_map *em; - root_location.offset = (u64)-1; - root_location.type = BTRFS_ROOT_ITEM_KEY; - path->lowest_level = level; - path->reada = 0; - while(1) { - ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0); - found_bytenr = 0; - if (ret == 0 && file_key) { - struct extent_buffer *leaf = path->nodes[0]; - file_extent = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, file_extent) == - BTRFS_FILE_EXTENT_REG) { - found_bytenr = - btrfs_file_extent_disk_bytenr(leaf, - file_extent); - } - } else if (!file_key) { - if (path->nodes[level]) - found_bytenr = path->nodes[level]->start; - } - - btrfs_release_path(cur_root, path); - - if (found_bytenr == bytenr) { - *found_root = cur_root; - ret = 0; - goto out; - } - ret = btrfs_search_root(root->fs_info->tree_root, - root_search_start, &root_search_start); - if (ret) - break; + em = alloc_extent_map(GFP_NOFS); + BUG_ON(!em || IS_ERR(em)); - root_location.objectid = root_search_start; - cur_root = btrfs_read_fs_root_no_name(root->fs_info, - &root_location); - if (!cur_root) { - ret = 1; + em->start = extent_key->objectid - offset; + em->len = extent_key->offset; + em->block_start = extent_key->objectid; + em->bdev = root->fs_info->fs_devices->latest_bdev; + set_bit(EXTENT_FLAG_PINNED, &em->flags); + + /* setup extent map to cheat btrfs_readpage */ + mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex); + while (1) { + int ret; + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret != -EEXIST) { + free_extent_map(em); break; } + btrfs_drop_extent_cache(reloc_inode, em->start, + em->start + em->len - 1, 0); } -out: - path->lowest_level = 0; - return ret; -} + mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex); -/* - * note, this releases the path - */ -static int noinline relocate_one_reference(struct btrfs_root *extent_root, - struct btrfs_path *path, - struct btrfs_key *extent_key, - u64 *last_file_objectid, - u64 *last_file_offset, - u64 *last_file_root, - u64 last_extent) -{ - struct inode *inode; - struct btrfs_root *found_root; - struct btrfs_key root_location; - struct btrfs_key found_key; - struct btrfs_extent_ref *ref; - u64 ref_root; - u64 ref_gen; - u64 ref_objectid; - u64 ref_offset; - int ret; - int level; + return relocate_inode_pages(reloc_inode, extent_key->objectid - offset, + extent_key->offset); +} - WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); +struct btrfs_ref_path { + u64 extent_start; + u64 nodes[BTRFS_MAX_LEVEL]; + u64 root_objectid; + u64 root_generation; + u64 owner_objectid; + u64 owner_offset; + u32 num_refs; + int lowest_level; + int current_level; +}; - ref = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_ref); - ref_root = btrfs_ref_root(path->nodes[0], ref); - ref_gen = btrfs_ref_generation(path->nodes[0], ref); - ref_objectid = btrfs_ref_objectid(path->nodes[0], ref); - ref_offset = btrfs_ref_offset(path->nodes[0], ref); - btrfs_release_path(extent_root, path); +struct disk_extent { + u64 disk_bytenr; + u64 disk_num_bytes; + u64 offset; + u64 num_bytes; +}; - root_location.objectid = ref_root; - if (ref_gen == 0) - root_location.offset = 0; - else - root_location.offset = (u64)-1; - root_location.type = BTRFS_ROOT_ITEM_KEY; +static int is_cowonly_root(u64 root_objectid) +{ + if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || + root_objectid == BTRFS_EXTENT_TREE_OBJECTID || + root_objectid == BTRFS_CHUNK_TREE_OBJECTID || + root_objectid == BTRFS_DEV_TREE_OBJECTID || + root_objectid == BTRFS_TREE_LOG_OBJECTID) + return 1; + return 0; +} - found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, - &root_location); - BUG_ON(!found_root); - mutex_unlock(&extent_root->fs_info->alloc_mutex); +static int noinline __next_ref_path(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct btrfs_ref_path *ref_path, + int first_time) +{ + struct extent_buffer *leaf; + struct btrfs_path *path; + struct btrfs_extent_ref *ref; + struct btrfs_key key; + struct btrfs_key found_key; + u64 bytenr; + u32 nritems; + int level; + int ret = 1; - if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { - found_key.objectid = ref_objectid; - found_key.type = BTRFS_EXTENT_DATA_KEY; - found_key.offset = ref_offset; - level = 0; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; - if (last_extent == extent_key->objectid && - *last_file_objectid == ref_objectid && - *last_file_offset == ref_offset && - *last_file_root == ref_root) - goto out; + mutex_lock(&extent_root->fs_info->alloc_mutex); - ret = find_root_for_ref(extent_root, path, &found_key, - level, 1, &found_root, - extent_key->objectid); + if (first_time) { + ref_path->lowest_level = -1; + ref_path->current_level = -1; + goto walk_up; + } +walk_down: + level = ref_path->current_level - 1; + while (level >= -1) { + u64 parent; + if (level < ref_path->lowest_level) + break; - if (ret) - goto out; + if (level >= 0) { + bytenr = ref_path->nodes[level]; + } else { + bytenr = ref_path->extent_start; + } + BUG_ON(bytenr == 0); - if (last_extent == extent_key->objectid && - *last_file_objectid == ref_objectid && - *last_file_offset == ref_offset && - *last_file_root == ref_root) - goto out; + parent = ref_path->nodes[level + 1]; + ref_path->nodes[level + 1] = 0; + ref_path->current_level = level; + BUG_ON(parent == 0); - inode = btrfs_iget_locked(extent_root->fs_info->sb, - ref_objectid, found_root); - if (inode->i_state & I_NEW) { - /* the inode and parent dir are two different roots */ - BTRFS_I(inode)->root = found_root; - BTRFS_I(inode)->location.objectid = ref_objectid; - BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; - BTRFS_I(inode)->location.offset = 0; - btrfs_read_locked_inode(inode); - unlock_new_inode(inode); + key.objectid = bytenr; + key.offset = parent + 1; + key.type = BTRFS_EXTENT_REF_KEY; - } - /* this can happen if the reference is not against - * the latest version of the tree root - */ - if (is_bad_inode(inode)) + ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); + if (ret < 0) goto out; + BUG_ON(ret == 0); - *last_file_objectid = inode->i_ino; - *last_file_root = found_root->root_key.objectid; - *last_file_offset = ref_offset; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(extent_root, path); + if (ret < 0) + goto out; + if (ret > 0) + goto next; + leaf = path->nodes[0]; + } - relocate_inode_pages(inode, ref_offset, extent_key->offset); - iput(inode); - } else { - struct btrfs_trans_handle *trans; - struct extent_buffer *eb; - int needs_lock = 0; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid == bytenr && + found_key.type == BTRFS_EXTENT_REF_KEY) + goto found; +next: + level--; + btrfs_release_path(extent_root, path); + if (need_resched()) { + mutex_unlock(&extent_root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&extent_root->fs_info->alloc_mutex); + } + } + /* reached lowest level */ + ret = 1; + goto out; +walk_up: + level = ref_path->current_level; + while (level < BTRFS_MAX_LEVEL - 1) { + u64 ref_objectid; + if (level >= 0) { + bytenr = ref_path->nodes[level]; + } else { + bytenr = ref_path->extent_start; + } + BUG_ON(bytenr == 0); - eb = read_tree_block(found_root, extent_key->objectid, - extent_key->offset, 0); - btrfs_tree_lock(eb); - level = btrfs_header_level(eb); + key.objectid = bytenr; + key.offset = 0; + key.type = BTRFS_EXTENT_REF_KEY; - if (level == 0) - btrfs_item_key_to_cpu(eb, &found_key, 0); - else - btrfs_node_key_to_cpu(eb, &found_key, 0); + ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); + if (ret < 0) + goto out; - btrfs_tree_unlock(eb); - free_extent_buffer(eb); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(extent_root, path); + if (ret < 0) + goto out; + if (ret > 0) { + /* the extent was freed by someone */ + if (ref_path->lowest_level == level) + goto out; + btrfs_release_path(extent_root, path); + goto walk_down; + } + leaf = path->nodes[0]; + } - ret = find_root_for_ref(extent_root, path, &found_key, - level, 0, &found_root, - extent_key->objectid); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid != bytenr || + found_key.type != BTRFS_EXTENT_REF_KEY) { + /* the extent was freed by someone */ + if (ref_path->lowest_level == level) { + ret = 1; + goto out; + } + btrfs_release_path(extent_root, path); + goto walk_down; + } +found: + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref); + ref_objectid = btrfs_ref_objectid(leaf, ref); + if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { + if (first_time) { + level = (int)ref_objectid; + BUG_ON(level >= BTRFS_MAX_LEVEL); + ref_path->lowest_level = level; + ref_path->current_level = level; + ref_path->nodes[level] = bytenr; + } else { + WARN_ON(ref_objectid != level); + } + } else { + WARN_ON(level != -1); + } + first_time = 0; - if (ret) - goto out; + if (ref_path->lowest_level == level) { + ref_path->owner_objectid = ref_objectid; + ref_path->owner_offset = btrfs_ref_offset(leaf, ref); + ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); + } /* - * right here almost anything could happen to our key, - * but that's ok. The cow below will either relocate it - * or someone else will have relocated it. Either way, - * it is in a different spot than it was before and - * we're happy. + * the block is tree root or the block isn't in reference + * counted tree. */ + if (found_key.objectid == found_key.offset || + is_cowonly_root(btrfs_ref_root(leaf, ref))) { + ref_path->root_objectid = btrfs_ref_root(leaf, ref); + ref_path->root_generation = + btrfs_ref_generation(leaf, ref); + if (level < 0) { + /* special reference from the tree log */ + ref_path->nodes[0] = found_key.offset; + ref_path->current_level = 0; + } + ret = 0; + goto out; + } - trans = btrfs_start_transaction(found_root, 1); + level++; + BUG_ON(ref_path->nodes[level] != 0); + ref_path->nodes[level] = found_key.offset; + ref_path->current_level = level; - if (found_root == extent_root->fs_info->extent_root || - found_root == extent_root->fs_info->chunk_root || - found_root == extent_root->fs_info->dev_root) { - needs_lock = 1; - mutex_lock(&extent_root->fs_info->alloc_mutex); + /* + * the reference was created in the running transaction, + * no need to continue walking up. + */ + if (btrfs_ref_generation(leaf, ref) == trans->transid) { + ref_path->root_objectid = btrfs_ref_root(leaf, ref); + ref_path->root_generation = + btrfs_ref_generation(leaf, ref); + ret = 0; + goto out; } - path->lowest_level = level; - path->reada = 2; - ret = btrfs_search_slot(trans, found_root, &found_key, path, - 0, 1); - path->lowest_level = 0; - btrfs_release_path(found_root, path); - - if (found_root == found_root->fs_info->extent_root) - btrfs_extent_post_op(trans, found_root); - if (needs_lock) + btrfs_release_path(extent_root, path); + if (need_resched()) { mutex_unlock(&extent_root->fs_info->alloc_mutex); - - btrfs_end_transaction(trans, found_root); - + cond_resched(); + mutex_lock(&extent_root->fs_info->alloc_mutex); + } } + /* reached max tree level, but no tree root found. */ + BUG(); out: - mutex_lock(&extent_root->fs_info->alloc_mutex); - return 0; + mutex_unlock(&extent_root->fs_info->alloc_mutex); + btrfs_free_path(path); + return ret; } -static int noinline del_extent_zero(struct btrfs_root *extent_root, - struct btrfs_path *path, - struct btrfs_key *extent_key) +static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct btrfs_ref_path *ref_path, + u64 extent_start) { - int ret; - struct btrfs_trans_handle *trans; + memset(ref_path, 0, sizeof(*ref_path)); + ref_path->extent_start = extent_start; - trans = btrfs_start_transaction(extent_root, 1); - ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); - if (ret > 0) { - ret = -EIO; - goto out; - } - if (ret < 0) - goto out; - ret = btrfs_del_item(trans, extent_root, path); -out: - btrfs_end_transaction(trans, extent_root); - return ret; + return __next_ref_path(trans, extent_root, ref_path, 1); } -static int noinline relocate_one_extent(struct btrfs_root *extent_root, - struct btrfs_path *path, - struct btrfs_key *extent_key) +static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct btrfs_ref_path *ref_path) { - struct btrfs_key key; - struct btrfs_key found_key; + return __next_ref_path(trans, extent_root, ref_path, 0); +} + +static int noinline get_new_locations(struct inode *reloc_inode, + struct btrfs_key *extent_key, + u64 offset, int no_fragment, + struct disk_extent **extents, + int *nr_extents) +{ + struct btrfs_root *root = BTRFS_I(reloc_inode)->root; + struct btrfs_path *path; + struct btrfs_file_extent_item *fi; struct extent_buffer *leaf; - u64 last_file_objectid = 0; - u64 last_file_root = 0; - u64 last_file_offset = (u64)-1; - u64 last_extent = 0; + struct disk_extent *exts = *extents; + struct btrfs_key found_key; + u64 cur_pos; + u64 last_byte; u32 nritems; - u32 item_size; - int ret = 0; + int nr = 0; + int max = *nr_extents; + int ret; - if (extent_key->objectid == 0) { - ret = del_extent_zero(extent_root, path, extent_key); - goto out; + WARN_ON(!no_fragment && *extents); + if (!exts) { + max = 1; + exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); + if (!exts) + return -ENOMEM; } - key.objectid = extent_key->objectid; - key.type = BTRFS_EXTENT_REF_KEY; - key.offset = 0; - while(1) { - ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + path = btrfs_alloc_path(); + BUG_ON(!path); - if (ret < 0) - goto out; + cur_pos = extent_key->objectid - offset; + last_byte = extent_key->objectid + extent_key->offset; + ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, + cur_pos, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } - ret = 0; + while (1) { leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); - if (path->slots[0] == nritems) { - ret = btrfs_next_leaf(extent_root, path); - if (ret > 0) { - ret = 0; - goto out; - } + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); if (ret < 0) goto out; + if (ret > 0) + break; leaf = path->nodes[0]; } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid != extent_key->objectid) { + if (found_key.offset != cur_pos || + found_key.type != BTRFS_EXTENT_DATA_KEY || + found_key.objectid != reloc_inode->i_ino) break; - } - if (found_key.type != BTRFS_EXTENT_REF_KEY) { + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, fi) != + BTRFS_FILE_EXTENT_REG || + btrfs_file_extent_disk_bytenr(leaf, fi) == 0) break; + + if (nr == max) { + struct disk_extent *old = exts; + max *= 2; + exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); + memcpy(exts, old, sizeof(*exts) * nr); + if (old != *extents) + kfree(old); } - key.offset = found_key.offset + 1; - item_size = btrfs_item_size_nr(leaf, path->slots[0]); + exts[nr].disk_bytenr = + btrfs_file_extent_disk_bytenr(leaf, fi); + exts[nr].disk_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, fi); + exts[nr].offset = btrfs_file_extent_offset(leaf, fi); + exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); + WARN_ON(exts[nr].offset > 0); + WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); - ret = relocate_one_reference(extent_root, path, extent_key, - &last_file_objectid, - &last_file_offset, - &last_file_root, last_extent); - if (ret) + cur_pos += exts[nr].num_bytes; + nr++; + + if (cur_pos + offset >= last_byte) + break; + + if (no_fragment) { + ret = 1; goto out; - last_extent = extent_key->objectid; + } + path->slots[0]++; + } + + WARN_ON(cur_pos + offset > last_byte); + if (cur_pos + offset < last_byte) { + ret = -ENOENT; + goto out; } ret = 0; out: - btrfs_release_path(extent_root, path); - return ret; -} - + btrfs_free_path(path); + if (ret) { + if (exts != *extents) + kfree(exts); + } else { + *extents = exts; + *nr_extents = nr; + } + return ret; +} + +static int noinline replace_one_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *extent_key, + struct btrfs_key *leaf_key, + struct btrfs_ref_path *ref_path, + struct disk_extent *new_extents, + int nr_extents) +{ + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + struct inode *inode = NULL; + struct btrfs_key key; + u64 lock_start = 0; + u64 lock_end = 0; + u64 num_bytes; + u64 ext_offset; + u64 first_pos; + u32 nritems; + int extent_locked = 0; + int ret; + + first_pos = ref_path->owner_offset; + if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { + key.objectid = ref_path->owner_objectid; + key.offset = ref_path->owner_offset; + key.type = BTRFS_EXTENT_DATA_KEY; + } else { + memcpy(&key, leaf_key, sizeof(key)); + } + + while (1) { + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) + goto out; + + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); +next: + if (extent_locked && ret > 0) { + /* + * the file extent item was modified by someone + * before the extent got locked. + */ + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, + lock_end, GFP_NOFS); + extent_locked = 0; + } + + if (path->slots[0] >= nritems) { + if (ref_path->owner_objectid == + BTRFS_MULTIPLE_OBJECTIDS) + break; + + BUG_ON(extent_locked); + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret > 0) + break; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { + if ((key.objectid > ref_path->owner_objectid) || + (key.objectid == ref_path->owner_objectid && + key.type > BTRFS_EXTENT_DATA_KEY) || + (key.offset >= first_pos + extent_key->offset)) + break; + } + + if (inode && key.objectid != inode->i_ino) { + BUG_ON(extent_locked); + btrfs_release_path(root, path); + mutex_unlock(&inode->i_mutex); + iput(inode); + inode = NULL; + continue; + } + + if (key.type != BTRFS_EXTENT_DATA_KEY) { + path->slots[0]++; + ret = 1; + goto next; + } + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + if ((btrfs_file_extent_type(leaf, fi) != + BTRFS_FILE_EXTENT_REG) || + (btrfs_file_extent_disk_bytenr(leaf, fi) != + extent_key->objectid)) { + path->slots[0]++; + ret = 1; + goto next; + } + + num_bytes = btrfs_file_extent_num_bytes(leaf, fi); + ext_offset = btrfs_file_extent_offset(leaf, fi); + + if (first_pos > key.offset - ext_offset) + first_pos = key.offset - ext_offset; + + if (!extent_locked) { + lock_start = key.offset; + lock_end = lock_start + num_bytes - 1; + } else { + BUG_ON(lock_start != key.offset); + BUG_ON(lock_end - lock_start + 1 < num_bytes); + } + + if (!inode) { + btrfs_release_path(root, path); + + inode = btrfs_iget_locked(root->fs_info->sb, + key.objectid, root); + if (inode->i_state & I_NEW) { + BTRFS_I(inode)->root = root; + BTRFS_I(inode)->location.objectid = + key.objectid; + BTRFS_I(inode)->location.type = + BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.offset = 0; + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + } + /* + * some code call btrfs_commit_transaction while + * holding the i_mutex, so we can't use mutex_lock + * here. + */ + if (is_bad_inode(inode) || + !mutex_trylock(&inode->i_mutex)) { + iput(inode); + inode = NULL; + key.offset = (u64)-1; + goto skip; + } + } + + if (!extent_locked) { + struct btrfs_ordered_extent *ordered; + + btrfs_release_path(root, path); + + lock_extent(&BTRFS_I(inode)->io_tree, lock_start, + lock_end, GFP_NOFS); + ordered = btrfs_lookup_first_ordered_extent(inode, + lock_end); + if (ordered && + ordered->file_offset <= lock_end && + ordered->file_offset + ordered->len > lock_start) { + unlock_extent(&BTRFS_I(inode)->io_tree, + lock_start, lock_end, GFP_NOFS); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + key.offset += num_bytes; + goto skip; + } + if (ordered) + btrfs_put_ordered_extent(ordered); + + mutex_lock(&BTRFS_I(inode)->extent_mutex); + extent_locked = 1; + continue; + } + + if (nr_extents == 1) { + /* update extent pointer in place */ + btrfs_set_file_extent_generation(leaf, fi, + trans->transid); + btrfs_set_file_extent_disk_bytenr(leaf, fi, + new_extents[0].disk_bytenr); + btrfs_set_file_extent_disk_num_bytes(leaf, fi, + new_extents[0].disk_num_bytes); + ext_offset += new_extents[0].offset; + btrfs_set_file_extent_offset(leaf, fi, ext_offset); + btrfs_mark_buffer_dirty(leaf); + + btrfs_drop_extent_cache(inode, key.offset, + key.offset + num_bytes - 1, 0); + + ret = btrfs_inc_extent_ref(trans, root, + new_extents[0].disk_bytenr, + new_extents[0].disk_num_bytes, + leaf->start, + root->root_key.objectid, + trans->transid, + key.objectid, key.offset); + BUG_ON(ret); + + ret = btrfs_free_extent(trans, root, + extent_key->objectid, + extent_key->offset, + leaf->start, + btrfs_header_owner(leaf), + btrfs_header_generation(leaf), + key.objectid, key.offset, 0); + BUG_ON(ret); + + btrfs_release_path(root, path); + key.offset += num_bytes; + } else { + u64 alloc_hint; + u64 extent_len; + int i; + /* + * drop old extent pointer at first, then insert the + * new pointers one bye one + */ + btrfs_release_path(root, path); + ret = btrfs_drop_extents(trans, root, inode, key.offset, + key.offset + num_bytes, + key.offset, &alloc_hint); + BUG_ON(ret); + + for (i = 0; i < nr_extents; i++) { + if (ext_offset >= new_extents[i].num_bytes) { + ext_offset -= new_extents[i].num_bytes; + continue; + } + extent_len = min(new_extents[i].num_bytes - + ext_offset, num_bytes); + + ret = btrfs_insert_empty_item(trans, root, + path, &key, + sizeof(*fi)); + BUG_ON(ret); + + leaf = path->nodes[0]; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, fi, + trans->transid); + btrfs_set_file_extent_type(leaf, fi, + BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_disk_bytenr(leaf, fi, + new_extents[i].disk_bytenr); + btrfs_set_file_extent_disk_num_bytes(leaf, fi, + new_extents[i].disk_num_bytes); + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_len); + ext_offset += new_extents[i].offset; + btrfs_set_file_extent_offset(leaf, fi, + ext_offset); + btrfs_mark_buffer_dirty(leaf); + + btrfs_drop_extent_cache(inode, key.offset, + key.offset + extent_len - 1, 0); + + ret = btrfs_inc_extent_ref(trans, root, + new_extents[i].disk_bytenr, + new_extents[i].disk_num_bytes, + leaf->start, + root->root_key.objectid, + trans->transid, + key.objectid, key.offset); + BUG_ON(ret); + btrfs_release_path(root, path); + + inode->i_blocks += extent_len >> 9; + + ext_offset = 0; + num_bytes -= extent_len; + key.offset += extent_len; + + if (num_bytes == 0) + break; + } + BUG_ON(i >= nr_extents); + } + + if (extent_locked) { + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, + lock_end, GFP_NOFS); + extent_locked = 0; + } +skip: + if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && + key.offset >= first_pos + extent_key->offset) + break; + + cond_resched(); + } + ret = 0; +out: + btrfs_release_path(root, path); + if (inode) { + mutex_unlock(&inode->i_mutex); + if (extent_locked) { + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, + lock_end, GFP_NOFS); + } + iput(inode); + } + return ret; +} + +int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, + u64 num_bytes, u64 new_bytenr) +{ + set_extent_bits(&root->fs_info->reloc_mapping_tree, + orig_bytenr, orig_bytenr + num_bytes - 1, + EXTENT_LOCKED, GFP_NOFS); + set_state_private(&root->fs_info->reloc_mapping_tree, + orig_bytenr, new_bytenr); + return 0; +} + +int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, + u64 num_bytes, u64 *new_bytenr) +{ + u64 bytenr; + u64 cur_bytenr = orig_bytenr; + u64 prev_bytenr = orig_bytenr; + int ret; + + while (1) { + ret = get_state_private(&root->fs_info->reloc_mapping_tree, + cur_bytenr, &bytenr); + if (ret) + break; + prev_bytenr = cur_bytenr; + cur_bytenr = bytenr; + } + + if (orig_bytenr == cur_bytenr) + return -ENOENT; + + if (prev_bytenr != orig_bytenr) { + set_state_private(&root->fs_info->reloc_mapping_tree, + orig_bytenr, cur_bytenr); + } + *new_bytenr = cur_bytenr; + return 0; +} + +void btrfs_free_reloc_mappings(struct btrfs_root *root) +{ + clear_extent_bits(&root->fs_info->reloc_mapping_tree, + 0, (u64)-1, -1, GFP_NOFS); +} + +int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, u64 orig_start) +{ + int level; + int ret; + + BUG_ON(btrfs_header_generation(buf) != trans->transid); + BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); + + level = btrfs_header_level(buf); + if (level == 0) { + struct btrfs_leaf_ref *ref; + struct btrfs_leaf_ref *orig_ref; + + orig_ref = btrfs_lookup_leaf_ref(root, orig_start); + if (!orig_ref) + return -ENOENT; + + ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); + if (!ref) { + btrfs_free_leaf_ref(root, orig_ref); + return -ENOMEM; + } + + ref->nritems = orig_ref->nritems; + memcpy(ref->extents, orig_ref->extents, + sizeof(ref->extents[0]) * ref->nritems); + + btrfs_free_leaf_ref(root, orig_ref); + + ref->root_gen = trans->transid; + ref->bytenr = buf->start; + ref->owner = btrfs_header_owner(buf); + ref->generation = btrfs_header_generation(buf); + ret = btrfs_add_leaf_ref(root, ref, 0); + WARN_ON(ret); + btrfs_free_leaf_ref(root, ref); + } + return 0; +} + +static int noinline invalidate_extent_cache(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_block_group_cache *group, + struct btrfs_root *target_root) +{ + struct btrfs_key key; + struct inode *inode = NULL; + struct btrfs_file_extent_item *fi; + u64 num_bytes; + u64 skip_objectid = 0; + u32 nritems; + u32 i; + + nritems = btrfs_header_nritems(leaf); + for (i = 0; i < nritems; i++) { + btrfs_item_key_to_cpu(leaf, &key, i); + if (key.objectid == skip_objectid || + key.type != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) + continue; + if (!inode || inode->i_ino != key.objectid) { + iput(inode); + inode = btrfs_ilookup(target_root->fs_info->sb, + key.objectid, target_root, 1); + } + if (!inode) { + skip_objectid = key.objectid; + continue; + } + num_bytes = btrfs_file_extent_num_bytes(leaf, fi); + + lock_extent(&BTRFS_I(inode)->io_tree, key.offset, + key.offset + num_bytes - 1, GFP_NOFS); + mutex_lock(&BTRFS_I(inode)->extent_mutex); + btrfs_drop_extent_cache(inode, key.offset, + key.offset + num_bytes - 1, 1); + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, + key.offset + num_bytes - 1, GFP_NOFS); + cond_resched(); + } + iput(inode); + return 0; +} + +static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_block_group_cache *group, + struct inode *reloc_inode) +{ + struct btrfs_key key; + struct btrfs_key extent_key; + struct btrfs_file_extent_item *fi; + struct btrfs_leaf_ref *ref; + struct disk_extent *new_extent; + u64 bytenr; + u64 num_bytes; + u32 nritems; + u32 i; + int ext_index; + int nr_extent; + int ret; + + new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); + BUG_ON(!new_extent); + + ref = btrfs_lookup_leaf_ref(root, leaf->start); + BUG_ON(!ref); + + ext_index = -1; + nritems = btrfs_header_nritems(leaf); + for (i = 0; i < nritems; i++) { + btrfs_item_key_to_cpu(leaf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); + if (bytenr == 0) + continue; + + ext_index++; + if (bytenr >= group->key.objectid + group->key.offset || + bytenr + num_bytes <= group->key.objectid) + continue; + + extent_key.objectid = bytenr; + extent_key.offset = num_bytes; + extent_key.type = BTRFS_EXTENT_ITEM_KEY; + nr_extent = 1; + ret = get_new_locations(reloc_inode, &extent_key, + group->key.objectid, 1, + &new_extent, &nr_extent); + if (ret > 0) + continue; + BUG_ON(ret < 0); + + BUG_ON(ref->extents[ext_index].bytenr != bytenr); + BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); + ref->extents[ext_index].bytenr = new_extent->disk_bytenr; + ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; + + btrfs_set_file_extent_generation(leaf, fi, trans->transid); + btrfs_set_file_extent_disk_bytenr(leaf, fi, + new_extent->disk_bytenr); + btrfs_set_file_extent_disk_num_bytes(leaf, fi, + new_extent->disk_num_bytes); + new_extent->offset += btrfs_file_extent_offset(leaf, fi); + btrfs_set_file_extent_offset(leaf, fi, new_extent->offset); + btrfs_mark_buffer_dirty(leaf); + + ret = btrfs_inc_extent_ref(trans, root, + new_extent->disk_bytenr, + new_extent->disk_num_bytes, + leaf->start, + root->root_key.objectid, + trans->transid, + key.objectid, key.offset); + BUG_ON(ret); + ret = btrfs_free_extent(trans, root, + bytenr, num_bytes, leaf->start, + btrfs_header_owner(leaf), + btrfs_header_generation(leaf), + key.objectid, key.offset, 0); + BUG_ON(ret); + cond_resched(); + } + kfree(new_extent); + BUG_ON(ext_index + 1 != ref->nritems); + btrfs_free_leaf_ref(root, ref); + return 0; +} + +int btrfs_free_reloc_root(struct btrfs_root *root) +{ + struct btrfs_root *reloc_root; + + if (root->reloc_root) { + reloc_root = root->reloc_root; + root->reloc_root = NULL; + list_add(&reloc_root->dead_list, + &root->fs_info->dead_reloc_roots); + } + return 0; +} + +int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *reloc_root; + struct btrfs_root *prev_root = NULL; + struct list_head dead_roots; + int ret; + unsigned long nr; + + INIT_LIST_HEAD(&dead_roots); + list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); + + while (!list_empty(&dead_roots)) { + reloc_root = list_entry(dead_roots.prev, + struct btrfs_root, dead_list); + list_del_init(&reloc_root->dead_list); + + BUG_ON(reloc_root->commit_root != NULL); + while (1) { + trans = btrfs_join_transaction(root, 1); + BUG_ON(!trans); + + mutex_lock(&root->fs_info->drop_mutex); + ret = btrfs_drop_snapshot(trans, reloc_root); + if (ret != -EAGAIN) + break; + mutex_unlock(&root->fs_info->drop_mutex); + + nr = trans->blocks_used; + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + btrfs_btree_balance_dirty(root, nr); + } + + free_extent_buffer(reloc_root->node); + + ret = btrfs_del_root(trans, root->fs_info->tree_root, + &reloc_root->root_key); + BUG_ON(ret); + mutex_unlock(&root->fs_info->drop_mutex); + + nr = trans->blocks_used; + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + btrfs_btree_balance_dirty(root, nr); + + kfree(prev_root); + prev_root = reloc_root; + } + if (prev_root) { + btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); + kfree(prev_root); + } + return 0; +} + +int btrfs_add_dead_reloc_root(struct btrfs_root *root) +{ + list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); + return 0; +} + +int btrfs_cleanup_reloc_trees(struct btrfs_root *root) +{ + struct btrfs_root *reloc_root; + struct btrfs_trans_handle *trans; + struct btrfs_key location; + int found; + int ret; + + mutex_lock(&root->fs_info->tree_reloc_mutex); + ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); + BUG_ON(ret); + found = !list_empty(&root->fs_info->dead_reloc_roots); + mutex_unlock(&root->fs_info->tree_reloc_mutex); + + if (found) { + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + } + + location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; + location.offset = (u64)-1; + location.type = BTRFS_ROOT_ITEM_KEY; + + reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); + BUG_ON(!reloc_root); + btrfs_orphan_cleanup(reloc_root); + return 0; +} + +static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_root *reloc_root; + struct extent_buffer *eb; + struct btrfs_root_item *root_item; + struct btrfs_key root_key; + int ret; + + BUG_ON(!root->ref_cows); + if (root->reloc_root) + return 0; + + root_item = kmalloc(sizeof(*root_item), GFP_NOFS); + BUG_ON(!root_item); + + ret = btrfs_copy_root(trans, root, root->commit_root, + &eb, BTRFS_TREE_RELOC_OBJECTID); + BUG_ON(ret); + + root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; + root_key.offset = root->root_key.objectid; + root_key.type = BTRFS_ROOT_ITEM_KEY; + + memcpy(root_item, &root->root_item, sizeof(root_item)); + btrfs_set_root_refs(root_item, 0); + btrfs_set_root_bytenr(root_item, eb->start); + btrfs_set_root_level(root_item, btrfs_header_level(eb)); + memset(&root_item->drop_progress, 0, sizeof(root_item->drop_progress)); + root_item->drop_level = 0; + + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + + ret = btrfs_insert_root(trans, root->fs_info->tree_root, + &root_key, root_item); + BUG_ON(ret); + kfree(root_item); + + reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, + &root_key); + BUG_ON(!reloc_root); + reloc_root->last_trans = trans->transid; + reloc_root->commit_root = NULL; + reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; + + root->reloc_root = reloc_root; + return 0; +} + +/* + * Core function of space balance. + * + * The idea is using reloc trees to relocate tree blocks in reference + * counted roots. There is one reloc tree for each subvol, all reloc + * trees share same key objectid. Reloc trees are snapshots of the + * latest committed roots (subvol root->commit_root). To relocate a tree + * block referenced by a subvol, the code COW the block through the reloc + * tree, then update pointer in the subvol to point to the new block. + * Since all reloc trees share same key objectid, we can easily do special + * handing to share tree blocks between reloc trees. Once a tree block has + * been COWed in one reloc tree, we can use the result when the same block + * is COWed again through other reloc trees. + */ +static int noinline relocate_one_path(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *first_key, + struct btrfs_ref_path *ref_path, + struct btrfs_block_group_cache *group, + struct inode *reloc_inode) +{ + struct btrfs_root *reloc_root; + struct extent_buffer *eb = NULL; + struct btrfs_key *keys; + u64 *nodes; + int level; + int lowest_merge; + int lowest_level = 0; + int update_refs; + int ret; + + if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) + lowest_level = ref_path->owner_objectid; + + if (is_cowonly_root(ref_path->root_objectid)) { + path->lowest_level = lowest_level; + ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); + BUG_ON(ret < 0); + path->lowest_level = 0; + btrfs_release_path(root, path); + return 0; + } + + keys = kzalloc(sizeof(*keys) * BTRFS_MAX_LEVEL, GFP_NOFS); + BUG_ON(!keys); + nodes = kzalloc(sizeof(*nodes) * BTRFS_MAX_LEVEL, GFP_NOFS); + BUG_ON(!nodes); + + mutex_lock(&root->fs_info->tree_reloc_mutex); + ret = init_reloc_tree(trans, root); + BUG_ON(ret); + reloc_root = root->reloc_root; + + path->lowest_level = lowest_level; + ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 0); + BUG_ON(ret); + /* + * get relocation mapping for tree blocks in the path + */ + lowest_merge = BTRFS_MAX_LEVEL; + for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { + u64 new_bytenr; + eb = path->nodes[level]; + if (!eb || eb == reloc_root->node) + continue; + ret = btrfs_get_reloc_mapping(reloc_root, eb->start, eb->len, + &new_bytenr); + if (ret) + continue; + if (level == 0) + btrfs_item_key_to_cpu(eb, &keys[level], 0); + else + btrfs_node_key_to_cpu(eb, &keys[level], 0); + nodes[level] = new_bytenr; + lowest_merge = level; + } + + update_refs = 0; + if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + eb = path->nodes[0]; + if (btrfs_header_generation(eb) < trans->transid) + update_refs = 1; + } + + btrfs_release_path(reloc_root, path); + /* + * merge tree blocks that already relocated in other reloc trees + */ + if (lowest_merge != BTRFS_MAX_LEVEL) { + ret = btrfs_merge_path(trans, reloc_root, keys, nodes, + lowest_merge); + BUG_ON(ret < 0); + } + /* + * cow any tree blocks that still haven't been relocated + */ + ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 1); + BUG_ON(ret); + /* + * if we are relocating data block group, update extent pointers + * in the newly created tree leaf. + */ + eb = path->nodes[0]; + if (update_refs && nodes[0] != eb->start) { + ret = replace_extents_in_leaf(trans, reloc_root, eb, group, + reloc_inode); + BUG_ON(ret); + } + + memset(keys, 0, sizeof(*keys) * BTRFS_MAX_LEVEL); + memset(nodes, 0, sizeof(*nodes) * BTRFS_MAX_LEVEL); + for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { + eb = path->nodes[level]; + if (!eb || eb == reloc_root->node) + continue; + BUG_ON(btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID); + nodes[level] = eb->start; + if (level == 0) + btrfs_item_key_to_cpu(eb, &keys[level], 0); + else + btrfs_node_key_to_cpu(eb, &keys[level], 0); + } + + if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + eb = path->nodes[0]; + extent_buffer_get(eb); + } + btrfs_release_path(reloc_root, path); + /* + * replace tree blocks in the fs tree with tree blocks in + * the reloc tree. + */ + ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); + BUG_ON(ret < 0); + + if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + ret = invalidate_extent_cache(reloc_root, eb, group, root); + BUG_ON(ret); + free_extent_buffer(eb); + } + mutex_unlock(&root->fs_info->tree_reloc_mutex); + + path->lowest_level = 0; + kfree(nodes); + kfree(keys); + return 0; +} + +static int noinline relocate_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *first_key, + struct btrfs_ref_path *ref_path) +{ + int ret; + int needs_lock = 0; + + if (root == root->fs_info->extent_root || + root == root->fs_info->chunk_root || + root == root->fs_info->dev_root) { + needs_lock = 1; + mutex_lock(&root->fs_info->alloc_mutex); + } + + ret = relocate_one_path(trans, root, path, first_key, + ref_path, NULL, NULL); + BUG_ON(ret); + + if (root == root->fs_info->extent_root) + btrfs_extent_post_op(trans, root); + if (needs_lock) + mutex_unlock(&root->fs_info->alloc_mutex); + + return 0; +} + +static int noinline del_extent_zero(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key) +{ + int ret; + + mutex_lock(&extent_root->fs_info->alloc_mutex); + ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); + if (ret) + goto out; + ret = btrfs_del_item(trans, extent_root, path); +out: + btrfs_release_path(extent_root, path); + mutex_unlock(&extent_root->fs_info->alloc_mutex); + return ret; +} + +static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info, + struct btrfs_ref_path *ref_path) +{ + struct btrfs_key root_key; + + root_key.objectid = ref_path->root_objectid; + root_key.type = BTRFS_ROOT_ITEM_KEY; + if (is_cowonly_root(ref_path->root_objectid)) + root_key.offset = 0; + else + root_key.offset = (u64)-1; + + return btrfs_read_fs_root_no_name(fs_info, &root_key); +} + +static int noinline relocate_one_extent(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key, + struct btrfs_block_group_cache *group, + struct inode *reloc_inode, int pass) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *found_root; + struct btrfs_ref_path *ref_path = NULL; + struct disk_extent *new_extents = NULL; + int nr_extents = 0; + int loops; + int ret; + int level; + struct btrfs_key first_key; + u64 prev_block = 0; + + mutex_unlock(&extent_root->fs_info->alloc_mutex); + + trans = btrfs_start_transaction(extent_root, 1); + BUG_ON(!trans); + + if (extent_key->objectid == 0) { + ret = del_extent_zero(trans, extent_root, path, extent_key); + goto out; + } + + ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); + if (!ref_path) { + ret = -ENOMEM; + goto out; + } + + for (loops = 0; ; loops++) { + if (loops == 0) { + ret = btrfs_first_ref_path(trans, extent_root, ref_path, + extent_key->objectid); + } else { + ret = btrfs_next_ref_path(trans, extent_root, ref_path); + } + if (ret < 0) + goto out; + if (ret > 0) + break; + + if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || + ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) + continue; + + found_root = read_ref_root(extent_root->fs_info, ref_path); + BUG_ON(!found_root); + /* + * for reference counted tree, only process reference paths + * rooted at the latest committed root. + */ + if (found_root->ref_cows && + ref_path->root_generation != found_root->root_key.offset) + continue; + + if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + if (pass == 0) { + /* + * copy data extents to new locations + */ + u64 group_start = group->key.objectid; + ret = relocate_data_extent(reloc_inode, + extent_key, + group_start); + if (ret < 0) + goto out; + break; + } + level = 0; + } else { + level = ref_path->owner_objectid; + } + + if (prev_block != ref_path->nodes[level]) { + struct extent_buffer *eb; + u64 block_start = ref_path->nodes[level]; + u64 block_size = btrfs_level_size(found_root, level); + + eb = read_tree_block(found_root, block_start, + block_size, 0); + btrfs_tree_lock(eb); + BUG_ON(level != btrfs_header_level(eb)); + + if (level == 0) + btrfs_item_key_to_cpu(eb, &first_key, 0); + else + btrfs_node_key_to_cpu(eb, &first_key, 0); + + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + prev_block = block_start; + } + + if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID && + pass >= 2) { + /* + * use fallback method to process the remaining + * references. + */ + if (!new_extents) { + u64 group_start = group->key.objectid; + ret = get_new_locations(reloc_inode, + extent_key, + group_start, 0, + &new_extents, + &nr_extents); + if (ret < 0) + goto out; + } + btrfs_record_root_in_trans(found_root); + ret = replace_one_extent(trans, found_root, + path, extent_key, + &first_key, ref_path, + new_extents, nr_extents); + if (ret < 0) + goto out; + continue; + } + + btrfs_record_root_in_trans(found_root); + if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { + ret = relocate_tree_block(trans, found_root, path, + &first_key, ref_path); + } else { + /* + * try to update data extent references while + * keeping metadata shared between snapshots. + */ + ret = relocate_one_path(trans, found_root, path, + &first_key, ref_path, + group, reloc_inode); + } + if (ret < 0) + goto out; + } + ret = 0; +out: + btrfs_end_transaction(trans, extent_root); + kfree(new_extents); + kfree(ref_path); + mutex_lock(&extent_root->fs_info->alloc_mutex); + return ret; +} + static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) { u64 num_devices; @@ -3686,84 +4780,155 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root, return 0; } -int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) +static int __insert_orphan_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 size) +{ + struct btrfs_path *path; + struct btrfs_inode_item *item; + struct extent_buffer *leaf; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_insert_empty_inode(trans, root, path, objectid); + if (ret) + goto out; + + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); + memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); + btrfs_set_inode_generation(leaf, item, 1); + btrfs_set_inode_size(leaf, item, size); + btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); + btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(root, path); +out: + btrfs_free_path(path); + return ret; +} + +static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *group) +{ + struct inode *inode = NULL; + struct btrfs_trans_handle *trans; + struct btrfs_root *root; + struct btrfs_key root_key; + u64 objectid = BTRFS_FIRST_FREE_OBJECTID; + int err = 0; + + root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; + root_key.type = BTRFS_ROOT_ITEM_KEY; + root_key.offset = (u64)-1; + root = btrfs_read_fs_root_no_name(fs_info, &root_key); + if (IS_ERR(root)) + return ERR_CAST(root); + + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + err = btrfs_find_free_objectid(trans, root, objectid, &objectid); + if (err) + goto out; + + err = __insert_orphan_inode(trans, root, objectid, group->key.offset); + BUG_ON(err); + + err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, + group->key.offset, 0); + BUG_ON(err); + + inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); + if (inode->i_state & I_NEW) { + BTRFS_I(inode)->root = root; + BTRFS_I(inode)->location.objectid = objectid; + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.offset = 0; + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + BUG_ON(is_bad_inode(inode)); + } else { + BUG_ON(1); + } + + err = btrfs_orphan_add(trans, inode); +out: + btrfs_end_transaction(trans, root); + if (err) { + if (inode) + iput(inode); + inode = ERR_PTR(err); + } + return inode; +} + +int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) { struct btrfs_trans_handle *trans; - struct btrfs_root *tree_root = root->fs_info->tree_root; struct btrfs_path *path; + struct btrfs_fs_info *info = root->fs_info; + struct extent_buffer *leaf; + struct inode *reloc_inode; + struct btrfs_block_group_cache *block_group; + struct btrfs_key key; u64 cur_byte; u64 total_found; - u64 shrink_last_byte; - struct btrfs_block_group_cache *shrink_block_group; - struct btrfs_key key; - struct btrfs_key found_key; - struct extent_buffer *leaf; u32 nritems; int ret; int progress; + int pass = 0; - mutex_lock(&root->fs_info->alloc_mutex); - shrink_block_group = btrfs_lookup_block_group(root->fs_info, - shrink_start); - BUG_ON(!shrink_block_group); + root = root->fs_info->extent_root; + + block_group = btrfs_lookup_block_group(info, group_start); + BUG_ON(!block_group); - shrink_last_byte = shrink_block_group->key.objectid + - shrink_block_group->key.offset; + printk("btrfs relocating block group %llu flags %llu\n", + (unsigned long long)block_group->key.objectid, + (unsigned long long)block_group->flags); - shrink_block_group->space_info->total_bytes -= - shrink_block_group->key.offset; path = btrfs_alloc_path(); - root = root->fs_info->extent_root; - path->reada = 2; + BUG_ON(!path); - printk("btrfs relocating block group %llu flags %llu\n", - (unsigned long long)shrink_start, - (unsigned long long)shrink_block_group->flags); + reloc_inode = create_reloc_inode(info, block_group); + BUG_ON(IS_ERR(reloc_inode)); - __alloc_chunk_for_shrink(root, shrink_block_group, 1); + mutex_lock(&root->fs_info->alloc_mutex); -again: + __alloc_chunk_for_shrink(root, block_group, 1); + block_group->ro = 1; + block_group->space_info->total_bytes -= block_group->key.offset; - shrink_block_group->ro = 1; + mutex_unlock(&root->fs_info->alloc_mutex); + btrfs_start_delalloc_inodes(info->tree_root); + btrfs_wait_ordered_extents(info->tree_root, 0); +again: total_found = 0; progress = 0; - key.objectid = shrink_start; + key.objectid = block_group->key.objectid; key.offset = 0; key.type = 0; cur_byte = key.objectid; - mutex_unlock(&root->fs_info->alloc_mutex); + trans = btrfs_start_transaction(info->tree_root, 1); + btrfs_commit_transaction(trans, info->tree_root); - btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(tree_root, 0); + mutex_lock(&root->fs_info->cleaner_mutex); + btrfs_clean_old_snapshots(info->tree_root); + btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); + mutex_unlock(&root->fs_info->cleaner_mutex); mutex_lock(&root->fs_info->alloc_mutex); - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out; - - ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); - if (ret < 0) - goto out; - - if (ret == 0) { - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid + found_key.offset > shrink_start && - found_key.objectid < shrink_last_byte) { - cur_byte = found_key.objectid; - key.objectid = cur_byte; - } - } - btrfs_release_path(root, path); - while(1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; - next: leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); @@ -3779,109 +4944,76 @@ next: nritems = btrfs_header_nritems(leaf); } - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (found_key.objectid >= shrink_last_byte) + if (key.objectid >= block_group->key.objectid + + block_group->key.offset) break; if (progress && need_resched()) { - memcpy(&key, &found_key, sizeof(key)); - cond_resched(); btrfs_release_path(root, path); - btrfs_search_slot(NULL, root, &key, path, 0, 0); + mutex_unlock(&root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&root->fs_info->alloc_mutex); progress = 0; - goto next; + continue; } progress = 1; - if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || - found_key.objectid + found_key.offset <= cur_byte) { - memcpy(&key, &found_key, sizeof(key)); - key.offset++; + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || + key.objectid + key.offset <= cur_byte) { path->slots[0]++; goto next; } total_found++; - cur_byte = found_key.objectid + found_key.offset; - key.objectid = cur_byte; + cur_byte = key.objectid + key.offset; btrfs_release_path(root, path); - ret = relocate_one_extent(root, path, &found_key); - __alloc_chunk_for_shrink(root, shrink_block_group, 0); - } - - btrfs_release_path(root, path); - - if (total_found > 0) { - printk("btrfs relocate found %llu last extent was %llu\n", - (unsigned long long)total_found, - (unsigned long long)found_key.objectid); - mutex_unlock(&root->fs_info->alloc_mutex); - trans = btrfs_start_transaction(tree_root, 1); - btrfs_commit_transaction(trans, tree_root); - btrfs_clean_old_snapshots(tree_root); + __alloc_chunk_for_shrink(root, block_group, 0); + ret = relocate_one_extent(root, path, &key, block_group, + reloc_inode, pass); + BUG_ON(ret < 0); - btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(tree_root, 0); - - trans = btrfs_start_transaction(tree_root, 1); - btrfs_commit_transaction(trans, tree_root); - mutex_lock(&root->fs_info->alloc_mutex); - goto again; + key.objectid = cur_byte; + key.type = 0; + key.offset = 0; } - /* - * we've freed all the extents, now remove the block - * group item from the tree - */ + btrfs_release_path(root, path); mutex_unlock(&root->fs_info->alloc_mutex); - trans = btrfs_start_transaction(root, 1); - - mutex_lock(&root->fs_info->alloc_mutex); - memcpy(&key, &shrink_block_group->key, sizeof(key)); - - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret > 0) - ret = -EIO; - if (ret < 0) { - btrfs_end_transaction(trans, root); - goto out; + if (pass == 0) { + btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); + invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); + WARN_ON(reloc_inode->i_mapping->nrpages); } - spin_lock(&root->fs_info->block_group_cache_lock); - rb_erase(&shrink_block_group->cache_node, - &root->fs_info->block_group_cache_tree); - spin_unlock(&root->fs_info->block_group_cache_lock); - - ret = btrfs_remove_free_space(shrink_block_group, key.objectid, - key.offset); - if (ret) { - btrfs_end_transaction(trans, root); - goto out; + if (total_found > 0) { + printk("btrfs found %llu extents in pass %d\n", + (unsigned long long)total_found, pass); + pass++; + goto again; } - /* - memset(shrink_block_group, 0, sizeof(*shrink_block_group)); - kfree(shrink_block_group); - */ - btrfs_del_item(trans, root, path); - btrfs_release_path(root, path); - mutex_unlock(&root->fs_info->alloc_mutex); - btrfs_commit_transaction(trans, root); + /* delete reloc_inode */ + iput(reloc_inode); + + /* unpin extents in this range */ + trans = btrfs_start_transaction(info->tree_root, 1); + btrfs_commit_transaction(trans, info->tree_root); mutex_lock(&root->fs_info->alloc_mutex); - /* the code to unpin extents might set a few bits in the free - * space cache for this range again - */ - /* XXX? */ - ret = btrfs_remove_free_space(shrink_block_group, key.objectid, - key.offset); + spin_lock(&block_group->lock); + WARN_ON(block_group->pinned > 0); + WARN_ON(block_group->reserved > 0); + WARN_ON(btrfs_block_group_used(&block_group->item) > 0); + spin_unlock(&block_group->lock); + ret = 0; out: - btrfs_free_path(path); mutex_unlock(&root->fs_info->alloc_mutex); + btrfs_free_path(path); return ret; } @@ -3922,6 +5054,33 @@ out: return ret; } +int btrfs_free_block_groups(struct btrfs_fs_info *info) +{ + struct btrfs_block_group_cache *block_group; + struct rb_node *n; + + mutex_lock(&info->alloc_mutex); + spin_lock(&info->block_group_cache_lock); + while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { + block_group = rb_entry(n, struct btrfs_block_group_cache, + cache_node); + + spin_unlock(&info->block_group_cache_lock); + btrfs_remove_free_space_cache(block_group); + spin_lock(&info->block_group_cache_lock); + + rb_erase(&block_group->cache_node, + &info->block_group_cache_tree); + spin_lock(&block_group->space_info->lock); + list_del(&block_group->list); + spin_unlock(&block_group->space_info->lock); + kfree(block_group); + } + spin_unlock(&info->block_group_cache_lock); + mutex_unlock(&info->alloc_mutex); + return 0; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -4039,3 +5198,46 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, return 0; } + +int btrfs_remove_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 group_start) +{ + struct btrfs_path *path; + struct btrfs_block_group_cache *block_group; + struct btrfs_key key; + int ret; + + BUG_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); + root = root->fs_info->extent_root; + + block_group = btrfs_lookup_block_group(root->fs_info, group_start); + BUG_ON(!block_group); + + memcpy(&key, &block_group->key, sizeof(key)); + + path = btrfs_alloc_path(); + BUG_ON(!path); + + btrfs_remove_free_space_cache(block_group); + rb_erase(&block_group->cache_node, + &root->fs_info->block_group_cache_tree); + spin_lock(&block_group->space_info->lock); + list_del(&block_group->list); + spin_unlock(&block_group->space_info->lock); + + /* + memset(shrink_block_group, 0, sizeof(*shrink_block_group)); + kfree(shrink_block_group); + */ + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) + ret = -EIO; + if (ret < 0) + goto out; + + ret = btrfs_del_item(trans, root, path); +out: + btrfs_free_path(path); + return ret; +} diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index e3984f902e7..0091c01abb0 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -210,7 +210,10 @@ again: goto err; } - ret = btrfs_add_dead_root(dead_root, latest); + if (objectid == BTRFS_TREE_RELOC_OBJECTID) + ret = btrfs_add_dead_reloc_root(dead_root); + else + ret = btrfs_add_dead_root(dead_root, latest); if (ret) goto err; goto again; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8c83cf464c8..444abe0796a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -477,6 +477,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, dirty = root->dirty_root; btrfs_free_log(trans, root); + btrfs_free_reloc_root(root); if (root->commit_root == root->node) { WARN_ON(root->node->start != @@ -855,6 +856,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, * with the tree-log code. */ mutex_lock(&root->fs_info->tree_log_mutex); + /* + * keep tree reloc code from adding new reloc trees + */ + mutex_lock(&root->fs_info->tree_reloc_mutex); + ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); @@ -865,6 +871,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, */ btrfs_free_log_root_tree(trans, root->fs_info); + btrfs_free_reloc_mappings(root); + ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); @@ -910,10 +918,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->tree_log_mutex); btrfs_finish_extent_commit(trans, root, pinned_copy); - mutex_lock(&root->fs_info->trans_mutex); - kfree(pinned_copy); + btrfs_drop_dead_reloc_roots(root); + mutex_unlock(&root->fs_info->tree_reloc_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + cur_trans->commit_done = 1; root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ddf89626498..51f113119b2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1268,7 +1268,7 @@ int btrfs_relocate_chunk(struct btrfs_root *root, em_tree = &root->fs_info->mapping_tree.map_tree; /* step one, relocate all the extents inside this chunk */ - ret = btrfs_shrink_extent_tree(extent_root, chunk_offset); + ret = btrfs_relocate_block_group(extent_root, chunk_offset); BUG_ON(ret); trans = btrfs_start_transaction(root, 1); @@ -1308,15 +1308,18 @@ int btrfs_relocate_chunk(struct btrfs_root *root, BUG_ON(ret); } + ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); + BUG_ON(ret); + spin_lock(&em_tree->lock); remove_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + kfree(map); em->bdev = NULL; /* once for the tree */ free_extent_map(em); - spin_unlock(&em_tree->lock); - /* once for us */ free_extent_map(em); -- cgit v1.2.3-70-g09d2 From 45b8c9a8b1e15bf79c2c17ec217adf96785f8011 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 30 Sep 2008 14:40:06 -0400 Subject: Btrfs: fix seekiness due to finding the wrong block group This patch fixes a problem where we end up seeking too much when *last_ptr is valid. This happens because btrfs_lookup_first_block_group only returns a block group that starts on or after the given search start, so if the search_start is in the middle of a block group it will return the block group after the given search_start, which is suboptimal. This patch fixes that by doing a btrfs_lookup_block_group, which will return the block group that contains the given search start. If we fail to find a block group, we fall back on btrfs_lookup_first_block_group so we can find the next block group, not sure if this is absolutely needed, but better safe than sorry. Also if we can't find the block group that we need, or it happens to not be of the right type, we need to add empty_cluster since *last_ptr could point to a mismatched block group, which means we need to start over with empty_cluster added to total needed. Thank you, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8043b9d584a..64e14ddf623 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2238,7 +2238,10 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, total_needed += empty_size; new_group: - block_group = btrfs_lookup_first_block_group(info, search_start); + block_group = btrfs_lookup_block_group(info, search_start); + if (!block_group) + block_group = btrfs_lookup_first_block_group(info, + search_start); /* * Ok this looks a little tricky, buts its really simple. First if we @@ -2255,8 +2258,10 @@ new_group: if (!block_group || (!block_group_bits(block_group, data) && last_ptr && *last_ptr)) { if (search_start != orig_search_start) { - if (last_ptr && *last_ptr) + if (last_ptr && *last_ptr) { + total_needed += empty_cluster; *last_ptr = 0; + } search_start = orig_search_start; goto new_group; } else if (!chunk_alloc_done && allowed_chunk_alloc) { -- cgit v1.2.3-70-g09d2 From 75ccf47d13bfb66de7faf596bfe497b9af7aaa40 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 30 Sep 2008 19:24:06 -0400 Subject: Btrfs: fix multi-device code to use raid policies set by mkfs When reading in block groups, a global mask of the available raid policies should be adjusted based on the types of block groups found on disk. This global mask is then used to decide which raid policy to use for new block groups. The recent allocator changes dropped the call that updated the global mask, making all the block groups allocated at run time single striped onto a single drive. This also fixes the async worker threads to set any thread that uses the requeue mechanism as busy. This allows us to avoid blocking on get_request_wait for the async bio submission threads. Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 14 +++++++++++++- fs/btrfs/extent-tree.c | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 04fb9702d14..d82efd722a4 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -302,8 +302,20 @@ int btrfs_requeue_work(struct btrfs_work *work) spin_lock_irqsave(&worker->lock, flags); atomic_inc(&worker->num_pending); list_add_tail(&work->list, &worker->pending); - check_busy_worker(worker); + + /* by definition we're busy, take ourselves off the idle + * list + */ + if (worker->idle) { + spin_lock_irqsave(&worker->workers->lock, flags); + worker->idle = 0; + list_move_tail(&worker->worker_list, + &worker->workers->worker_list); + spin_unlock_irqrestore(&worker->workers->lock, flags); + } + spin_unlock_irqrestore(&worker->lock, flags); + out: return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 64e14ddf623..677d5e774fa 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5145,6 +5145,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) ret = btrfs_add_block_group_cache(root->fs_info, cache); BUG_ON(ret); + + set_avail_alloc_bits(root->fs_info, cache->flags); } ret = 0; error: -- cgit v1.2.3-70-g09d2 From cf749823857230017c86504bfdc70524f929ba96 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 1 Oct 2008 19:11:18 -0400 Subject: Btrfs: fix deadlock between alloc_mutex/chunk_mutex This fixes a deadlock that happens between the alloc_mutex and chunk_mutex. Process A comes in, decides to do a do_chunk_alloc, which takes the chunk_mutex, and is holding the alloc_mutex because the only way you get to do_chunk_alloc is by holding the alloc_mutex. btrfs_alloc_chunk does its thing and goes to insert a new item, which results in a cow of the block. We get into del_pending_extents from there, where if we need to be rescheduled we drop the alloc_mutex and schedule. At this point process B comes in to do an allocation and gets the alloc_mutex, and because process A did not do the chunk allocation completely it thinks its a good time to do a chunk allocation as well, and hangs on the chunk_mutex. Process A wakes up and tries to take the alloc_mutex and cannot. The way to fix this is do a mutex_trylock() on chunk_mutex. If we return 0 we didn't get the lock, and if this is just a "hey it may be a good time to allocate a chunk" then we just exit. If we are trying to force an allocation then we reschedule and keep trying to acquire the chunk_mutex. If once we acquire it the space is already full then we can just exit, otherwise we can continue with the chunk allocation. Thank you, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 677d5e774fa..db37b867e4f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1505,7 +1505,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 thresh; u64 start; u64 num_bytes; - int ret = 0; + int ret = 0, waited = 0; flags = reduce_alloc_profile(extent_root, flags); @@ -1530,7 +1530,18 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, space_info->bytes_reserved + alloc_bytes) < thresh) goto out; - mutex_lock(&extent_root->fs_info->chunk_mutex); + while (!mutex_trylock(&extent_root->fs_info->chunk_mutex)) { + if (!force) + goto out; + mutex_unlock(&extent_root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&extent_root->fs_info->alloc_mutex); + waited = 1; + } + + if (waited && space_info->full) + goto out_unlock; + ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags); if (ret == -ENOSPC) { printk("space info full %Lu\n", flags); -- cgit v1.2.3-70-g09d2 From 30c43e2444c16afe3b2130f40ad273541bf3dc36 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 3 Oct 2008 12:24:01 -0400 Subject: Btrfs: remove last_log_alloc allocator optimization The tree logging code was trying to separate tree log allocations from normal metadata allocations to improve writeback patterns during an fsync. But, the code was not effective and ended up just mixing tree log blocks with regular metadata. That seems to be working fairly well, so the last_log_alloc code can be removed. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 - fs/btrfs/extent-tree.c | 7 ------- fs/btrfs/transaction.c | 1 - 3 files changed, 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 94e0cdfddc0..8566eb30f56 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -672,7 +672,6 @@ struct btrfs_fs_info { u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; - u64 last_log_alloc; spinlock_t ref_cache_lock; u64 total_ref_cache_size; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index db37b867e4f..29380467b67 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2228,13 +2228,6 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) last_ptr = &root->fs_info->last_data_alloc; - if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { - last_ptr = &root->fs_info->last_log_alloc; - if (!last_ptr == 0 && root->fs_info->last_alloc) { - *last_ptr = root->fs_info->last_alloc + empty_cluster; - } - } - if (last_ptr) { if (*last_ptr) hint_byte = *last_ptr; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 11266d68a6c..5ecc24d634a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -61,7 +61,6 @@ static noinline int join_transaction(struct btrfs_root *root) root->fs_info->generation++; root->fs_info->last_alloc = 0; root->fs_info->last_data_alloc = 0; - root->fs_info->last_log_alloc = 0; cur_trans->num_writers = 1; cur_trans->num_joined = 0; cur_trans->transid = root->fs_info->generation; -- cgit v1.2.3-70-g09d2 From a76a3cd40c1127ca199d4f7f37bf0d541bf44eb2 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 9 Oct 2008 11:46:29 -0400 Subject: Btrfs: Count space allocated to file in bytes This patch makes btrfs count space allocated to file in bytes instead of 512 byte sectors. Everything else in btrfs uses a byte count instead of sector sizes or blocks sizes, so this fits better. Signed-off-by: Yan Zheng --- fs/btrfs/ctree.h | 13 ++----------- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/file.c | 24 +++++++++++++----------- fs/btrfs/inode.c | 23 ++++++++++++----------- fs/btrfs/ioctl.c | 4 ++-- fs/btrfs/tree-log.c | 6 +++--- 6 files changed, 33 insertions(+), 39 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8566eb30f56..50fbcc9ec45 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -411,7 +411,7 @@ struct btrfs_inode_item { /* transid that last touched this inode */ __le64 transid; __le64 size; - __le64 nblocks; + __le64 nbytes; __le64 block_group; __le32 nlink; __le32 uid; @@ -1017,7 +1017,7 @@ BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64); BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); -BTRFS_SETGET_FUNCS(inode_nblocks, struct btrfs_inode_item, nblocks, 64); +BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64); BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); @@ -1814,15 +1814,6 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio); -static inline void dec_i_blocks(struct inode *inode, u64 dec) -{ - dec = dec >> 9; - if (dec <= inode->i_blocks) - inode->i_blocks -= dec; - else - inode->i_blocks = 0; -} - unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 29380467b67..69db54e09fb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3930,7 +3930,7 @@ next: BUG_ON(ret); btrfs_release_path(root, path); - inode->i_blocks += extent_len >> 9; + inode_add_bytes(inode, extent_len); ext_offset = 0; num_bytes -= extent_len; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a03d1bbb19a..18dfdf5f91d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -193,7 +193,7 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - inode->i_blocks += (offset + size - found_end) >> 9; + inode_add_bytes(inode, offset + size - found_end); } if (found_end < offset) { ptr = btrfs_file_extent_inline_start(ei) + found_size; @@ -203,7 +203,7 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, insert: btrfs_release_path(root, path); datasize = offset + size - key.offset; - inode->i_blocks += datasize >> 9; + inode_add_bytes(inode, datasize); datasize = btrfs_file_extent_calc_inline_size(datasize); ret = btrfs_insert_empty_item(trans, root, path, &key, datasize); @@ -713,7 +713,8 @@ next_slot: extent); if (btrfs_file_extent_disk_bytenr(leaf, extent)) { - dec_i_blocks(inode, old_num - new_num); + inode_sub_bytes(inode, old_num - + new_num); } btrfs_set_file_extent_num_bytes(leaf, extent, new_num); @@ -724,14 +725,17 @@ next_slot: u32 new_size; new_size = btrfs_file_extent_calc_inline_size( inline_limit - key.offset); - dec_i_blocks(inode, (extent_end - key.offset) - - (inline_limit - key.offset)); + inode_sub_bytes(inode, extent_end - + inline_limit); btrfs_truncate_item(trans, root, path, new_size, 1); } } /* delete the entire extent */ if (!keep) { + if (found_inline) + inode_sub_bytes(inode, extent_end - + key.offset); ret = btrfs_del_item(trans, root, path); /* TODO update progress marker and return */ BUG_ON(ret); @@ -743,8 +747,7 @@ next_slot: u32 new_size; new_size = btrfs_file_extent_calc_inline_size( extent_end - end); - dec_i_blocks(inode, (extent_end - key.offset) - - (extent_end - end)); + inode_sub_bytes(inode, end - key.offset); ret = btrfs_truncate_item(trans, root, path, new_size, 0); BUG_ON(ret); @@ -791,9 +794,7 @@ next_slot: } btrfs_release_path(root, path); if (disk_bytenr != 0) { - inode->i_blocks += - btrfs_file_extent_num_bytes(leaf, - extent) >> 9; + inode_add_bytes(inode, extent_end - end); } } @@ -801,7 +802,8 @@ next_slot: u64 disk_bytenr = le64_to_cpu(old.disk_bytenr); if (disk_bytenr != 0) { - dec_i_blocks(inode, le64_to_cpu(old.num_bytes)); + inode_sub_bytes(inode, + le64_to_cpu(old.num_bytes)); ret = btrfs_free_extent(trans, root, disk_bytenr, le64_to_cpu(old.disk_num_bytes), diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ff0c3597665..f9df89c5fdf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -652,7 +652,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) BUG_ON(ret); btrfs_release_path(root, path); - inode->i_blocks += ordered_extent->len >> 9; + inode_add_bytes(inode, ordered_extent->len); unlock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); @@ -1104,7 +1104,7 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec); inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); - inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item); + inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); inode->i_generation = BTRFS_I(inode)->generation; inode->i_rdev = 0; @@ -1184,7 +1184,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), inode->i_ctime.tv_nsec); - btrfs_set_inode_nblocks(leaf, item, inode->i_blocks); + btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); btrfs_set_inode_transid(leaf, item, trans->transid); btrfs_set_inode_rdev(leaf, item, inode->i_rdev); @@ -1679,7 +1679,7 @@ search_again: num_dec = (orig_num_bytes - extent_num_bytes); if (root->ref_cows && extent_start != 0) - dec_i_blocks(inode, num_dec); + inode_sub_bytes(inode, num_dec); btrfs_mark_buffer_dirty(leaf); } else { extent_num_bytes = @@ -1690,7 +1690,7 @@ search_again: if (extent_start != 0) { found_extent = 1; if (root->ref_cows) - dec_i_blocks(inode, num_dec); + inode_sub_bytes(inode, num_dec); } root_gen = btrfs_header_generation(leaf); root_owner = btrfs_header_owner(leaf); @@ -1700,8 +1700,8 @@ search_again: u32 size = new_size - found_key.offset; if (root->ref_cows) { - dec_i_blocks(inode, item_end + 1 - - found_key.offset - size); + inode_sub_bytes(inode, item_end + 1 - + new_size); } size = btrfs_file_extent_calc_inline_size(size); @@ -1709,8 +1709,8 @@ search_again: size, 1); BUG_ON(ret); } else if (root->ref_cows) { - dec_i_blocks(inode, item_end + 1 - - found_key.offset); + inode_sub_bytes(inode, item_end + 1 - + found_key.offset); } } delete: @@ -2514,7 +2514,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_gid = current->fsgid; inode->i_mode = mode; inode->i_ino = objectid; - inode->i_blocks = 0; + inode_set_bytes(inode, 0); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); @@ -3557,7 +3557,8 @@ static int btrfs_getattr(struct vfsmount *mnt, struct inode *inode = dentry->d_inode; generic_fillattr(inode, stat); stat->blksize = PAGE_CACHE_SIZE; - stat->blocks = inode->i_blocks + (BTRFS_I(inode)->delalloc_bytes >> 9); + stat->blocks = (inode_get_bytes(inode) + + BTRFS_I(inode)->delalloc_bytes) >> 9; return 0; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 04de767a8db..ab7a0f61ded 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -99,7 +99,7 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, inode_item->generation = cpu_to_le64(1); inode_item->size = cpu_to_le64(3); inode_item->nlink = cpu_to_le32(1); - inode_item->nblocks = cpu_to_le64(1); + inode_item->nbytes = cpu_to_le64(root->leafsize); inode_item->mode = cpu_to_le32(S_IFDIR | 0755); btrfs_set_root_bytenr(&root_item, leaf->start); @@ -671,7 +671,7 @@ out: btrfs_release_path(root, path); if (ret == 0) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_blocks = src->i_blocks; + inode_set_bytes(inode, inode_get_bytes(src)); btrfs_i_size_write(inode, src->i_size); BTRFS_I(inode)->flags = BTRFS_I(src)->flags; ret = btrfs_update_inode(trans, root, inode); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 88bbfd959f1..8df719a73d8 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -113,7 +113,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, inode_item->generation = cpu_to_le64(1); inode_item->size = cpu_to_le64(3); inode_item->nlink = cpu_to_le32(1); - inode_item->nblocks = cpu_to_le64(1); + inode_item->nbytes = cpu_to_le64(root->leafsize); inode_item->mode = cpu_to_le32(S_IFDIR | 0755); btrfs_set_root_bytenr(&root_item, leaf->start); @@ -598,8 +598,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ret = overwrite_item(trans, root, path, eb, slot, key); BUG_ON(ret); - /* btrfs_drop_extents changes i_blocks, update it here */ - inode->i_blocks += (extent_end - start) >> 9; + /* btrfs_drop_extents changes i_bytes & i_blocks, update it here */ + inode_add_bytes(inode, extent_end - start); btrfs_update_inode(trans, root, inode); out: if (inode) -- cgit v1.2.3-70-g09d2 From 3bb1a1bc42f2ae9582c28adf620484efcd4da38d Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 9 Oct 2008 11:46:24 -0400 Subject: Btrfs: Remove offset field from struct btrfs_extent_ref The offset field in struct btrfs_extent_ref records the position inside file that file extent is referenced by. In the new back reference system, tree leaves holding references to file extent are recorded explicitly. We can scan these tree leaves very quickly, so the offset field is not required. This patch also makes the back reference system check the objectid when extents are in deleting. Signed-off-by: Yan Zheng --- fs/btrfs/ctree.c | 24 +++---- fs/btrfs/ctree.h | 25 ++----- fs/btrfs/extent-tree.c | 185 +++++++++++++++++++++++-------------------------- fs/btrfs/file.c | 6 +- fs/btrfs/inode.c | 6 +- fs/btrfs/ioctl.c | 2 +- fs/btrfs/print-tree.c | 3 +- fs/btrfs/tree-log.c | 8 +-- 8 files changed, 115 insertions(+), 144 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2eab4643dcb..9caeb377de6 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -254,8 +254,7 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, ret = btrfs_alloc_reserved_extent(trans, root, parent_start, root->root_key.objectid, - trans->transid, level, 0, - &ins); + trans->transid, level, &ins); BUG_ON(ret); cow = btrfs_init_new_buffer(trans, root, prealloc_dest, buf->len); @@ -333,7 +332,7 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, buf->len, buf->start, root->root_key.objectid, btrfs_header_generation(buf), - 0, 0, 1); + level, 1); } free_extent_buffer(buf); add_root_to_dirty_list(root); @@ -347,7 +346,7 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(parent) != trans->transid); btrfs_free_extent(trans, root, buf->start, buf->len, parent_start, btrfs_header_owner(parent), - btrfs_header_generation(parent), 0, 0, 1); + btrfs_header_generation(parent), level, 1); } if (unlock_orig) btrfs_tree_unlock(buf); @@ -927,7 +926,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, ret = btrfs_update_extent_ref(trans, root, child->start, mid->start, child->start, root->root_key.objectid, - trans->transid, level - 1, 0); + trans->transid, level - 1); BUG_ON(ret); add_root_to_dirty_list(root); @@ -940,7 +939,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, free_extent_buffer(mid); ret = btrfs_free_extent(trans, root, mid->start, mid->len, mid->start, root->root_key.objectid, - btrfs_header_generation(mid), 0, 0, 1); + btrfs_header_generation(mid), + level, 1); /* once for the root ptr */ free_extent_buffer(mid); return ret; @@ -1006,7 +1006,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, wret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, btrfs_header_owner(parent), - generation, 0, 0, 1); + generation, level, 1); if (wret) ret = wret; } else { @@ -1055,7 +1055,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, wret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, btrfs_header_owner(parent), - root_gen, 0, 0, 1); + root_gen, level, 1); if (wret) ret = wret; } else { @@ -1691,13 +1691,13 @@ next_level: blocksize, parent->start, btrfs_header_owner(parent), btrfs_header_generation(parent), - level - 1, 0); + level - 1); BUG_ON(ret); ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, btrfs_header_owner(parent), btrfs_header_generation(parent), - level - 1, 0, 1); + level - 1, 1); BUG_ON(ret); if (generation == trans->transid) { @@ -1973,7 +1973,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, ret = btrfs_update_extent_ref(trans, root, lower->start, lower->start, c->start, root->root_key.objectid, - trans->transid, level - 1, 0); + trans->transid, level - 1); BUG_ON(ret); /* the super has an extra ref to root->node */ @@ -3213,7 +3213,7 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, btrfs_level_size(root, 0), path->nodes[1]->start, btrfs_header_owner(path->nodes[1]), - root_gen, 0, 0, 1); + root_gen, 0, 1); return ret; } /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 50fbcc9ec45..a37fd783407 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -374,7 +374,6 @@ struct btrfs_extent_ref { __le64 root; __le64 generation; __le64 objectid; - __le64 offset; __le32 num_refs; } __attribute__ ((__packed__)); @@ -1082,7 +1081,6 @@ static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); -BTRFS_SETGET_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64); BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32); BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64); @@ -1090,8 +1088,6 @@ BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref, generation, 64); BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref, objectid, 64); -BTRFS_SETGET_STACK_FUNCS(stack_ref_offset, struct btrfs_extent_ref, - offset, 64); BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref, num_refs, 32); @@ -1522,29 +1518,20 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u32 blocksize); -int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - u64 bytenr, u64 parent, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 parent, u64 min_bytes, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - u64 empty_size, u64 hint_byte, + u64 owner, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, u64 data); int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - struct btrfs_key *ins); + u64 owner, struct btrfs_key *ins); int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - struct btrfs_key *ins); + u64 owner, struct btrfs_key *ins); int btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, @@ -1563,7 +1550,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, int pin); + u64 owner_objectid, int pin); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -1572,12 +1559,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset); + u64 owner_objectid); int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 orig_parent, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset); + u64 owner_objectid); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 69db54e09fb..ab36769c356 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -525,31 +525,28 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) * - Objectid of the subvolume root * - Generation number of the tree holding the reference * - objectid of the file holding the reference - * - offset in the file corresponding to the key holding the reference * - number of references holding by parent node (alway 1 for tree blocks) * * Btree leaf may hold multiple references to a file extent. In most cases, * these references are from same file and the corresponding offsets inside - * the file are close together. So inode objectid and offset in file are - * just hints, they provide hints about where in the btree the references - * can be found and when we can stop searching. + * the file are close together. * * When a file extent is allocated the fields are filled in: - * (root_key.objectid, trans->transid, inode objectid, offset in file, 1) + * (root_key.objectid, trans->transid, inode objectid, 1) * * When a leaf is cow'd new references are added for every file extent found * in the leaf. It looks similar to the create case, but trans->transid will * be different when the block is cow'd. * - * (root_key.objectid, trans->transid, inode objectid, offset in file, + * (root_key.objectid, trans->transid, inode objectid, * number of references in the leaf) * - * Because inode objectid and offset in file are just hints, they are not - * used when backrefs are deleted. When a file extent is removed either - * during snapshot deletion or file truncation, we find the corresponding - * back back reference and check the following fields. + * When a file extent is removed either during snapshot deletion or + * file truncation, we find the corresponding back reference and check + * the following fields: * - * (btrfs_header_owner(leaf), btrfs_header_generation(leaf)) + * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), + * inode objectid) * * Btree extents can be referenced by: * @@ -558,21 +555,21 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) * * When a tree block is created, back references are inserted: * - * (root->root_key.objectid, trans->transid, level, 0, 1) + * (root->root_key.objectid, trans->transid, level, 1) * * When a tree block is cow'd, new back references are added for all the * blocks it points to. If the tree block isn't in reference counted root, * the old back references are removed. These new back references are of * the form (trans->transid will have increased since creation): * - * (root->root_key.objectid, trans->transid, level, 0, 1) + * (root->root_key.objectid, trans->transid, level, 1) * * When a backref is in deleting, the following fields are checked: * * if backref was for a tree root: - * (btrfs_header_owner(itself), btrfs_header_generation(itself)) + * (btrfs_header_owner(itself), btrfs_header_generation(itself), level) * else - * (btrfs_header_owner(parent), btrfs_header_generation(parent)) + * (btrfs_header_owner(parent), btrfs_header_generation(parent), level) * * Back Reference Key composing: * @@ -584,13 +581,15 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, u64 bytenr, - u64 parent, u64 ref_root, - u64 ref_generation, int del) + struct btrfs_path *path, + u64 bytenr, u64 parent, + u64 ref_root, u64 ref_generation, + u64 owner_objectid, int del) { struct btrfs_key key; struct btrfs_extent_ref *ref; struct extent_buffer *leaf; + u64 ref_objectid; int ret; key.objectid = bytenr; @@ -607,8 +606,11 @@ static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); + ref_objectid = btrfs_ref_objectid(leaf, ref); if (btrfs_ref_root(leaf, ref) != ref_root || - btrfs_ref_generation(leaf, ref) != ref_generation) { + btrfs_ref_generation(leaf, ref) != ref_generation || + (ref_objectid != owner_objectid && + ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { ret = -EIO; WARN_ON(1); goto out; @@ -623,7 +625,7 @@ static int noinline insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_path *path, u64 bytenr, u64 parent, u64 ref_root, u64 ref_generation, - u64 owner_objectid, u64 owner_offset) + u64 owner_objectid) { struct btrfs_key key; struct extent_buffer *leaf; @@ -643,7 +645,6 @@ static int noinline insert_extent_backref(struct btrfs_trans_handle *trans, btrfs_set_ref_root(leaf, ref, ref_root); btrfs_set_ref_generation(leaf, ref, ref_generation); btrfs_set_ref_objectid(leaf, ref, owner_objectid); - btrfs_set_ref_offset(leaf, ref, owner_offset); btrfs_set_ref_num_refs(leaf, ref, 1); } else if (ret == -EEXIST) { u64 existing_owner; @@ -663,14 +664,10 @@ static int noinline insert_extent_backref(struct btrfs_trans_handle *trans, btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); existing_owner = btrfs_ref_objectid(leaf, ref); - if (existing_owner == owner_objectid && - btrfs_ref_offset(leaf, ref) > owner_offset) { - btrfs_set_ref_offset(leaf, ref, owner_offset); - } else if (existing_owner != owner_objectid && - existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { + if (existing_owner != owner_objectid && + existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { btrfs_set_ref_objectid(leaf, ref, BTRFS_MULTIPLE_OBJECTIDS); - btrfs_set_ref_offset(leaf, ref, 0); } ret = 0; } else { @@ -711,7 +708,7 @@ static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, u64 orig_parent, u64 parent, u64 orig_root, u64 ref_root, u64 orig_generation, u64 ref_generation, - u64 owner_objectid, u64 owner_offset) + u64 owner_objectid) { int ret; struct btrfs_root *extent_root = root->fs_info->extent_root; @@ -762,7 +759,7 @@ static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, return -ENOMEM; ret = lookup_extent_backref(trans, extent_root, path, bytenr, orig_parent, orig_root, - orig_generation, 1); + orig_generation, owner_objectid, 1); if (ret) goto out; ret = remove_extent_backref(trans, extent_root, path); @@ -770,7 +767,7 @@ static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, goto out; ret = insert_extent_backref(trans, extent_root, path, bytenr, parent, ref_root, ref_generation, - owner_objectid, owner_offset); + owner_objectid); BUG_ON(ret); finish_current_insert(trans, extent_root); del_pending_extents(trans, extent_root); @@ -783,7 +780,7 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 orig_parent, u64 parent, u64 ref_root, u64 ref_generation, - u64 owner_objectid, u64 owner_offset) + u64 owner_objectid) { int ret; if (ref_root == BTRFS_TREE_LOG_OBJECTID && @@ -793,7 +790,7 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, parent, ref_root, ref_root, ref_generation, ref_generation, - owner_objectid, owner_offset); + owner_objectid); maybe_unlock_mutex(root); return ret; } @@ -803,7 +800,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u64 orig_parent, u64 parent, u64 orig_root, u64 ref_root, u64 orig_generation, u64 ref_generation, - u64 owner_objectid, u64 owner_offset) + u64 owner_objectid) { struct btrfs_path *path; int ret; @@ -845,7 +842,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ret = insert_extent_backref(trans, root->fs_info->extent_root, path, bytenr, parent, ref_root, ref_generation, - owner_objectid, owner_offset); + owner_objectid); BUG_ON(ret); finish_current_insert(trans, root->fs_info->extent_root); del_pending_extents(trans, root->fs_info->extent_root); @@ -858,7 +855,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 ref_generation, - u64 owner_objectid, u64 owner_offset) + u64 owner_objectid) { int ret; if (ref_root == BTRFS_TREE_LOG_OBJECTID && @@ -867,7 +864,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, maybe_lock_mutex(root); ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, 0, ref_root, 0, ref_generation, - owner_objectid, owner_offset); + owner_objectid); maybe_unlock_mutex(root); return ret; } @@ -1179,7 +1176,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int ret = 0; int faili = 0; int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, - u64, u64, u64, u64, u64, u64, u64, u64, u64); + u64, u64, u64, u64, u64, u64, u64, u64); ref_root = btrfs_header_owner(buf); ref_generation = btrfs_header_generation(buf); @@ -1223,7 +1220,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, - key.objectid, key.offset); + key.objectid); maybe_unlock_mutex(root); if (ret) { @@ -1238,7 +1235,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, - level - 1, 0); + level - 1); maybe_unlock_mutex(root); if (ret) { faili = i; @@ -1314,7 +1311,7 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, - key.objectid, key.offset); + key.objectid); maybe_unlock_mutex(root); if (ret) goto fail; @@ -1325,7 +1322,7 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, - level - 1, 0); + level - 1); maybe_unlock_mutex(root); if (ret) goto fail; @@ -1781,13 +1778,14 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, start, extent_op->parent, extent_root->root_key.objectid, extent_op->generation, - extent_op->level, 0); + extent_op->level); BUG_ON(err); } else if (extent_op->type == PENDING_BACKREF_UPDATE) { err = lookup_extent_backref(trans, extent_root, path, start, extent_op->orig_parent, extent_root->root_key.objectid, - extent_op->orig_generation, 0); + extent_op->orig_generation, + extent_op->level, 0); BUG_ON(err); clear_extent_bits(&info->extent_ins, start, end, @@ -1870,8 +1868,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, - int pin, int mark_free) + u64 owner_objectid, int pin, int mark_free) { struct btrfs_path *path; struct btrfs_key key; @@ -1894,8 +1891,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, return -ENOMEM; path->reada = 1; - ret = lookup_extent_backref(trans, extent_root, path, bytenr, parent, - root_objectid, ref_generation, 1); + ret = lookup_extent_backref(trans, extent_root, path, + bytenr, parent, root_objectid, + ref_generation, owner_objectid, 1); if (ret == 0) { struct btrfs_key found_key; extent_slot = path->slots[0]; @@ -1926,9 +1924,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, btrfs_print_leaf(extent_root, path->nodes[0]); WARN_ON(1); printk("Unable to find ref byte nr %Lu root %Lu " - " gen %Lu owner %Lu offset %Lu\n", bytenr, - root_objectid, ref_generation, owner_objectid, - owner_offset); + "gen %Lu owner %Lu\n", bytenr, + root_objectid, ref_generation, owner_objectid); } leaf = path->nodes[0]; @@ -2068,7 +2065,7 @@ free_extent: extent_op->orig_parent, extent_root->root_key.objectid, extent_op->orig_generation, - extent_op->level, 0, 0, mark_free); + extent_op->level, 0, mark_free); kfree(extent_op); } else { kfree(extent_op); @@ -2107,7 +2104,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, int pin) + u64 owner_objectid, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; int pending_ret; @@ -2156,8 +2153,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, pin = 1; ret = __free_extent(trans, root, bytenr, num_bytes, parent, - root_objectid, ref_generation, owner_objectid, - owner_offset, pin, pin == 0); + root_objectid, ref_generation, + owner_objectid, pin, pin == 0); finish_current_insert(trans, root->fs_info->extent_root); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); @@ -2168,14 +2165,14 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, int pin) + u64 owner_objectid, int pin) { int ret; maybe_lock_mutex(root); ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, root_objectid, ref_generation, - owner_objectid, owner_offset, pin); + owner_objectid, pin); maybe_unlock_mutex(root); return ret; } @@ -2522,8 +2519,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - struct btrfs_key *ins) + u64 owner, struct btrfs_key *ins) { int ret; int pending_ret; @@ -2597,7 +2593,6 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, btrfs_set_ref_root(path->nodes[0], ref, root_objectid); btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); btrfs_set_ref_objectid(path->nodes[0], ref, owner); - btrfs_set_ref_offset(path->nodes[0], ref, owner_offset); btrfs_set_ref_num_refs(path->nodes[0], ref, 1); btrfs_mark_buffer_dirty(path->nodes[0]); @@ -2629,17 +2624,15 @@ out: int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - struct btrfs_key *ins) + u64 owner, struct btrfs_key *ins) { int ret; if (root_objectid == BTRFS_TREE_LOG_OBJECTID) return 0; maybe_lock_mutex(root); - ret = __btrfs_alloc_reserved_extent(trans, root, parent, - root_objectid, ref_generation, - owner, owner_offset, ins); + ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, + ref_generation, owner, ins); update_reserved_extents(root, ins->objectid, ins->offset, 0); maybe_unlock_mutex(root); return ret; @@ -2653,8 +2646,7 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - struct btrfs_key *ins) + u64 owner, struct btrfs_key *ins) { int ret; struct btrfs_block_group_cache *block_group; @@ -2665,9 +2657,8 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset); BUG_ON(ret); - ret = __btrfs_alloc_reserved_extent(trans, root, parent, - root_objectid, ref_generation, - owner, owner_offset, ins); + ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, + ref_generation, owner, ins); maybe_unlock_mutex(root); return ret; } @@ -2683,8 +2674,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 parent, u64 min_alloc_size, u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, - u64 empty_size, u64 hint_byte, + u64 owner_objectid, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, u64 data) { int ret; @@ -2698,7 +2688,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, ref_generation, - owner_objectid, owner_offset, ins); + owner_objectid, ins); BUG_ON(ret); } else { @@ -2750,7 +2740,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *buf; ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize, - root_objectid, ref_generation, level, 0, + root_objectid, ref_generation, level, empty_size, hint, (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); @@ -2800,7 +2790,7 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, ret = __btrfs_free_extent(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(leaf, fi), leaf->start, leaf_owner, leaf_generation, - key.objectid, key.offset, 0); + key.objectid, 0); mutex_unlock(&root->fs_info->alloc_mutex); BUG_ON(ret); @@ -2824,7 +2814,7 @@ static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, ret = __btrfs_free_extent(trans, root, info->bytenr, info->num_bytes, ref->bytenr, ref->owner, ref->generation, - info->objectid, info->offset, 0); + info->objectid, 0); mutex_unlock(&root->fs_info->alloc_mutex); atomic_inc(&root->fs_info->throttle_gen); @@ -2940,7 +2930,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, - root_owner, root_gen, 0, 0, 1); + root_owner, root_gen, + *level - 1, 1); BUG_ON(ret); mutex_unlock(&root->fs_info->alloc_mutex); @@ -2970,9 +2961,10 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, *level = 0; break; } - if (printk_ratelimit()) + if (printk_ratelimit()) { printk("leaf ref miss for bytenr %llu\n", (unsigned long long)bytenr); + } } next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { @@ -3020,7 +3012,7 @@ out: mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, root_owner, root_gen, - 0, 0, 1); + *level, 1); mutex_unlock(&root->fs_info->alloc_mutex); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; @@ -3073,8 +3065,8 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans, ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, path->nodes[*level]->len, - parent->start, - root_owner, root_gen, 0, 0, 1); + parent->start, root_owner, + root_gen, *level, 1); BUG_ON(ret); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; @@ -3308,7 +3300,6 @@ struct btrfs_ref_path { u64 root_objectid; u64 root_generation; u64 owner_objectid; - u64 owner_offset; u32 num_refs; int lowest_level; int current_level; @@ -3480,7 +3471,6 @@ found: if (ref_path->lowest_level == level) { ref_path->owner_objectid = ref_objectid; - ref_path->owner_offset = btrfs_ref_offset(leaf, ref); ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); } @@ -3686,16 +3676,20 @@ static int noinline replace_one_extent(struct btrfs_trans_handle *trans, u64 ext_offset; u64 first_pos; u32 nritems; + int nr_scaned = 0; int extent_locked = 0; int ret; - first_pos = ref_path->owner_offset; + memcpy(&key, leaf_key, sizeof(key)); + first_pos = INT_LIMIT(loff_t) - extent_key->offset; if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { - key.objectid = ref_path->owner_objectid; - key.offset = ref_path->owner_offset; - key.type = BTRFS_EXTENT_DATA_KEY; - } else { - memcpy(&key, leaf_key, sizeof(key)); + if (key.objectid < ref_path->owner_objectid || + (key.objectid == ref_path->owner_objectid && + key.type < BTRFS_EXTENT_DATA_KEY)) { + key.objectid = ref_path->owner_objectid; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + } } while (1) { @@ -3718,8 +3712,7 @@ next: } if (path->slots[0] >= nritems) { - if (ref_path->owner_objectid == - BTRFS_MULTIPLE_OBJECTIDS) + if (++nr_scaned > 2) break; BUG_ON(extent_locked); @@ -3858,7 +3851,7 @@ next: leaf->start, root->root_key.objectid, trans->transid, - key.objectid, key.offset); + key.objectid); BUG_ON(ret); ret = btrfs_free_extent(trans, root, @@ -3867,7 +3860,7 @@ next: leaf->start, btrfs_header_owner(leaf), btrfs_header_generation(leaf), - key.objectid, key.offset, 0); + key.objectid, 0); BUG_ON(ret); btrfs_release_path(root, path); @@ -3925,8 +3918,7 @@ next: new_extents[i].disk_num_bytes, leaf->start, root->root_key.objectid, - trans->transid, - key.objectid, key.offset); + trans->transid, key.objectid); BUG_ON(ret); btrfs_release_path(root, path); @@ -4182,14 +4174,13 @@ static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, new_extent->disk_num_bytes, leaf->start, root->root_key.objectid, - trans->transid, - key.objectid, key.offset); + trans->transid, key.objectid); BUG_ON(ret); ret = btrfs_free_extent(trans, root, bytenr, num_bytes, leaf->start, btrfs_header_owner(leaf), btrfs_header_generation(leaf), - key.objectid, key.offset, 0); + key.objectid, 0); BUG_ON(ret); cond_resched(); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 18dfdf5f91d..69abbe19add 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -788,8 +788,7 @@ next_slot: le64_to_cpu(old.disk_num_bytes), leaf->start, root->root_key.objectid, - trans->transid, - ins.objectid, ins.offset); + trans->transid, ins.objectid); BUG_ON(ret); } btrfs_release_path(root, path); @@ -808,8 +807,7 @@ next_slot: disk_bytenr, le64_to_cpu(old.disk_num_bytes), leaf_start, root_owner, - root_gen, key.objectid, - key.offset, 0); + root_gen, key.objectid, 0); BUG_ON(ret); *hint_byte = disk_bytenr; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f9df89c5fdf..3ab147dc3c0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -647,8 +647,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ins.type = BTRFS_EXTENT_ITEM_KEY; ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, root->root_key.objectid, - trans->transid, inode->i_ino, - ordered_extent->file_offset, &ins); + trans->transid, inode->i_ino, &ins); BUG_ON(ret); btrfs_release_path(root, path); @@ -1734,8 +1733,7 @@ delete: ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, leaf->start, root_owner, - root_gen, inode->i_ino, - found_key.offset, 0); + root_gen, inode->i_ino, 0); BUG_ON(ret); } next: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ab7a0f61ded..50c8a066d1f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -658,7 +658,7 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) ds, dl, leaf->start, root->root_key.objectid, trans->transid, - inode->i_ino, key.offset); + inode->i_ino); BUG_ON(ret); } } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 3577badfa5b..bd9ab3e9a7f 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -102,11 +102,10 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_EXTENT_REF_KEY: ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref); printk("\t\textent back ref root %llu gen %llu " - "owner %llu offset %llu num_refs %lu\n", + "owner %llu num_refs %lu\n", (unsigned long long)btrfs_ref_root(l, ref), (unsigned long long)btrfs_ref_generation(l, ref), (unsigned long long)btrfs_ref_objectid(l, ref), - (unsigned long long)btrfs_ref_offset(l, ref), (unsigned long)btrfs_ref_num_refs(l, ref)); break; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8df719a73d8..cf618cc8b34 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -460,8 +460,7 @@ insert: ins.objectid, ins.offset, path->nodes[0]->start, root->root_key.objectid, - trans->transid, - key->objectid, key->offset); + trans->transid, key->objectid); } else { /* * insert the extent pointer in the extent @@ -471,7 +470,7 @@ insert: path->nodes[0]->start, root->root_key.objectid, trans->transid, key->objectid, - key->offset, &ins); + &ins); BUG_ON(ret); } } @@ -2534,8 +2533,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, dst_path->nodes[0]->start, BTRFS_TREE_LOG_OBJECTID, trans->transid, - ins_keys[i].objectid, - ins_keys[i].offset); + ins_keys[i].objectid); BUG_ON(ret); } } -- cgit v1.2.3-70-g09d2 From 5b84e8d6eef82fcf7b1b16d92e29375b85b6549a Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 9 Oct 2008 11:46:19 -0400 Subject: Btrfs: Fix leaf reference cache miss Due to the optimization for truncate, tree leaves only containing checksum items can be deleted without being COW'ed first. This causes reference cache misses. The way to fix the miss is create cache entries for tree leaves only contain checksum. This patch also fixes a -EEXIST issue in shared reference cache. Signed-off-by: Yan Zheng --- fs/btrfs/extent-tree.c | 8 ++++++++ fs/btrfs/inode.c | 26 +++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ab36769c356..280ac1aa9b6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1151,6 +1151,14 @@ int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } ret = btrfs_add_leaf_ref(root, ref, shared); + if (ret == -EEXIST && shared) { + struct btrfs_leaf_ref *old; + old = btrfs_lookup_leaf_ref(root, ref->bytenr); + BUG_ON(!old); + btrfs_remove_leaf_ref(root, old); + btrfs_free_leaf_ref(root, old); + ret = btrfs_add_leaf_ref(root, ref, shared); + } WARN_ON(ret); btrfs_free_leaf_ref(root, ref); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3ab147dc3c0..11bfe131fde 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -48,6 +48,7 @@ #include "xattr.h" #include "compat.h" #include "tree-log.h" +#include "ref-cache.h" struct btrfs_iget_args { u64 ino; @@ -1416,6 +1417,9 @@ static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans, int nritems; struct btrfs_key found_key; struct btrfs_key other_key; + struct btrfs_leaf_ref *ref; + u64 leaf_gen; + u64 leaf_start; path->lowest_level = 1; key.objectid = inode->i_ino; @@ -1509,15 +1513,31 @@ next_node: if (other_key.objectid != inode->i_ino || other_key.type != key.type) goto out; + leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]); + leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]); /* * it is safe to delete this leaf, it contains only * csum items from this inode at an offset >= new_size */ - ret = btrfs_del_leaf(trans, root, path, - btrfs_node_blockptr(path->nodes[1], - path->slots[1])); + ret = btrfs_del_leaf(trans, root, path, leaf_start); BUG_ON(ret); + if (root->ref_cows && leaf_gen < trans->transid) { + ref = btrfs_alloc_leaf_ref(root, 0); + if (ref) { + ref->root_gen = root->root_key.offset; + ref->bytenr = leaf_start; + ref->owner = 0; + ref->generation = leaf_gen; + ref->nritems = 0; + + ret = btrfs_add_leaf_ref(root, ref, 0); + WARN_ON(ret); + btrfs_free_leaf_ref(root, ref); + } else { + WARN_ON(1); + } + } next_key: btrfs_release_path(root, path); -- cgit v1.2.3-70-g09d2 From c8b978188c9a0fd3d535c13debd19d522b726f1f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 29 Oct 2008 14:49:59 -0400 Subject: Btrfs: Add zlib compression support This is a large change for adding compression on reading and writing, both for inline and regular extents. It does some fairly large surgery to the writeback paths. Compression is off by default and enabled by mount -o compress. Even when the -o compress mount option is not used, it is possible to read compressed extents off the disk. If compression for a given set of pages fails to make them smaller, the file is flagged to avoid future compression attempts later. * While finding delalloc extents, the pages are locked before being sent down to the delalloc handler. This allows the delalloc handler to do complex things such as cleaning the pages, marking them writeback and starting IO on their behalf. * Inline extents are inserted at delalloc time now. This allows us to compress the data before inserting the inline extent, and it allows us to insert an inline extent that spans multiple pages. * All of the in-memory extent representations (extent_map.c, ordered-data.c etc) are changed to record both an in-memory size and an on disk size, as well as a flag for compression. From a disk format point of view, the extent pointers in the file are changed to record the on disk size of a given extent and some encoding flags. Space in the disk format is allocated for compression encoding, as well as encryption and a generic 'other' field. Neither the encryption or the 'other' field are currently used. In order to limit the amount of data read for a single random read in the file, the size of a compressed extent is limited to 128k. This is a software only limit, the disk format supports u64 sized compressed extents. In order to limit the ram consumed while processing extents, the uncompressed size of a compressed extent is limited to 256k. This is a software only limit and will be subject to tuning later. Checksumming is still done on compressed extents, and it is done on the uncompressed version of the data. This way additional encodings can be layered on without having to figure out which encoding to checksum. Compression happens at delalloc time, which is basically singled threaded because it is usually done by a single pdflush thread. This makes it tricky to spread the compression load across all the cpus on the box. We'll have to look at parallel pdflush walks of dirty inodes at a later time. Decompression is hooked into readpages and it does spread across CPUs nicely. Signed-off-by: Chris Mason --- fs/Kconfig | 2 + fs/btrfs/Makefile | 3 +- fs/btrfs/compression.c | 454 ++++++++++++++++++++++++++++++++++ fs/btrfs/compression.h | 47 ++++ fs/btrfs/ctree.h | 99 ++++++-- fs/btrfs/disk-io.c | 18 +- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent-tree.c | 27 +- fs/btrfs/extent_io.c | 411 ++++++++++++++++++++++++------- fs/btrfs/extent_io.h | 17 +- fs/btrfs/extent_map.c | 9 + fs/btrfs/extent_map.h | 6 +- fs/btrfs/file-item.c | 75 +++++- fs/btrfs/file.c | 263 +++++--------------- fs/btrfs/inode.c | 584 ++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/ordered-data.c | 9 +- fs/btrfs/ordered-data.h | 10 +- fs/btrfs/print-tree.c | 7 +- fs/btrfs/super.c | 10 +- fs/btrfs/tree-log.c | 3 +- fs/btrfs/volumes.c | 2 + fs/btrfs/zlib.c | 637 ++++++++++++++++++++++++++++++++++++++++++++++++ 22 files changed, 2315 insertions(+), 379 deletions(-) create mode 100644 fs/btrfs/compression.c create mode 100644 fs/btrfs/compression.h create mode 100644 fs/btrfs/zlib.c (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/Kconfig b/fs/Kconfig index 18f5a85b47c..31cce5d88b1 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -501,6 +501,8 @@ config BTRFS_FS tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format" depends on EXPERIMENTAL select LIBCRC32C + select ZLIB_INFLATE + select ZLIB_DEFLATE help Btrfs is a new filesystem with extents, writable snapshotting, support for multiple devices and many more features. diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 7125716e142..d2cf5a54a4b 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -7,7 +7,8 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - ref-cache.o export.o tree-log.o acl.o free-space-cache.o + ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ + compression.o else # Normal Makefile diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c new file mode 100644 index 00000000000..c5470367ca5 --- /dev/null +++ b/fs/btrfs/compression.c @@ -0,0 +1,454 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "btrfs_inode.h" +#include "volumes.h" +#include "ordered-data.h" +#include "compat.h" +#include "compression.h" +#include "extent_io.h" +#include "extent_map.h" + +struct compressed_bio { + /* number of bios pending for this compressed extent */ + atomic_t pending_bios; + + /* the pages with the compressed data on them */ + struct page **compressed_pages; + + /* inode that owns this data */ + struct inode *inode; + + /* starting offset in the inode for our pages */ + u64 start; + + /* number of bytes in the inode we're working on */ + unsigned long len; + + /* number of bytes on disk */ + unsigned long compressed_len; + + /* number of compressed pages in the array */ + unsigned long nr_pages; + + /* IO errors */ + int errors; + + /* for reads, this is the bio we are copying the data into */ + struct bio *orig_bio; +}; + +static struct bio *compressed_bio_alloc(struct block_device *bdev, + u64 first_byte, gfp_t gfp_flags) +{ + struct bio *bio; + int nr_vecs; + + nr_vecs = bio_get_nr_vecs(bdev); + bio = bio_alloc(gfp_flags, nr_vecs); + + if (bio == NULL && (current->flags & PF_MEMALLOC)) { + while (!bio && (nr_vecs /= 2)) + bio = bio_alloc(gfp_flags, nr_vecs); + } + + if (bio) { + bio->bi_size = 0; + bio->bi_bdev = bdev; + bio->bi_sector = first_byte >> 9; + } + return bio; +} + +/* when we finish reading compressed pages from the disk, we + * decompress them and then run the bio end_io routines on the + * decompressed pages (in the inode address space). + * + * This allows the checksumming and other IO error handling routines + * to work normally + * + * The compressed pages are freed here, and it must be run + * in process context + */ +static void end_compressed_bio_read(struct bio *bio, int err) +{ + struct extent_io_tree *tree; + struct compressed_bio *cb = bio->bi_private; + struct inode *inode; + struct page *page; + unsigned long index; + int ret; + + if (err) + cb->errors = 1; + + /* if there are more bios still pending for this compressed + * extent, just exit + */ + if (!atomic_dec_and_test(&cb->pending_bios)) + goto out; + + /* ok, we're the last bio for this extent, lets start + * the decompression. + */ + inode = cb->inode; + tree = &BTRFS_I(inode)->io_tree; + ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, + cb->start, + cb->orig_bio->bi_io_vec, + cb->orig_bio->bi_vcnt, + cb->compressed_len); + if (ret) + cb->errors = 1; + + /* release the compressed pages */ + index = 0; + for (index = 0; index < cb->nr_pages; index++) { + page = cb->compressed_pages[index]; + page->mapping = NULL; + page_cache_release(page); + } + + /* do io completion on the original bio */ + if (cb->errors) + bio_io_error(cb->orig_bio); + else + bio_endio(cb->orig_bio, 0); + + /* finally free the cb struct */ + kfree(cb->compressed_pages); + kfree(cb); +out: + bio_put(bio); +} + +/* + * Clear the writeback bits on all of the file + * pages for a compressed write + */ +static noinline int end_compressed_writeback(struct inode *inode, u64 start, + unsigned long ram_size) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT; + struct page *pages[16]; + unsigned long nr_pages = end_index - index + 1; + int i; + int ret; + + while(nr_pages > 0) { + ret = find_get_pages_contig(inode->i_mapping, index, + min(nr_pages, ARRAY_SIZE(pages)), pages); + if (ret == 0) { + nr_pages -= 1; + index += 1; + continue; + } + for (i = 0; i < ret; i++) { + end_page_writeback(pages[i]); + page_cache_release(pages[i]); + } + nr_pages -= ret; + index += ret; + } + /* the inode may be gone now */ + return 0; +} + +/* + * do the cleanup once all the compressed pages hit the disk. + * This will clear writeback on the file pages and free the compressed + * pages. + * + * This also calls the writeback end hooks for the file pages so that + * metadata and checksums can be updated in the file. + */ +static void end_compressed_bio_write(struct bio *bio, int err) +{ + struct extent_io_tree *tree; + struct compressed_bio *cb = bio->bi_private; + struct inode *inode; + struct page *page; + unsigned long index; + + if (err) + cb->errors = 1; + + /* if there are more bios still pending for this compressed + * extent, just exit + */ + if (!atomic_dec_and_test(&cb->pending_bios)) + goto out; + + /* ok, we're the last bio for this extent, step one is to + * call back into the FS and do all the end_io operations + */ + inode = cb->inode; + tree = &BTRFS_I(inode)->io_tree; + tree->ops->writepage_end_io_hook(cb->compressed_pages[0], + cb->start, + cb->start + cb->len - 1, + NULL, 1); + + end_compressed_writeback(inode, cb->start, cb->len); + /* note, our inode could be gone now */ + + /* + * release the compressed pages, these came from alloc_page and + * are not attached to the inode at all + */ + index = 0; + for (index = 0; index < cb->nr_pages; index++) { + page = cb->compressed_pages[index]; + page->mapping = NULL; + page_cache_release(page); + } + + /* finally free the cb struct */ + kfree(cb->compressed_pages); + kfree(cb); +out: + bio_put(bio); +} + +/* + * worker function to build and submit bios for previously compressed pages. + * The corresponding pages in the inode should be marked for writeback + * and the compressed pages should have a reference on them for dropping + * when the IO is complete. + * + * This also checksums the file bytes and gets things ready for + * the end io hooks. + */ +int btrfs_submit_compressed_write(struct inode *inode, u64 start, + unsigned long len, u64 disk_start, + unsigned long compressed_len, + struct page **compressed_pages, + unsigned long nr_pages) +{ + struct bio *bio = NULL; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct compressed_bio *cb; + unsigned long bytes_left; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + int page_index = 0; + struct page *page; + u64 first_byte = disk_start; + struct block_device *bdev; + int ret; + + WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); + cb = kmalloc(sizeof(*cb), GFP_NOFS); + atomic_set(&cb->pending_bios, 0); + cb->errors = 0; + cb->inode = inode; + cb->start = start; + cb->len = len; + cb->compressed_pages = compressed_pages; + cb->compressed_len = compressed_len; + cb->orig_bio = NULL; + cb->nr_pages = nr_pages; + + bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; + + ret = btrfs_csum_file_bytes(root, inode, start, len); + BUG_ON(ret); + + bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); + bio->bi_private = cb; + bio->bi_end_io = end_compressed_bio_write; + atomic_inc(&cb->pending_bios); + + /* create and submit bios for the compressed pages */ + bytes_left = compressed_len; + while(bytes_left > 0) { + page = compressed_pages[page_index]; + page->mapping = inode->i_mapping; + if (bio->bi_size) + ret = io_tree->ops->merge_bio_hook(page, 0, + PAGE_CACHE_SIZE, + bio, 0); + else + ret = 0; + + if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < + PAGE_CACHE_SIZE) { + bio_get(bio); + + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + BUG_ON(ret); + + ret = btrfs_map_bio(root, WRITE, bio, 0, 1); + BUG_ON(ret); + + bio_put(bio); + + bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); + atomic_inc(&cb->pending_bios); + bio->bi_private = cb; + bio->bi_end_io = end_compressed_bio_write; + bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); + } + page_index++; + bytes_left -= PAGE_CACHE_SIZE; + first_byte += PAGE_CACHE_SIZE; + } + bio_get(bio); + + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + BUG_ON(ret); + + ret = btrfs_map_bio(root, WRITE, bio, 0, 1); + BUG_ON(ret); + + bio_put(bio); + return 0; +} + +/* + * for a compressed read, the bio we get passed has all the inode pages + * in it. We don't actually do IO on those pages but allocate new ones + * to hold the compressed pages on disk. + * + * bio->bi_sector points to the compressed extent on disk + * bio->bi_io_vec points to all of the inode pages + * bio->bi_vcnt is a count of pages + * + * After the compressed pages are read, we copy the bytes into the + * bio we were passed and then call the bio end_io calls + */ +int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, + int mirror_num, unsigned long bio_flags) +{ + struct extent_io_tree *tree; + struct extent_map_tree *em_tree; + struct compressed_bio *cb; + struct btrfs_root *root = BTRFS_I(inode)->root; + unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; + unsigned long compressed_len; + unsigned long nr_pages; + unsigned long page_index; + struct page *page; + struct block_device *bdev; + struct bio *comp_bio; + u64 cur_disk_byte = (u64)bio->bi_sector << 9; + struct extent_map *em; + int ret; + + tree = &BTRFS_I(inode)->io_tree; + em_tree = &BTRFS_I(inode)->extent_tree; + + /* we need the actual starting offset of this extent in the file */ + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, + page_offset(bio->bi_io_vec->bv_page), + PAGE_CACHE_SIZE); + spin_unlock(&em_tree->lock); + + cb = kmalloc(sizeof(*cb), GFP_NOFS); + atomic_set(&cb->pending_bios, 0); + cb->errors = 0; + cb->inode = inode; + + cb->start = em->start; + compressed_len = em->block_len; + free_extent_map(em); + + cb->len = uncompressed_len; + cb->compressed_len = compressed_len; + cb->orig_bio = bio; + + nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / + PAGE_CACHE_SIZE; + cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, + GFP_NOFS); + bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; + + for (page_index = 0; page_index < nr_pages; page_index++) { + cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | + __GFP_HIGHMEM); + } + cb->nr_pages = nr_pages; + + comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); + comp_bio->bi_private = cb; + comp_bio->bi_end_io = end_compressed_bio_read; + atomic_inc(&cb->pending_bios); + + for (page_index = 0; page_index < nr_pages; page_index++) { + page = cb->compressed_pages[page_index]; + page->mapping = inode->i_mapping; + if (comp_bio->bi_size) + ret = tree->ops->merge_bio_hook(page, 0, + PAGE_CACHE_SIZE, + comp_bio, 0); + else + ret = 0; + + if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) < + PAGE_CACHE_SIZE) { + bio_get(comp_bio); + + ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); + BUG_ON(ret); + + ret = btrfs_map_bio(root, READ, comp_bio, 0, 0); + BUG_ON(ret); + + bio_put(comp_bio); + + comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, + GFP_NOFS); + atomic_inc(&cb->pending_bios); + bio->bi_private = cb; + bio->bi_end_io = end_compressed_bio_write; + bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); + } + cur_disk_byte += PAGE_CACHE_SIZE; + } + bio_get(comp_bio); + + ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); + BUG_ON(ret); + + ret = btrfs_map_bio(root, READ, comp_bio, 0, 0); + BUG_ON(ret); + + bio_put(comp_bio); + return 0; +} diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h new file mode 100644 index 00000000000..421f5b4aa71 --- /dev/null +++ b/fs/btrfs/compression.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_COMPRESSION_ +#define __BTRFS_COMPRESSION_ + +int btrfs_zlib_decompress(unsigned char *data_in, + struct page *dest_page, + unsigned long start_byte, + size_t srclen, size_t destlen); +int btrfs_zlib_compress_pages(struct address_space *mapping, + u64 start, unsigned long len, + struct page **pages, + unsigned long nr_dest_pages, + unsigned long *out_pages, + unsigned long *total_in, + unsigned long *total_out, + unsigned long max_out); +int btrfs_zlib_decompress_biovec(struct page **pages_in, + u64 disk_start, + struct bio_vec *bvec, + int vcnt, + size_t srclen); +void btrfs_zlib_exit(void); +int btrfs_submit_compressed_write(struct inode *inode, u64 start, + unsigned long len, u64 disk_start, + unsigned long compressed_len, + struct page **compressed_pages, + unsigned long nr_pages); +int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, + int mirror_num, unsigned long bio_flags); +#endif diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8559f39fd47..793d8fdda24 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -400,10 +400,18 @@ struct btrfs_timespec { __le32 nsec; } __attribute__ ((__packed__)); -/* - * there is no padding here on purpose. If you want to extent the inode, - * make a new item type - */ +typedef enum { + BTRFS_COMPRESS_NONE = 0, + BTRFS_COMPRESS_ZLIB = 1, + BTRFS_COMPRESS_LAST = 2, +} btrfs_compression_type; + +/* we don't understand any encryption methods right now */ +typedef enum { + BTRFS_ENCRYPTION_NONE = 0, + BTRFS_ENCRYPTION_LAST = 1, +} btrfs_encryption_type; + struct btrfs_inode_item { /* nfs style generation number */ __le64 generation; @@ -419,6 +427,7 @@ struct btrfs_inode_item { __le64 rdev; __le16 flags; __le16 compat_flags; + struct btrfs_timespec atime; struct btrfs_timespec ctime; struct btrfs_timespec mtime; @@ -454,8 +463,33 @@ struct btrfs_root_item { #define BTRFS_FILE_EXTENT_INLINE 1 struct btrfs_file_extent_item { + /* + * transaction id that created this extent + */ __le64 generation; + /* + * max number of bytes to hold this extent in ram + * when we split a compressed extent we can't know how big + * each of the resulting pieces will be. So, this is + * an upper limit on the size of the extent in ram instead of + * an exact limit. + */ + __le64 ram_bytes; + + /* + * 32 bits for the various ways we might encode the data, + * including compression and encryption. If any of these + * are set to something a given disk format doesn't understand + * it is treated like an incompat flag for reading and writing, + * but not for stat. + */ + u8 compression; + u8 encryption; + __le16 other_encoding; /* spare for later use */ + + /* are we inline data or a real extent? */ u8 type; + /* * disk space consumed by the extent, checksum blocks are included * in these numbers @@ -471,9 +505,11 @@ struct btrfs_file_extent_item { */ __le64 offset; /* - * the logical number of file blocks (no csums included) + * the logical number of file blocks (no csums included). This + * always reflects the size uncompressed and without encoding. */ __le64 num_bytes; + } __attribute__ ((__packed__)); struct btrfs_csum_item { @@ -814,6 +850,7 @@ struct btrfs_root { #define BTRFS_MOUNT_NOBARRIER (1 << 2) #define BTRFS_MOUNT_SSD (1 << 3) #define BTRFS_MOUNT_DEGRADED (1 << 4) +#define BTRFS_MOUNT_COMPRESS (1 << 5) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -825,6 +862,7 @@ struct btrfs_root { #define BTRFS_INODE_NODATASUM (1 << 0) #define BTRFS_INODE_NODATACOW (1 << 1) #define BTRFS_INODE_READONLY (1 << 2) +#define BTRFS_INODE_NOCOMPRESS (1 << 3) #define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ ~BTRFS_INODE_##flag) #define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ @@ -1424,14 +1462,6 @@ static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; } -static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, - struct btrfs_item *e) -{ - unsigned long offset; - offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); - return btrfs_item_size(eb, e) - offset; -} - BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, disk_bytenr, 64); BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, @@ -1442,6 +1472,36 @@ BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, offset, 64); BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, num_bytes, 64); +BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item, + ram_bytes, 64); +BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item, + compression, 8); +BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item, + encryption, 8); +BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, + other_encoding, 16); + +/* this returns the number of file bytes represented by the inline item. + * If an item is compressed, this is the uncompressed size + */ +static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, + struct btrfs_file_extent_item *e) +{ + return btrfs_file_extent_ram_bytes(eb, e); +} + +/* + * this returns the number of bytes used by the item on disk, minus the + * size of any extent headers. If a file is compressed on disk, this is + * the compressed size + */ +static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, + struct btrfs_item *e) +{ + unsigned long offset; + offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); + return btrfs_item_size(eb, e) - offset; +} static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { @@ -1745,10 +1805,11 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, u64 pos, u64 disk_offset, - u64 disk_num_bytes, - u64 num_bytes, u64 offset); + struct btrfs_root *root, + u64 objectid, u64 pos, + u64 disk_offset, u64 disk_num_bytes, + u64 num_bytes, u64 offset, u64 ram_bytes, + u8 compression, u8 encryption, u16 other_encoding); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, @@ -1758,6 +1819,8 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum *sums); int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio); +int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode, + u64 start, unsigned long len); struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -1799,7 +1862,7 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, int namelen); int btrfs_merge_bio_hook(struct page *page, unsigned long offset, - size_t size, struct bio *bio); + size_t size, struct bio *bio, unsigned long bio_flags); unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0be044bb619..dc95f636a11 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -83,6 +83,7 @@ struct async_submit_bio { extent_submit_bio_hook_t *submit_bio_hook; int rw; int mirror_num; + unsigned long bio_flags; struct btrfs_work work; }; @@ -115,6 +116,7 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, } em->start = 0; em->len = (u64)-1; + em->block_len = (u64)-1; em->block_start = 0; em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; @@ -469,12 +471,13 @@ static void run_one_async_submit(struct btrfs_work *work) wake_up(&fs_info->async_submit_wait); async->submit_bio_hook(async->inode, async->rw, async->bio, - async->mirror_num); + async->mirror_num, async->bio_flags); kfree(async); } int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, int rw, struct bio *bio, int mirror_num, + unsigned long bio_flags, extent_submit_bio_hook_t *submit_bio_hook) { struct async_submit_bio *async; @@ -491,6 +494,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->submit_bio_hook = submit_bio_hook; async->work.func = run_one_async_submit; async->work.flags = 0; + async->bio_flags = bio_flags; while(atomic_read(&fs_info->async_submit_draining) && atomic_read(&fs_info->nr_async_submits)) { @@ -530,7 +534,7 @@ static int btree_csum_one_bio(struct bio *bio) } static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, - int mirror_num) + int mirror_num, unsigned long bio_flags) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; @@ -556,17 +560,17 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, } static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, - int mirror_num) + int mirror_num, unsigned long bio_flags) { /* * kthread helpers are used to submit writes so that checksumming * can happen in parallel across all CPUs */ if (!(rw & (1 << BIO_RW))) { - return __btree_submit_bio_hook(inode, rw, bio, mirror_num); + return __btree_submit_bio_hook(inode, rw, bio, mirror_num, 0); } return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, - inode, rw, bio, mirror_num, + inode, rw, bio, mirror_num, 0, __btree_submit_bio_hook); } @@ -1407,6 +1411,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; + fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); INIT_LIST_HEAD(&fs_info->ordered_extents); @@ -1508,6 +1513,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, */ btrfs_init_workers(&fs_info->workers, "worker", fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->submit_workers, "submit", min_t(u64, fs_devices->num_devices, fs_info->thread_pool_size)); @@ -1559,6 +1565,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, } fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); + fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, + 4 * 1024 * 1024 / PAGE_CACHE_SIZE); nodesize = btrfs_super_nodesize(disk_super); leafsize = btrfs_super_leafsize(disk_super); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index f84f5058dbb..4eb1f1408d2 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -71,6 +71,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, int metadata); int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, int rw, struct bio *bio, int mirror_num, + unsigned long bio_flags, extent_submit_bio_hook_t *submit_bio_hook); int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 280ac1aa9b6..bbf04e80a1a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3278,6 +3278,7 @@ static int noinline relocate_data_extent(struct inode *reloc_inode, em->start = extent_key->objectid - offset; em->len = extent_key->offset; + em->block_len = extent_key->offset; em->block_start = extent_key->objectid; em->bdev = root->fs_info->fs_devices->latest_bdev; set_bit(EXTENT_FLAG_PINNED, &em->flags); @@ -3314,10 +3315,14 @@ struct btrfs_ref_path { }; struct disk_extent { + u64 ram_bytes; u64 disk_bytenr; u64 disk_num_bytes; u64 offset; u64 num_bytes; + u8 compression; + u8 encryption; + u16 other_encoding; }; static int is_cowonly_root(u64 root_objectid) @@ -3631,6 +3636,11 @@ static int noinline get_new_locations(struct inode *reloc_inode, btrfs_file_extent_disk_num_bytes(leaf, fi); exts[nr].offset = btrfs_file_extent_offset(leaf, fi); exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); + exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); + exts[nr].compression = btrfs_file_extent_compression(leaf, fi); + exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); + exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, + fi); WARN_ON(exts[nr].offset > 0); WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); @@ -3846,6 +3856,8 @@ next: new_extents[0].disk_bytenr); btrfs_set_file_extent_disk_num_bytes(leaf, fi, new_extents[0].disk_num_bytes); + btrfs_set_file_extent_ram_bytes(leaf, fi, + new_extents[0].ram_bytes); ext_offset += new_extents[0].offset; btrfs_set_file_extent_offset(leaf, fi, ext_offset); btrfs_mark_buffer_dirty(leaf); @@ -3911,6 +3923,16 @@ next: new_extents[i].disk_bytenr); btrfs_set_file_extent_disk_num_bytes(leaf, fi, new_extents[i].disk_num_bytes); + btrfs_set_file_extent_ram_bytes(leaf, fi, + new_extents[i].ram_bytes); + + btrfs_set_file_extent_compression(leaf, fi, + new_extents[i].compression); + btrfs_set_file_extent_encryption(leaf, fi, + new_extents[i].encryption); + btrfs_set_file_extent_other_encoding(leaf, fi, + new_extents[i].other_encoding); + btrfs_set_file_extent_num_bytes(leaf, fi, extent_len); ext_offset += new_extents[i].offset; @@ -4169,6 +4191,8 @@ static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; btrfs_set_file_extent_generation(leaf, fi, trans->transid); + btrfs_set_file_extent_ram_bytes(leaf, fi, + new_extent->ram_bytes); btrfs_set_file_extent_disk_bytenr(leaf, fi, new_extent->disk_bytenr); btrfs_set_file_extent_disk_num_bytes(leaf, fi, @@ -4847,7 +4871,8 @@ static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info, BUG_ON(err); err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, - group->key.offset, 0); + group->key.offset, 0, group->key.offset, + 0, 0, 0); BUG_ON(err); inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 563b2d12f4f..314041fdfa4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -30,6 +30,7 @@ static struct kmem_cache *extent_buffer_cache; static LIST_HEAD(buffers); static LIST_HEAD(states); +#define LEAK_DEBUG 1 #ifdef LEAK_DEBUG static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; #endif @@ -1067,8 +1068,8 @@ EXPORT_SYMBOL(find_first_extent_bit_state); * * 1 is returned if we find something, 0 if nothing was in the tree */ -static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, - u64 *start, u64 *end, u64 max_bytes) +static noinline u64 find_delalloc_range(struct extent_io_tree *tree, + u64 *start, u64 *end, u64 max_bytes) { struct rb_node *node; struct extent_state *state; @@ -1077,11 +1078,11 @@ static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, u64 total_bytes = 0; spin_lock_irq(&tree->lock); + /* * this search will find all the extents that end after * our range starts. */ -search_again: node = tree_search(tree, cur_start); if (!node) { if (!found) @@ -1100,40 +1101,6 @@ search_again: *end = state->end; goto out; } - if (!found && !(state->state & EXTENT_BOUNDARY)) { - struct extent_state *prev_state; - struct rb_node *prev_node = node; - while(1) { - prev_node = rb_prev(prev_node); - if (!prev_node) - break; - prev_state = rb_entry(prev_node, - struct extent_state, - rb_node); - if ((prev_state->end + 1 != state->start) || - !(prev_state->state & EXTENT_DELALLOC)) - break; - if ((cur_start - prev_state->start) * 2 > - max_bytes) - break; - state = prev_state; - node = prev_node; - } - } - if (state->state & EXTENT_LOCKED) { - DEFINE_WAIT(wait); - atomic_inc(&state->refs); - prepare_to_wait(&state->wq, &wait, - TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&tree->lock); - schedule(); - spin_lock_irq(&tree->lock); - finish_wait(&state->wq, &wait); - free_extent_state(state); - goto search_again; - } - set_state_cb(tree, state, EXTENT_LOCKED); - state->state |= EXTENT_LOCKED; if (!found) *start = state->start; found++; @@ -1151,6 +1118,208 @@ out: return found; } +static noinline int __unlock_for_delalloc(struct inode *inode, + struct page *locked_page, + u64 start, u64 end) +{ + int ret; + struct page *pages[16]; + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + unsigned long nr_pages = end_index - index + 1; + int i; + + if (index == locked_page->index && end_index == index) + return 0; + + while(nr_pages > 0) { + ret = find_get_pages_contig(inode->i_mapping, index, + min(nr_pages, ARRAY_SIZE(pages)), pages); + for (i = 0; i < ret; i++) { + if (pages[i] != locked_page) + unlock_page(pages[i]); + page_cache_release(pages[i]); + } + nr_pages -= ret; + index += ret; + cond_resched(); + } + return 0; +} + +static noinline int lock_delalloc_pages(struct inode *inode, + struct page *locked_page, + u64 delalloc_start, + u64 delalloc_end) +{ + unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT; + unsigned long start_index = index; + unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT; + unsigned long pages_locked = 0; + struct page *pages[16]; + unsigned long nrpages; + int ret; + int i; + + /* the caller is responsible for locking the start index */ + if (index == locked_page->index && index == end_index) + return 0; + + /* skip the page at the start index */ + nrpages = end_index - index + 1; + while(nrpages > 0) { + ret = find_get_pages_contig(inode->i_mapping, index, + min(nrpages, ARRAY_SIZE(pages)), pages); + if (ret == 0) { + ret = -EAGAIN; + goto done; + } + /* now we have an array of pages, lock them all */ + for (i = 0; i < ret; i++) { + /* + * the caller is taking responsibility for + * locked_page + */ + if (pages[i] != locked_page) + lock_page(pages[i]); + page_cache_release(pages[i]); + } + pages_locked += ret; + nrpages -= ret; + index += ret; + cond_resched(); + } + ret = 0; +done: + if (ret && pages_locked) { + __unlock_for_delalloc(inode, locked_page, + delalloc_start, + ((u64)(start_index + pages_locked - 1)) << + PAGE_CACHE_SHIFT); + } + return ret; +} + +/* + * find a contiguous range of bytes in the file marked as delalloc, not + * more than 'max_bytes'. start and end are used to return the range, + * + * 1 is returned if we find something, 0 if nothing was in the tree + */ +static noinline u64 find_lock_delalloc_range(struct inode *inode, + struct extent_io_tree *tree, + struct page *locked_page, + u64 *start, u64 *end, + u64 max_bytes) +{ + u64 delalloc_start; + u64 delalloc_end; + u64 found; + int ret; + int loops = 0; + +again: + /* step one, find a bunch of delalloc bytes starting at start */ + delalloc_start = *start; + delalloc_end = 0; + found = find_delalloc_range(tree, &delalloc_start, &delalloc_end, + max_bytes); + if (!found) { + *start = delalloc_start; + *end = delalloc_end; + return found; + } + + /* + * make sure to limit the number of pages we try to lock down + * if we're looping. + */ + if (delalloc_end + 1 - delalloc_start > max_bytes && loops) { + delalloc_end = (delalloc_start + PAGE_CACHE_SIZE - 1) & + ~((u64)PAGE_CACHE_SIZE - 1); + } + /* step two, lock all the pages after the page that has start */ + ret = lock_delalloc_pages(inode, locked_page, + delalloc_start, delalloc_end); + if (ret == -EAGAIN) { + /* some of the pages are gone, lets avoid looping by + * shortening the size of the delalloc range we're searching + */ + if (!loops) { + unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); + max_bytes = PAGE_CACHE_SIZE - offset; + loops = 1; + goto again; + } else { + found = 0; + goto out_failed; + } + } + BUG_ON(ret); + + /* step three, lock the state bits for the whole range */ + lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); + + /* then test to make sure it is all still delalloc */ + ret = test_range_bit(tree, delalloc_start, delalloc_end, + EXTENT_DELALLOC, 1); + if (!ret) { + unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); + __unlock_for_delalloc(inode, locked_page, + delalloc_start, delalloc_end); + cond_resched(); + goto again; + } + *start = delalloc_start; + *end = delalloc_end; +out_failed: + return found; +} + +int extent_clear_unlock_delalloc(struct inode *inode, + struct extent_io_tree *tree, + u64 start, u64 end, struct page *locked_page, + int clear_dirty, int set_writeback, + int end_writeback) +{ + int ret; + struct page *pages[16]; + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + unsigned long nr_pages = end_index - index + 1; + int i; + int clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC; + + if (clear_dirty) + clear_bits |= EXTENT_DIRTY; + + clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); + + while(nr_pages > 0) { + ret = find_get_pages_contig(inode->i_mapping, index, + min(nr_pages, ARRAY_SIZE(pages)), pages); + for (i = 0; i < ret; i++) { + if (pages[i] == locked_page) { + page_cache_release(pages[i]); + continue; + } + if (clear_dirty) + clear_page_dirty_for_io(pages[i]); + if (set_writeback) + set_page_writeback(pages[i]); + if (end_writeback) + end_page_writeback(pages[i]); + unlock_page(pages[i]); + page_cache_release(pages[i]); + } + nr_pages -= ret; + index += ret; + cond_resched(); + } + return 0; +} +EXPORT_SYMBOL(extent_clear_unlock_delalloc); + /* * count the number of bytes in the tree that have a given bit(s) * set. This can be fairly slow, except for EXTENT_DIRTY which is @@ -1631,38 +1800,26 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, return bio; } -static int submit_one_bio(int rw, struct bio *bio, int mirror_num) +static int submit_one_bio(int rw, struct bio *bio, int mirror_num, + unsigned long bio_flags) { int ret = 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct page *page = bvec->bv_page; struct extent_io_tree *tree = bio->bi_private; - struct rb_node *node; - struct extent_state *state; u64 start; u64 end; start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; end = start + bvec->bv_len - 1; - spin_lock_irq(&tree->lock); - node = __etree_search(tree, start, NULL, NULL); - BUG_ON(!node); - state = rb_entry(node, struct extent_state, rb_node); - while(state->end < end) { - node = rb_next(node); - state = rb_entry(node, struct extent_state, rb_node); - } - BUG_ON(state->end != end); - spin_unlock_irq(&tree->lock); - bio->bi_private = NULL; bio_get(bio); if (tree->ops && tree->ops->submit_bio_hook) tree->ops->submit_bio_hook(page->mapping->host, rw, bio, - mirror_num); + mirror_num, bio_flags); else submit_bio(rw, bio); if (bio_flagged(bio, BIO_EOPNOTSUPP)) @@ -1678,39 +1835,56 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, struct bio **bio_ret, unsigned long max_pages, bio_end_io_t end_io_func, - int mirror_num) + int mirror_num, + unsigned long prev_bio_flags, + unsigned long bio_flags) { int ret = 0; struct bio *bio; int nr; + int contig = 0; + int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED; + int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED; + size_t page_size = min(size, PAGE_CACHE_SIZE); if (bio_ret && *bio_ret) { bio = *bio_ret; - if (bio->bi_sector + (bio->bi_size >> 9) != sector || + if (old_compressed) + contig = bio->bi_sector == sector; + else + contig = bio->bi_sector + (bio->bi_size >> 9) == + sector; + + if (prev_bio_flags != bio_flags || !contig || (tree->ops && tree->ops->merge_bio_hook && - tree->ops->merge_bio_hook(page, offset, size, bio)) || - bio_add_page(bio, page, size, offset) < size) { - ret = submit_one_bio(rw, bio, mirror_num); + tree->ops->merge_bio_hook(page, offset, page_size, bio, + bio_flags)) || + bio_add_page(bio, page, page_size, offset) < page_size) { + ret = submit_one_bio(rw, bio, mirror_num, + prev_bio_flags); bio = NULL; } else { return 0; } } - nr = bio_get_nr_vecs(bdev); + if (this_compressed) + nr = BIO_MAX_PAGES; + else + nr = bio_get_nr_vecs(bdev); + bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); if (!bio) { printk("failed to allocate bio nr %d\n", nr); } - - bio_add_page(bio, page, size, offset); + bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; if (bio_ret) { *bio_ret = bio; } else { - ret = submit_one_bio(rw, bio, mirror_num); + ret = submit_one_bio(rw, bio, mirror_num, bio_flags); } return ret; @@ -1738,7 +1912,8 @@ void set_page_extent_head(struct page *page, unsigned long len) static int __extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, - struct bio **bio, int mirror_num) + struct bio **bio, int mirror_num, + unsigned long *bio_flags) { struct inode *inode = page->mapping->host; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; @@ -1756,13 +1931,27 @@ static int __extent_read_full_page(struct extent_io_tree *tree, int nr = 0; size_t page_offset = 0; size_t iosize; + size_t disk_io_size; size_t blocksize = inode->i_sb->s_blocksize; + unsigned long this_bio_flag = 0; set_page_extent_mapped(page); end = page_end; lock_extent(tree, start, end, GFP_NOFS); + if (page->index == last_byte >> PAGE_CACHE_SHIFT) { + char *userpage; + size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); + + if (zero_offset) { + iosize = PAGE_CACHE_SIZE - zero_offset; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + zero_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + } + } while (cur <= end) { if (cur >= last_byte) { char *userpage; @@ -1793,10 +1982,19 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur); } BUG_ON(end < cur); + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) + this_bio_flag = EXTENT_BIO_COMPRESSED; + iosize = min(extent_map_end(em) - cur, end - cur + 1); cur_end = min(extent_map_end(em) - 1, end); iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); - sector = (em->block_start + extent_offset) >> 9; + if (this_bio_flag & EXTENT_BIO_COMPRESSED) { + disk_io_size = em->block_len; + sector = em->block_start >> 9; + } else { + sector = (em->block_start + extent_offset) >> 9; + disk_io_size = iosize; + } bdev = em->bdev; block_start = em->block_start; free_extent_map(em); @@ -1845,10 +2043,13 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur); unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; pnr -= page->index; ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, + sector, disk_io_size, page_offset, bdev, bio, pnr, - end_bio_extent_readpage, mirror_num); + end_bio_extent_readpage, mirror_num, + *bio_flags, + this_bio_flag); nr++; + *bio_flags = this_bio_flag; } if (ret) SetPageError(page); @@ -1867,11 +2068,13 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent) { struct bio *bio = NULL; + unsigned long bio_flags = 0; int ret; - ret = __extent_read_full_page(tree, page, get_extent, &bio, 0); + ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, + &bio_flags); if (bio) - submit_one_bio(READ, bio, 0); + submit_one_bio(READ, bio, 0, bio_flags); return ret; } EXPORT_SYMBOL(extent_read_full_page); @@ -1909,6 +2112,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; u64 nr_delalloc; u64 delalloc_end; + int page_started; + int compressed; WARN_ON(!PageLocked(page)); pg_offset = i_size & (PAGE_CACHE_SIZE - 1); @@ -1934,27 +2139,33 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_start = start; delalloc_end = 0; + page_started = 0; while(delalloc_end < page_end) { - nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, + nr_delalloc = find_lock_delalloc_range(inode, tree, + page, + &delalloc_start, &delalloc_end, 128 * 1024 * 1024); if (nr_delalloc == 0) { delalloc_start = delalloc_end + 1; continue; } - tree->ops->fill_delalloc(inode, delalloc_start, - delalloc_end); - clear_extent_bit(tree, delalloc_start, - delalloc_end, - EXTENT_LOCKED | EXTENT_DELALLOC, - 1, 0, GFP_NOFS); + tree->ops->fill_delalloc(inode, page, delalloc_start, + delalloc_end, &page_started); delalloc_start = delalloc_end + 1; } + + /* did the fill delalloc function already unlock and start the IO? */ + if (page_started) { + return 0; + } + lock_extent(tree, start, page_end, GFP_NOFS); unlock_start = start; if (tree->ops && tree->ops->writepage_start_hook) { - ret = tree->ops->writepage_start_hook(page, start, page_end); + ret = tree->ops->writepage_start_hook(page, start, + page_end); if (ret == -EAGAIN) { unlock_extent(tree, start, page_end, GFP_NOFS); redirty_page_for_writepage(wbc, page); @@ -2006,10 +2217,15 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, sector = (em->block_start + extent_offset) >> 9; bdev = em->bdev; block_start = em->block_start; + compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); free_extent_map(em); em = NULL; - if (block_start == EXTENT_MAP_HOLE || + /* + * compressed and inline extents are written through other + * paths in the FS + */ + if (compressed || block_start == EXTENT_MAP_HOLE || block_start == EXTENT_MAP_INLINE) { clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -2017,16 +2233,28 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unlock_extent(tree, unlock_start, cur + iosize -1, GFP_NOFS); - if (tree->ops && tree->ops->writepage_end_io_hook) + /* + * end_io notification does not happen here for + * compressed extents + */ + if (!compressed && tree->ops && + tree->ops->writepage_end_io_hook) tree->ops->writepage_end_io_hook(page, cur, cur + iosize - 1, NULL, 1); - cur = cur + iosize; + else if (compressed) { + /* we don't want to end_page_writeback on + * a compressed extent. this happens + * elsewhere + */ + nr++; + } + + cur += iosize; pg_offset += iosize; unlock_start = cur; continue; } - /* leave this out until we have a page_mkwrite call */ if (0 && !test_range_bit(tree, cur, cur + iosize - 1, EXTENT_DIRTY, 0)) { @@ -2034,6 +2262,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, pg_offset += iosize; continue; } + clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); if (tree->ops && tree->ops->writepage_io_hook) { ret = tree->ops->writepage_io_hook(page, cur, @@ -2057,7 +2286,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ret = submit_extent_page(WRITE, tree, page, sector, iosize, pg_offset, bdev, &epd->bio, max_nr, - end_bio_extent_writepage, 0); + end_bio_extent_writepage, + 0, 0, 0); if (ret) SetPageError(page); } @@ -2226,7 +2456,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, extent_write_cache_pages(tree, mapping, &wbc_writepages, __extent_writepage, &epd); if (epd.bio) { - submit_one_bio(WRITE, epd.bio, 0); + submit_one_bio(WRITE, epd.bio, 0, 0); } return ret; } @@ -2248,7 +2478,7 @@ int extent_writepages(struct extent_io_tree *tree, ret = extent_write_cache_pages(tree, mapping, wbc, __extent_writepage, &epd); if (epd.bio) { - submit_one_bio(WRITE, epd.bio, 0); + submit_one_bio(WRITE, epd.bio, 0, 0); } return ret; } @@ -2262,6 +2492,7 @@ int extent_readpages(struct extent_io_tree *tree, struct bio *bio = NULL; unsigned page_idx; struct pagevec pvec; + unsigned long bio_flags = 0; pagevec_init(&pvec, 0); for (page_idx = 0; page_idx < nr_pages; page_idx++) { @@ -2281,7 +2512,7 @@ int extent_readpages(struct extent_io_tree *tree, if (!pagevec_add(&pvec, page)) __pagevec_lru_add(&pvec); __extent_read_full_page(tree, page, get_extent, - &bio, 0); + &bio, 0, &bio_flags); } page_cache_release(page); } @@ -2289,7 +2520,7 @@ int extent_readpages(struct extent_io_tree *tree, __pagevec_lru_add(&pvec); BUG_ON(!list_empty(pages)); if (bio) - submit_one_bio(READ, bio, 0); + submit_one_bio(READ, bio, 0, bio_flags); return 0; } EXPORT_SYMBOL(extent_readpages); @@ -2414,7 +2645,8 @@ int extent_prepare_write(struct extent_io_tree *tree, ret = submit_extent_page(READ, tree, page, sector, iosize, page_offset, em->bdev, NULL, 1, - end_bio_extent_preparewrite, 0); + end_bio_extent_preparewrite, 0, + 0, 0); iocount++; block_start = block_start + iosize; } else { @@ -2495,7 +2727,9 @@ int try_release_extent_mapping(struct extent_map_tree *map, } if (!test_range_bit(tree, em->start, extent_map_end(em) - 1, - EXTENT_LOCKED, 0)) { + EXTENT_LOCKED | EXTENT_WRITEBACK | + EXTENT_ORDERED, + 0)) { remove_extent_mapping(map, em); /* once for the rb tree */ free_extent_map(em); @@ -2923,6 +3157,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, int inc_all_pages = 0; unsigned long num_pages; struct bio *bio = NULL; + unsigned long bio_flags = 0; if (eb->flags & EXTENT_UPTODATE) return 0; @@ -2973,7 +3208,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, ClearPageError(page); err = __extent_read_full_page(tree, page, get_extent, &bio, - mirror_num); + mirror_num, &bio_flags); if (err) { ret = err; printk("err %d from __extent_read_full_page\n", ret); @@ -2984,7 +3219,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, } if (bio) - submit_one_bio(READ, bio, mirror_num); + submit_one_bio(READ, bio, mirror_num, bio_flags); if (ret || !wait) { if (ret) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c9d1908a1ae..86f859b87a6 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -18,6 +18,9 @@ #define EXTENT_BOUNDARY (1 << 11) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) +/* flags for bio submission */ +#define EXTENT_BIO_COMPRESSED 1 + /* * page->private values. Every page that is controlled by the extent * map has page->private set to one. @@ -28,14 +31,17 @@ struct extent_state; typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, - struct bio *bio, int mirror_num); + struct bio *bio, int mirror_num, + unsigned long bio_flags); struct extent_io_ops { - int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); + int (*fill_delalloc)(struct inode *inode, struct page *locked_page, + u64 start, u64 end, int *page_started); int (*writepage_start_hook)(struct page *page, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); extent_submit_bio_hook_t *submit_bio_hook; int (*merge_bio_hook)(struct page *page, unsigned long offset, - size_t size, struct bio *bio); + size_t size, struct bio *bio, + unsigned long bio_flags); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, u64 start, u64 end, @@ -245,4 +251,9 @@ void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); int release_extent_buffer_tail_pages(struct extent_buffer *eb); int extent_range_uptodate(struct extent_io_tree *tree, u64 start, u64 end); +int extent_clear_unlock_delalloc(struct inode *inode, + struct extent_io_tree *tree, + u64 start, u64 end, struct page *locked_page, + int clear_dirty, int set_writeback, + int clear_writeback); #endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 74b2a29880d..fd3ebfb8c3c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -184,6 +184,13 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) return 0; + /* + * don't merge compressed extents, we need to know their + * actual size + */ + if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags)) + return 0; + if (extent_map_end(prev) == next->start && prev->flags == next->flags && prev->bdev == next->bdev && @@ -239,6 +246,7 @@ int add_extent_mapping(struct extent_map_tree *tree, if (rb && mergable_maps(merge, em)) { em->start = merge->start; em->len += merge->len; + em->block_len += merge->block_len; em->block_start = merge->block_start; merge->in_tree = 0; rb_erase(&merge->rb_node, &tree->map); @@ -250,6 +258,7 @@ int add_extent_mapping(struct extent_map_tree *tree, merge = rb_entry(rb, struct extent_map, rb_node); if (rb && mergable_maps(em, merge)) { em->len += merge->len; + em->block_len += merge->len; rb_erase(&merge->rb_node, &tree->map); merge->in_tree = 0; free_extent_map(merge); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 26ac6fe0b26..abbcbeb28c7 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -10,6 +10,7 @@ /* bits for the flags field */ #define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ +#define EXTENT_FLAG_COMPRESSED 1 struct extent_map { struct rb_node rb_node; @@ -18,6 +19,7 @@ struct extent_map { u64 start; u64 len; u64 block_start; + u64 block_len; unsigned long flags; struct block_device *bdev; atomic_t refs; @@ -38,9 +40,9 @@ static inline u64 extent_map_end(struct extent_map *em) static inline u64 extent_map_block_end(struct extent_map *em) { - if (em->block_start + em->len < em->block_start) + if (em->block_start + em->block_len < em->block_start) return (u64)-1; - return em->block_start + em->len; + return em->block_start + em->block_len; } void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 6dbe88b9d7d..f4d3fa71bc4 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -31,7 +31,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, u64 disk_offset, u64 disk_num_bytes, - u64 num_bytes, u64 offset) + u64 num_bytes, u64 offset, u64 ram_bytes, + u8 compression, u8 encryption, u16 other_encoding) { int ret = 0; struct btrfs_file_extent_item *item; @@ -57,8 +58,13 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); btrfs_set_file_extent_offset(leaf, item, offset); btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); + btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes); btrfs_set_file_extent_generation(leaf, item, trans->transid); btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_compression(leaf, item, compression); + btrfs_set_file_extent_encryption(leaf, item, encryption); + btrfs_set_file_extent_other_encoding(leaf, item, other_encoding); + btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); @@ -213,6 +219,73 @@ found: return 0; } +int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode, + u64 start, unsigned long len) +{ + struct btrfs_ordered_sum *sums; + struct btrfs_sector_sum *sector_sum; + struct btrfs_ordered_extent *ordered; + char *data; + struct page *page; + unsigned long total_bytes = 0; + unsigned long this_sum_bytes = 0; + + sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS); + if (!sums) + return -ENOMEM; + + sector_sum = sums->sums; + sums->file_offset = start; + sums->len = len; + INIT_LIST_HEAD(&sums->list); + ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset); + BUG_ON(!ordered); + + while(len > 0) { + if (start >= ordered->file_offset + ordered->len || + start < ordered->file_offset) { + sums->len = this_sum_bytes; + this_sum_bytes = 0; + btrfs_add_ordered_sum(inode, ordered, sums); + btrfs_put_ordered_extent(ordered); + + sums = kzalloc(btrfs_ordered_sum_size(root, len), + GFP_NOFS); + BUG_ON(!sums); + sector_sum = sums->sums; + sums->len = len; + sums->file_offset = start; + ordered = btrfs_lookup_ordered_extent(inode, + sums->file_offset); + BUG_ON(!ordered); + } + + page = find_get_page(inode->i_mapping, + start >> PAGE_CACHE_SHIFT); + + data = kmap_atomic(page, KM_USER0); + sector_sum->sum = ~(u32)0; + sector_sum->sum = btrfs_csum_data(root, data, sector_sum->sum, + PAGE_CACHE_SIZE); + kunmap_atomic(data, KM_USER0); + btrfs_csum_final(sector_sum->sum, + (char *)§or_sum->sum); + sector_sum->offset = page_offset(page); + page_cache_release(page); + + sector_sum++; + total_bytes += PAGE_CACHE_SIZE; + this_sum_bytes += PAGE_CACHE_SIZE; + start += PAGE_CACHE_SIZE; + + WARN_ON(len < PAGE_CACHE_SIZE); + len -= PAGE_CACHE_SIZE; + } + btrfs_add_ordered_sum(inode, ordered, sums); + btrfs_put_ordered_extent(ordered); + return 0; +} + int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 69abbe19add..0aa15436590 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -95,153 +95,6 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) } } -/* this does all the hard work for inserting an inline extent into - * the btree. Any existing inline extent is extended as required to make room, - * otherwise things are inserted as required into the btree - */ -static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - u64 offset, size_t size, - struct page **pages, size_t page_offset, - int num_pages) -{ - struct btrfs_key key; - struct btrfs_path *path; - struct extent_buffer *leaf; - char *kaddr; - unsigned long ptr; - struct btrfs_file_extent_item *ei; - struct page *page; - u32 datasize; - int err = 0; - int ret; - int i; - ssize_t cur_size; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - btrfs_set_trans_block_group(trans, inode); - - key.objectid = inode->i_ino; - key.offset = offset; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); - - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret < 0) { - err = ret; - goto fail; - } - if (ret == 1) { - struct btrfs_key found_key; - - if (path->slots[0] == 0) - goto insert; - - path->slots[0]--; - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - - if (found_key.objectid != inode->i_ino) - goto insert; - - if (found_key.type != BTRFS_EXTENT_DATA_KEY) - goto insert; - ei = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - - if (btrfs_file_extent_type(leaf, ei) != - BTRFS_FILE_EXTENT_INLINE) { - goto insert; - } - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - ret = 0; - } - if (ret == 0) { - u32 found_size; - u64 found_end; - - leaf = path->nodes[0]; - ei = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - - if (btrfs_file_extent_type(leaf, ei) != - BTRFS_FILE_EXTENT_INLINE) { - err = ret; - btrfs_print_leaf(root, leaf); - printk("found wasn't inline offset %Lu inode %lu\n", - offset, inode->i_ino); - goto fail; - } - found_size = btrfs_file_extent_inline_len(leaf, - btrfs_item_nr(leaf, path->slots[0])); - found_end = key.offset + found_size; - - if (found_end < offset + size) { - btrfs_release_path(root, path); - ret = btrfs_search_slot(trans, root, &key, path, - offset + size - found_end, 1); - BUG_ON(ret != 0); - - ret = btrfs_extend_item(trans, root, path, - offset + size - found_end); - if (ret) { - err = ret; - goto fail; - } - leaf = path->nodes[0]; - ei = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - inode_add_bytes(inode, offset + size - found_end); - } - if (found_end < offset) { - ptr = btrfs_file_extent_inline_start(ei) + found_size; - memset_extent_buffer(leaf, 0, ptr, offset - found_end); - } - } else { -insert: - btrfs_release_path(root, path); - datasize = offset + size - key.offset; - inode_add_bytes(inode, datasize); - datasize = btrfs_file_extent_calc_inline_size(datasize); - ret = btrfs_insert_empty_item(trans, root, path, &key, - datasize); - if (ret) { - err = ret; - printk("got bad ret %d\n", ret); - goto fail; - } - leaf = path->nodes[0]; - ei = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(leaf, ei, trans->transid); - btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); - } - ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset; - - cur_size = size; - i = 0; - while (size > 0) { - page = pages[i]; - kaddr = kmap_atomic(page, KM_USER0); - cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size); - write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size); - kunmap_atomic(kaddr, KM_USER0); - page_offset = 0; - ptr += cur_size; - size -= cur_size; - if (i >= num_pages) { - printk("i %d num_pages %d\n", i, num_pages); - } - i++; - } - btrfs_mark_buffer_dirty(leaf); -fail: - btrfs_free_path(path); - return err; -} - /* * after copy_from_user, pages need to be dirtied and we need to make * sure holes are created between the current EOF and the start of @@ -267,8 +120,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 start_pos; u64 end_of_last_block; u64 end_pos = pos + write_bytes; - u64 inline_size; - int did_inline = 0; loff_t isize = i_size_read(inode); start_pos = pos & ~((u64)root->sectorsize - 1); @@ -314,7 +165,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, err = btrfs_insert_file_extent(trans, root, inode->i_ino, last_pos_in_file, - 0, 0, hole_size, 0); + 0, 0, hole_size, 0, + hole_size, 0, 0, 0); btrfs_drop_extent_cache(inode, last_pos_in_file, last_pos_in_file + hole_size - 1, 0); mutex_unlock(&BTRFS_I(inode)->extent_mutex); @@ -324,57 +176,19 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, goto failed; } - /* - * either allocate an extent for the new bytes or setup the key - * to show we are doing inline data in the extent + /* check for reserved extents on each page, we don't want + * to reset the delalloc bit on things that already have + * extents reserved. */ - inline_size = end_pos; - if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || - inline_size > root->fs_info->max_inline || - (inline_size & (root->sectorsize -1)) == 0 || - inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { - /* check for reserved extents on each page, we don't want - * to reset the delalloc bit on things that already have - * extents reserved. - */ - btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); - for (i = 0; i < num_pages; i++) { - struct page *p = pages[i]; - SetPageUptodate(p); - ClearPageChecked(p); - set_page_dirty(p); - } - } else { - u64 aligned_end; - /* step one, delete the existing extents in this range */ - aligned_end = (pos + write_bytes + root->sectorsize - 1) & - ~((u64)root->sectorsize - 1); - mutex_lock(&BTRFS_I(inode)->extent_mutex); - err = btrfs_drop_extents(trans, root, inode, start_pos, - aligned_end, aligned_end, &hint_byte); - if (err) - goto failed; - if (isize > inline_size) - inline_size = min_t(u64, isize, aligned_end); - inline_size -= start_pos; - err = insert_inline_extent(trans, root, inode, start_pos, - inline_size, pages, 0, num_pages); - btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1, 0); - BUG_ON(err); - mutex_unlock(&BTRFS_I(inode)->extent_mutex); - - /* - * an ugly way to do all the prop accounting around - * the page bits and mapping tags - */ - set_page_writeback(pages[0]); - end_page_writeback(pages[0]); - did_inline = 1; + btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); + for (i = 0; i < num_pages; i++) { + struct page *p = pages[i]; + SetPageUptodate(p); + ClearPageChecked(p); + set_page_dirty(p); } if (end_pos > isize) { i_size_write(inode, end_pos); - if (did_inline) - BTRFS_I(inode)->disk_i_size = end_pos; btrfs_update_inode(trans, root, inode); } failed: @@ -399,6 +213,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int ret; int testend = 1; unsigned long flags; + int compressed = 0; WARN_ON(end < start); if (end == (u64)-1) { @@ -434,6 +249,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, free_extent_map(em); continue; } + compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); clear_bit(EXTENT_FLAG_PINNED, &em->flags); remove_extent_mapping(em_tree, em); @@ -442,6 +258,12 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->start = em->start; split->len = start - em->start; split->block_start = em->block_start; + + if (compressed) + split->block_len = em->block_len; + else + split->block_len = split->len; + split->bdev = em->bdev; split->flags = flags; ret = add_extent_mapping(em_tree, split); @@ -459,7 +281,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->bdev = em->bdev; split->flags = flags; - split->block_start = em->block_start + diff; + if (compressed) { + split->block_len = em->block_len; + split->block_start = em->block_start; + } else { + split->block_len = split->len; + split->block_start = em->block_start + diff; + } ret = add_extent_mapping(em_tree, split); BUG_ON(ret); @@ -533,7 +361,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode) struct btrfs_item *item; item = btrfs_item_nr(leaf, slot); extent_end = found_key.offset + - btrfs_file_extent_inline_len(leaf, item); + btrfs_file_extent_inline_len(leaf, extent); extent_end = (extent_end + root->sectorsize - 1) & ~((u64)root->sectorsize -1 ); } @@ -573,6 +401,10 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, u64 extent_end = 0; u64 search_start = start; u64 leaf_start; + u64 ram_bytes = 0; + u8 compression = 0; + u8 encryption = 0; + u16 other_encoding = 0; u64 root_gen; u64 root_owner; struct extent_buffer *leaf; @@ -589,6 +421,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, int recow; int ret; + inline_limit = 0; btrfs_drop_extent_cache(inode, start, end - 1, 0); path = btrfs_alloc_path(); @@ -637,6 +470,12 @@ next_slot: extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); found_type = btrfs_file_extent_type(leaf, extent); + compression = btrfs_file_extent_compression(leaf, + extent); + encryption = btrfs_file_extent_encryption(leaf, + extent); + other_encoding = btrfs_file_extent_other_encoding(leaf, + extent); if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = btrfs_file_extent_disk_bytenr(leaf, @@ -646,13 +485,13 @@ next_slot: extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, extent); + ram_bytes = btrfs_file_extent_ram_bytes(leaf, + extent); found_extent = 1; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - struct btrfs_item *item; - item = btrfs_item_nr(leaf, slot); found_inline = 1; extent_end = key.offset + - btrfs_file_extent_inline_len(leaf, item); + btrfs_file_extent_inline_len(leaf, extent); } } else { extent_end = search_start; @@ -680,10 +519,9 @@ next_slot: search_start = (extent_end + mask) & ~mask; } else search_start = extent_end; - if (end <= extent_end && start >= key.offset && found_inline) { + + if (end <= extent_end && start >= key.offset && found_inline) *hint_byte = EXTENT_MAP_INLINE; - goto out; - } if (found_extent) { read_extent_buffer(leaf, &old, (unsigned long)extent, @@ -770,12 +608,27 @@ next_slot: write_extent_buffer(leaf, &old, (unsigned long)extent, sizeof(old)); + btrfs_set_file_extent_compression(leaf, extent, + compression); + btrfs_set_file_extent_encryption(leaf, extent, + encryption); + btrfs_set_file_extent_other_encoding(leaf, extent, + other_encoding); btrfs_set_file_extent_offset(leaf, extent, le64_to_cpu(old.offset) + end - key.offset); WARN_ON(le64_to_cpu(old.num_bytes) < (extent_end - end)); btrfs_set_file_extent_num_bytes(leaf, extent, extent_end - end); + + /* + * set the ram bytes to the size of the full extent + * before splitting. This is a worst case flag, + * but its the best we can do because we don't know + * how splitting affects compression + */ + btrfs_set_file_extent_ram_bytes(leaf, extent, + ram_bytes); btrfs_set_file_extent_type(leaf, extent, BTRFS_FILE_EXTENT_REG); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bf4bed6ca4d..9797592dc86 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -49,6 +49,7 @@ #include "compat.h" #include "tree-log.h" #include "ref-cache.h" +#include "compression.h" struct btrfs_iget_args { u64 ino; @@ -83,6 +84,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { }; static void btrfs_truncate(struct inode *inode); +static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); /* * a very lame attempt at stopping writes when the FS is 85% full. There @@ -113,58 +115,375 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, return ret; } +/* + * this does all the hard work for inserting an inline extent into + * the btree. The caller should have done a btrfs_drop_extents so that + * no overlapping inline items exist in the btree + */ +static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + u64 start, size_t size, size_t compressed_size, + struct page **compressed_pages) +{ + struct btrfs_key key; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct page *page = NULL; + char *kaddr; + unsigned long ptr; + struct btrfs_file_extent_item *ei; + int err = 0; + int ret; + size_t cur_size = size; + size_t datasize; + unsigned long offset; + int use_compress = 0; + + if (compressed_size && compressed_pages) { + use_compress = 1; + cur_size = compressed_size; + } + + path = btrfs_alloc_path(); if (!path) + return -ENOMEM; + + btrfs_set_trans_block_group(trans, inode); + + key.objectid = inode->i_ino; + key.offset = start; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + inode_add_bytes(inode, size); + datasize = btrfs_file_extent_calc_inline_size(cur_size); + + inode_add_bytes(inode, size); + ret = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + BUG_ON(ret); + if (ret) { + err = ret; + printk("got bad ret %d\n", ret); + goto fail; + } + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); + btrfs_set_file_extent_encryption(leaf, ei, 0); + btrfs_set_file_extent_other_encoding(leaf, ei, 0); + btrfs_set_file_extent_ram_bytes(leaf, ei, size); + ptr = btrfs_file_extent_inline_start(ei); + + if (use_compress) { + struct page *cpage; + int i = 0; + while(compressed_size > 0) { + cpage = compressed_pages[i]; + cur_size = min(compressed_size, + PAGE_CACHE_SIZE); + + kaddr = kmap(cpage); + write_extent_buffer(leaf, kaddr, ptr, cur_size); + kunmap(cpage); + + i++; + ptr += cur_size; + compressed_size -= cur_size; + } + btrfs_set_file_extent_compression(leaf, ei, + BTRFS_COMPRESS_ZLIB); + } else { + page = find_get_page(inode->i_mapping, + start >> PAGE_CACHE_SHIFT); + btrfs_set_file_extent_compression(leaf, ei, 0); + kaddr = kmap_atomic(page, KM_USER0); + offset = start & (PAGE_CACHE_SIZE - 1); + write_extent_buffer(leaf, kaddr + offset, ptr, size); + kunmap_atomic(kaddr, KM_USER0); + page_cache_release(page); + } + btrfs_mark_buffer_dirty(leaf); + btrfs_free_path(path); + + BTRFS_I(inode)->disk_i_size = inode->i_size; + btrfs_update_inode(trans, root, inode); + return 0; +fail: + btrfs_free_path(path); + return err; +} + + +/* + * conditionally insert an inline extent into the file. This + * does the checks required to make sure the data is small enough + * to fit as an inline extent. + */ +static int cow_file_range_inline(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, u64 start, u64 end, + size_t compressed_size, + struct page **compressed_pages) +{ + u64 isize = i_size_read(inode); + u64 actual_end = min(end + 1, isize); + u64 inline_len = actual_end - start; + u64 aligned_end = (end + root->sectorsize - 1) & + ~((u64)root->sectorsize - 1); + u64 hint_byte; + u64 data_len = inline_len; + int ret; + + if (compressed_size) + data_len = compressed_size; + + if (start > 0 || + data_len >= BTRFS_MAX_INLINE_DATA_SIZE(root) || + (!compressed_size && + (actual_end & (root->sectorsize - 1)) == 0) || + end + 1 < isize || + data_len > root->fs_info->max_inline) { + return 1; + } + + mutex_lock(&BTRFS_I(inode)->extent_mutex); + ret = btrfs_drop_extents(trans, root, inode, start, + aligned_end, aligned_end, &hint_byte); + BUG_ON(ret); + + if (isize > actual_end) + inline_len = min_t(u64, isize, actual_end); + ret = insert_inline_extent(trans, root, inode, start, + inline_len, compressed_size, + compressed_pages); + BUG_ON(ret); + btrfs_drop_extent_cache(inode, start, aligned_end, 0); + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + return 0; +} + /* * when extent_io.c finds a delayed allocation range in the file, * the call backs end up in this code. The basic idea is to * allocate extents on disk for the range, and create ordered data structs * in ram to track those extents. + * + * locked_page is the page that writepage had locked already. We use + * it to make sure we don't do extra locks or unlocks. + * + * *page_started is set to one if we unlock locked_page and do everything + * required to start IO on it. It may be clean and already done with + * IO when we return. */ -static int cow_file_range(struct inode *inode, u64 start, u64 end) +static int cow_file_range(struct inode *inode, struct page *locked_page, + u64 start, u64 end, int *page_started) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; u64 alloc_hint = 0; u64 num_bytes; + unsigned long ram_size; + u64 orig_start; + u64 disk_num_bytes; u64 cur_alloc_size; u64 blocksize = root->sectorsize; - u64 orig_num_bytes; + u64 actual_end; struct btrfs_key ins; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; + struct page **pages = NULL; + unsigned long nr_pages; + unsigned long nr_pages_ret = 0; + unsigned long total_compressed = 0; + unsigned long total_in = 0; + unsigned long max_compressed = 128 * 1024; + unsigned long max_uncompressed = 256 * 1024; + int i; + int will_compress; trans = btrfs_join_transaction(root, 1); BUG_ON(!trans); btrfs_set_trans_block_group(trans, inode); + orig_start = start; + + /* + * compression made this loop a bit ugly, but the basic idea is to + * compress some pages but keep the total size of the compressed + * extent relatively small. If compression is off, this goto target + * is never used. + */ +again: + will_compress = 0; + nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1; + nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE); + actual_end = min_t(u64, i_size_read(inode), end + 1); + total_compressed = actual_end - start; + + /* we want to make sure that amount of ram required to uncompress + * an extent is reasonable, so we limit the total size in ram + * of a compressed extent to 256k + */ + total_compressed = min(total_compressed, max_uncompressed); num_bytes = (end - start + blocksize) & ~(blocksize - 1); num_bytes = max(blocksize, num_bytes); - orig_num_bytes = num_bytes; + disk_num_bytes = num_bytes; + total_in = 0; + ret = 0; - if (alloc_hint == EXTENT_MAP_INLINE) - goto out; + /* we do compression for mount -o compress and when the + * inode has not been flagged as nocompress + */ + if (!btrfs_test_flag(inode, NOCOMPRESS) && + btrfs_test_opt(root, COMPRESS)) { + WARN_ON(pages); + pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); + + /* we want to make sure the amount of IO required to satisfy + * a random read is reasonably small, so we limit the size + * of a compressed extent to 128k + */ + ret = btrfs_zlib_compress_pages(inode->i_mapping, start, + total_compressed, pages, + nr_pages, &nr_pages_ret, + &total_in, + &total_compressed, + max_compressed); + + if (!ret) { + unsigned long offset = total_compressed & + (PAGE_CACHE_SIZE - 1); + struct page *page = pages[nr_pages_ret - 1]; + char *kaddr; + + /* zero the tail end of the last page, we might be + * sending it down to disk + */ + if (offset) { + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, 0, + PAGE_CACHE_SIZE - offset); + kunmap_atomic(kaddr, KM_USER0); + } + will_compress = 1; + } + } + if (start == 0) { + /* lets try to make an inline extent */ + if (ret || total_in < (end - start + 1)) { + /* we didn't compress the entire range, try + * to make an uncompressed inline extent. This + * is almost sure to fail, but maybe inline sizes + * will get bigger later + */ + ret = cow_file_range_inline(trans, root, inode, + start, end, 0, NULL); + } else { + ret = cow_file_range_inline(trans, root, inode, + start, end, + total_compressed, pages); + } + if (ret == 0) { + extent_clear_unlock_delalloc(inode, + &BTRFS_I(inode)->io_tree, + start, end, NULL, + 1, 1, 1); + *page_started = 1; + ret = 0; + goto free_pages_out; + } + } + + if (will_compress) { + /* + * we aren't doing an inline extent round the compressed size + * up to a block size boundary so the allocator does sane + * things + */ + total_compressed = (total_compressed + blocksize - 1) & + ~(blocksize - 1); + + /* + * one last check to make sure the compression is really a + * win, compare the page count read with the blocks on disk + */ + total_in = (total_in + PAGE_CACHE_SIZE - 1) & + ~(PAGE_CACHE_SIZE - 1); + if (total_compressed >= total_in) { + will_compress = 0; + } else { + disk_num_bytes = total_compressed; + num_bytes = total_in; + } + } + if (!will_compress && pages) { + /* + * the compression code ran but failed to make things smaller, + * free any pages it allocated and our page pointer array + */ + for (i = 0; i < nr_pages_ret; i++) { + page_cache_release(pages[i]); + } + kfree(pages); + pages = NULL; + total_compressed = 0; + nr_pages_ret = 0; + + /* flag the file so we don't compress in the future */ + btrfs_set_flag(inode, NOCOMPRESS); + } + + BUG_ON(disk_num_bytes > + btrfs_super_total_bytes(&root->fs_info->super_copy)); - BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); mutex_lock(&BTRFS_I(inode)->extent_mutex); btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); mutex_unlock(&BTRFS_I(inode)->extent_mutex); - while(num_bytes > 0) { - cur_alloc_size = min(num_bytes, root->fs_info->max_extent); + while(disk_num_bytes > 0) { + unsigned long min_bytes; + + /* + * the max size of a compressed extent is pretty small, + * make the code a little less complex by forcing + * the allocator to find a whole compressed extent at once + */ + if (will_compress) + min_bytes = disk_num_bytes; + else + min_bytes = root->sectorsize; + + cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); ret = btrfs_reserve_extent(trans, root, cur_alloc_size, - root->sectorsize, 0, alloc_hint, + min_bytes, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); - goto out; + goto free_pages_out_fail; } em = alloc_extent_map(GFP_NOFS); em->start = start; - em->len = ins.offset; + + if (will_compress) { + ram_size = num_bytes; + em->len = num_bytes; + } else { + /* ramsize == disk size */ + ram_size = ins.offset; + em->len = ins.offset; + } + em->block_start = ins.objectid; + em->block_len = ins.offset; em->bdev = root->fs_info->fs_devices->latest_bdev; + mutex_lock(&BTRFS_I(inode)->extent_mutex); set_bit(EXTENT_FLAG_PINNED, &em->flags); + + if (will_compress) + set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); + while(1) { spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); @@ -174,26 +493,95 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) break; } btrfs_drop_extent_cache(inode, start, - start + ins.offset - 1, 0); + start + ram_size - 1, 0); } mutex_unlock(&BTRFS_I(inode)->extent_mutex); cur_alloc_size = ins.offset; ret = btrfs_add_ordered_extent(inode, start, ins.objectid, - ins.offset, 0); + ram_size, cur_alloc_size, 0, + will_compress); BUG_ON(ret); - if (num_bytes < cur_alloc_size) { - printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, + + if (disk_num_bytes < cur_alloc_size) { + printk("num_bytes %Lu cur_alloc %Lu\n", disk_num_bytes, cur_alloc_size); break; } + + if (will_compress) { + /* + * we're doing compression, we and we need to + * submit the compressed extents down to the device. + * + * We lock down all the file pages, clearing their + * dirty bits and setting them writeback. Everyone + * that wants to modify the page will wait on the + * ordered extent above. + * + * The writeback bits on the file pages are + * cleared when the compressed pages are on disk + */ + btrfs_end_transaction(trans, root); + + if (start <= page_offset(locked_page) && + page_offset(locked_page) < start + ram_size) { + *page_started = 1; + } + + extent_clear_unlock_delalloc(inode, + &BTRFS_I(inode)->io_tree, + start, + start + ram_size - 1, + NULL, 1, 1, 0); + + ret = btrfs_submit_compressed_write(inode, start, + ram_size, ins.objectid, + cur_alloc_size, pages, + nr_pages_ret); + + BUG_ON(ret); + trans = btrfs_join_transaction(root, 1); + if (start + ram_size < end) { + start += ram_size; + alloc_hint = ins.objectid + ins.offset; + /* pages will be freed at end_bio time */ + pages = NULL; + goto again; + } else { + /* we've written everything, time to go */ + break; + } + } + /* we're not doing compressed IO, don't unlock the first + * page (which the caller expects to stay locked), don't + * clear any dirty bits and don't set any writeback bits + */ + extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, + start, start + ram_size - 1, + locked_page, 0, 0, 0); + disk_num_bytes -= cur_alloc_size; num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } + + ret = 0; out: btrfs_end_transaction(trans, root); + return ret; + +free_pages_out_fail: + extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, + start, end, locked_page, 0, 0, 0); +free_pages_out: + for (i = 0; i < nr_pages_ret; i++) + page_cache_release(pages[i]); + if (pages) + kfree(pages); + + goto out; } /* @@ -203,7 +591,8 @@ out: * If no cow copies or snapshots exist, we write directly to the existing * blocks on disk */ -static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) +static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, + u64 start, u64 end, int *page_started) { u64 extent_start; u64 extent_end; @@ -260,6 +649,11 @@ again: extent_end = extent_start + extent_num_bytes; err = 0; + if (btrfs_file_extent_compression(leaf, item) || + btrfs_file_extent_encryption(leaf,item) || + btrfs_file_extent_other_encoding(leaf, item)) + goto not_found; + if (loops && start != extent_start) goto not_found; @@ -284,7 +678,8 @@ again: bytenr += btrfs_file_extent_offset(leaf, item); extent_num_bytes = min(end + 1, extent_end) - start; ret = btrfs_add_ordered_extent(inode, start, bytenr, - extent_num_bytes, 1); + extent_num_bytes, + extent_num_bytes, 1, 0); if (ret) { err = ret; goto out; @@ -300,7 +695,8 @@ again: not_found: btrfs_end_transaction(trans, root); btrfs_free_path(path); - return cow_file_range(inode, start, end); + return cow_file_range(inode, locked_page, start, end, + page_started); } out: WARN_ON(err); @@ -312,16 +708,19 @@ out: /* * extent_io.c call back to do delayed allocation processing */ -static int run_delalloc_range(struct inode *inode, u64 start, u64 end) +static int run_delalloc_range(struct inode *inode, struct page *locked_page, + u64 start, u64 end, int *page_started) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; if (btrfs_test_opt(root, NODATACOW) || btrfs_test_flag(inode, NODATACOW)) - ret = run_delalloc_nocow(inode, start, end); + ret = run_delalloc_nocow(inode, locked_page, start, end, + page_started); else - ret = cow_file_range(inode, start, end); + ret = cow_file_range(inode, locked_page, start, end, + page_started); return ret; } @@ -383,7 +782,8 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, * we don't create bios that span stripes or chunks */ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, - size_t size, struct bio *bio) + size_t size, struct bio *bio, + unsigned long bio_flags) { struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; struct btrfs_mapping_tree *map_tree; @@ -413,7 +813,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, * are inserted into the btree */ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, - int mirror_num) + int mirror_num, unsigned long bio_flags) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; @@ -429,7 +829,7 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, * or reading the csums from the tree before a read */ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, - int mirror_num) + int mirror_num, unsigned long bio_flags) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; @@ -444,11 +844,17 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, if (!(rw & (1 << BIO_RW))) { btrfs_lookup_bio_sums(root, inode, bio); + + if (bio_flags & EXTENT_BIO_COMPRESSED) { + return btrfs_submit_compressed_read(inode, bio, + mirror_num, bio_flags); + } + goto mapit; } return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, inode, rw, bio, mirror_num, - __btrfs_submit_bio_hook); + bio_flags, __btrfs_submit_bio_hook); mapit: return btrfs_map_bio(root, rw, bio, mirror_num, 0); } @@ -539,7 +945,7 @@ out_page: * good idea. This causes problems because we want to make sure COW * properly happens and the data=ordered rules are followed. * - * In our case any range that doesn't have the EXTENT_ORDERED bit set + * In our case any range that doesn't have the ORDERED bit set * hasn't been properly setup for IO. We kick off an async process * to fix it up. The async helper will wait for ordered extents, set * the delalloc bit and make it safe to write the page. @@ -632,10 +1038,21 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) btrfs_set_file_extent_disk_bytenr(leaf, extent_item, ordered_extent->start); btrfs_set_file_extent_disk_num_bytes(leaf, extent_item, - ordered_extent->len); + ordered_extent->disk_len); btrfs_set_file_extent_offset(leaf, extent_item, 0); + + if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) + btrfs_set_file_extent_compression(leaf, extent_item, 1); + else + btrfs_set_file_extent_compression(leaf, extent_item, 0); + btrfs_set_file_extent_encryption(leaf, extent_item, 0); + btrfs_set_file_extent_other_encoding(leaf, extent_item, 0); + + /* ram bytes = extent_num_bytes for now */ btrfs_set_file_extent_num_bytes(leaf, extent_item, ordered_extent->len); + btrfs_set_file_extent_ram_bytes(leaf, extent_item, + ordered_extent->len); btrfs_mark_buffer_dirty(leaf); btrfs_drop_extent_cache(inode, ordered_extent->file_offset, @@ -644,7 +1061,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) mutex_unlock(&BTRFS_I(inode)->extent_mutex); ins.objectid = ordered_extent->start; - ins.offset = ordered_extent->len; + ins.offset = ordered_extent->disk_len; ins.type = BTRFS_EXTENT_ITEM_KEY; ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, root->root_key.objectid, @@ -714,6 +1131,7 @@ int btrfs_io_failed_hook(struct bio *failed_bio, int ret; int rw; u64 logical; + unsigned long bio_flags = 0; ret = get_state_private(failure_tree, start, &private); if (ret) { @@ -738,6 +1156,8 @@ int btrfs_io_failed_hook(struct bio *failed_bio, } logical = start - em->start; logical = em->block_start + logical; + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) + bio_flags = EXTENT_BIO_COMPRESSED; failrec->logical = logical; free_extent_map(em); set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | @@ -781,7 +1201,8 @@ int btrfs_io_failed_hook(struct bio *failed_bio, rw = READ; BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, - failrec->last_mirror); + failrec->last_mirror, + bio_flags); return 0; } @@ -1644,10 +2065,8 @@ search_again: item_end += btrfs_file_extent_num_bytes(leaf, fi); } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - struct btrfs_item *item = btrfs_item_nr(leaf, - path->slots[0]); item_end += btrfs_file_extent_inline_len(leaf, - item); + fi); } item_end--; } @@ -1715,7 +2134,14 @@ search_again: root_owner = btrfs_header_owner(leaf); } } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - if (!del_item) { + /* + * we can't truncate inline items that have had + * special encodings + */ + if (!del_item && + btrfs_file_extent_compression(leaf, fi) == 0 && + btrfs_file_extent_encryption(leaf, fi) == 0 && + btrfs_file_extent_other_encoding(leaf, fi) == 0) { u32 size = new_size - found_key.offset; if (root->ref_cows) { @@ -1926,7 +2352,8 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) err = btrfs_insert_file_extent(trans, root, inode->i_ino, hole_start, 0, 0, - hole_size, 0); + hole_size, 0, hole_size, + 0, 0, 0); btrfs_drop_extent_cache(inode, hole_start, (u64)-1, 0); btrfs_check_file(root, inode); @@ -2894,11 +3321,50 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree, start_diff = map_start - em->start; em->start = map_start; em->len = map_len; - if (em->block_start < EXTENT_MAP_LAST_BYTE) + if (em->block_start < EXTENT_MAP_LAST_BYTE && + !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { em->block_start += start_diff; + em->block_len -= start_diff; + } return add_extent_mapping(em_tree, em); } +static noinline int uncompress_inline(struct btrfs_path *path, + struct inode *inode, struct page *page, + size_t pg_offset, u64 extent_offset, + struct btrfs_file_extent_item *item) +{ + int ret; + struct extent_buffer *leaf = path->nodes[0]; + char *tmp; + size_t max_size; + unsigned long inline_size; + unsigned long ptr; + + WARN_ON(pg_offset != 0); + max_size = btrfs_file_extent_ram_bytes(leaf, item); + inline_size = btrfs_file_extent_inline_item_len(leaf, + btrfs_item_nr(leaf, path->slots[0])); + tmp = kmalloc(inline_size, GFP_NOFS); + ptr = btrfs_file_extent_inline_start(item); + + read_extent_buffer(leaf, tmp, ptr, inline_size); + + max_size = min(PAGE_CACHE_SIZE, max_size); + ret = btrfs_zlib_decompress(tmp, page, extent_offset, + inline_size, max_size); + if (ret) { + char *kaddr = kmap_atomic(page, KM_USER0); + unsigned long copy_size = min_t(u64, + PAGE_CACHE_SIZE - pg_offset, + max_size - extent_offset); + memset(kaddr + pg_offset, 0, copy_size); + kunmap_atomic(kaddr, KM_USER0); + } + kfree(tmp); + return 0; +} + /* * a bit scary, this does extent mapping from logical file offset to the disk. * the ugly parts come from merging extents from the disk with the @@ -2927,6 +3393,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_trans_handle *trans = NULL; + int compressed; again: spin_lock(&em_tree->lock); @@ -2951,6 +3418,7 @@ again: em->bdev = root->fs_info->fs_devices->latest_bdev; em->start = EXTENT_MAP_HOLE; em->len = (u64)-1; + em->block_len = (u64)-1; if (!path) { path = btrfs_alloc_path(); @@ -2983,6 +3451,7 @@ again: found_type = btrfs_file_extent_type(leaf, item); extent_start = found_key.offset; + compressed = btrfs_file_extent_compression(leaf, item); if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = extent_start + btrfs_file_extent_num_bytes(leaf, item); @@ -3005,10 +3474,18 @@ again: em->block_start = EXTENT_MAP_HOLE; goto insert; } - bytenr += btrfs_file_extent_offset(leaf, item); - em->block_start = bytenr; em->start = extent_start; em->len = extent_end - extent_start; + if (compressed) { + set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); + em->block_start = bytenr; + em->block_len = btrfs_file_extent_disk_num_bytes(leaf, + item); + } else { + bytenr += btrfs_file_extent_offset(leaf, item); + em->block_start = bytenr; + em->block_len = em->len; + } goto insert; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { u64 page_start; @@ -3018,8 +3495,7 @@ again: size_t extent_offset; size_t copy_size; - size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, - path->slots[0])); + size = btrfs_file_extent_inline_len(leaf, item); extent_end = (extent_start + size + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { @@ -3035,9 +3511,10 @@ again: } em->block_start = EXTENT_MAP_INLINE; - if (!page) { + if (!page || create) { em->start = extent_start; - em->len = size; + em->len = (size + root->sectorsize - 1) & + ~((u64)root->sectorsize - 1); goto out; } @@ -3048,11 +3525,22 @@ again: em->start = extent_start + extent_offset; em->len = (copy_size + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); - map = kmap(page); + if (compressed) + set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); ptr = btrfs_file_extent_inline_start(item) + extent_offset; if (create == 0 && !PageUptodate(page)) { - read_extent_buffer(leaf, map + pg_offset, ptr, - copy_size); + if (btrfs_file_extent_compression(leaf, item) == + BTRFS_COMPRESS_ZLIB) { + ret = uncompress_inline(path, inode, page, + pg_offset, + extent_offset, item); + BUG_ON(ret); + } else { + map = kmap(page); + read_extent_buffer(leaf, map + pg_offset, ptr, + copy_size); + kunmap(page); + } flush_dcache_page(page); } else if (create && PageUptodate(page)) { if (!trans) { @@ -3063,11 +3551,12 @@ again: trans = btrfs_join_transaction(root, 1); goto again; } + map = kmap(page); write_extent_buffer(leaf, map + pg_offset, ptr, copy_size); + kunmap(page); btrfs_mark_buffer_dirty(leaf); } - kunmap(page); set_extent_uptodate(io_tree, em->start, extent_map_end(em) - 1, GFP_NOFS); goto insert; @@ -3779,6 +4268,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, btrfs_set_file_extent_generation(leaf, ei, trans->transid); btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); + btrfs_set_file_extent_encryption(leaf, ei, 0); + btrfs_set_file_extent_compression(leaf, ei, 0); + btrfs_set_file_extent_other_encoding(leaf, ei, 0); + btrfs_set_file_extent_ram_bytes(leaf, ei, name_len); + ptr = btrfs_file_extent_inline_start(ei); write_extent_buffer(leaf, symname, ptr, name_len); btrfs_mark_buffer_dirty(leaf); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 2eb6caba57c..b5745bb96d4 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -165,7 +165,8 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, * inserted. */ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, - u64 start, u64 len, int nocow) + u64 start, u64 len, u64 disk_len, int nocow, + int compressed) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; @@ -180,9 +181,12 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->file_offset = file_offset; entry->start = start; entry->len = len; + entry->disk_len = disk_len; entry->inode = inode; if (nocow) set_bit(BTRFS_ORDERED_NOCOW, &entry->flags); + if (compressed) + set_bit(BTRFS_ORDERED_COMPRESSED, &entry->flags); /* one ref for the tree */ atomic_set(&entry->refs, 1); @@ -389,9 +393,10 @@ void btrfs_start_ordered_extent(struct inode *inode, * for pdflush to find them */ btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE); - if (wait) + if (wait) { wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); + } } /* diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index f50f8870a14..1ef464145d2 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -66,6 +66,8 @@ struct btrfs_ordered_sum { #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ +#define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ + struct btrfs_ordered_extent { /* logical offset in the file */ u64 file_offset; @@ -73,9 +75,12 @@ struct btrfs_ordered_extent { /* disk byte number */ u64 start; - /* length of the extent in bytes */ + /* ram length of the extent in bytes */ u64 len; + /* extent length on disk */ + u64 disk_len; + /* flags (described above) */ unsigned long flags; @@ -127,7 +132,8 @@ int btrfs_remove_ordered_extent(struct inode *inode, int btrfs_dec_test_ordered_pending(struct inode *inode, u64 file_offset, u64 io_size); int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, - u64 start, u64 len, int nocow); + u64 start, u64 len, u64 disk_len, int nocow, + int compressed); int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_extent *entry, struct btrfs_ordered_sum *sum); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index bd9ab3e9a7f..64725c13aa1 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -115,15 +115,16 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) if (btrfs_file_extent_type(l, fi) == BTRFS_FILE_EXTENT_INLINE) { printk("\t\tinline extent data size %u\n", - btrfs_file_extent_inline_len(l, item)); + btrfs_file_extent_inline_len(l, fi)); break; } printk("\t\textent data disk bytenr %llu nr %llu\n", (unsigned long long)btrfs_file_extent_disk_bytenr(l, fi), (unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi)); - printk("\t\textent data offset %llu nr %llu\n", + printk("\t\textent data offset %llu nr %llu ram %llu\n", (unsigned long long)btrfs_file_extent_offset(l, fi), - (unsigned long long)btrfs_file_extent_num_bytes(l, fi)); + (unsigned long long)btrfs_file_extent_num_bytes(l, fi), + (unsigned long long)btrfs_file_extent_ram_bytes(l, fi)); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2e6039825b7..431fdf144b5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -47,6 +47,7 @@ #include "volumes.h" #include "version.h" #include "export.h" +#include "compression.h" #define BTRFS_SUPER_MAGIC 0x9123683E @@ -69,7 +70,7 @@ static void btrfs_put_super (struct super_block * sb) enum { Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, - Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_err, + Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_err, }; static match_table_t tokens = { @@ -83,6 +84,7 @@ static match_table_t tokens = { {Opt_max_inline, "max_inline=%s"}, {Opt_alloc_start, "alloc_start=%s"}, {Opt_thread_pool, "thread_pool=%d"}, + {Opt_compress, "compress"}, {Opt_ssd, "ssd"}, {Opt_noacl, "noacl"}, {Opt_err, NULL}, @@ -163,6 +165,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, NODATACOW); btrfs_set_opt(info->mount_opt, NODATASUM); break; + case Opt_compress: + printk(KERN_INFO "btrfs: use compression\n"); + btrfs_set_opt(info->mount_opt, COMPRESS); + break; case Opt_ssd: printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); btrfs_set_opt(info->mount_opt, SSD); @@ -622,6 +628,7 @@ static int __init init_btrfs_fs(void) err = btrfs_interface_init(); if (err) goto free_extent_map; + err = register_filesystem(&btrfs_fs_type); if (err) goto unregister_ioctl; @@ -651,6 +658,7 @@ static void __exit exit_btrfs_fs(void) unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); btrfs_cleanup_fs_uuids(); + btrfs_zlib_exit(); } module_init(init_btrfs_fs) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index cf618cc8b34..e6d579053a4 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -540,8 +540,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, if (found_type == BTRFS_FILE_EXTENT_REG) extent_end = start + btrfs_file_extent_num_bytes(eb, item); else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - size = btrfs_file_extent_inline_len(eb, - btrfs_item_nr(eb, slot)); + size = btrfs_file_extent_inline_len(eb, item); extent_end = (start + size + mask) & ~mask; } else { ret = 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2eed7f91f51..7db4cfd03a9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1816,6 +1816,7 @@ again: em->start = key.offset; em->len = *num_bytes; em->block_start = 0; + em->block_len = em->len; if (type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_add_system_chunk(trans, chunk_root, &key, @@ -2323,6 +2324,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, em->start = logical; em->len = length; em->block_start = 0; + em->block_len = em->len; map->num_stripes = num_stripes; map->io_width = btrfs_chunk_io_width(leaf, chunk); diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c new file mode 100644 index 00000000000..e99309180a1 --- /dev/null +++ b/fs/btrfs/zlib.c @@ -0,0 +1,637 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on jffs2 zlib code: + * Copyright © 2001-2007 Red Hat, Inc. + * Created by David Woodhouse + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Plan: call deflate() with avail_in == *sourcelen, + avail_out = *dstlen - 12 and flush == Z_FINISH. + If it doesn't manage to finish, call it again with + avail_in == 0 and avail_out set to the remaining 12 + bytes for it to clean up. + Q: Is 12 bytes sufficient? +*/ +#define STREAM_END_SPACE 12 + +struct workspace { + z_stream inf_strm; + z_stream def_strm; + char *buf; + struct list_head list; +}; + +static LIST_HEAD(idle_workspace); +static DEFINE_SPINLOCK(workspace_lock); +static unsigned long num_workspace; +static atomic_t alloc_workspace = ATOMIC_INIT(0); +static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); + +/* + * this finds an available zlib workspace or allocates a new one + * NULL or an ERR_PTR is returned if things go bad. + */ +static struct workspace *find_zlib_workspace(void) +{ + struct workspace *workspace; + int ret; + int cpus = num_online_cpus(); + +again: + spin_lock(&workspace_lock); + if (!list_empty(&idle_workspace)) { + workspace = list_entry(idle_workspace.next, struct workspace, + list); + list_del(&workspace->list); + num_workspace--; + spin_unlock(&workspace_lock); + return workspace; + + } + spin_unlock(&workspace_lock); + if (atomic_read(&alloc_workspace) > cpus) { + DEFINE_WAIT(wait); + prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); + if (atomic_read(&alloc_workspace) > cpus) + schedule(); + finish_wait(&workspace_wait, &wait); + goto again; + } + atomic_inc(&alloc_workspace); + workspace = kzalloc(sizeof(*workspace), GFP_NOFS); + if (!workspace) { + ret = -ENOMEM; + goto fail; + } + + workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); + if (!workspace->def_strm.workspace) { + ret = -ENOMEM; + goto fail; + } + workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); + if (!workspace->inf_strm.workspace) { + ret = -ENOMEM; + goto fail_inflate; + } + workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); + if (!workspace->buf) { + ret = -ENOMEM; + goto fail_kmalloc; + } + return workspace; + +fail_kmalloc: + vfree(workspace->inf_strm.workspace); +fail_inflate: + vfree(workspace->def_strm.workspace); +fail: + kfree(workspace); + atomic_dec(&alloc_workspace); + wake_up(&workspace_wait); + return ERR_PTR(ret); +} + +/* + * put a workspace struct back on the list or free it if we have enough + * idle ones sitting around + */ +static int free_workspace(struct workspace *workspace) +{ + spin_lock(&workspace_lock); + if (num_workspace < num_online_cpus()) { + list_add_tail(&workspace->list, &idle_workspace); + num_workspace++; + spin_unlock(&workspace_lock); + if (waitqueue_active(&workspace_wait)) + wake_up(&workspace_wait); + return 0; + } + spin_unlock(&workspace_lock); + vfree(workspace->def_strm.workspace); + vfree(workspace->inf_strm.workspace); + kfree(workspace->buf); + kfree(workspace); + + atomic_dec(&alloc_workspace); + if (waitqueue_active(&workspace_wait)) + wake_up(&workspace_wait); + return 0; +} + +/* + * cleanup function for module exit + */ +static void free_workspaces(void) +{ + struct workspace *workspace; + while(!list_empty(&idle_workspace)) { + workspace = list_entry(idle_workspace.next, struct workspace, + list); + list_del(&workspace->list); + vfree(workspace->def_strm.workspace); + vfree(workspace->inf_strm.workspace); + kfree(workspace->buf); + kfree(workspace); + atomic_dec(&alloc_workspace); + } +} + +/* + * given an address space and start/len, compress the bytes. + * + * pages are allocated to hold the compressed result and stored + * in 'pages' + * + * out_pages is used to return the number of pages allocated. There + * may be pages allocated even if we return an error + * + * total_in is used to return the number of bytes actually read. It + * may be smaller then len if we had to exit early because we + * ran out of room in the pages array or because we cross the + * max_out threshold. + * + * total_out is used to return the total number of compressed bytes + * + * max_out tells us the max number of bytes that we're allowed to + * stuff into pages + */ +int btrfs_zlib_compress_pages(struct address_space *mapping, + u64 start, unsigned long len, + struct page **pages, + unsigned long nr_dest_pages, + unsigned long *out_pages, + unsigned long *total_in, + unsigned long *total_out, + unsigned long max_out) +{ + int ret; + struct workspace *workspace; + char *data_in; + char *cpage_out; + int nr_pages = 0; + struct page *in_page = NULL; + struct page *out_page = NULL; + int out_written = 0; + int in_read = 0; + unsigned long bytes_left; + + *out_pages = 0; + *total_out = 0; + *total_in = 0; + + workspace = find_zlib_workspace(); + if (!workspace) + return -1; + + if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { + printk(KERN_WARNING "deflateInit failed\n"); + ret = -1; + goto out; + } + + workspace->def_strm.total_in = 0; + workspace->def_strm.total_out = 0; + + in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); + data_in = kmap(in_page); + + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + cpage_out = kmap(out_page); + pages[0] = out_page; + nr_pages = 1; + + workspace->def_strm.next_in = data_in; + workspace->def_strm.next_out = cpage_out; + workspace->def_strm.avail_out = PAGE_CACHE_SIZE; + workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); + + out_written = 0; + in_read = 0; + + while (workspace->def_strm.total_in < len) { + ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); + if (ret != Z_OK) { + printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", + ret); + zlib_deflateEnd(&workspace->def_strm); + ret = -1; + goto out; + } + + /* we're making it bigger, give up */ + if (workspace->def_strm.total_in > 8192 && + workspace->def_strm.total_in < + workspace->def_strm.total_out) { + ret = -1; + goto out; + } + /* we need another page for writing out. Test this + * before the total_in so we will pull in a new page for + * the stream end if required + */ + if (workspace->def_strm.avail_out == 0) { + kunmap(out_page); + if (nr_pages == nr_dest_pages) { + out_page = NULL; + ret = -1; + goto out; + } + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + cpage_out = kmap(out_page); + pages[nr_pages] = out_page; + nr_pages++; + workspace->def_strm.avail_out = PAGE_CACHE_SIZE; + workspace->def_strm.next_out = cpage_out; + } + /* we're all done */ + if (workspace->def_strm.total_in >= len) + break; + + /* we've read in a full page, get a new one */ + if (workspace->def_strm.avail_in == 0) { + if (workspace->def_strm.total_out > max_out) + break; + + bytes_left = len - workspace->def_strm.total_in; + kunmap(in_page); + page_cache_release(in_page); + + start += PAGE_CACHE_SIZE; + in_page = find_get_page(mapping, + start >> PAGE_CACHE_SHIFT); + data_in = kmap(in_page); + workspace->def_strm.avail_in = min(bytes_left, + PAGE_CACHE_SIZE); + workspace->def_strm.next_in = data_in; + } + } + workspace->def_strm.avail_in = 0; + ret = zlib_deflate(&workspace->def_strm, Z_FINISH); + zlib_deflateEnd(&workspace->def_strm); + + if (ret != Z_STREAM_END) { + ret = -1; + goto out; + } + + if (workspace->def_strm.total_out >= workspace->def_strm.total_in) { + ret = -1; + goto out; + } + + ret = 0; + *total_out = workspace->def_strm.total_out; + *total_in = workspace->def_strm.total_in; +out: + *out_pages = nr_pages; + if (out_page) + kunmap(out_page); + + if (in_page) { + kunmap(in_page); + page_cache_release(in_page); + } + free_workspace(workspace); + return ret; +} + +/* + * pages_in is an array of pages with compressed data. + * + * disk_start is the starting logical offset of this array in the file + * + * bvec is a bio_vec of pages from the file that we want to decompress into + * + * vcnt is the count of pages in the biovec + * + * srclen is the number of bytes in pages_in + * + * The basic idea is that we have a bio that was created by readpages. + * The pages in the bio are for the uncompressed data, and they may not + * be contiguous. They all correspond to the range of bytes covered by + * the compressed extent. + */ +int btrfs_zlib_decompress_biovec(struct page **pages_in, + u64 disk_start, + struct bio_vec *bvec, + int vcnt, + size_t srclen) +{ + int ret = 0; + int wbits = MAX_WBITS; + struct workspace *workspace; + char *data_in; + size_t total_out = 0; + unsigned long page_bytes_left; + unsigned long page_in_index = 0; + unsigned long page_out_index = 0; + struct page *page_out; + unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / + PAGE_CACHE_SIZE; + unsigned long buf_start; + unsigned long buf_offset; + unsigned long bytes; + unsigned long working_bytes; + unsigned long pg_offset; + unsigned long start_byte; + unsigned long current_buf_start; + char *kaddr; + + workspace = find_zlib_workspace(); + if (!workspace) + return -ENOMEM; + + data_in = kmap(pages_in[page_in_index]); + workspace->inf_strm.next_in = data_in; + workspace->inf_strm.avail_in = min(srclen, PAGE_CACHE_SIZE); + workspace->inf_strm.total_in = 0; + + workspace->inf_strm.total_out = 0; + workspace->inf_strm.next_out = workspace->buf; + workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; + page_out = bvec[page_out_index].bv_page; + page_bytes_left = PAGE_CACHE_SIZE; + pg_offset = 0; + + /* If it's deflate, and it's got no preset dictionary, then + we can tell zlib to skip the adler32 check. */ + if (srclen > 2 && !(data_in[1] & PRESET_DICT) && + ((data_in[0] & 0x0f) == Z_DEFLATED) && + !(((data_in[0]<<8) + data_in[1]) % 31)) { + + wbits = -((data_in[0] >> 4) + 8); + workspace->inf_strm.next_in += 2; + workspace->inf_strm.avail_in -= 2; + } + + if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { + printk(KERN_WARNING "inflateInit failed\n"); + ret = -1; + goto out; + } + while(workspace->inf_strm.total_in < srclen) { + ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); + if (ret != Z_OK && ret != Z_STREAM_END) { + break; + } + + /* + * buf start is the byte offset we're of the start of + * our workspace buffer + */ + buf_start = total_out; + + /* total_out is the last byte of the workspace buffer */ + total_out = workspace->inf_strm.total_out; + + working_bytes = total_out - buf_start; + + /* + * start byte is the first byte of the page we're currently + * copying into relative to the start of the compressed data. + */ + start_byte = page_offset(page_out) - disk_start; + + if (working_bytes == 0) { + /* we didn't make progress in this inflate + * call, we're done + */ + if (ret != Z_STREAM_END) + ret = -1; + break; + } + + /* we haven't yet hit data corresponding to this page */ + if (total_out <= start_byte) { + goto next; + } + + /* + * the start of the data we care about is offset into + * the middle of our working buffer + */ + if (total_out > start_byte && buf_start < start_byte) { + buf_offset = start_byte - buf_start; + working_bytes -= buf_offset; + } else { + buf_offset = 0; + } + current_buf_start = buf_start; + + /* copy bytes from the working buffer into the pages */ + while(working_bytes > 0) { + bytes = min(PAGE_CACHE_SIZE - pg_offset, + PAGE_CACHE_SIZE - buf_offset); + bytes = min(bytes, working_bytes); + kaddr = kmap_atomic(page_out, KM_USER0); + memcpy(kaddr + pg_offset, workspace->buf + buf_offset, + bytes); + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page_out); + + pg_offset += bytes; + page_bytes_left -= bytes; + buf_offset += bytes; + working_bytes -= bytes; + current_buf_start += bytes; + + /* check if we need to pick another page */ + if (page_bytes_left == 0) { + page_out_index++; + if (page_out_index >= vcnt) { + ret = 0; + goto done; + } + page_out = bvec[page_out_index].bv_page; + pg_offset = 0; + page_bytes_left = PAGE_CACHE_SIZE; + start_byte = page_offset(page_out) - disk_start; + + /* + * make sure our new page is covered by this + * working buffer + */ + if (total_out <= start_byte) { + goto next; + } + + /* the next page in the biovec might not + * be adjacent to the last page, but it + * might still be found inside this working + * buffer. bump our offset pointer + */ + if (total_out > start_byte && + current_buf_start < start_byte) { + buf_offset = start_byte - buf_start; + working_bytes = total_out - start_byte; + current_buf_start = buf_start + + buf_offset; + } + } + } +next: + workspace->inf_strm.next_out = workspace->buf; + workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; + + if (workspace->inf_strm.avail_in == 0) { + unsigned long tmp; + kunmap(pages_in[page_in_index]); + page_in_index++; + if (page_in_index >= total_pages_in) { + data_in = NULL; + break; + } + data_in = kmap(pages_in[page_in_index]); + workspace->inf_strm.next_in = data_in; + tmp = srclen - workspace->inf_strm.total_in; + workspace->inf_strm.avail_in = min(tmp, + PAGE_CACHE_SIZE); + } + } + if (ret != Z_STREAM_END) { + ret = -1; + } else { + ret = 0; + } +done: + zlib_inflateEnd(&workspace->inf_strm); + if (data_in) + kunmap(pages_in[page_in_index]); +out: + free_workspace(workspace); + return ret; +} + +/* + * a less complex decompression routine. Our compressed data fits in a + * single page, and we want to read a single page out of it. + * start_byte tells us the offset into the compressed data we're interested in + */ +int btrfs_zlib_decompress(unsigned char *data_in, + struct page *dest_page, + unsigned long start_byte, + size_t srclen, size_t destlen) +{ + int ret = 0; + int wbits = MAX_WBITS; + struct workspace *workspace; + unsigned long bytes_left = destlen; + unsigned long total_out = 0; + char *kaddr; + + if (destlen > PAGE_CACHE_SIZE) + return -ENOMEM; + + workspace = find_zlib_workspace(); + if (!workspace) + return -ENOMEM; + + workspace->inf_strm.next_in = data_in; + workspace->inf_strm.avail_in = srclen; + workspace->inf_strm.total_in = 0; + + workspace->inf_strm.next_out = workspace->buf; + workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; + workspace->inf_strm.total_out = 0; + /* If it's deflate, and it's got no preset dictionary, then + we can tell zlib to skip the adler32 check. */ + if (srclen > 2 && !(data_in[1] & PRESET_DICT) && + ((data_in[0] & 0x0f) == Z_DEFLATED) && + !(((data_in[0]<<8) + data_in[1]) % 31)) { + + wbits = -((data_in[0] >> 4) + 8); + workspace->inf_strm.next_in += 2; + workspace->inf_strm.avail_in -= 2; + } + + if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { + printk(KERN_WARNING "inflateInit failed\n"); + ret = -1; + goto out; + } + + while(bytes_left > 0) { + unsigned long buf_start; + unsigned long buf_offset; + unsigned long bytes; + unsigned long pg_offset = 0; + + ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); + if (ret != Z_OK && ret != Z_STREAM_END) { + break; + } + + buf_start = total_out; + total_out = workspace->inf_strm.total_out; + + if (total_out == buf_start) { + ret = -1; + break; + } + + if (total_out <= start_byte) { + goto next; + } + + if (total_out > start_byte && buf_start < start_byte) { + buf_offset = start_byte - buf_start; + } else { + buf_offset = 0; + } + + bytes = min(PAGE_CACHE_SIZE - pg_offset, + PAGE_CACHE_SIZE - buf_offset); + bytes = min(bytes, bytes_left); + + kaddr = kmap_atomic(dest_page, KM_USER0); + memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); + kunmap_atomic(kaddr, KM_USER0); + + pg_offset += bytes; + bytes_left -= bytes; +next: + workspace->inf_strm.next_out = workspace->buf; + workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; + } + if (ret != Z_STREAM_END && bytes_left != 0) { + ret = -1; + } else { + ret = 0; + } + zlib_inflateEnd(&workspace->inf_strm); +out: + free_workspace(workspace); + return ret; +} + +void btrfs_zlib_exit(void) +{ + free_workspaces(); +} -- cgit v1.2.3-70-g09d2 From f82d02d9d8222183b7945e893111a6d1bf67ae4a Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 29 Oct 2008 14:49:05 -0400 Subject: Btrfs: Improve space balancing code This patch improves the space balancing code to keep more sharing of tree blocks. The only case that breaks sharing of tree blocks is data extents get fragmented during balancing. The main changes in this patch are: Add a 'drop sub-tree' function. This solves the problem in old code that BTRFS_HEADER_FLAG_WRITTEN check breaks sharing of tree block. Remove relocation mapping tree. Relocation mappings are stored in struct btrfs_ref_path and updated dynamically during walking up/down the reference path. This reduces CPU usage and simplifies code. This patch also fixes a bug. Root items for reloc trees should be updated in btrfs_free_reloc_root. Signed-off-by: Yan Zheng --- fs/btrfs/ctree.c | 93 ++++++------- fs/btrfs/ctree.h | 13 +- fs/btrfs/disk-io.c | 2 - fs/btrfs/extent-tree.c | 364 ++++++++++++++++++++++++++++++------------------- fs/btrfs/transaction.c | 4 +- 5 files changed, 277 insertions(+), 199 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9caeb377de6..73899d0f9d8 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -287,7 +287,7 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, /* * There are only two places that can drop reference to * tree blocks owned by living reloc trees, one is here, - * the other place is btrfs_merge_path. In both places, + * the other place is btrfs_drop_subtree. In both places, * we check reference count while tree block is locked. * Furthermore, if reference count is one, it won't get * increased by someone else. @@ -312,9 +312,6 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, } if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { - ret = btrfs_add_reloc_mapping(root, buf->start, - buf->len, cow->start); - BUG_ON(ret); ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); WARN_ON(ret); } @@ -1627,61 +1624,57 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, btrfs_node_key_to_cpu(eb, &key, slot); key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); + if (generation == trans->transid) { + eb = read_tree_block(root, bytenr, blocksize, + generation); + btrfs_tree_lock(eb); + } + /* * if node keys match and node pointer hasn't been modified * in the running transaction, we can merge the path. for - * reloc trees, the node pointer check is skipped, this is - * because the reloc trees are fully controlled by the space - * balance code, no one else can modify them. + * blocks owened by reloc trees, the node pointer check is + * skipped, this is because these blocks are fully controlled + * by the space balance code, no one else can modify them. */ if (!nodes[level - 1] || !key_match || (generation == trans->transid && - root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) { -next_level: - if (level == 1 || level == lowest_level + 1) + btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) { + if (level == 1 || level == lowest_level + 1) { + if (generation == trans->transid) { + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + } break; + } - eb = read_tree_block(root, bytenr, blocksize, - generation); - btrfs_tree_lock(eb); + if (generation != trans->transid) { + eb = read_tree_block(root, bytenr, blocksize, + generation); + btrfs_tree_lock(eb); + } ret = btrfs_cow_block(trans, root, eb, parent, slot, &eb, 0); BUG_ON(ret); + if (root->root_key.objectid == + BTRFS_TREE_RELOC_OBJECTID) { + if (!nodes[level - 1]) { + nodes[level - 1] = eb->start; + memcpy(&node_keys[level - 1], &key, + sizeof(node_keys[0])); + } else { + WARN_ON(1); + } + } + btrfs_tree_unlock(parent); free_extent_buffer(parent); parent = eb; continue; } - if (generation == trans->transid) { - u32 refs; - BUG_ON(btrfs_header_owner(eb) != - BTRFS_TREE_RELOC_OBJECTID); - /* - * lock the block to keep __btrfs_cow_block from - * changing the reference count. - */ - eb = read_tree_block(root, bytenr, blocksize, - generation); - btrfs_tree_lock(eb); - - ret = btrfs_lookup_extent_ref(trans, root, bytenr, - blocksize, &refs); - BUG_ON(ret); - /* - * if replace block whose reference count is one, - * we have to "drop the subtree". so skip it for - * simplicity - */ - if (refs == 1) { - btrfs_tree_unlock(eb); - free_extent_buffer(eb); - goto next_level; - } - } - btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); btrfs_set_node_ptr_generation(parent, slot, trans->transid); btrfs_mark_buffer_dirty(parent); @@ -1693,16 +1686,24 @@ next_level: btrfs_header_generation(parent), level - 1); BUG_ON(ret); - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, parent->start, - btrfs_header_owner(parent), - btrfs_header_generation(parent), - level - 1, 1); - BUG_ON(ret); + /* + * If the block was created in the running transaction, + * it's possible this is the last reference to it, so we + * should drop the subtree. + */ if (generation == trans->transid) { + ret = btrfs_drop_subtree(trans, root, eb, parent); + BUG_ON(ret); btrfs_tree_unlock(eb); free_extent_buffer(eb); + } else { + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, parent->start, + btrfs_header_owner(parent), + btrfs_header_generation(parent), + level - 1, 1); + BUG_ON(ret); } break; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 793d8fdda24..117090995e7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -684,7 +684,6 @@ struct btrfs_fs_info { int thread_pool_size; /* tree relocation relocated fields */ - struct extent_io_tree reloc_mapping_tree; struct list_head dead_reloc_roots; struct btrfs_leaf_ref_tree reloc_ref_tree; struct btrfs_leaf_ref_tree shared_ref_tree; @@ -1636,13 +1635,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 group_start); int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); -int btrfs_free_reloc_root(struct btrfs_root *root); +int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btrfs_drop_dead_reloc_roots(struct btrfs_root *root); -int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, - u64 num_bytes, u64 new_bytenr); -int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, - u64 num_bytes, u64 *new_bytenr); -void btrfs_free_reloc_mappings(struct btrfs_root *root); int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, u64 orig_start); @@ -1726,6 +1721,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_drop_subtree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *node, + struct extent_buffer *parent); /* root-item.c */ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dc95f636a11..796256440df 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1448,8 +1448,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; - extent_io_tree_init(&fs_info->reloc_mapping_tree, - fs_info->btree_inode->i_mapping, GFP_NOFS); INIT_LIST_HEAD(&fs_info->dead_reloc_roots); btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index bbf04e80a1a..56e41369d71 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3031,6 +3031,95 @@ out: return 0; } +/* + * helper function for drop_subtree, this function is similar to + * walk_down_tree. The main difference is that it checks reference + * counts while tree blocks are locked. + */ +static int noinline walk_down_subtree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level) +{ + struct extent_buffer *next; + struct extent_buffer *cur; + struct extent_buffer *parent; + u64 bytenr; + u64 ptr_gen; + u32 blocksize; + u32 refs; + int ret; + + cur = path->nodes[*level]; + ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len, + &refs); + BUG_ON(ret); + if (refs > 1) + goto out; + + while (*level >= 0) { + cur = path->nodes[*level]; + if (*level == 0) { + ret = btrfs_drop_leaf_ref(trans, root, cur); + BUG_ON(ret); + clean_tree_block(trans, root, cur); + break; + } + if (path->slots[*level] >= btrfs_header_nritems(cur)) { + clean_tree_block(trans, root, cur); + break; + } + + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + blocksize = btrfs_level_size(root, *level - 1); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); + + next = read_tree_block(root, bytenr, blocksize, ptr_gen); + btrfs_tree_lock(next); + + ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, + &refs); + BUG_ON(ret); + if (refs > 1) { + parent = path->nodes[*level]; + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, parent->start, + btrfs_header_owner(parent), + btrfs_header_generation(parent), + *level - 1, 1); + BUG_ON(ret); + path->slots[*level]++; + btrfs_tree_unlock(next); + free_extent_buffer(next); + continue; + } + + *level = btrfs_header_level(next); + path->nodes[*level] = next; + path->slots[*level] = 0; + path->locks[*level] = 1; + cond_resched(); + } +out: + parent = path->nodes[*level + 1]; + bytenr = path->nodes[*level]->start; + blocksize = path->nodes[*level]->len; + + ret = btrfs_free_extent(trans, root, bytenr, blocksize, + parent->start, btrfs_header_owner(parent), + btrfs_header_generation(parent), *level, 1); + BUG_ON(ret); + + if (path->locks[*level]) { + btrfs_tree_unlock(path->nodes[*level]); + path->locks[*level] = 0; + } + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + *level += 1; + cond_resched(); + return 0; +} + /* * helper for dropping snapshots. This walks back up the tree in the path * to find the first node higher up where we haven't yet gone through @@ -3038,7 +3127,8 @@ out: */ static int noinline walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int *level) + struct btrfs_path *path, + int *level, int max_level) { u64 root_owner; u64 root_gen; @@ -3047,7 +3137,7 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans, int slot; int ret; - for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + for (i = *level; i < max_level && path->nodes[i]; i++) { slot = path->slots[i]; if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { struct extent_buffer *node; @@ -3070,12 +3160,18 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); + + clean_tree_block(trans, root, path->nodes[*level]); ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, path->nodes[*level]->len, parent->start, root_owner, root_gen, *level, 1); BUG_ON(ret); + if (path->locks[*level]) { + btrfs_tree_unlock(path->nodes[*level]); + path->locks[*level] = 0; + } free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; @@ -3145,7 +3241,8 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; - wret = walk_up_tree(trans, root, path, &level); + wret = walk_up_tree(trans, root, path, &level, + BTRFS_MAX_LEVEL); if (wret > 0) break; if (wret < 0) @@ -3168,6 +3265,50 @@ out: return ret; } +int btrfs_drop_subtree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *node, + struct extent_buffer *parent) +{ + struct btrfs_path *path; + int level; + int parent_level; + int ret = 0; + int wret; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + BUG_ON(!btrfs_tree_locked(parent)); + parent_level = btrfs_header_level(parent); + extent_buffer_get(parent); + path->nodes[parent_level] = parent; + path->slots[parent_level] = btrfs_header_nritems(parent); + + BUG_ON(!btrfs_tree_locked(node)); + level = btrfs_header_level(node); + extent_buffer_get(node); + path->nodes[level] = node; + path->slots[level] = 0; + + while (1) { + wret = walk_down_subtree(trans, root, path, &level); + if (wret < 0) + ret = wret; + if (wret != 0) + break; + + wret = walk_up_tree(trans, root, path, &level, parent_level); + if (wret < 0) + ret = wret; + if (wret != 0) + break; + } + + btrfs_free_path(path); + return ret; +} + static unsigned long calc_ra(unsigned long start, unsigned long last, unsigned long nr) { @@ -3312,6 +3453,10 @@ struct btrfs_ref_path { u32 num_refs; int lowest_level; int current_level; + int shared_level; + + struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; + u64 new_nodes[BTRFS_MAX_LEVEL]; }; struct disk_extent { @@ -3360,6 +3505,7 @@ static int noinline __next_ref_path(struct btrfs_trans_handle *trans, if (first_time) { ref_path->lowest_level = -1; ref_path->current_level = -1; + ref_path->shared_level = -1; goto walk_up; } walk_down: @@ -3403,8 +3549,11 @@ walk_down: btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid == bytenr && - found_key.type == BTRFS_EXTENT_REF_KEY) + found_key.type == BTRFS_EXTENT_REF_KEY) { + if (level < ref_path->shared_level) + ref_path->shared_level = level; goto found; + } next: level--; btrfs_release_path(extent_root, path); @@ -3992,51 +4141,6 @@ out: return ret; } -int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, - u64 num_bytes, u64 new_bytenr) -{ - set_extent_bits(&root->fs_info->reloc_mapping_tree, - orig_bytenr, orig_bytenr + num_bytes - 1, - EXTENT_LOCKED, GFP_NOFS); - set_state_private(&root->fs_info->reloc_mapping_tree, - orig_bytenr, new_bytenr); - return 0; -} - -int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, - u64 num_bytes, u64 *new_bytenr) -{ - u64 bytenr; - u64 cur_bytenr = orig_bytenr; - u64 prev_bytenr = orig_bytenr; - int ret; - - while (1) { - ret = get_state_private(&root->fs_info->reloc_mapping_tree, - cur_bytenr, &bytenr); - if (ret) - break; - prev_bytenr = cur_bytenr; - cur_bytenr = bytenr; - } - - if (orig_bytenr == cur_bytenr) - return -ENOENT; - - if (prev_bytenr != orig_bytenr) { - set_state_private(&root->fs_info->reloc_mapping_tree, - orig_bytenr, cur_bytenr); - } - *new_bytenr = cur_bytenr; - return 0; -} - -void btrfs_free_reloc_mappings(struct btrfs_root *root) -{ - clear_extent_bits(&root->fs_info->reloc_mapping_tree, - 0, (u64)-1, -1, GFP_NOFS); -} - int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, u64 orig_start) @@ -4222,15 +4326,30 @@ static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, return 0; } -int btrfs_free_reloc_root(struct btrfs_root *root) +int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { struct btrfs_root *reloc_root; + int ret; if (root->reloc_root) { reloc_root = root->reloc_root; root->reloc_root = NULL; list_add(&reloc_root->dead_list, &root->fs_info->dead_reloc_roots); + + btrfs_set_root_bytenr(&reloc_root->root_item, + reloc_root->node->start); + btrfs_set_root_level(&root->root_item, + btrfs_header_level(reloc_root->node)); + memset(&reloc_root->root_item.drop_progress, 0, + sizeof(struct btrfs_disk_key)); + reloc_root->root_item.drop_level = 0; + + ret = btrfs_update_root(trans, root->fs_info->tree_root, + &reloc_root->root_key, + &reloc_root->root_item); + BUG_ON(ret); } return 0; } @@ -4356,8 +4475,6 @@ static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, btrfs_set_root_refs(root_item, 0); btrfs_set_root_bytenr(root_item, eb->start); btrfs_set_root_level(root_item, btrfs_header_level(eb)); - memset(&root_item->drop_progress, 0, sizeof(root_item->drop_progress)); - root_item->drop_level = 0; btrfs_tree_unlock(eb); free_extent_buffer(eb); @@ -4382,15 +4499,19 @@ static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, * Core function of space balance. * * The idea is using reloc trees to relocate tree blocks in reference - * counted roots. There is one reloc tree for each subvol, all reloc - * trees share same key objectid. Reloc trees are snapshots of the - * latest committed roots (subvol root->commit_root). To relocate a tree - * block referenced by a subvol, the code COW the block through the reloc - * tree, then update pointer in the subvol to point to the new block. - * Since all reloc trees share same key objectid, we can easily do special - * handing to share tree blocks between reloc trees. Once a tree block has - * been COWed in one reloc tree, we can use the result when the same block - * is COWed again through other reloc trees. + * counted roots. There is one reloc tree for each subvol, and all + * reloc trees share same root key objectid. Reloc trees are snapshots + * of the latest committed roots of subvols (root->commit_root). + * + * To relocate a tree block referenced by a subvol, there are two steps. + * COW the block through subvol's reloc tree, then update block pointer + * in the subvol to point to the new block. Since all reloc trees share + * same root key objectid, doing special handing for tree blocks owned + * by them is easy. Once a tree block has been COWed in one reloc tree, + * we can use the resulting new block directly when the same block is + * required to COW again through other reloc trees. By this way, relocated + * tree blocks are shared between reloc trees, so they are also shared + * between subvols. */ static int noinline relocate_one_path(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -4405,15 +4526,14 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans, struct btrfs_key *keys; u64 *nodes; int level; - int lowest_merge; + int shared_level; int lowest_level = 0; - int update_refs; int ret; if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) lowest_level = ref_path->owner_objectid; - if (is_cowonly_root(ref_path->root_objectid)) { + if (!root->ref_cows) { path->lowest_level = lowest_level; ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); BUG_ON(ret < 0); @@ -4422,91 +4542,49 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans, return 0; } - keys = kzalloc(sizeof(*keys) * BTRFS_MAX_LEVEL, GFP_NOFS); - BUG_ON(!keys); - nodes = kzalloc(sizeof(*nodes) * BTRFS_MAX_LEVEL, GFP_NOFS); - BUG_ON(!nodes); - mutex_lock(&root->fs_info->tree_reloc_mutex); ret = init_reloc_tree(trans, root); BUG_ON(ret); reloc_root = root->reloc_root; - path->lowest_level = lowest_level; - ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 0); - BUG_ON(ret); - /* - * get relocation mapping for tree blocks in the path - */ - lowest_merge = BTRFS_MAX_LEVEL; - for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { - u64 new_bytenr; - eb = path->nodes[level]; - if (!eb || eb == reloc_root->node) - continue; - ret = btrfs_get_reloc_mapping(reloc_root, eb->start, eb->len, - &new_bytenr); - if (ret) - continue; - if (level == 0) - btrfs_item_key_to_cpu(eb, &keys[level], 0); - else - btrfs_node_key_to_cpu(eb, &keys[level], 0); - nodes[level] = new_bytenr; - lowest_merge = level; - } + shared_level = ref_path->shared_level; + ref_path->shared_level = BTRFS_MAX_LEVEL - 1; - update_refs = 0; - if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { - eb = path->nodes[0]; - if (btrfs_header_generation(eb) < trans->transid) - update_refs = 1; - } + keys = ref_path->node_keys; + nodes = ref_path->new_nodes; + memset(&keys[shared_level + 1], 0, + sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); + memset(&nodes[shared_level + 1], 0, + sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); - btrfs_release_path(reloc_root, path); - /* - * merge tree blocks that already relocated in other reloc trees - */ - if (lowest_merge != BTRFS_MAX_LEVEL) { + if (nodes[lowest_level] == 0) { + path->lowest_level = lowest_level; + ret = btrfs_search_slot(trans, reloc_root, first_key, path, + 0, 1); + BUG_ON(ret); + for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { + eb = path->nodes[level]; + if (!eb || eb == reloc_root->node) + break; + nodes[level] = eb->start; + if (level == 0) + btrfs_item_key_to_cpu(eb, &keys[level], 0); + else + btrfs_node_key_to_cpu(eb, &keys[level], 0); + } + if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + eb = path->nodes[0]; + ret = replace_extents_in_leaf(trans, reloc_root, eb, + group, reloc_inode); + BUG_ON(ret); + } + btrfs_release_path(reloc_root, path); + } else { ret = btrfs_merge_path(trans, reloc_root, keys, nodes, - lowest_merge); - BUG_ON(ret < 0); - } - /* - * cow any tree blocks that still haven't been relocated - */ - ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 1); - BUG_ON(ret); - /* - * if we are relocating data block group, update extent pointers - * in the newly created tree leaf. - */ - eb = path->nodes[0]; - if (update_refs && nodes[0] != eb->start) { - ret = replace_extents_in_leaf(trans, reloc_root, eb, group, - reloc_inode); + lowest_level); BUG_ON(ret); } - memset(keys, 0, sizeof(*keys) * BTRFS_MAX_LEVEL); - memset(nodes, 0, sizeof(*nodes) * BTRFS_MAX_LEVEL); - for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { - eb = path->nodes[level]; - if (!eb || eb == reloc_root->node) - continue; - BUG_ON(btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID); - nodes[level] = eb->start; - if (level == 0) - btrfs_item_key_to_cpu(eb, &keys[level], 0); - else - btrfs_node_key_to_cpu(eb, &keys[level], 0); - } - - if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { - eb = path->nodes[0]; - extent_buffer_get(eb); - } - btrfs_release_path(reloc_root, path); /* * replace tree blocks in the fs tree with tree blocks in * the reloc tree. @@ -4515,15 +4593,19 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans, BUG_ON(ret < 0); if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + ret = btrfs_search_slot(trans, reloc_root, first_key, path, + 0, 0); + BUG_ON(ret); + extent_buffer_get(path->nodes[0]); + eb = path->nodes[0]; + btrfs_release_path(reloc_root, path); ret = invalidate_extent_cache(reloc_root, eb, group, root); BUG_ON(ret); free_extent_buffer(eb); } - mutex_unlock(&root->fs_info->tree_reloc_mutex); + mutex_unlock(&root->fs_info->tree_reloc_mutex); path->lowest_level = 0; - kfree(nodes); - kfree(keys); return 0; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5ecc24d634a..1df67129cc3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -521,7 +521,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, dirty = root->dirty_root; btrfs_free_log(trans, root); - btrfs_free_reloc_root(root); + btrfs_free_reloc_root(trans, root); if (root->commit_root == root->node) { WARN_ON(root->node->start != @@ -930,8 +930,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, */ btrfs_free_log_root_tree(trans, root->fs_info); - btrfs_free_reloc_mappings(root); - ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); -- cgit v1.2.3-70-g09d2 From 80eb234af09dbe6c97b2e3d60a13ec391e98fbba Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 29 Oct 2008 14:49:05 -0400 Subject: Btrfs: fix enospc when there is plenty of space So there is an odd case where we can possibly return -ENOSPC when there is in fact space to be had. It only happens with Metadata writes, and happens _very_ infrequently. What has to happen is we have to allocate have allocated out of the first logical byte on the disk, which would set last_alloc to first_logical_byte(root, 0), so search_start == orig_search_start. We then need to allocate for normal metadata, so BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DUP. We will do a block lookup for the given search_start, block_group_bits() won't match and we'll go to choose another block group. However because search_start matches orig_search_start we go to see if we can allocate a chunk. If we are in the situation that we cannot allocate a chunk, we fail and ENOSPC. This is kind of a big flaw of the way find_free_extent works, as it along with find_free_space loop through _all_ of the block groups, not just the ones that we want to allocate out of. This patch completely kills find_free_space and rolls it into find_free_extent. I've introduced a sort of state machine into this, which will make it easier to get cache miss information out of the allocator, and will work well with my locking changes. The basic flow is this: We have the variable loop which is 0, meaning we are in the hint phase. We lookup the block group for the hint, and lookup the space_info for what we want to allocate out of. If the block group we were pointed at by the hint either isn't of the correct type, or just doesn't have the space we need, we set head to space_info->block_groups, so we start at the beginning of the block groups for this particular space info, and loop through. This is also where we add the empty_cluster to total_needed. At this point loop is set to 1 and we just loop through all of the block groups for this particular space_info looking for the space we need, just as find_free_space would have done, except we only hit the block groups we want and not _all_ of the block groups. If we come full circle we see if we can allocate a chunk. If we cannot of course we exit with -ENOSPC and we are good. If not we start over at space_info->block_groups and loop through again, with loop == 2. If we come full circle and haven't found what we need then we exit with -ENOSPC. I've been running this for a couple of days now and it seems stable, and I haven't yet hit a -ENOSPC when there was plenty of space left. Also I've made a groups_sem to handle the group list for the space_info. This is part of my locking changes, but is relatively safe and seems better than holding the space_info spinlock over that entire search time. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 322 ++++++++++++++++++++----------------------------- 2 files changed, 132 insertions(+), 191 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 117090995e7..caa860a1c3e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -544,6 +544,7 @@ struct btrfs_space_info { /* for block groups in our same type */ struct list_head block_groups; spinlock_t lock; + struct rw_semaphore groups_sem; }; struct btrfs_free_space { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 56e41369d71..e3b3e13a481 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -317,59 +317,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return cache; } -static int noinline find_free_space(struct btrfs_root *root, - struct btrfs_block_group_cache **cache_ret, - u64 *start_ret, u64 num, int data) -{ - int ret; - struct btrfs_block_group_cache *cache = *cache_ret; - struct btrfs_free_space *info = NULL; - u64 last; - u64 search_start = *start_ret; - - WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); - if (!cache) - goto out; - - last = max(search_start, cache->key.objectid); - -again: - ret = cache_block_group(root, cache); - if (ret) - goto out; - - if (cache->ro || !block_group_bits(cache, data)) - goto new_group; - - info = btrfs_find_free_space(cache, last, num); - if (info) { - *start_ret = info->offset; - return 0; - } - -new_group: - last = cache->key.objectid + cache->key.offset; - - cache = btrfs_lookup_first_block_group(root->fs_info, last); - if (!cache) - goto out; - - *cache_ret = cache; - goto again; - -out: - return -ENOSPC; -} - -static u64 div_factor(u64 num, int factor) -{ - if (factor == 10) - return num; - num *= factor; - do_div(num, 10); - return num; -} - static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, u64 flags) { @@ -384,6 +331,15 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, return NULL; } +static u64 div_factor(u64 num, int factor) +{ + if (factor == 10) + return num; + num *= factor; + do_div(num, 10); + return num; +} + static struct btrfs_block_group_cache * __btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, @@ -1446,6 +1402,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, list_add(&found->list, &info->space_info); INIT_LIST_HEAD(&found->block_groups); + init_rwsem(&found->groups_sem); spin_lock_init(&found->lock); found->flags = flags; found->total_bytes = total_bytes; @@ -2208,19 +2165,22 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, u64 exclude_start, u64 exclude_nr, int data) { - int ret; - u64 orig_search_start; + int ret = 0; struct btrfs_root * root = orig_root->fs_info->extent_root; - struct btrfs_fs_info *info = root->fs_info; u64 total_needed = num_bytes; u64 *last_ptr = NULL; - struct btrfs_block_group_cache *block_group; + struct btrfs_block_group_cache *block_group = NULL; int chunk_alloc_done = 0; int empty_cluster = 2 * 1024 * 1024; int allowed_chunk_alloc = 0; + struct list_head *head = NULL, *cur = NULL; + int loop = 0; + struct btrfs_space_info *space_info; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + ins->objectid = 0; + ins->offset = 0; if (orig_root->ref_cows || empty_size) allowed_chunk_alloc = 1; @@ -2239,152 +2199,132 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, else empty_size += empty_cluster; } - search_start = max(search_start, first_logical_byte(root, 0)); - orig_search_start = search_start; - search_start = max(search_start, hint_byte); total_needed += empty_size; -new_group: - block_group = btrfs_lookup_block_group(info, search_start); - if (!block_group) - block_group = btrfs_lookup_first_block_group(info, - search_start); + block_group = btrfs_lookup_block_group(root->fs_info, search_start); + space_info = __find_space_info(root->fs_info, data); - /* - * Ok this looks a little tricky, buts its really simple. First if we - * didn't find a block group obviously we want to start over. - * Secondly, if the block group we found does not match the type we - * need, and we have a last_ptr and its not 0, chances are the last - * allocation we made was at the end of the block group, so lets go - * ahead and skip the looking through the rest of the block groups and - * start at the beginning. This helps with metadata allocations, - * since you are likely to have a bunch of data block groups to search - * through first before you realize that you need to start over, so go - * ahead and start over and save the time. - */ - if (!block_group || (!block_group_bits(block_group, data) && - last_ptr && *last_ptr)) { - if (search_start != orig_search_start) { - if (last_ptr && *last_ptr) { - total_needed += empty_cluster; - *last_ptr = 0; - } - search_start = orig_search_start; - goto new_group; - } else if (!chunk_alloc_done && allowed_chunk_alloc) { - ret = do_chunk_alloc(trans, root, - num_bytes + 2 * 1024 * 1024, - data, 1); - if (ret < 0) - goto error; - BUG_ON(ret); - chunk_alloc_done = 1; - search_start = orig_search_start; - goto new_group; - } else { - ret = -ENOSPC; - goto error; - } - } - - /* - * this is going to seach through all of the existing block groups it - * can find, so if we don't find something we need to see if we can - * allocate what we need. - */ - ret = find_free_space(root, &block_group, &search_start, - total_needed, data); - if (ret == -ENOSPC) { + down_read(&space_info->groups_sem); + while (1) { + struct btrfs_free_space *free_space; /* - * instead of allocating, start at the original search start - * and see if there is something to be found, if not then we - * allocate + * the only way this happens if our hint points to a block + * group thats not of the proper type, while looping this + * should never happen */ - if (search_start != orig_search_start) { - if (last_ptr && *last_ptr) { - *last_ptr = 0; - total_needed += empty_cluster; - } - search_start = orig_search_start; + if (unlikely(!block_group_bits(block_group, data))) goto new_group; - } - /* - * we've already allocated, we're pretty screwed - */ - if (chunk_alloc_done) { - goto error; - } else if (!allowed_chunk_alloc && block_group && - block_group_bits(block_group, data)) { - block_group->space_info->force_alloc = 1; - goto error; - } else if (!allowed_chunk_alloc) { - goto error; - } + ret = cache_block_group(root, block_group); + if (ret) + break; - ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024, - data, 1); - if (ret < 0) - goto error; + if (block_group->ro) + goto new_group; - BUG_ON(ret); - chunk_alloc_done = 1; - if (block_group) - search_start = block_group->key.objectid + + free_space = btrfs_find_free_space(block_group, search_start, + total_needed); + if (free_space) { + u64 start = block_group->key.objectid; + u64 end = block_group->key.objectid + block_group->key.offset; - else - search_start = orig_search_start; - goto new_group; - } - if (ret) - goto error; + search_start = stripe_align(root, free_space->offset); - search_start = stripe_align(root, search_start); - ins->objectid = search_start; - ins->offset = num_bytes; + /* move on to the next group */ + if (search_start + num_bytes >= search_end) + goto new_group; - if (ins->objectid + num_bytes >= search_end) { - search_start = orig_search_start; - if (chunk_alloc_done) { - ret = -ENOSPC; - goto error; + /* move on to the next group */ + if (search_start + num_bytes > end) + goto new_group; + + if (exclude_nr > 0 && + (search_start + num_bytes > exclude_start && + search_start < exclude_start + exclude_nr)) { + search_start = exclude_start + exclude_nr; + /* + * if search_start is still in this block group + * then we just re-search this block group + */ + if (search_start >= start && + search_start < end) + continue; + + /* else we go to the next block group */ + goto new_group; + } + + ins->objectid = search_start; + ins->offset = num_bytes; + /* we are all good, lets return */ + break; } - goto new_group; - } +new_group: + /* + * Here's how this works. + * loop == 0: we were searching a block group via a hint + * and didn't find anything, so we start at + * the head of the block groups and keep searching + * loop == 1: we're searching through all of the block groups + * if we hit the head again we have searched + * all of the block groups for this space and we + * need to try and allocate, if we cant error out. + * loop == 2: we allocated more space and are looping through + * all of the block groups again. + */ + if (loop == 0) { + head = &space_info->block_groups; + cur = head->next; - if (ins->objectid + num_bytes > - block_group->key.objectid + block_group->key.offset) { - if (search_start == orig_search_start && chunk_alloc_done) { - ret = -ENOSPC; - goto error; + if (last_ptr && *last_ptr) { + total_needed += empty_cluster; + *last_ptr = 0; + } + loop++; + } else if (loop == 1 && cur == head) { + if (allowed_chunk_alloc && !chunk_alloc_done) { + up_read(&space_info->groups_sem); + ret = do_chunk_alloc(trans, root, num_bytes + + 2 * 1024 * 1024, data, 1); + if (ret < 0) + break; + down_read(&space_info->groups_sem); + loop++; + head = &space_info->block_groups; + cur = head->next; + chunk_alloc_done = 1; + } else if (!allowed_chunk_alloc) { + space_info->force_alloc = 1; + break; + } else { + break; + } + } else if (cur == head) { + break; } - search_start = block_group->key.objectid + - block_group->key.offset; - goto new_group; - } - if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && - ins->objectid < exclude_start + exclude_nr)) { - search_start = exclude_start + exclude_nr; - goto new_group; + block_group = list_entry(cur, struct btrfs_block_group_cache, + list); + search_start = block_group->key.objectid; + cur = cur->next; } - if (!(data & BTRFS_BLOCK_GROUP_DATA)) - trans->block_group = block_group; + /* we found what we needed */ + if (ins->objectid) { + if (!(data & BTRFS_BLOCK_GROUP_DATA)) + trans->block_group = block_group; - ins->offset = num_bytes; - if (last_ptr) { - *last_ptr = ins->objectid + ins->offset; - if (*last_ptr == - btrfs_super_total_bytes(&root->fs_info->super_copy)) - *last_ptr = 0; + if (last_ptr) + *last_ptr = ins->objectid + ins->offset; + ret = 0; + } else if (!ret) { + ret = -ENOSPC; } - ret = 0; -error: + up_read(&space_info->groups_sem); return ret; } @@ -2397,7 +2337,7 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) info->total_bytes - info->bytes_used - info->bytes_pinned - info->bytes_reserved, (info->full) ? "" : "not "); - spin_lock(&info->lock); + down_read(&info->groups_sem); list_for_each(l, &info->block_groups) { cache = list_entry(l, struct btrfs_block_group_cache, list); spin_lock(&cache->lock); @@ -2409,7 +2349,7 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } - spin_unlock(&info->lock); + up_read(&info->groups_sem); } static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, @@ -5186,9 +5126,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) rb_erase(&block_group->cache_node, &info->block_group_cache_tree); - spin_lock(&block_group->space_info->lock); + down_write(&block_group->space_info->groups_sem); list_del(&block_group->list); - spin_unlock(&block_group->space_info->lock); + up_write(&block_group->space_info->groups_sem); kfree(block_group); } spin_unlock(&info->block_group_cache_lock); @@ -5249,9 +5189,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) &space_info); BUG_ON(ret); cache->space_info = space_info; - spin_lock(&space_info->lock); - list_add(&cache->list, &space_info->block_groups); - spin_unlock(&space_info->lock); + down_write(&space_info->groups_sem); + list_add_tail(&cache->list, &space_info->block_groups); + up_write(&space_info->groups_sem); ret = btrfs_add_block_group_cache(root->fs_info, cache); BUG_ON(ret); @@ -5297,9 +5237,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, &cache->space_info); BUG_ON(ret); - spin_lock(&cache->space_info->lock); - list_add(&cache->list, &cache->space_info->block_groups); - spin_unlock(&cache->space_info->lock); + down_write(&cache->space_info->groups_sem); + list_add_tail(&cache->list, &cache->space_info->block_groups); + up_write(&cache->space_info->groups_sem); ret = btrfs_add_block_group_cache(root->fs_info, cache); BUG_ON(ret); @@ -5338,9 +5278,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, btrfs_remove_free_space_cache(block_group); rb_erase(&block_group->cache_node, &root->fs_info->block_group_cache_tree); - spin_lock(&block_group->space_info->lock); + down_write(&block_group->space_info->groups_sem); list_del(&block_group->list); - spin_unlock(&block_group->space_info->lock); + up_write(&block_group->space_info->groups_sem); /* memset(shrink_block_group, 0, sizeof(*shrink_block_group)); -- cgit v1.2.3-70-g09d2 From 2517920135b0d29e70453e5b03d70d7b94207df3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 29 Oct 2008 14:49:05 -0400 Subject: Btrfs: nuke fs wide allocation mutex V2 This patch removes the giant fs_info->alloc_mutex and replaces it with a bunch of little locks. There is now a pinned_mutex, which is used when messing with the pinned_extents extent io tree, and the extent_ins_mutex which is used with the pending_del and extent_ins extent io trees. The locking for the extent tree stuff was inspired by a patch that Yan Zheng wrote to fix a race condition, I cleaned it up some and changed the locking around a little bit, but the idea remains the same. Basically instead of holding the extent_ins_mutex throughout the processing of an extent on the extent_ins or pending_del trees, we just hold it while we're searching and when we clear the bits on those trees, and lock the extent for the duration of the operations on the extent. Also to keep from getting hung up waiting to lock an extent, I've added a try_lock_extent so if we cannot lock the extent, move on to the next one in the tree and we'll come back to that one. I have tested this heavily and it does not appear to break anything. This has to be applied on top of my find_free_extent redo patch. I tested this patch on top of Yan's space reblancing code and it worked fine. The only thing that has changed since the last version is I pulled out all my debugging stuff, apparently I forgot to run guilt refresh before I sent the last patch out. Thank you, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 3 +- fs/btrfs/ctree.h | 8 +- fs/btrfs/disk-io.c | 3 +- fs/btrfs/extent-tree.c | 333 +++++++++++++++++++++----------------------- fs/btrfs/extent_io.c | 14 ++ fs/btrfs/extent_io.h | 2 + fs/btrfs/free-space-cache.c | 92 ++++++++---- fs/btrfs/transaction.c | 2 - fs/btrfs/tree-defrag.c | 3 - fs/btrfs/tree-log.c | 4 +- fs/btrfs/volumes.c | 2 - 11 files changed, 250 insertions(+), 216 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 73899d0f9d8..f82f8db0227 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1387,8 +1387,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len > 0); WARN_ON(p->nodes[0] != NULL); - WARN_ON(cow && root == root->fs_info->extent_root && - !mutex_is_locked(&root->fs_info->alloc_mutex)); + if (ins_len < 0) lowest_unlock = 2; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index caa860a1c3e..fdba4f1b634 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -558,6 +558,7 @@ struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; spinlock_t lock; + struct mutex alloc_mutex; u64 pinned; u64 reserved; u64 flags; @@ -635,7 +636,8 @@ struct btrfs_fs_info { struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; - struct mutex alloc_mutex; + struct mutex extent_ins_mutex; + struct mutex pinned_mutex; struct mutex chunk_mutex; struct mutex drop_mutex; struct mutex volume_mutex; @@ -1941,8 +1943,12 @@ int btrfs_acl_chmod(struct inode *inode); /* free-space-cache.c */ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, u64 bytenr, u64 size); +int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes); int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, u64 bytenr, u64 size); +int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes); void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group); struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 796256440df..d1137d7ea8d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1460,7 +1460,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->drop_mutex); - mutex_init(&fs_info->alloc_mutex); + mutex_init(&fs_info->extent_ins_mutex); + mutex_init(&fs_info->pinned_mutex); mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e3b3e13a481..564260872c7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -53,24 +53,6 @@ __btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, int data, int owner); -void maybe_lock_mutex(struct btrfs_root *root) -{ - if (root != root->fs_info->extent_root && - root != root->fs_info->chunk_root && - root != root->fs_info->dev_root) { - mutex_lock(&root->fs_info->alloc_mutex); - } -} - -void maybe_unlock_mutex(struct btrfs_root *root) -{ - if (root != root->fs_info->extent_root && - root != root->fs_info->chunk_root && - root != root->fs_info->dev_root) { - mutex_unlock(&root->fs_info->alloc_mutex); - } -} - static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) { return (cache->flags & bits) == bits; @@ -164,6 +146,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, u64 extent_start, extent_end, size; int ret; + mutex_lock(&info->pinned_mutex); while (start < end) { ret = find_first_extent_bit(&info->pinned_extents, start, &extent_start, &extent_end, @@ -175,7 +158,8 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, start = extent_end + 1; } else if (extent_start > start && extent_start < end) { size = extent_start - start; - ret = btrfs_add_free_space(block_group, start, size); + ret = btrfs_add_free_space_lock(block_group, start, + size); BUG_ON(ret); start = extent_end + 1; } else { @@ -185,9 +169,10 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, if (start < end) { size = end - start; - ret = btrfs_add_free_space(block_group, start, size); + ret = btrfs_add_free_space_lock(block_group, start, size); BUG_ON(ret); } + mutex_unlock(&info->pinned_mutex); return 0; } @@ -445,13 +430,11 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) path = btrfs_alloc_path(); BUG_ON(!path); - maybe_lock_mutex(root); key.objectid = start; key.offset = len; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 0, 0); - maybe_unlock_mutex(root); btrfs_free_path(path); return ret; } @@ -676,8 +659,9 @@ static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL); num_bytes = btrfs_level_size(root, (int)owner_objectid); + mutex_lock(&root->fs_info->extent_ins_mutex); if (test_range_bit(&root->fs_info->extent_ins, bytenr, - bytenr + num_bytes - 1, EXTENT_LOCKED, 0)) { + bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { u64 priv; ret = get_state_private(&root->fs_info->extent_ins, bytenr, &priv); @@ -686,6 +670,7 @@ static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, (unsigned long)priv; BUG_ON(extent_op->parent != orig_parent); BUG_ON(extent_op->generation != orig_generation); + extent_op->parent = parent; extent_op->generation = ref_generation; } else { @@ -703,10 +688,11 @@ static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, set_extent_bits(&root->fs_info->extent_ins, bytenr, bytenr + num_bytes - 1, - EXTENT_LOCKED, GFP_NOFS); + EXTENT_WRITEBACK, GFP_NOFS); set_state_private(&root->fs_info->extent_ins, bytenr, (unsigned long)extent_op); } + mutex_unlock(&root->fs_info->extent_ins_mutex); return 0; } @@ -742,12 +728,10 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, if (ref_root == BTRFS_TREE_LOG_OBJECTID && owner_objectid < BTRFS_FIRST_FREE_OBJECTID) return 0; - maybe_lock_mutex(root); ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, parent, ref_root, ref_root, ref_generation, ref_generation, owner_objectid); - maybe_unlock_mutex(root); return ret; } @@ -817,11 +801,9 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, if (ref_root == BTRFS_TREE_LOG_OBJECTID && owner_objectid < BTRFS_FIRST_FREE_OBJECTID) return 0; - maybe_lock_mutex(root); ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, 0, ref_root, 0, ref_generation, owner_objectid); - maybe_unlock_mutex(root); return ret; } @@ -886,7 +868,6 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, key.type = BTRFS_EXTENT_ITEM_KEY; path = btrfs_alloc_path(); - mutex_lock(&root->fs_info->alloc_mutex); ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto out; @@ -953,7 +934,6 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, } ret = 0; out: - mutex_unlock(&root->fs_info->alloc_mutex); btrfs_free_path(path); return ret; } @@ -1179,13 +1159,11 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, nr_file_extents++; - maybe_lock_mutex(root); ret = process_func(trans, root, bytenr, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, key.objectid); - maybe_unlock_mutex(root); if (ret) { faili = i; @@ -1194,13 +1172,11 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } } else { bytenr = btrfs_node_blockptr(buf, i); - maybe_lock_mutex(root); ret = process_func(trans, root, bytenr, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, level - 1); - maybe_unlock_mutex(root); if (ret) { faili = i; WARN_ON(1); @@ -1270,24 +1246,20 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans, bytenr = btrfs_file_extent_disk_bytenr(buf, fi); if (bytenr == 0) continue; - maybe_lock_mutex(root); ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, key.objectid); - maybe_unlock_mutex(root); if (ret) goto fail; } else { bytenr = btrfs_node_blockptr(buf, slot); - maybe_lock_mutex(root); ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, level - 1); - maybe_unlock_mutex(root); if (ret) goto fail; } @@ -1344,7 +1316,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - mutex_lock(&root->fs_info->alloc_mutex); while(1) { cache = NULL; spin_lock(&root->fs_info->block_group_cache_lock); @@ -1378,7 +1349,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, } } btrfs_free_path(path); - mutex_unlock(&root->fs_info->alloc_mutex); return werr; } @@ -1390,9 +1360,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found = __find_space_info(info, flags); if (found) { + spin_lock(&found->lock); found->total_bytes += total_bytes; found->bytes_used += bytes_used; found->full = 0; + spin_unlock(&found->lock); *space_info = found; return 0; } @@ -1479,43 +1451,53 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, } BUG_ON(!space_info); + spin_lock(&space_info->lock); if (space_info->force_alloc) { force = 1; space_info->force_alloc = 0; } - if (space_info->full) + if (space_info->full) { + spin_unlock(&space_info->lock); goto out; + } thresh = div_factor(space_info->total_bytes, 6); if (!force && (space_info->bytes_used + space_info->bytes_pinned + - space_info->bytes_reserved + alloc_bytes) < thresh) + space_info->bytes_reserved + alloc_bytes) < thresh) { + spin_unlock(&space_info->lock); goto out; + } - while (!mutex_trylock(&extent_root->fs_info->chunk_mutex)) { - if (!force) - goto out; - mutex_unlock(&extent_root->fs_info->alloc_mutex); - cond_resched(); - mutex_lock(&extent_root->fs_info->alloc_mutex); + spin_unlock(&space_info->lock); + + ret = mutex_trylock(&extent_root->fs_info->chunk_mutex); + if (!ret && !force) { + goto out; + } else if (!ret) { + mutex_lock(&extent_root->fs_info->chunk_mutex); waited = 1; } - if (waited && space_info->full) - goto out_unlock; + if (waited) { + spin_lock(&space_info->lock); + if (space_info->full) { + spin_unlock(&space_info->lock); + goto out_unlock; + } + spin_unlock(&space_info->lock); + } ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags); - if (ret == -ENOSPC) { + if (ret) { printk("space info full %Lu\n", flags); space_info->full = 1; goto out_unlock; } - BUG_ON(ret); ret = btrfs_make_block_group(trans, extent_root, 0, flags, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); - out_unlock: mutex_unlock(&extent_root->fs_info->chunk_mutex); out: @@ -1533,7 +1515,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 old_val; u64 byte_in_group; - WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); while(total) { cache = btrfs_lookup_block_group(info, bytenr); if (!cache) { @@ -1542,6 +1523,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, byte_in_group = bytenr - cache->key.objectid; WARN_ON(byte_in_group > cache->key.offset); + spin_lock(&cache->space_info->lock); spin_lock(&cache->lock); cache->dirty = 1; old_val = btrfs_block_group_used(&cache->item); @@ -1551,11 +1533,13 @@ static int update_block_group(struct btrfs_trans_handle *trans, cache->space_info->bytes_used += num_bytes; btrfs_set_block_group_used(&cache->item, old_val); spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); } else { old_val -= num_bytes; cache->space_info->bytes_used -= num_bytes; btrfs_set_block_group_used(&cache->item, old_val); spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); if (mark_free) { int ret; ret = btrfs_add_free_space(cache, bytenr, @@ -1588,7 +1572,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, struct btrfs_block_group_cache *cache; struct btrfs_fs_info *fs_info = root->fs_info; - WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); + WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex)); if (pin) { set_extent_dirty(&fs_info->pinned_extents, bytenr, bytenr + num - 1, GFP_NOFS); @@ -1602,16 +1586,20 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, len = min(num, cache->key.offset - (bytenr - cache->key.objectid)); if (pin) { + spin_lock(&cache->space_info->lock); spin_lock(&cache->lock); cache->pinned += len; cache->space_info->bytes_pinned += len; spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); fs_info->total_pinned += len; } else { + spin_lock(&cache->space_info->lock); spin_lock(&cache->lock); cache->pinned -= len; cache->space_info->bytes_pinned -= len; spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); fs_info->total_pinned -= len; } bytenr += len; @@ -1627,23 +1615,23 @@ static int update_reserved_extents(struct btrfs_root *root, struct btrfs_block_group_cache *cache; struct btrfs_fs_info *fs_info = root->fs_info; - WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); while (num > 0) { cache = btrfs_lookup_block_group(fs_info, bytenr); BUG_ON(!cache); len = min(num, cache->key.offset - (bytenr - cache->key.objectid)); + + spin_lock(&cache->space_info->lock); + spin_lock(&cache->lock); if (reserve) { - spin_lock(&cache->lock); cache->reserved += len; cache->space_info->bytes_reserved += len; - spin_unlock(&cache->lock); } else { - spin_lock(&cache->lock); cache->reserved -= len; cache->space_info->bytes_reserved -= len; - spin_unlock(&cache->lock); } + spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); bytenr += len; num -= len; } @@ -1658,6 +1646,7 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; int ret; + mutex_lock(&root->fs_info->pinned_mutex); while(1) { ret = find_first_extent_bit(pinned_extents, last, &start, &end, EXTENT_DIRTY); @@ -1666,6 +1655,7 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) set_extent_dirty(copy, start, end, GFP_NOFS); last = end + 1; } + mutex_unlock(&root->fs_info->pinned_mutex); return 0; } @@ -1678,7 +1668,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, int ret; struct btrfs_block_group_cache *cache; - mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->pinned_mutex); while(1) { ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY); @@ -1690,12 +1680,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, if (cache->cached) btrfs_add_free_space(cache, start, end - start + 1); if (need_resched()) { - mutex_unlock(&root->fs_info->alloc_mutex); + mutex_unlock(&root->fs_info->pinned_mutex); cond_resched(); - mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->pinned_mutex); } } - mutex_unlock(&root->fs_info->alloc_mutex); + mutex_unlock(&root->fs_info->pinned_mutex); return 0; } @@ -1705,6 +1695,7 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, u64 start; u64 end; u64 priv; + u64 search = 0; struct btrfs_fs_info *info = extent_root->fs_info; struct btrfs_path *path; struct btrfs_extent_ref *ref; @@ -1714,20 +1705,37 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, int ret; int err = 0; - WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); btrfs_set_stack_extent_refs(&extent_item, 1); path = btrfs_alloc_path(); while(1) { - ret = find_first_extent_bit(&info->extent_ins, 0, &start, - &end, EXTENT_LOCKED); - if (ret) + mutex_lock(&info->extent_ins_mutex); + ret = find_first_extent_bit(&info->extent_ins, search, &start, + &end, EXTENT_WRITEBACK); + if (ret) { + mutex_unlock(&info->extent_ins_mutex); + if (search) { + search = 0; + continue; + } break; + } + + ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); + if (!ret) { + search = end+1; + mutex_unlock(&info->extent_ins_mutex); + cond_resched(); + continue; + } + BUG_ON(ret < 0); ret = get_state_private(&info->extent_ins, start, &priv); BUG_ON(ret); extent_op = (struct pending_extent_op *)(unsigned long)priv; + mutex_unlock(&info->extent_ins_mutex); + if (extent_op->type == PENDING_EXTENT_INSERT) { key.objectid = start; key.offset = end + 1 - start; @@ -1736,8 +1744,10 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, &extent_item, sizeof(extent_item)); BUG_ON(err); + mutex_lock(&info->extent_ins_mutex); clear_extent_bits(&info->extent_ins, start, end, - EXTENT_LOCKED, GFP_NOFS); + EXTENT_WRITEBACK, GFP_NOFS); + mutex_unlock(&info->extent_ins_mutex); err = insert_extent_backref(trans, extent_root, path, start, extent_op->parent, @@ -1753,8 +1763,10 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, extent_op->level, 0); BUG_ON(err); + mutex_lock(&info->extent_ins_mutex); clear_extent_bits(&info->extent_ins, start, end, - EXTENT_LOCKED, GFP_NOFS); + EXTENT_WRITEBACK, GFP_NOFS); + mutex_unlock(&info->extent_ins_mutex); key.objectid = start; key.offset = extent_op->parent; @@ -1772,12 +1784,10 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, BUG_ON(1); } kfree(extent_op); + unlock_extent(&info->extent_ins, start, end, GFP_NOFS); + search = 0; - if (need_resched()) { - mutex_unlock(&extent_root->fs_info->alloc_mutex); - cond_resched(); - mutex_lock(&extent_root->fs_info->alloc_mutex); - } + cond_resched(); } btrfs_free_path(path); return 0; @@ -1790,7 +1800,6 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, int err = 0; struct extent_buffer *buf; - WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); if (is_data) goto pinit; @@ -1847,7 +1856,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_extent_item *ei; u32 refs; - WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_bytes; @@ -1935,8 +1943,10 @@ static int __free_extent(struct btrfs_trans_handle *trans, #endif if (pin) { + mutex_lock(&root->fs_info->pinned_mutex); ret = pin_down_bytes(trans, root, bytenr, num_bytes, owner_objectid >= BTRFS_FIRST_FREE_OBJECTID); + mutex_unlock(&root->fs_info->pinned_mutex); if (ret > 0) mark_free = 1; BUG_ON(ret < 0); @@ -1956,6 +1966,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, ret = btrfs_del_items(trans, extent_root, path, path->slots[0], num_to_del); BUG_ON(ret); + btrfs_release_path(extent_root, path); ret = update_block_group(trans, root, bytenr, num_bytes, 0, mark_free); BUG_ON(ret); @@ -1994,70 +2005,91 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct { int ret; int err = 0; - int mark_free = 0; u64 start; u64 end; u64 priv; + u64 search = 0; struct extent_io_tree *pending_del; struct extent_io_tree *extent_ins; struct pending_extent_op *extent_op; + struct btrfs_fs_info *info = extent_root->fs_info; - WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); extent_ins = &extent_root->fs_info->extent_ins; pending_del = &extent_root->fs_info->pending_del; while(1) { - ret = find_first_extent_bit(pending_del, 0, &start, &end, - EXTENT_LOCKED); - if (ret) + mutex_lock(&info->extent_ins_mutex); + ret = find_first_extent_bit(pending_del, search, &start, &end, + EXTENT_WRITEBACK); + if (ret) { + mutex_unlock(&info->extent_ins_mutex); + if (search) { + search = 0; + continue; + } break; + } + + ret = try_lock_extent(extent_ins, start, end, GFP_NOFS); + if (!ret) { + search = end+1; + mutex_unlock(&info->extent_ins_mutex); + cond_resched(); + continue; + } + BUG_ON(ret < 0); ret = get_state_private(pending_del, start, &priv); BUG_ON(ret); extent_op = (struct pending_extent_op *)(unsigned long)priv; - clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, + clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK, GFP_NOFS); - - ret = pin_down_bytes(trans, extent_root, start, - end + 1 - start, 0); - mark_free = ret > 0; if (!test_range_bit(extent_ins, start, end, - EXTENT_LOCKED, 0)) { + EXTENT_WRITEBACK, 0)) { + mutex_unlock(&info->extent_ins_mutex); free_extent: ret = __free_extent(trans, extent_root, start, end + 1 - start, extent_op->orig_parent, extent_root->root_key.objectid, extent_op->orig_generation, - extent_op->level, 0, mark_free); + extent_op->level, 1, 0); kfree(extent_op); } else { kfree(extent_op); - ret = get_state_private(extent_ins, start, &priv); + + ret = get_state_private(&info->extent_ins, start, + &priv); BUG_ON(ret); extent_op = (struct pending_extent_op *) - (unsigned long)priv; + (unsigned long)priv; + + clear_extent_bits(&info->extent_ins, start, end, + EXTENT_WRITEBACK, GFP_NOFS); - clear_extent_bits(extent_ins, start, end, - EXTENT_LOCKED, GFP_NOFS); + mutex_unlock(&info->extent_ins_mutex); if (extent_op->type == PENDING_BACKREF_UPDATE) goto free_extent; + mutex_lock(&extent_root->fs_info->pinned_mutex); + ret = pin_down_bytes(trans, extent_root, start, + end + 1 - start, 0); + mutex_unlock(&extent_root->fs_info->pinned_mutex); + ret = update_block_group(trans, extent_root, start, - end + 1 - start, 0, mark_free); + end + 1 - start, 0, ret > 0); + BUG_ON(ret); kfree(extent_op); } if (ret) err = ret; + unlock_extent(extent_ins, start, end, GFP_NOFS); - if (need_resched()) { - mutex_unlock(&extent_root->fs_info->alloc_mutex); - cond_resched(); - mutex_lock(&extent_root->fs_info->alloc_mutex); - } + search = 0; + cond_resched(); } return err; } @@ -2091,11 +2123,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, extent_op->orig_generation = ref_generation; extent_op->level = (int)owner_objectid; + mutex_lock(&root->fs_info->extent_ins_mutex); set_extent_bits(&root->fs_info->pending_del, bytenr, bytenr + num_bytes - 1, - EXTENT_LOCKED, GFP_NOFS); + EXTENT_WRITEBACK, GFP_NOFS); set_state_private(&root->fs_info->pending_del, bytenr, (unsigned long)extent_op); + mutex_unlock(&root->fs_info->extent_ins_mutex); return 0; } /* if metadata always pin */ @@ -2134,11 +2168,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, { int ret; - maybe_lock_mutex(root); ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, root_objectid, ref_generation, owner_objectid, pin); - maybe_unlock_mutex(root); return ret; } @@ -2214,12 +2246,16 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, * group thats not of the proper type, while looping this * should never happen */ + WARN_ON(!block_group); + mutex_lock(&block_group->alloc_mutex); if (unlikely(!block_group_bits(block_group, data))) goto new_group; ret = cache_block_group(root, block_group); - if (ret) + if (ret) { + mutex_unlock(&block_group->alloc_mutex); break; + } if (block_group->ro) goto new_group; @@ -2250,8 +2286,10 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, * then we just re-search this block group */ if (search_start >= start && - search_start < end) + search_start < end) { + mutex_unlock(&block_group->alloc_mutex); continue; + } /* else we go to the next block group */ goto new_group; @@ -2259,10 +2297,15 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, ins->objectid = search_start; ins->offset = num_bytes; + + btrfs_remove_free_space_lock(block_group, search_start, + num_bytes); /* we are all good, lets return */ + mutex_unlock(&block_group->alloc_mutex); break; } new_group: + mutex_unlock(&block_group->alloc_mutex); /* * Here's how this works. * loop == 0: we were searching a block group via a hint @@ -2363,7 +2406,6 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, u64 search_start = 0; u64 alloc_profile; struct btrfs_fs_info *info = root->fs_info; - struct btrfs_block_group_cache *cache; if (data) { alloc_profile = info->avail_data_alloc_bits & @@ -2419,13 +2461,6 @@ again: dump_space_info(sinfo, num_bytes); BUG(); } - cache = btrfs_lookup_block_group(root->fs_info, ins->objectid); - if (!cache) { - printk(KERN_ERR "Unable to find block group for %Lu\n", ins->objectid); - return -ENOSPC; - } - - ret = btrfs_remove_free_space(cache, ins->objectid, ins->offset); return ret; } @@ -2434,16 +2469,13 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) { struct btrfs_block_group_cache *cache; - maybe_lock_mutex(root); cache = btrfs_lookup_block_group(root->fs_info, start); if (!cache) { printk(KERN_ERR "Unable to find block group for %Lu\n", start); - maybe_unlock_mutex(root); return -ENOSPC; } btrfs_add_free_space(cache, start, len); update_reserved_extents(root, start, len, 0); - maybe_unlock_mutex(root); return 0; } @@ -2455,12 +2487,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, u64 data) { int ret; - maybe_lock_mutex(root); ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, empty_size, hint_byte, search_end, ins, data); update_reserved_extents(root, ins->objectid, ins->offset, 1); - maybe_unlock_mutex(root); return ret; } @@ -2510,11 +2540,13 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, extent_op->orig_generation = 0; extent_op->level = (int)owner; + mutex_lock(&root->fs_info->extent_ins_mutex); set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, - EXTENT_LOCKED, GFP_NOFS); + EXTENT_WRITEBACK, GFP_NOFS); set_state_private(&root->fs_info->extent_ins, ins->objectid, (unsigned long)extent_op); + mutex_unlock(&root->fs_info->extent_ins_mutex); goto update_block; } @@ -2578,11 +2610,9 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, if (root_objectid == BTRFS_TREE_LOG_OBJECTID) return 0; - maybe_lock_mutex(root); ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, ref_generation, owner, ins); update_reserved_extents(root, ins->objectid, ins->offset, 0); - maybe_unlock_mutex(root); return ret; } @@ -2599,15 +2629,16 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, int ret; struct btrfs_block_group_cache *block_group; - maybe_lock_mutex(root); block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); + mutex_lock(&block_group->alloc_mutex); cache_block_group(root, block_group); - ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset); + ret = btrfs_remove_free_space_lock(block_group, ins->objectid, + ins->offset); + mutex_unlock(&block_group->alloc_mutex); BUG_ON(ret); ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, ref_generation, owner, ins); - maybe_unlock_mutex(root); return ret; } @@ -2627,8 +2658,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, { int ret; - maybe_lock_mutex(root); - ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, empty_size, hint_byte, search_end, ins, data); @@ -2642,7 +2671,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } else { update_reserved_extents(root, ins->objectid, ins->offset, 1); } - maybe_unlock_mutex(root); return ret; } @@ -2734,12 +2762,10 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, if (disk_bytenr == 0) continue; - mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(leaf, fi), leaf->start, leaf_owner, leaf_generation, key.objectid, 0); - mutex_unlock(&root->fs_info->alloc_mutex); BUG_ON(ret); atomic_inc(&root->fs_info->throttle_gen); @@ -2758,12 +2784,10 @@ static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_extent_info *info = ref->extents; for (i = 0; i < ref->nritems; i++) { - mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, info->bytenr, info->num_bytes, ref->bytenr, ref->owner, ref->generation, info->objectid, 0); - mutex_unlock(&root->fs_info->alloc_mutex); atomic_inc(&root->fs_info->throttle_gen); wake_up(&root->fs_info->transaction_throttle); @@ -2875,13 +2899,11 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, root_gen = btrfs_header_generation(parent); path->slots[*level]++; - mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, root_owner, root_gen, *level - 1, 1); BUG_ON(ret); - mutex_unlock(&root->fs_info->alloc_mutex); atomic_inc(&root->fs_info->throttle_gen); wake_up(&root->fs_info->transaction_throttle); @@ -2957,11 +2979,9 @@ out: root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); - mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, root_owner, root_gen, *level, 1); - mutex_unlock(&root->fs_info->alloc_mutex); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -3440,8 +3460,6 @@ static int noinline __next_ref_path(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - mutex_lock(&extent_root->fs_info->alloc_mutex); - if (first_time) { ref_path->lowest_level = -1; ref_path->current_level = -1; @@ -3498,9 +3516,7 @@ next: level--; btrfs_release_path(extent_root, path); if (need_resched()) { - mutex_unlock(&extent_root->fs_info->alloc_mutex); cond_resched(); - mutex_lock(&extent_root->fs_info->alloc_mutex); } } /* reached lowest level */ @@ -3613,15 +3629,12 @@ found: btrfs_release_path(extent_root, path); if (need_resched()) { - mutex_unlock(&extent_root->fs_info->alloc_mutex); cond_resched(); - mutex_lock(&extent_root->fs_info->alloc_mutex); } } /* reached max tree level, but no tree root found. */ BUG(); out: - mutex_unlock(&extent_root->fs_info->alloc_mutex); btrfs_free_path(path); return ret; } @@ -4556,14 +4569,6 @@ static int noinline relocate_tree_block(struct btrfs_trans_handle *trans, struct btrfs_ref_path *ref_path) { int ret; - int needs_lock = 0; - - if (root == root->fs_info->extent_root || - root == root->fs_info->chunk_root || - root == root->fs_info->dev_root) { - needs_lock = 1; - mutex_lock(&root->fs_info->alloc_mutex); - } ret = relocate_one_path(trans, root, path, first_key, ref_path, NULL, NULL); @@ -4571,8 +4576,6 @@ static int noinline relocate_tree_block(struct btrfs_trans_handle *trans, if (root == root->fs_info->extent_root) btrfs_extent_post_op(trans, root); - if (needs_lock) - mutex_unlock(&root->fs_info->alloc_mutex); return 0; } @@ -4584,14 +4587,12 @@ static int noinline del_extent_zero(struct btrfs_trans_handle *trans, { int ret; - mutex_lock(&extent_root->fs_info->alloc_mutex); ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); if (ret) goto out; ret = btrfs_del_item(trans, extent_root, path); out: btrfs_release_path(extent_root, path); - mutex_unlock(&extent_root->fs_info->alloc_mutex); return ret; } @@ -4627,7 +4628,6 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, struct btrfs_key first_key; u64 prev_block = 0; - mutex_unlock(&extent_root->fs_info->alloc_mutex); trans = btrfs_start_transaction(extent_root, 1); BUG_ON(!trans); @@ -4754,7 +4754,6 @@ out: btrfs_end_transaction(trans, extent_root); kfree(new_extents); kfree(ref_path); - mutex_lock(&extent_root->fs_info->alloc_mutex); return ret; } @@ -4807,10 +4806,8 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root, spin_lock(&shrink_block_group->lock); if (btrfs_block_group_used(&shrink_block_group->item) > 0) { spin_unlock(&shrink_block_group->lock); - mutex_unlock(&root->fs_info->alloc_mutex); trans = btrfs_start_transaction(root, 1); - mutex_lock(&root->fs_info->alloc_mutex); spin_lock(&shrink_block_group->lock); new_alloc_flags = update_block_group_flags(root, @@ -4826,9 +4823,7 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root, do_chunk_alloc(trans, root->fs_info->extent_root, calc + 2 * 1024 * 1024, new_alloc_flags, force); - mutex_unlock(&root->fs_info->alloc_mutex); btrfs_end_transaction(trans, root); - mutex_lock(&root->fs_info->alloc_mutex); } else spin_unlock(&shrink_block_group->lock); return 0; @@ -4952,14 +4947,10 @@ int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) reloc_inode = create_reloc_inode(info, block_group); BUG_ON(IS_ERR(reloc_inode)); - mutex_lock(&root->fs_info->alloc_mutex); - __alloc_chunk_for_shrink(root, block_group, 1); block_group->ro = 1; block_group->space_info->total_bytes -= block_group->key.offset; - mutex_unlock(&root->fs_info->alloc_mutex); - btrfs_start_delalloc_inodes(info->tree_root); btrfs_wait_ordered_extents(info->tree_root, 0); again: @@ -4978,8 +4969,6 @@ again: btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); mutex_unlock(&root->fs_info->cleaner_mutex); - mutex_lock(&root->fs_info->alloc_mutex); - while(1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) @@ -5007,9 +4996,7 @@ next: if (progress && need_resched()) { btrfs_release_path(root, path); - mutex_unlock(&root->fs_info->alloc_mutex); cond_resched(); - mutex_lock(&root->fs_info->alloc_mutex); progress = 0; continue; } @@ -5036,7 +5023,6 @@ next: } btrfs_release_path(root, path); - mutex_unlock(&root->fs_info->alloc_mutex); if (pass == 0) { btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); @@ -5058,8 +5044,6 @@ next: trans = btrfs_start_transaction(info->tree_root, 1); btrfs_commit_transaction(trans, info->tree_root); - mutex_lock(&root->fs_info->alloc_mutex); - spin_lock(&block_group->lock); WARN_ON(block_group->pinned > 0); WARN_ON(block_group->reserved > 0); @@ -5067,7 +5051,6 @@ next: spin_unlock(&block_group->lock); ret = 0; out: - mutex_unlock(&root->fs_info->alloc_mutex); btrfs_free_path(path); return ret; } @@ -5114,7 +5097,6 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) struct btrfs_block_group_cache *block_group; struct rb_node *n; - mutex_lock(&info->alloc_mutex); spin_lock(&info->block_group_cache_lock); while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { block_group = rb_entry(n, struct btrfs_block_group_cache, @@ -5132,7 +5114,6 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) kfree(block_group); } spin_unlock(&info->block_group_cache_lock); - mutex_unlock(&info->alloc_mutex); return 0; } @@ -5155,7 +5136,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) if (!path) return -ENOMEM; - mutex_lock(&root->fs_info->alloc_mutex); while(1) { ret = find_first_block_group(root, path, &key); if (ret > 0) { @@ -5174,6 +5154,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) } spin_lock_init(&cache->lock); + mutex_init(&cache->alloc_mutex); INIT_LIST_HEAD(&cache->list); read_extent_buffer(leaf, &cache->item, btrfs_item_ptr_offset(leaf, path->slots[0]), @@ -5201,7 +5182,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) ret = 0; error: btrfs_free_path(path); - mutex_unlock(&root->fs_info->alloc_mutex); return ret; } @@ -5214,7 +5194,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root; struct btrfs_block_group_cache *cache; - WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); extent_root = root->fs_info->extent_root; root->fs_info->last_trans_new_blockgroup = trans->transid; @@ -5226,6 +5205,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->key.objectid = chunk_offset; cache->key.offset = size; spin_lock_init(&cache->lock); + mutex_init(&cache->alloc_mutex); INIT_LIST_HEAD(&cache->list); btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); @@ -5264,7 +5244,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_key key; int ret; - BUG_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); root = root->fs_info->extent_root; block_group = btrfs_lookup_block_group(root->fs_info, group_start); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 314041fdfa4..7503bd46819 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -938,6 +938,20 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) } EXPORT_SYMBOL(lock_extent); +int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + int err; + u64 failed_start; + + err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, + &failed_start, mask); + if (err == -EEXIST) + return 0; + return 1; +} +EXPORT_SYMBOL(try_lock_extent); + int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 86f859b87a6..283110ec4ee 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -128,6 +128,8 @@ int try_release_extent_state(struct extent_map_tree *map, gfp_t mask); int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent); int __init extent_io_init(void); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 96241f01fa0..f4926c0f3c8 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -184,8 +184,8 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, return ret; } -int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, - u64 offset, u64 bytes) +static int __btrfs_add_free_space(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes) { struct btrfs_free_space *right_info; struct btrfs_free_space *left_info; @@ -202,8 +202,6 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, * are adding, if there is remove that struct and add a new one to * cover the entire range */ - spin_lock(&block_group->lock); - right_info = tree_search_offset(&block_group->free_space_offset, offset+bytes, 0, 1); left_info = tree_search_offset(&block_group->free_space_offset, @@ -261,7 +259,6 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, if (ret) kfree(info); out: - spin_unlock(&block_group->lock); if (ret) { printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); if (ret == -EEXIST) @@ -274,13 +271,13 @@ out: return ret; } -int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, - u64 offset, u64 bytes) +static int +__btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes) { struct btrfs_free_space *info; int ret = 0; - spin_lock(&block_group->lock); info = tree_search_offset(&block_group->free_space_offset, offset, 0, 1); @@ -334,17 +331,63 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, /* step two, insert a new info struct to cover anything * before the hole */ - spin_unlock(&block_group->lock); - ret = btrfs_add_free_space(block_group, old_start, - offset - old_start); + ret = __btrfs_add_free_space(block_group, old_start, + offset - old_start); BUG_ON(ret); - goto out_nolock; } else { WARN_ON(1); } out: - spin_unlock(&block_group->lock); -out_nolock: + return ret; +} + +int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes) +{ + int ret; + struct btrfs_free_space *sp; + + mutex_lock(&block_group->alloc_mutex); + ret = __btrfs_add_free_space(block_group, offset, bytes); + sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1); + BUG_ON(!sp); + mutex_unlock(&block_group->alloc_mutex); + + return ret; +} + +int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes) +{ + int ret; + struct btrfs_free_space *sp; + + ret = __btrfs_add_free_space(block_group, offset, bytes); + sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1); + BUG_ON(!sp); + + return ret; +} + +int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes) +{ + int ret = 0; + + mutex_lock(&block_group->alloc_mutex); + ret = __btrfs_remove_free_space(block_group, offset, bytes); + mutex_unlock(&block_group->alloc_mutex); + + return ret; +} + +int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes) +{ + int ret; + + ret = __btrfs_remove_free_space(block_group, offset, bytes); + return ret; } @@ -386,18 +429,18 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) struct btrfs_free_space *info; struct rb_node *node; - spin_lock(&block_group->lock); + mutex_lock(&block_group->alloc_mutex); while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { info = rb_entry(node, struct btrfs_free_space, bytes_index); unlink_free_space(block_group, info); kfree(info); if (need_resched()) { - spin_unlock(&block_group->lock); + mutex_unlock(&block_group->alloc_mutex); cond_resched(); - spin_lock(&block_group->lock); + mutex_lock(&block_group->alloc_mutex); } } - spin_unlock(&block_group->lock); + mutex_unlock(&block_group->alloc_mutex); } struct btrfs_free_space *btrfs_find_free_space_offset(struct @@ -407,10 +450,10 @@ struct btrfs_free_space *btrfs_find_free_space_offset(struct { struct btrfs_free_space *ret; - spin_lock(&block_group->lock); + mutex_lock(&block_group->alloc_mutex); ret = tree_search_offset(&block_group->free_space_offset, offset, bytes, 0); - spin_unlock(&block_group->lock); + mutex_unlock(&block_group->alloc_mutex); return ret; } @@ -422,10 +465,10 @@ struct btrfs_free_space *btrfs_find_free_space_bytes(struct { struct btrfs_free_space *ret; - spin_lock(&block_group->lock); + mutex_lock(&block_group->alloc_mutex); ret = tree_search_bytes(&block_group->free_space_bytes, offset, bytes); - spin_unlock(&block_group->lock); + mutex_unlock(&block_group->alloc_mutex); return ret; } @@ -434,16 +477,13 @@ struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache *block_group, u64 offset, u64 bytes) { - struct btrfs_free_space *ret; + struct btrfs_free_space *ret = NULL; - spin_lock(&block_group->lock); ret = tree_search_offset(&block_group->free_space_offset, offset, bytes, 0); if (!ret) ret = tree_search_bytes(&block_group->free_space_bytes, offset, bytes); - spin_unlock(&block_group->lock); - return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1df67129cc3..48b455fdaac 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -670,7 +670,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, atomic_dec(&root->fs_info->throttles); wake_up(&root->fs_info->transaction_throttle); - mutex_lock(&root->fs_info->alloc_mutex); num_bytes -= btrfs_root_used(&dirty->root->root_item); bytes_used = btrfs_root_used(&root->root_item); if (num_bytes) { @@ -678,7 +677,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, btrfs_set_root_used(&root->root_item, bytes_used - num_bytes); } - mutex_unlock(&root->fs_info->alloc_mutex); ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); if (ret) { diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 6f57d0889b1..a6a3956cedf 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -125,9 +125,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (is_extent) btrfs_extent_post_op(trans, root); out: - if (is_extent) - mutex_unlock(&root->fs_info->alloc_mutex); - if (path) btrfs_free_path(path); if (ret == -EAGAIN) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e6d579053a4..835daed5561 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -271,10 +271,10 @@ static int process_one_buffer(struct btrfs_root *log, struct walk_control *wc, u64 gen) { if (wc->pin) { - mutex_lock(&log->fs_info->alloc_mutex); + mutex_lock(&log->fs_info->pinned_mutex); btrfs_update_pinned_extents(log->fs_info->extent_root, eb->start, eb->len, 1); - mutex_unlock(&log->fs_info->alloc_mutex); + mutex_unlock(&log->fs_info->pinned_mutex); } if (btrfs_buffer_uptodate(eb, gen)) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7db4cfd03a9..cbb9bb31431 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -58,14 +58,12 @@ void btrfs_unlock_volumes(void) static void lock_chunks(struct btrfs_root *root) { - mutex_lock(&root->fs_info->alloc_mutex); mutex_lock(&root->fs_info->chunk_mutex); } static void unlock_chunks(struct btrfs_root *root) { mutex_unlock(&root->fs_info->chunk_mutex); - mutex_unlock(&root->fs_info->alloc_mutex); } int btrfs_cleanup_fs_uuids(void) -- cgit v1.2.3-70-g09d2 From 84234f3a1f7c532e4afeba03cc8e7e4a8a5277ea Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 29 Oct 2008 14:49:05 -0400 Subject: Btrfs: Add root tree pointer transaction ids This patch adds transaction IDs to root tree pointers. Transaction IDs in tree pointers are compared with the generation numbers in block headers when reading root blocks of trees. This can detect some types of IO errors. Signed-off-by: Yan Zheng --- fs/btrfs/ctree.h | 8 ++++++++ fs/btrfs/disk-io.c | 21 ++++++++++++++------- fs/btrfs/extent-tree.c | 1 + fs/btrfs/ioctl.c | 1 + fs/btrfs/transaction.c | 13 +++++++++++++ fs/btrfs/tree-log.c | 2 ++ 6 files changed, 39 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fdba4f1b634..0621ab90b1a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -297,6 +297,7 @@ struct btrfs_super_block { __le32 leafsize; __le32 stripesize; __le32 sys_chunk_array_size; + __le64 chunk_root_generation; u8 root_level; u8 chunk_root_level; u8 log_root_level; @@ -448,6 +449,7 @@ struct btrfs_dir_item { struct btrfs_root_item { struct btrfs_inode_item inode; + __le64 generation; __le64 root_dirid; __le64 bytenr; __le64 byte_limit; @@ -1396,10 +1398,14 @@ static inline int btrfs_is_leaf(struct extent_buffer *eb) } /* struct btrfs_root_item */ +BTRFS_SETGET_FUNCS(disk_root_generation, struct btrfs_root_item, + generation, 64); BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64); BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8); +BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item, + generation, 64); BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64); BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8); BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); @@ -1416,6 +1422,8 @@ BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); BTRFS_SETGET_STACK_FUNCS(super_sys_array_size, struct btrfs_super_block, sys_chunk_array_size, 32); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root_generation, + struct btrfs_super_block, chunk_root_generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, root_level, 8); BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d1137d7ea8d..94b4e50f6b2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -832,6 +832,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root, { int ret; u32 blocksize; + u64 generation; __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, @@ -840,9 +841,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root, &root->root_item, &root->root_key); BUG_ON(ret); + generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize, 0); + blocksize, generation); BUG_ON(!root->node); return 0; } @@ -929,6 +931,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, struct btrfs_path *path; struct extent_buffer *l; u64 highest_inode; + u64 generation; u32 blocksize; int ret = 0; @@ -970,9 +973,10 @@ out: kfree(root); return ERR_PTR(ret); } + generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize, 0); + blocksize, generation); BUG_ON(!root->node); insert: if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { @@ -1357,6 +1361,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 leafsize; u32 blocksize; u32 stripesize; + u64 generation; struct buffer_head *bh; struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); @@ -1596,13 +1601,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, blocksize = btrfs_level_size(tree_root, btrfs_super_chunk_root_level(disk_super)); + generation = btrfs_super_chunk_root_generation(disk_super); __setup_root(nodesize, leafsize, sectorsize, stripesize, chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), - blocksize, 0); + blocksize, generation); BUG_ON(!chunk_root->node); read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, @@ -1618,11 +1624,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); - + generation = btrfs_super_generation(disk_super); tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), - blocksize, 0); + blocksize, generation); if (!tree_root->node) goto fail_sb_buffer; @@ -1672,15 +1678,16 @@ struct btrfs_root *open_ctree(struct super_block *sb, log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); log_tree_root->node = read_tree_block(tree_root, bytenr, - blocksize, 0); + blocksize, + generation + 1); ret = btrfs_recover_log_trees(log_tree_root); BUG_ON(ret); } + fs_info->last_trans_committed = btrfs_super_generation(disk_super); ret = btrfs_cleanup_reloc_trees(tree_root); BUG_ON(ret); - fs_info->last_trans_committed = btrfs_super_generation(disk_super); return tree_root; fail_cleaner: diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 564260872c7..155c8dc56a2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4428,6 +4428,7 @@ static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, btrfs_set_root_refs(root_item, 0); btrfs_set_root_bytenr(root_item, eb->start); btrfs_set_root_level(root_item, btrfs_header_level(eb)); + btrfs_set_root_generation(root_item, trans->transid); btrfs_tree_unlock(eb); free_extent_buffer(eb); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1136ce2febc..fd3c8b5676c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -108,6 +108,7 @@ static noinline int create_subvol(struct btrfs_root *root, inode_item->mode = cpu_to_le32(S_IFDIR | 0755); btrfs_set_root_bytenr(&root_item, leaf->start); + btrfs_set_root_generation(&root_item, trans->transid); btrfs_set_root_level(&root_item, 0); btrfs_set_root_refs(&root_item, 1); btrfs_set_root_used(&root_item, 0); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 48b455fdaac..924af6f2aea 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -439,6 +439,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, root->node->start); btrfs_set_root_level(&root->root_item, btrfs_header_level(root->node)); + btrfs_set_root_generation(&root->root_item, trans->transid); ret = btrfs_update_root(trans, tree_root, &root->root_key, &root->root_item); @@ -456,6 +457,12 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = root->fs_info; struct list_head *next; + struct extent_buffer *eb; + + eb = btrfs_lock_root_node(fs_info->tree_root); + btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb, 0); + btrfs_tree_unlock(eb); + free_extent_buffer(eb); while(!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; @@ -559,6 +566,9 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, root->node->start); btrfs_set_root_level(&root->root_item, btrfs_header_level(root->node)); + btrfs_set_root_generation(&root->root_item, + root->root_key.offset); + err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); @@ -756,6 +766,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_set_root_bytenr(new_root_item, tmp->start); btrfs_set_root_level(new_root_item, btrfs_header_level(tmp)); + btrfs_set_root_generation(new_root_item, trans->transid); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, new_root_item); btrfs_tree_unlock(tmp); @@ -946,6 +957,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, chunk_root->node->start); btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, btrfs_header_level(chunk_root->node)); + btrfs_set_super_chunk_root_generation(&root->fs_info->super_copy, + btrfs_header_generation(chunk_root->node)); if (!root->fs_info->log_root_recovering) { btrfs_set_super_log_root(&root->fs_info->super_copy, 0); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 835daed5561..e0201c3a7dc 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -117,6 +117,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, inode_item->mode = cpu_to_le32(S_IFDIR | 0755); btrfs_set_root_bytenr(&root_item, leaf->start); + btrfs_set_root_generation(&root_item, trans->transid); btrfs_set_root_level(&root_item, 0); btrfs_set_root_refs(&root_item, 0); btrfs_set_root_used(&root_item, 0); @@ -2065,6 +2066,7 @@ static int update_log_root(struct btrfs_trans_handle *trans, return 0; btrfs_set_root_bytenr(&log->root_item, log->node->start); + btrfs_set_root_generation(&log->root_item, trans->transid); btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); ret = btrfs_update_root(trans, log->fs_info->log_root_tree, &log->root_key, &log->root_item); -- cgit v1.2.3-70-g09d2 From 87ef2bb46bfc4be0b40799e68115cbe28d80a1bd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 30 Oct 2008 11:23:27 -0400 Subject: Btrfs: prevent looping forever in finish_current_insert and del_pending_extents finish_current_insert and del_pending_extents process extent tree modifications that build up while we are changing the extent tree. It is a confusing bit of code that prevents recursion. Both functions run through a list of pending operations and both funcs add to the list of pending operations. If you have two procs in either one of them, they can end up looping forever making more work for each other. This patch makes them walk forward through the list of pending changes instead of always trying to process the entire list. At transaction commit time, we catch any changes that were left over. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 54 ++++++++++++++++++++++++++++---------------------- fs/btrfs/transaction.c | 12 +++++++++++ 2 files changed, 42 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 155c8dc56a2..fada9c22a02 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -45,9 +45,9 @@ struct pending_extent_op { }; static int finish_current_insert(struct btrfs_trans_handle *trans, struct - btrfs_root *extent_root); + btrfs_root *extent_root, int all); static int del_pending_extents(struct btrfs_trans_handle *trans, struct - btrfs_root *extent_root); + btrfs_root *extent_root, int all); static struct btrfs_block_group_cache * __btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, @@ -711,8 +711,8 @@ static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, parent, ref_root, ref_generation, owner_objectid); BUG_ON(ret); - finish_current_insert(trans, extent_root); - del_pending_extents(trans, extent_root); + finish_current_insert(trans, extent_root, 0); + del_pending_extents(trans, extent_root, 0); out: btrfs_free_path(path); return ret; @@ -784,8 +784,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ref_root, ref_generation, owner_objectid); BUG_ON(ret); - finish_current_insert(trans, root->fs_info->extent_root); - del_pending_extents(trans, root->fs_info->extent_root); + finish_current_insert(trans, root->fs_info->extent_root, 0); + del_pending_extents(trans, root->fs_info->extent_root, 0); btrfs_free_path(path); return 0; @@ -810,8 +810,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - finish_current_insert(trans, root->fs_info->extent_root); - del_pending_extents(trans, root->fs_info->extent_root); + finish_current_insert(trans, root->fs_info->extent_root, 1); + del_pending_extents(trans, root->fs_info->extent_root, 1); return 0; } @@ -1292,8 +1292,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_release_path(extent_root, path); fail: - finish_current_insert(trans, extent_root); - pending_ret = del_pending_extents(trans, extent_root); + finish_current_insert(trans, extent_root, 0); + pending_ret = del_pending_extents(trans, extent_root, 0); if (ret) return ret; if (pending_ret) @@ -1690,7 +1690,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, } static int finish_current_insert(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root) + struct btrfs_root *extent_root, int all) { u64 start; u64 end; @@ -1714,7 +1714,7 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, &end, EXTENT_WRITEBACK); if (ret) { mutex_unlock(&info->extent_ins_mutex); - if (search) { + if (search && all) { search = 0; continue; } @@ -1723,7 +1723,7 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); if (!ret) { - search = end+1; + search = end + 1; mutex_unlock(&info->extent_ins_mutex); cond_resched(); continue; @@ -1785,7 +1785,10 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, } kfree(extent_op); unlock_extent(&info->extent_ins, start, end, GFP_NOFS); - search = 0; + if (all) + search = 0; + else + search = end + 1; cond_resched(); } @@ -1992,7 +1995,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, #endif } btrfs_free_path(path); - finish_current_insert(trans, extent_root); + finish_current_insert(trans, extent_root, 0); return ret; } @@ -2001,7 +2004,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, * them from the extent map */ static int del_pending_extents(struct btrfs_trans_handle *trans, struct - btrfs_root *extent_root) + btrfs_root *extent_root, int all) { int ret; int err = 0; @@ -2023,7 +2026,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct EXTENT_WRITEBACK); if (ret) { mutex_unlock(&info->extent_ins_mutex); - if (search) { + if (all && search) { search = 0; continue; } @@ -2088,7 +2091,10 @@ free_extent: err = ret; unlock_extent(extent_ins, start, end, GFP_NOFS); - search = 0; + if (all) + search = 0; + else + search = end + 1; cond_resched(); } return err; @@ -2155,8 +2161,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, root_objectid, ref_generation, owner_objectid, pin, pin == 0); - finish_current_insert(trans, root->fs_info->extent_root); - pending_ret = del_pending_extents(trans, root->fs_info->extent_root); + finish_current_insert(trans, root->fs_info->extent_root, 0); + pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0); return ret ? ret : pending_ret; } @@ -2580,8 +2586,8 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, trans->alloc_exclude_start = 0; trans->alloc_exclude_nr = 0; btrfs_free_path(path); - finish_current_insert(trans, extent_root); - pending_ret = del_pending_extents(trans, extent_root); + finish_current_insert(trans, extent_root, 0); + pending_ret = del_pending_extents(trans, extent_root, 0); if (ret) goto out; @@ -5229,8 +5235,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, sizeof(cache->item)); BUG_ON(ret); - finish_current_insert(trans, extent_root); - ret = del_pending_extents(trans, extent_root); + finish_current_insert(trans, extent_root, 0); + ret = del_pending_extents(trans, extent_root, 0); BUG_ON(ret); set_avail_alloc_bits(extent_root->fs_info, type); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 924af6f2aea..968b84f17a1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -430,7 +430,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, u64 old_root_bytenr; struct btrfs_root *tree_root = root->fs_info->tree_root; + btrfs_extent_post_op(trans, root); btrfs_write_dirty_block_groups(trans, root); + btrfs_extent_post_op(trans, root); + while(1) { old_root_bytenr = btrfs_root_bytenr(&root->root_item); if (old_root_bytenr == root->node->start) @@ -440,11 +443,15 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, btrfs_set_root_level(&root->root_item, btrfs_header_level(root->node)); btrfs_set_root_generation(&root->root_item, trans->transid); + + btrfs_extent_post_op(trans, root); + ret = btrfs_update_root(trans, tree_root, &root->root_key, &root->root_item); BUG_ON(ret); btrfs_write_dirty_block_groups(trans, root); + btrfs_extent_post_op(trans, root); } return 0; } @@ -459,15 +466,20 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct list_head *next; struct extent_buffer *eb; + btrfs_extent_post_op(trans, fs_info->tree_root); + eb = btrfs_lock_root_node(fs_info->tree_root); btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb, 0); btrfs_tree_unlock(eb); free_extent_buffer(eb); + btrfs_extent_post_op(trans, fs_info->tree_root); + while(!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; list_del_init(next); root = list_entry(next, struct btrfs_root, dirty_list); + update_cowonly_root(trans, root); } return 0; -- cgit v1.2.3-70-g09d2 From 6643558db29006825dbb10012b3f8890aca4bcd5 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 30 Oct 2008 14:19:50 -0400 Subject: Btrfs: Fix bookend extent race v2 When dropping middle part of an extent, btrfs_drop_extents truncates the extent at first, then inserts a bookend extent. Since truncation and insertion can't be done atomically, there is a small period that the bookend extent isn't in the tree. This causes problem for functions that search the tree for file extent item. The way to fix this is lock the range of the bookend extent before truncation. Signed-off-by: Yan Zheng --- fs/btrfs/extent-tree.c | 28 +++++++++++++--------------- fs/btrfs/extent_io.c | 6 +++++- fs/btrfs/file.c | 31 +++++++++++++++++++++++++------ fs/btrfs/inode.c | 10 ---------- 4 files changed, 43 insertions(+), 32 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fada9c22a02..535cee47fcf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3379,11 +3379,13 @@ static int noinline relocate_data_extent(struct inode *reloc_inode, struct btrfs_root *root = BTRFS_I(reloc_inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; struct extent_map *em; + u64 start = extent_key->objectid - offset; + u64 end = start + extent_key->offset - 1; em = alloc_extent_map(GFP_NOFS); BUG_ON(!em || IS_ERR(em)); - em->start = extent_key->objectid - offset; + em->start = start; em->len = extent_key->offset; em->block_len = extent_key->offset; em->block_start = extent_key->objectid; @@ -3391,7 +3393,7 @@ static int noinline relocate_data_extent(struct inode *reloc_inode, set_bit(EXTENT_FLAG_PINNED, &em->flags); /* setup extent map to cheat btrfs_readpage */ - mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex); + lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); while (1) { int ret; spin_lock(&em_tree->lock); @@ -3401,13 +3403,11 @@ static int noinline relocate_data_extent(struct inode *reloc_inode, free_extent_map(em); break; } - btrfs_drop_extent_cache(reloc_inode, em->start, - em->start + em->len - 1, 0); + btrfs_drop_extent_cache(reloc_inode, start, end, 0); } - mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex); + unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); - return relocate_inode_pages(reloc_inode, extent_key->objectid - offset, - extent_key->offset); + return relocate_inode_pages(reloc_inode, start, extent_key->offset); } struct btrfs_ref_path { @@ -3831,7 +3831,6 @@ next: * the file extent item was modified by someone * before the extent got locked. */ - mutex_unlock(&BTRFS_I(inode)->extent_mutex); unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, lock_end, GFP_NOFS); extent_locked = 0; @@ -3896,8 +3895,12 @@ next: lock_start = key.offset; lock_end = lock_start + num_bytes - 1; } else { - BUG_ON(lock_start != key.offset); - BUG_ON(lock_end - lock_start + 1 < num_bytes); + if (lock_start > key.offset || + lock_end + 1 < key.offset + num_bytes) { + unlock_extent(&BTRFS_I(inode)->io_tree, + lock_start, lock_end, GFP_NOFS); + extent_locked = 0; + } } if (!inode) { @@ -3951,7 +3954,6 @@ next: if (ordered) btrfs_put_ordered_extent(ordered); - mutex_lock(&BTRFS_I(inode)->extent_mutex); extent_locked = 1; continue; } @@ -4073,7 +4075,6 @@ next: } if (extent_locked) { - mutex_unlock(&BTRFS_I(inode)->extent_mutex); unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, lock_end, GFP_NOFS); extent_locked = 0; @@ -4091,7 +4092,6 @@ out: if (inode) { mutex_unlock(&inode->i_mutex); if (extent_locked) { - mutex_unlock(&BTRFS_I(inode)->extent_mutex); unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, lock_end, GFP_NOFS); } @@ -4180,10 +4180,8 @@ static int noinline invalidate_extent_cache(struct btrfs_root *root, lock_extent(&BTRFS_I(inode)->io_tree, key.offset, key.offset + num_bytes - 1, GFP_NOFS); - mutex_lock(&BTRFS_I(inode)->extent_mutex); btrfs_drop_extent_cache(inode, key.offset, key.offset + num_bytes - 1, 1); - mutex_unlock(&BTRFS_I(inode)->extent_mutex); unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, key.offset + num_bytes - 1, GFP_NOFS); cond_resched(); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7503bd46819..65a0583027e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -946,8 +946,12 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, &failed_start, mask); - if (err == -EEXIST) + if (err == -EEXIST) { + if (failed_start > start) + clear_extent_bit(tree, start, failed_start - 1, + EXTENT_LOCKED, 1, 0, mask); return 0; + } return 1; } EXPORT_SYMBOL(try_lock_extent); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b8a7637e14a..1a0510ad030 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -364,6 +364,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, u64 start, u64 end, u64 inline_limit, u64 *hint_byte) { u64 extent_end = 0; + u64 locked_end = end; u64 search_start = start; u64 leaf_start; u64 ram_bytes = 0; @@ -479,12 +480,6 @@ next_slot: goto next_slot; } - if (found_inline) { - u64 mask = root->sectorsize - 1; - search_start = (extent_end + mask) & ~mask; - } else - search_start = extent_end; - if (end <= extent_end && start >= key.offset && found_inline) *hint_byte = EXTENT_MAP_INLINE; @@ -501,6 +496,26 @@ next_slot: if (found_inline && start <= key.offset) keep = 1; } + + if (bookend && found_extent && locked_end < extent_end) { + ret = try_lock_extent(&BTRFS_I(inode)->io_tree, + locked_end, extent_end - 1, GFP_NOFS); + if (!ret) { + btrfs_release_path(root, path); + lock_extent(&BTRFS_I(inode)->io_tree, + locked_end, extent_end - 1, GFP_NOFS); + locked_end = extent_end; + continue; + } + locked_end = extent_end; + } + + if (found_inline) { + u64 mask = root->sectorsize - 1; + search_start = (extent_end + mask) & ~mask; + } else + search_start = extent_end; + /* truncate existing extent */ if (start > key.offset) { u64 new_num; @@ -638,6 +653,10 @@ next_slot: } out: btrfs_free_path(path); + if (locked_end > end) { + unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, + GFP_NOFS); + } btrfs_check_file(root, inode); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8254d6fa691..e8511d14b11 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -246,7 +246,6 @@ static int cow_file_range_inline(struct btrfs_trans_handle *trans, return 1; } - mutex_lock(&BTRFS_I(inode)->extent_mutex); ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, aligned_end, &hint_byte); BUG_ON(ret); @@ -258,7 +257,6 @@ static int cow_file_range_inline(struct btrfs_trans_handle *trans, compressed_pages); BUG_ON(ret); btrfs_drop_extent_cache(inode, start, aligned_end, 0); - mutex_unlock(&BTRFS_I(inode)->extent_mutex); return 0; } @@ -437,9 +435,7 @@ again: BUG_ON(disk_num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); - mutex_lock(&BTRFS_I(inode)->extent_mutex); btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); - mutex_unlock(&BTRFS_I(inode)->extent_mutex); while(disk_num_bytes > 0) { unsigned long min_bytes; @@ -477,8 +473,6 @@ again: em->block_start = ins.objectid; em->block_len = ins.offset; em->bdev = root->fs_info->fs_devices->latest_bdev; - - mutex_lock(&BTRFS_I(inode)->extent_mutex); set_bit(EXTENT_FLAG_PINNED, &em->flags); if (will_compress) @@ -495,7 +489,6 @@ again: btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); } - mutex_unlock(&BTRFS_I(inode)->extent_mutex); cur_alloc_size = ins.offset; ret = btrfs_add_ordered_extent(inode, start, ins.objectid, @@ -1016,8 +1009,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) INIT_LIST_HEAD(&list); - mutex_lock(&BTRFS_I(inode)->extent_mutex); - ret = btrfs_drop_extents(trans, root, inode, ordered_extent->file_offset, ordered_extent->file_offset + @@ -1059,7 +1050,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) btrfs_drop_extent_cache(inode, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, 0); - mutex_unlock(&BTRFS_I(inode)->extent_mutex); ins.objectid = ordered_extent->start; ins.offset = ordered_extent->disk_len; -- cgit v1.2.3-70-g09d2 From 80ff385665b7fca29fefe358a60ab0d09f9b8e87 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 30 Oct 2008 14:20:02 -0400 Subject: Btrfs: update nodatacow code v2 This patch simplifies the nodatacow checker. If all references were created after the latest snapshot, then we can avoid COW safely. This patch also updates run_delalloc_nocow to do more fine-grained checking. Signed-off-by: Yan Zheng --- fs/btrfs/ctree.h | 8 +- fs/btrfs/extent-tree.c | 131 ++++------------------------- fs/btrfs/inode.c | 213 ++++++++++++++++++++++++++++-------------------- fs/btrfs/ioctl.c | 1 + fs/btrfs/ordered-data.c | 9 +- fs/btrfs/ordered-data.h | 3 +- fs/btrfs/transaction.c | 2 + 7 files changed, 154 insertions(+), 213 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ca5547af609..8bf6a085a73 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -454,6 +454,7 @@ struct btrfs_root_item { __le64 bytenr; __le64 byte_limit; __le64 bytes_used; + __le64 last_snapshot; __le32 flags; __le32 refs; struct btrfs_disk_key drop_progress; @@ -1413,6 +1414,8 @@ BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32); BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); +BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, + last_snapshot, 64); /* struct btrfs_super_block */ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); @@ -1564,9 +1567,8 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin); int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf); -int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_key *key, u64 bytenr); +int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 535cee47fcf..1eb69a91b72 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -848,9 +848,8 @@ out: return 0; } -static int get_reference_status(struct btrfs_root *root, u64 bytenr, - u64 parent_gen, u64 ref_objectid, - u64 *min_generation, u32 *ref_count) +int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr) { struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_path *path; @@ -858,8 +857,8 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, struct btrfs_extent_ref *ref_item; struct btrfs_key key; struct btrfs_key found_key; - u64 root_objectid = root->root_key.objectid; - u64 ref_generation; + u64 ref_root; + u64 last_snapshot; u32 nritems; int ret; @@ -872,7 +871,9 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, if (ret < 0) goto out; BUG_ON(ret == 0); - if (ret < 0 || path->slots[0] == 0) + + ret = -ENOENT; + if (path->slots[0] == 0) goto out; path->slots[0]--; @@ -880,14 +881,10 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != bytenr || - found_key.type != BTRFS_EXTENT_ITEM_KEY) { - ret = 1; + found_key.type != BTRFS_EXTENT_ITEM_KEY) goto out; - } - - *ref_count = 0; - *min_generation = (u64)-1; + last_snapshot = btrfs_root_last_snapshot(&root->root_item); while (1) { leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); @@ -910,114 +907,22 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, ref_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); - ref_generation = btrfs_ref_generation(leaf, ref_item); - /* - * For (parent_gen > 0 && parent_gen > ref_generation): - * - * we reach here through the oldest root, therefore - * all other reference from same snapshot should have - * a larger generation. - */ - if ((root_objectid != btrfs_ref_root(leaf, ref_item)) || - (parent_gen > 0 && parent_gen > ref_generation) || - (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID && - ref_objectid != btrfs_ref_objectid(leaf, ref_item))) { - *ref_count = 2; - break; - } - - *ref_count = 1; - if (*min_generation > ref_generation) - *min_generation = ref_generation; - - path->slots[0]++; - } - ret = 0; -out: - btrfs_free_path(path); - return ret; -} - -int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_key *key, u64 bytenr) -{ - struct btrfs_root *old_root; - struct btrfs_path *path = NULL; - struct extent_buffer *eb; - struct btrfs_file_extent_item *item; - u64 ref_generation; - u64 min_generation; - u64 extent_start; - u32 ref_count; - int level; - int ret; - - BUG_ON(trans == NULL); - BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY); - ret = get_reference_status(root, bytenr, 0, key->objectid, - &min_generation, &ref_count); - if (ret) - return ret; - - if (ref_count != 1) - return 1; - - old_root = root->dirty_root->root; - ref_generation = old_root->root_key.offset; - - /* all references are created in running transaction */ - if (min_generation > ref_generation) { - ret = 0; - goto out; - } - - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - - path->skip_locking = 1; - /* if no item found, the extent is referenced by other snapshot */ - ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0); - if (ret) - goto out; - - eb = path->nodes[0]; - item = btrfs_item_ptr(eb, path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG || - btrfs_file_extent_disk_bytenr(eb, item) != bytenr) { - ret = 1; - goto out; - } - - for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) { - if (level >= 0) { - eb = path->nodes[level]; - if (!eb) - continue; - extent_start = eb->start; - } else - extent_start = bytenr; - - ret = get_reference_status(root, extent_start, ref_generation, - 0, &min_generation, &ref_count); - if (ret) + ref_root = btrfs_ref_root(leaf, ref_item); + if (ref_root != root->root_key.objectid && + ref_root != BTRFS_TREE_LOG_OBJECTID) { + ret = 1; goto out; - - if (ref_count != 1) { + } + if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) { ret = 1; goto out; } - if (level >= 0) - ref_generation = btrfs_header_generation(eb); + + path->slots[0]++; } ret = 0; out: - if (path) - btrfs_free_path(path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e8511d14b11..3e6f0568fdb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -298,6 +298,7 @@ static int cow_file_range(struct inode *inode, struct page *locked_page, unsigned long max_compressed = 128 * 1024; unsigned long max_uncompressed = 256 * 1024; int i; + int ordered_type; int will_compress; trans = btrfs_join_transaction(root, 1); @@ -491,9 +492,10 @@ again: } cur_alloc_size = ins.offset; + ordered_type = will_compress ? BTRFS_ORDERED_COMPRESSED : 0; ret = btrfs_add_ordered_extent(inode, start, ins.objectid, - ram_size, cur_alloc_size, 0, - will_compress); + ram_size, cur_alloc_size, + ordered_type); BUG_ON(ret); if (disk_num_bytes < cur_alloc_size) { @@ -587,115 +589,148 @@ free_pages_out: static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started) { - u64 extent_start; - u64 extent_end; - u64 bytenr; - u64 loops = 0; - u64 total_fs_bytes; struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_block_group_cache *block_group; struct btrfs_trans_handle *trans; struct extent_buffer *leaf; - int found_type; struct btrfs_path *path; - struct btrfs_file_extent_item *item; - int ret; - int err = 0; + struct btrfs_file_extent_item *fi; struct btrfs_key found_key; + u64 cow_start; + u64 cur_offset; + u64 extent_end; + u64 disk_bytenr; + u64 num_bytes; + int extent_type; + int ret; + int nocow; + int check_prev = 1; - total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); path = btrfs_alloc_path(); BUG_ON(!path); trans = btrfs_join_transaction(root, 1); BUG_ON(!trans); -again: - ret = btrfs_lookup_file_extent(NULL, root, path, - inode->i_ino, start, 0); - if (ret < 0) { - err = ret; - goto out; - } - - if (ret != 0) { - if (path->slots[0] == 0) - goto not_found; - path->slots[0]--; - } - - leaf = path->nodes[0]; - item = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - - /* are we inside the extent that was found? */ - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - found_type = btrfs_key_type(&found_key); - if (found_key.objectid != inode->i_ino || - found_type != BTRFS_EXTENT_DATA_KEY) - goto not_found; - - found_type = btrfs_file_extent_type(leaf, item); - extent_start = found_key.offset; - if (found_type == BTRFS_FILE_EXTENT_REG) { - u64 extent_num_bytes; - - extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item); - extent_end = extent_start + extent_num_bytes; - err = 0; - if (btrfs_file_extent_compression(leaf, item) || - btrfs_file_extent_encryption(leaf,item) || - btrfs_file_extent_other_encoding(leaf, item)) - goto not_found; + cow_start = (u64)-1; + cur_offset = start; + while (1) { + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + cur_offset, 0); + BUG_ON(ret < 0); + if (ret > 0 && path->slots[0] > 0 && check_prev) { + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, + path->slots[0] - 1); + if (found_key.objectid == inode->i_ino && + found_key.type == BTRFS_EXTENT_DATA_KEY) + path->slots[0]--; + } + check_prev = 0; +next_slot: + leaf = path->nodes[0]; + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + BUG_ON(1); + if (ret > 0) + break; + leaf = path->nodes[0]; + } - if (loops && start != extent_start) - goto not_found; + nocow = 0; + disk_bytenr = 0; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (start < extent_start || start >= extent_end) - goto not_found; + if (found_key.objectid > inode->i_ino || + found_key.type > BTRFS_EXTENT_DATA_KEY || + found_key.offset > end) + break; - bytenr = btrfs_file_extent_disk_bytenr(leaf, item); - if (bytenr == 0) - goto not_found; + if (found_key.offset > cur_offset) { + extent_end = found_key.offset; + goto out_check; + } - if (btrfs_cross_ref_exists(trans, root, &found_key, bytenr)) - goto not_found; - /* - * we may be called by the resizer, make sure we're inside - * the limits of the FS - */ - block_group = btrfs_lookup_block_group(root->fs_info, - bytenr); - if (!block_group || block_group->ro) - goto not_found; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + extent_type = btrfs_file_extent_type(leaf, fi); - bytenr += btrfs_file_extent_offset(leaf, item); - extent_num_bytes = min(end + 1, extent_end) - start; - ret = btrfs_add_ordered_extent(inode, start, bytenr, - extent_num_bytes, - extent_num_bytes, 1, 0); - if (ret) { - err = ret; - goto out; + if (extent_type == BTRFS_FILE_EXTENT_REG) { + struct btrfs_block_group_cache *block_group; + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + extent_end = found_key.offset + + btrfs_file_extent_num_bytes(leaf, fi); + if (extent_end <= start) { + path->slots[0]++; + goto next_slot; + } + if (btrfs_file_extent_compression(leaf, fi) || + btrfs_file_extent_encryption(leaf, fi) || + btrfs_file_extent_other_encoding(leaf, fi)) + goto out_check; + if (disk_bytenr == 0) + goto out_check; + if (btrfs_cross_ref_exist(trans, root, disk_bytenr)) + goto out_check; + block_group = btrfs_lookup_block_group(root->fs_info, + disk_bytenr); + if (!block_group || block_group->ro) + goto out_check; + disk_bytenr += btrfs_file_extent_offset(leaf, fi); + nocow = 1; + } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { + extent_end = found_key.offset + + btrfs_file_extent_inline_len(leaf, fi); + extent_end = ALIGN(extent_end, root->sectorsize); + } else { + BUG_ON(1); + } +out_check: + if (extent_end <= start) { + path->slots[0]++; + goto next_slot; + } + if (!nocow) { + if (cow_start == (u64)-1) + cow_start = cur_offset; + cur_offset = extent_end; + if (cur_offset > end) + break; + path->slots[0]++; + goto next_slot; } btrfs_release_path(root, path); - start = extent_end; - if (start <= end) { - loops++; - goto again; + if (cow_start != (u64)-1) { + ret = cow_file_range(inode, locked_page, cow_start, + found_key.offset - 1, page_started); + BUG_ON(ret); + cow_start = (u64)-1; } - } else { -not_found: - btrfs_end_transaction(trans, root); - btrfs_free_path(path); - return cow_file_range(inode, locked_page, start, end, - page_started); + + disk_bytenr += cur_offset - found_key.offset; + num_bytes = min(end + 1, extent_end) - cur_offset; + + ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, + num_bytes, num_bytes, + BTRFS_ORDERED_NOCOW); + cur_offset = extent_end; + if (cur_offset > end) + break; } -out: - WARN_ON(err); - btrfs_end_transaction(trans, root); + btrfs_release_path(root, path); + + if (cur_offset <= end && cow_start == (u64)-1) + cow_start = cur_offset; + if (cow_start != (u64)-1) { + ret = cow_file_range(inode, locked_page, cow_start, end, + page_started); + BUG_ON(ret); + } + + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); btrfs_free_path(path); - return err; + return 0; } /* diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fd3c8b5676c..7f915d47839 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -112,6 +112,7 @@ static noinline int create_subvol(struct btrfs_root *root, btrfs_set_root_level(&root_item, 0); btrfs_set_root_refs(&root_item, 1); btrfs_set_root_used(&root_item, 0); + btrfs_set_root_last_snapshot(&root_item, 0); memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); root_item.drop_level = 0; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b5745bb96d4..e7317c8fda2 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -165,8 +165,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, * inserted. */ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, - u64 start, u64 len, u64 disk_len, int nocow, - int compressed) + u64 start, u64 len, u64 disk_len, int type) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; @@ -183,10 +182,8 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->len = len; entry->disk_len = disk_len; entry->inode = inode; - if (nocow) - set_bit(BTRFS_ORDERED_NOCOW, &entry->flags); - if (compressed) - set_bit(BTRFS_ORDERED_COMPRESSED, &entry->flags); + if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_COMPRESSED) + set_bit(type, &entry->flags); /* one ref for the tree */ atomic_set(&entry->refs, 1); diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 1ef464145d2..e6d9bc54c2b 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -132,8 +132,7 @@ int btrfs_remove_ordered_extent(struct inode *inode, int btrfs_dec_test_ordered_pending(struct inode *inode, u64 file_offset, u64 io_size); int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, - u64 start, u64 len, u64 disk_len, int nocow, - int compressed); + u64 start, u64 len, u64 disk_len, int type); int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_extent *entry, struct btrfs_ordered_sum *sum); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 968b84f17a1..e72a013d24b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -763,6 +763,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (ret) goto fail; + btrfs_record_root_in_trans(root); + btrfs_set_root_last_snapshot(&root->root_item, trans->transid); memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); key.objectid = objectid; -- cgit v1.2.3-70-g09d2 From d899e05215178fed903ad0e7fc1cb4d8e0cc0a88 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 30 Oct 2008 14:25:28 -0400 Subject: Btrfs: Add fallocate support v2 This patch updates btrfs-progs for fallocate support. fallocate is a little different in Btrfs because we need to tell the COW system that a given preallocated extent doesn't need to be cow'd as long as there are no snapshots of it. This leverages the -o nodatacow checks. Signed-off-by: Yan Zheng --- fs/btrfs/ctree.h | 9 +- fs/btrfs/extent-tree.c | 65 +++++----- fs/btrfs/extent_io.c | 12 +- fs/btrfs/extent_map.h | 1 + fs/btrfs/file.c | 245 +++++++++++++++++++++++++++++++++++- fs/btrfs/inode.c | 323 +++++++++++++++++++++++++++++++++++++----------- fs/btrfs/ioctl.c | 3 +- fs/btrfs/ordered-data.c | 5 +- fs/btrfs/ordered-data.h | 4 +- fs/btrfs/tree-log.c | 13 +- 10 files changed, 563 insertions(+), 117 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8bf6a085a73..d5ba3d1aaf9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -462,8 +462,9 @@ struct btrfs_root_item { u8 level; } __attribute__ ((__packed__)); -#define BTRFS_FILE_EXTENT_REG 0 -#define BTRFS_FILE_EXTENT_INLINE 1 +#define BTRFS_FILE_EXTENT_INLINE 0 +#define BTRFS_FILE_EXTENT_REG 1 +#define BTRFS_FILE_EXTENT_PREALLOC 2 struct btrfs_file_extent_item { /* @@ -868,6 +869,7 @@ struct btrfs_root { #define BTRFS_INODE_NODATACOW (1 << 1) #define BTRFS_INODE_READONLY (1 << 2) #define BTRFS_INODE_NOCOMPRESS (1 << 3) +#define BTRFS_INODE_PREALLOC (1 << 4) #define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ ~BTRFS_INODE_##flag) #define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ @@ -1924,6 +1926,9 @@ extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 inline_limit, u64 *hint_block); +int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, u64 start, u64 end); int btrfs_release_file(struct inode *inode, struct file *file); /* tree-defrag.c */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1eb69a91b72..8af39521eb7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2147,6 +2147,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, total_needed += empty_size; block_group = btrfs_lookup_block_group(root->fs_info, search_start); + if (!block_group) + block_group = btrfs_lookup_first_block_group(root->fs_info, + search_start); space_info = __find_space_info(root->fs_info, data); down_read(&space_info->groups_sem); @@ -3426,9 +3429,7 @@ walk_down: next: level--; btrfs_release_path(extent_root, path); - if (need_resched()) { - cond_resched(); - } + cond_resched(); } /* reached lowest level */ ret = 1; @@ -3539,9 +3540,7 @@ found: } btrfs_release_path(extent_root, path); - if (need_resched()) { - cond_resched(); - } + cond_resched(); } /* reached max tree level, but no tree root found. */ BUG(); @@ -3654,8 +3653,9 @@ static int noinline get_new_locations(struct inode *reloc_inode, exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, fi); - WARN_ON(exts[nr].offset > 0); - WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); + BUG_ON(exts[nr].offset > 0); + BUG_ON(exts[nr].compression || exts[nr].encryption); + BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); cur_pos += exts[nr].num_bytes; nr++; @@ -3709,6 +3709,7 @@ static int noinline replace_one_extent(struct btrfs_trans_handle *trans, u32 nritems; int nr_scaned = 0; int extent_locked = 0; + int extent_type; int ret; memcpy(&key, leaf_key, sizeof(key)); @@ -3781,8 +3782,9 @@ next: } fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - if ((btrfs_file_extent_type(leaf, fi) != - BTRFS_FILE_EXTENT_REG) || + extent_type = btrfs_file_extent_type(leaf, fi); + if ((extent_type != BTRFS_FILE_EXTENT_REG && + extent_type != BTRFS_FILE_EXTENT_PREALLOC) || (btrfs_file_extent_disk_bytenr(leaf, fi) != extent_key->objectid)) { path->slots[0]++; @@ -3865,16 +3867,10 @@ next: if (nr_extents == 1) { /* update extent pointer in place */ - btrfs_set_file_extent_generation(leaf, fi, - trans->transid); btrfs_set_file_extent_disk_bytenr(leaf, fi, new_extents[0].disk_bytenr); btrfs_set_file_extent_disk_num_bytes(leaf, fi, new_extents[0].disk_num_bytes); - btrfs_set_file_extent_ram_bytes(leaf, fi, - new_extents[0].ram_bytes); - ext_offset += new_extents[0].offset; - btrfs_set_file_extent_offset(leaf, fi, ext_offset); btrfs_mark_buffer_dirty(leaf); btrfs_drop_extent_cache(inode, key.offset, @@ -3901,6 +3897,8 @@ next: btrfs_release_path(root, path); key.offset += num_bytes; } else { + BUG_ON(1); +#if 0 u64 alloc_hint; u64 extent_len; int i; @@ -3977,6 +3975,7 @@ next: break; } BUG_ON(i >= nr_extents); +#endif } if (extent_locked) { @@ -4156,15 +4155,10 @@ static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, ref->extents[ext_index].bytenr = new_extent->disk_bytenr; ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; - btrfs_set_file_extent_generation(leaf, fi, trans->transid); - btrfs_set_file_extent_ram_bytes(leaf, fi, - new_extent->ram_bytes); btrfs_set_file_extent_disk_bytenr(leaf, fi, new_extent->disk_bytenr); btrfs_set_file_extent_disk_num_bytes(leaf, fi, new_extent->disk_num_bytes); - new_extent->offset += btrfs_file_extent_offset(leaf, fi); - btrfs_set_file_extent_offset(leaf, fi, new_extent->offset); btrfs_mark_buffer_dirty(leaf); ret = btrfs_inc_extent_ref(trans, root, @@ -4625,12 +4619,15 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, */ if (!new_extents) { u64 group_start = group->key.objectid; + new_extents = kmalloc(sizeof(*new_extents), + GFP_NOFS); + nr_extents = 1; ret = get_new_locations(reloc_inode, extent_key, - group_start, 0, + group_start, 1, &new_extents, &nr_extents); - if (ret < 0) + if (ret) goto out; } btrfs_record_root_in_trans(found_root); @@ -4762,7 +4759,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_generation(leaf, item, 1); btrfs_set_inode_size(leaf, item, size); btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); - btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM); + btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM | + BTRFS_INODE_NOCOMPRESS); btrfs_mark_buffer_dirty(leaf); btrfs_release_path(root, path); out: @@ -4835,6 +4833,7 @@ int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) struct inode *reloc_inode; struct btrfs_block_group_cache *block_group; struct btrfs_key key; + u64 skipped; u64 cur_byte; u64 total_found; u32 nritems; @@ -4864,6 +4863,7 @@ int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) btrfs_start_delalloc_inodes(info->tree_root); btrfs_wait_ordered_extents(info->tree_root, 0); again: + skipped = 0; total_found = 0; progress = 0; key.objectid = block_group->key.objectid; @@ -4926,6 +4926,8 @@ next: ret = relocate_one_extent(root, path, &key, block_group, reloc_inode, pass); BUG_ON(ret < 0); + if (ret > 0) + skipped++; key.objectid = cur_byte; key.type = 0; @@ -4944,6 +4946,11 @@ next: printk("btrfs found %llu extents in pass %d\n", (unsigned long long)total_found, pass); pass++; + if (total_found == skipped && pass > 2) { + iput(reloc_inode); + reloc_inode = create_reloc_inode(info, block_group); + pass = 0; + } goto again; } @@ -5011,17 +5018,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { block_group = rb_entry(n, struct btrfs_block_group_cache, cache_node); - - spin_unlock(&info->block_group_cache_lock); - btrfs_remove_free_space_cache(block_group); - spin_lock(&info->block_group_cache_lock); - rb_erase(&block_group->cache_node, &info->block_group_cache_tree); + spin_unlock(&info->block_group_cache_lock); + + btrfs_remove_free_space_cache(block_group); down_write(&block_group->space_info->groups_sem); list_del(&block_group->list); up_write(&block_group->space_info->groups_sem); kfree(block_group); + + spin_lock(&info->block_group_cache_lock); } spin_unlock(&info->block_group_cache_lock); return 0; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 65a0583027e..eb3c12e7bea 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2015,6 +2015,8 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur); } bdev = em->bdev; block_start = em->block_start; + if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) + block_start = EXTENT_MAP_HOLE; free_extent_map(em); em = NULL; @@ -2769,14 +2771,18 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, struct inode *inode = mapping->host; u64 start = iblock << inode->i_blkbits; sector_t sector = 0; + size_t blksize = (1 << inode->i_blkbits); struct extent_map *em; - em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0); + lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, + GFP_NOFS); + em = get_extent(inode, NULL, 0, start, blksize, 0); + unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, + GFP_NOFS); if (!em || IS_ERR(em)) return 0; - if (em->block_start == EXTENT_MAP_INLINE || - em->block_start == EXTENT_MAP_HOLE) + if (em->block_start > EXTENT_MAP_LAST_BYTE) goto out; sector = (em->block_start + start - em->start) >> inode->i_blkbits; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index e693e1b4ac4..accfedaeb51 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -12,6 +12,7 @@ #define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ #define EXTENT_FLAG_COMPRESSED 1 #define EXTENT_FLAG_VACANCY 2 /* no file extent item found */ +#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ struct extent_map { struct rb_node rb_node; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1a0510ad030..238a8e215eb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -381,7 +381,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, int keep; int slot; int bookend; - int found_type; + int found_type = 0; int found_extent; int found_inline; int recow; @@ -442,7 +442,8 @@ next_slot: extent); other_encoding = btrfs_file_extent_other_encoding(leaf, extent); - if (found_type == BTRFS_FILE_EXTENT_REG) { + if (found_type == BTRFS_FILE_EXTENT_REG || + found_type == BTRFS_FILE_EXTENT_PREALLOC) { extent_end = btrfs_file_extent_disk_bytenr(leaf, extent); @@ -609,8 +610,7 @@ next_slot: */ btrfs_set_file_extent_ram_bytes(leaf, extent, ram_bytes); - btrfs_set_file_extent_type(leaf, extent, - BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_type(leaf, extent, found_type); btrfs_mark_buffer_dirty(path->nodes[0]); @@ -661,6 +661,243 @@ out: return ret; } +static int extent_mergeable(struct extent_buffer *leaf, int slot, + u64 objectid, u64 bytenr, u64 *start, u64 *end) +{ + struct btrfs_file_extent_item *fi; + struct btrfs_key key; + u64 extent_end; + + if (slot < 0 || slot >= btrfs_header_nritems(leaf)) + return 0; + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY) + return 0; + + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG || + btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr || + btrfs_file_extent_compression(leaf, fi) || + btrfs_file_extent_encryption(leaf, fi) || + btrfs_file_extent_other_encoding(leaf, fi)) + return 0; + + extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); + if ((*start && *start != key.offset) || (*end && *end != extent_end)) + return 0; + + *start = key.offset; + *end = extent_end; + return 1; +} + +/* + * Mark extent in the range start - end as written. + * + * This changes extent type from 'pre-allocated' to 'regular'. If only + * part of extent is marked as written, the extent will be split into + * two or three. + */ +int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, u64 start, u64 end) +{ + struct extent_buffer *leaf; + struct btrfs_path *path; + struct btrfs_file_extent_item *fi; + struct btrfs_key key; + u64 bytenr; + u64 num_bytes; + u64 extent_end; + u64 extent_offset; + u64 other_start; + u64 other_end; + u64 split = start; + u64 locked_end = end; + int extent_type; + int split_end = 1; + int ret; + + btrfs_drop_extent_cache(inode, start, end - 1, 0); + + path = btrfs_alloc_path(); + BUG_ON(!path); +again: + key.objectid = inode->i_ino; + key.type = BTRFS_EXTENT_DATA_KEY; + if (split == start) + key.offset = split; + else + key.offset = split - 1; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0 && path->slots[0] > 0) + path->slots[0]--; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + BUG_ON(key.objectid != inode->i_ino || + key.type != BTRFS_EXTENT_DATA_KEY); + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + extent_type = btrfs_file_extent_type(leaf, fi); + BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC); + extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); + BUG_ON(key.offset > start || extent_end < end); + + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); + extent_offset = btrfs_file_extent_offset(leaf, fi); + + if (key.offset == start) + split = end; + + if (key.offset == start && extent_end == end) { + int del_nr = 0; + int del_slot = 0; + u64 leaf_owner = btrfs_header_owner(leaf); + u64 leaf_gen = btrfs_header_generation(leaf); + other_start = end; + other_end = 0; + if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, + bytenr, &other_start, &other_end)) { + extent_end = other_end; + del_slot = path->slots[0] + 1; + del_nr++; + ret = btrfs_free_extent(trans, root, bytenr, num_bytes, + leaf->start, leaf_owner, + leaf_gen, inode->i_ino, 0); + BUG_ON(ret); + } + other_start = 0; + other_end = start; + if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino, + bytenr, &other_start, &other_end)) { + key.offset = other_start; + del_slot = path->slots[0]; + del_nr++; + ret = btrfs_free_extent(trans, root, bytenr, num_bytes, + leaf->start, leaf_owner, + leaf_gen, inode->i_ino, 0); + BUG_ON(ret); + } + split_end = 0; + if (del_nr == 0) { + btrfs_set_file_extent_type(leaf, fi, + BTRFS_FILE_EXTENT_REG); + goto done; + } + + fi = btrfs_item_ptr(leaf, del_slot - 1, + struct btrfs_file_extent_item); + btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_end - key.offset); + btrfs_mark_buffer_dirty(leaf); + + ret = btrfs_del_items(trans, root, path, del_slot, del_nr); + BUG_ON(ret); + goto done; + } else if (split == start) { + if (locked_end < extent_end) { + ret = try_lock_extent(&BTRFS_I(inode)->io_tree, + locked_end, extent_end - 1, GFP_NOFS); + if (!ret) { + btrfs_release_path(root, path); + lock_extent(&BTRFS_I(inode)->io_tree, + locked_end, extent_end - 1, GFP_NOFS); + locked_end = extent_end; + goto again; + } + locked_end = extent_end; + } + btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset); + extent_offset += split - key.offset; + } else { + BUG_ON(key.offset != start); + btrfs_set_file_extent_offset(leaf, fi, extent_offset + + split - key.offset); + btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split); + key.offset = split; + btrfs_set_item_key_safe(trans, root, path, &key); + extent_end = split; + } + + if (extent_end == end) { + split_end = 0; + extent_type = BTRFS_FILE_EXTENT_REG; + } + if (extent_end == end && split == start) { + other_start = end; + other_end = 0; + if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, + bytenr, &other_start, &other_end)) { + path->slots[0]++; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + key.offset = split; + btrfs_set_item_key_safe(trans, root, path, &key); + btrfs_set_file_extent_offset(leaf, fi, extent_offset); + btrfs_set_file_extent_num_bytes(leaf, fi, + other_end - split); + goto done; + } + } + if (extent_end == end && split == end) { + other_start = 0; + other_end = start; + if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino, + bytenr, &other_start, &other_end)) { + path->slots[0]--; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - + other_start); + goto done; + } + } + + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(root, path); + + key.offset = start; + ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi)); + BUG_ON(ret); + + leaf = path->nodes[0]; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, fi, trans->transid); + btrfs_set_file_extent_type(leaf, fi, extent_type); + btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr); + btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_offset(leaf, fi, extent_offset); + btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset); + btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_compression(leaf, fi, 0); + btrfs_set_file_extent_encryption(leaf, fi, 0); + btrfs_set_file_extent_other_encoding(leaf, fi, 0); + + ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, + leaf->start, root->root_key.objectid, + trans->transid, inode->i_ino); + BUG_ON(ret); +done: + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(root, path); + if (split_end && split == start) { + split = end; + goto again; + } + if (locked_end > end) { + unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, + GFP_NOFS); + } + btrfs_free_path(path); + return 0; +} + /* * this gets pages into the page cache and locks them down, it also properly * waits for data=ordered extents to finish before allowing the pages to be diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3e6f0568fdb..789c376157f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -587,7 +588,7 @@ free_pages_out: * blocks on disk */ static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, - u64 start, u64 end, int *page_started) + u64 start, u64 end, int *page_started, int force) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; @@ -602,6 +603,7 @@ static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, u64 num_bytes; int extent_type; int ret; + int type; int nocow; int check_prev = 1; @@ -654,7 +656,8 @@ next_slot: struct btrfs_file_extent_item); extent_type = btrfs_file_extent_type(leaf, fi); - if (extent_type == BTRFS_FILE_EXTENT_REG) { + if (extent_type == BTRFS_FILE_EXTENT_REG || + extent_type == BTRFS_FILE_EXTENT_PREALLOC) { struct btrfs_block_group_cache *block_group; disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); extent_end = found_key.offset + @@ -669,6 +672,8 @@ next_slot: goto out_check; if (disk_bytenr == 0) goto out_check; + if (extent_type == BTRFS_FILE_EXTENT_REG && !force) + goto out_check; if (btrfs_cross_ref_exist(trans, root, disk_bytenr)) goto out_check; block_group = btrfs_lookup_block_group(root->fs_info, @@ -709,10 +714,39 @@ out_check: disk_bytenr += cur_offset - found_key.offset; num_bytes = min(end + 1, extent_end) - cur_offset; + if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { + struct extent_map *em; + struct extent_map_tree *em_tree; + em_tree = &BTRFS_I(inode)->extent_tree; + em = alloc_extent_map(GFP_NOFS); + em->start = cur_offset; + em->len = num_bytes; + em->block_len = num_bytes; + em->block_start = disk_bytenr; + em->bdev = root->fs_info->fs_devices->latest_bdev; + set_bit(EXTENT_FLAG_PINNED, &em->flags); + while (1) { + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret != -EEXIST) { + free_extent_map(em); + break; + } + btrfs_drop_extent_cache(inode, em->start, + em->start + em->len - 1, 0); + } + type = BTRFS_ORDERED_PREALLOC; + } else { + type = BTRFS_ORDERED_NOCOW; + } ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, - num_bytes, num_bytes, - BTRFS_ORDERED_NOCOW); + num_bytes, num_bytes, type); + BUG_ON(ret); + extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, + cur_offset, cur_offset + num_bytes - 1, + locked_page, 0, 0, 0); cur_offset = extent_end; if (cur_offset > end) break; @@ -745,7 +779,10 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, if (btrfs_test_opt(root, NODATACOW) || btrfs_test_flag(inode, NODATACOW)) ret = run_delalloc_nocow(inode, locked_page, start, end, - page_started); + page_started, 0); + else if (btrfs_test_flag(inode, PREALLOC)) + ret = run_delalloc_nocow(inode, locked_page, start, end, + page_started, 1); else ret = cow_file_range(inode, locked_page, start, end, page_started); @@ -1006,6 +1043,63 @@ int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) return -EAGAIN; } +static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, + struct inode *inode, u64 file_pos, + u64 disk_bytenr, u64 disk_num_bytes, + u64 num_bytes, u64 ram_bytes, + u8 compression, u8 encryption, + u16 other_encoding, int extent_type) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_file_extent_item *fi; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_key ins; + u64 hint; + int ret; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + ret = btrfs_drop_extents(trans, root, inode, file_pos, + file_pos + num_bytes, file_pos, &hint); + BUG_ON(ret); + + ins.objectid = inode->i_ino; + ins.offset = file_pos; + ins.type = BTRFS_EXTENT_DATA_KEY; + ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); + BUG_ON(ret); + leaf = path->nodes[0]; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, fi, trans->transid); + btrfs_set_file_extent_type(leaf, fi, extent_type); + btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr); + btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes); + btrfs_set_file_extent_offset(leaf, fi, 0); + btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); + btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes); + btrfs_set_file_extent_compression(leaf, fi, compression); + btrfs_set_file_extent_encryption(leaf, fi, encryption); + btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); + btrfs_mark_buffer_dirty(leaf); + + inode_add_bytes(inode, num_bytes); + btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0); + + ins.objectid = disk_bytenr; + ins.offset = disk_num_bytes; + ins.type = BTRFS_EXTENT_ITEM_KEY; + ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, + root->root_key.objectid, + trans->transid, inode->i_ino, &ins); + BUG_ON(ret); + + btrfs_free_path(path); + return 0; +} + /* as ordered data IO finishes, this gets called so we can finish * an ordered extent if the range of bytes in the file it covers are * fully written. @@ -1016,12 +1110,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct btrfs_file_extent_item *extent_item; - struct btrfs_path *path = NULL; - struct extent_buffer *leaf; - u64 alloc_hint = 0; - struct list_head list; - struct btrfs_key ins; + int compressed = 0; int ret; ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); @@ -1035,67 +1124,30 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) goto nocow; - path = btrfs_alloc_path(); - BUG_ON(!path); - lock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); - INIT_LIST_HEAD(&list); - - ret = btrfs_drop_extents(trans, root, inode, - ordered_extent->file_offset, - ordered_extent->file_offset + - ordered_extent->len, - ordered_extent->file_offset, &alloc_hint); - BUG_ON(ret); - - ins.objectid = inode->i_ino; - ins.offset = ordered_extent->file_offset; - ins.type = BTRFS_EXTENT_DATA_KEY; - ret = btrfs_insert_empty_item(trans, root, path, &ins, - sizeof(*extent_item)); - BUG_ON(ret); - leaf = path->nodes[0]; - extent_item = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(leaf, extent_item, trans->transid); - btrfs_set_file_extent_type(leaf, extent_item, BTRFS_FILE_EXTENT_REG); - btrfs_set_file_extent_disk_bytenr(leaf, extent_item, - ordered_extent->start); - btrfs_set_file_extent_disk_num_bytes(leaf, extent_item, - ordered_extent->disk_len); - btrfs_set_file_extent_offset(leaf, extent_item, 0); - if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) - btrfs_set_file_extent_compression(leaf, extent_item, 1); - else - btrfs_set_file_extent_compression(leaf, extent_item, 0); - btrfs_set_file_extent_encryption(leaf, extent_item, 0); - btrfs_set_file_extent_other_encoding(leaf, extent_item, 0); - - /* ram bytes = extent_num_bytes for now */ - btrfs_set_file_extent_num_bytes(leaf, extent_item, - ordered_extent->len); - btrfs_set_file_extent_ram_bytes(leaf, extent_item, - ordered_extent->len); - btrfs_mark_buffer_dirty(leaf); - - btrfs_drop_extent_cache(inode, ordered_extent->file_offset, - ordered_extent->file_offset + - ordered_extent->len - 1, 0); - - ins.objectid = ordered_extent->start; - ins.offset = ordered_extent->disk_len; - ins.type = BTRFS_EXTENT_ITEM_KEY; - ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, - root->root_key.objectid, - trans->transid, inode->i_ino, &ins); - BUG_ON(ret); - btrfs_release_path(root, path); - - inode_add_bytes(inode, ordered_extent->len); + compressed = 1; + if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { + BUG_ON(compressed); + ret = btrfs_mark_extent_written(trans, root, inode, + ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len); + BUG_ON(ret); + } else { + ret = insert_reserved_file_extent(trans, inode, + ordered_extent->file_offset, + ordered_extent->start, + ordered_extent->disk_len, + ordered_extent->len, + ordered_extent->len, + compressed, 0, 0, + BTRFS_FILE_EXTENT_REG); + BUG_ON(ret); + } unlock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); @@ -1115,8 +1167,6 @@ nocow: btrfs_put_ordered_extent(ordered_extent); btrfs_end_transaction(trans, root); - if (path) - btrfs_free_path(path); return 0; } @@ -3488,7 +3538,8 @@ again: found_type = btrfs_file_extent_type(leaf, item); extent_start = found_key.offset; compressed = btrfs_file_extent_compression(leaf, item); - if (found_type == BTRFS_FILE_EXTENT_REG) { + if (found_type == BTRFS_FILE_EXTENT_REG || + found_type == BTRFS_FILE_EXTENT_PREALLOC) { extent_end = extent_start + btrfs_file_extent_num_bytes(leaf, item); } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { @@ -3521,7 +3572,8 @@ again: goto not_found_em; } - if (found_type == BTRFS_FILE_EXTENT_REG) { + if (found_type == BTRFS_FILE_EXTENT_REG || + found_type == BTRFS_FILE_EXTENT_PREALLOC) { em->start = extent_start; em->len = extent_end - extent_start; bytenr = btrfs_file_extent_disk_bytenr(leaf, item); @@ -3538,6 +3590,8 @@ again: bytenr += btrfs_file_extent_offset(leaf, item); em->block_start = bytenr; em->block_len = em->len; + if (found_type == BTRFS_FILE_EXTENT_PREALLOC) + set_bit(EXTENT_FLAG_PREALLOC, &em->flags); } goto insert; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { @@ -3969,6 +4023,7 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry, if (error) return error; + atomic_inc(&inode->i_count); d_instantiate(dentry, inode); return 0; } @@ -4318,6 +4373,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_mapping->backing_dev_info = &root->fs_info->bdi; + inode_set_bytes(inode, name_len); btrfs_i_size_write(inode, name_len - 1); err = btrfs_update_inode(trans, root, inode); if (err) @@ -4335,6 +4391,130 @@ out_fail: return err; } +static int prealloc_file_range(struct inode *inode, u64 start, u64 end, + u64 alloc_hint, int mode) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_key ins; + u64 alloc_size; + u64 cur_offset = start; + u64 num_bytes = end - start; + int ret = 0; + + trans = btrfs_join_transaction(root, 1); + BUG_ON(!trans); + btrfs_set_trans_block_group(trans, inode); + + while (num_bytes > 0) { + alloc_size = min(num_bytes, root->fs_info->max_extent); + ret = btrfs_reserve_extent(trans, root, alloc_size, + root->sectorsize, 0, alloc_hint, + (u64)-1, &ins, 1); + if (ret) { + WARN_ON(1); + goto out; + } + ret = insert_reserved_file_extent(trans, inode, + cur_offset, ins.objectid, + ins.offset, ins.offset, + ins.offset, 0, 0, 0, + BTRFS_FILE_EXTENT_PREALLOC); + BUG_ON(ret); + num_bytes -= ins.offset; + cur_offset += ins.offset; + alloc_hint = ins.objectid + ins.offset; + } +out: + if (cur_offset > start) { + inode->i_ctime = CURRENT_TIME; + btrfs_set_flag(inode, PREALLOC); + if (!(mode & FALLOC_FL_KEEP_SIZE) && + cur_offset > i_size_read(inode)) + btrfs_i_size_write(inode, cur_offset); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + } + + btrfs_end_transaction(trans, root); + return ret; +} + +static long btrfs_fallocate(struct inode *inode, int mode, + loff_t offset, loff_t len) +{ + u64 cur_offset; + u64 last_byte; + u64 alloc_start; + u64 alloc_end; + u64 alloc_hint = 0; + u64 mask = BTRFS_I(inode)->root->sectorsize - 1; + struct extent_map *em; + int ret; + + alloc_start = offset & ~mask; + alloc_end = (offset + len + mask) & ~mask; + + mutex_lock(&inode->i_mutex); + if (alloc_start > inode->i_size) { + ret = btrfs_cont_expand(inode, alloc_start); + if (ret) + goto out; + } + + while (1) { + struct btrfs_ordered_extent *ordered; + lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, + alloc_end - 1, GFP_NOFS); + ordered = btrfs_lookup_first_ordered_extent(inode, + alloc_end - 1); + if (ordered && + ordered->file_offset + ordered->len > alloc_start && + ordered->file_offset < alloc_end) { + btrfs_put_ordered_extent(ordered); + unlock_extent(&BTRFS_I(inode)->io_tree, + alloc_start, alloc_end - 1, GFP_NOFS); + btrfs_wait_ordered_range(inode, alloc_start, + alloc_end - alloc_start); + } else { + if (ordered) + btrfs_put_ordered_extent(ordered); + break; + } + } + + cur_offset = alloc_start; + while (1) { + em = btrfs_get_extent(inode, NULL, 0, cur_offset, + alloc_end - cur_offset, 0); + BUG_ON(IS_ERR(em) || !em); + last_byte = min(extent_map_end(em), alloc_end); + last_byte = (last_byte + mask) & ~mask; + if (em->block_start == EXTENT_MAP_HOLE) { + ret = prealloc_file_range(inode, cur_offset, + last_byte, alloc_hint, mode); + if (ret < 0) { + free_extent_map(em); + break; + } + } + if (em->block_start <= EXTENT_MAP_LAST_BYTE) + alloc_hint = em->block_start; + free_extent_map(em); + + cur_offset = last_byte; + if (cur_offset >= alloc_end) { + ret = 0; + break; + } + } + unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, + GFP_NOFS); +out: + mutex_unlock(&inode->i_mutex); + return ret; +} + static int btrfs_set_page_dirty(struct page *page) { return __set_page_dirty_nobuffers(page); @@ -4421,6 +4601,7 @@ static struct inode_operations btrfs_file_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .permission = btrfs_permission, + .fallocate = btrfs_fallocate, }; static struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7f915d47839..9ff2b4e0e92 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -724,7 +724,8 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); found_type = btrfs_file_extent_type(leaf, extent); - if (found_type == BTRFS_FILE_EXTENT_REG) { + if (found_type == BTRFS_FILE_EXTENT_REG || + found_type == BTRFS_FILE_EXTENT_PREALLOC) { u64 ds = btrfs_file_extent_disk_bytenr(leaf, extent); u64 dl = btrfs_file_extent_disk_num_bytes(leaf, diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index e7317c8fda2..370bb428559 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -182,7 +182,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->len = len; entry->disk_len = disk_len; entry->inode = inode; - if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_COMPRESSED) + if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) set_bit(type, &entry->flags); /* one ref for the tree */ @@ -339,7 +339,8 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) ordered = list_entry(cur, struct btrfs_ordered_extent, root_extent_list); if (nocow_only && - !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { + !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) && + !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { list_move(&ordered->root_extent_list, &root->fs_info->ordered_extents); cond_resched_lock(&root->fs_info->ordered_extent_lock); diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index e6d9bc54c2b..260bf95dfe0 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -68,6 +68,8 @@ struct btrfs_ordered_sum { #define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ +#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ + struct btrfs_ordered_extent { /* logical offset in the file */ u64 file_offset; @@ -132,7 +134,7 @@ int btrfs_remove_ordered_extent(struct inode *inode, int btrfs_dec_test_ordered_pending(struct inode *inode, u64 file_offset, u64 io_size); int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, - u64 start, u64 len, u64 disk_len, int type); + u64 start, u64 len, u64 disk_len, int tyep); int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_extent *entry, struct btrfs_ordered_sum *sum); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e0201c3a7dc..be4fc30a30e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -442,7 +442,8 @@ insert: fi = (struct btrfs_file_extent_item *)dst_ptr; extent_type = btrfs_file_extent_type(path->nodes[0], fi); - if (extent_type == BTRFS_FILE_EXTENT_REG) { + if (extent_type == BTRFS_FILE_EXTENT_REG || + extent_type == BTRFS_FILE_EXTENT_PREALLOC) { struct btrfs_key ins; ins.objectid = btrfs_file_extent_disk_bytenr( path->nodes[0], fi); @@ -538,7 +539,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); found_type = btrfs_file_extent_type(eb, item); - if (found_type == BTRFS_FILE_EXTENT_REG) + if (found_type == BTRFS_FILE_EXTENT_REG || + found_type == BTRFS_FILE_EXTENT_PREALLOC) extent_end = start + btrfs_file_extent_num_bytes(eb, item); else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size = btrfs_file_extent_inline_len(eb, item); @@ -562,7 +564,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, start, 0); - if (ret == 0 && found_type == BTRFS_FILE_EXTENT_REG) { + if (ret == 0 && + (found_type == BTRFS_FILE_EXTENT_REG || + found_type == BTRFS_FILE_EXTENT_PREALLOC)) { struct btrfs_file_extent_item cmp1; struct btrfs_file_extent_item cmp2; struct btrfs_file_extent_item *existing; @@ -2522,7 +2526,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item); found_type = btrfs_file_extent_type(src, extent); - if (found_type == BTRFS_FILE_EXTENT_REG) { + if (found_type == BTRFS_FILE_EXTENT_REG || + found_type == BTRFS_FILE_EXTENT_PREALLOC) { u64 ds = btrfs_file_extent_disk_bytenr(src, extent); u64 dl = btrfs_file_extent_disk_num_bytes(src, -- cgit v1.2.3-70-g09d2 From 771ed689d2cd53439e28e095bc38fbe40a71429e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 6 Nov 2008 22:02:51 -0500 Subject: Btrfs: Optimize compressed writeback and reads When reading compressed extents, try to put pages into the page cache for any pages covered by the compressed extent that readpages didn't already preload. Add an async work queue to handle transformations at delayed allocation processing time. Right now this is just compression. The workflow is: 1) Find offsets in the file marked for delayed allocation 2) Lock the pages 3) Lock the state bits 4) Call the async delalloc code The async delalloc code clears the state lock bits and delalloc bits. It is important this happens before the range goes into the work queue because otherwise it might deadlock with other work queue items that try to lock those extent bits. The file pages are compressed, and if the compression doesn't work the pages are written back directly. An ordered work queue is used to make sure the inodes are written in the same order that pdflush or writepages sent them down. This changes extent_write_cache_pages to let the writepage function update the wbc nr_written count. Signed-off-by: Chris Mason --- fs/btrfs/compression.c | 150 ++++++++++- fs/btrfs/ctree.h | 4 +- fs/btrfs/disk-io.c | 27 +- fs/btrfs/extent-tree.c | 6 +- fs/btrfs/extent_io.c | 140 +++++++++-- fs/btrfs/extent_io.h | 13 +- fs/btrfs/file.c | 53 ++-- fs/btrfs/inode.c | 643 +++++++++++++++++++++++++++++++++++++----------- fs/btrfs/ordered-data.c | 13 +- fs/btrfs/super.c | 4 + fs/btrfs/zlib.c | 3 +- 11 files changed, 849 insertions(+), 207 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 354913177ba..284f21025bc 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -145,9 +146,9 @@ static void end_compressed_bio_read(struct bio *bio, int err) } /* do io completion on the original bio */ - if (cb->errors) + if (cb->errors) { bio_io_error(cb->orig_bio); - else + } else bio_endio(cb->orig_bio, 0); /* finally free the cb struct */ @@ -333,6 +334,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, } bytes_left -= PAGE_CACHE_SIZE; first_byte += PAGE_CACHE_SIZE; + cond_resched(); } bio_get(bio); @@ -346,6 +348,130 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, return 0; } +static noinline int add_ra_bio_pages(struct inode *inode, + u64 compressed_end, + struct compressed_bio *cb) +{ + unsigned long end_index; + unsigned long page_index; + u64 last_offset; + u64 isize = i_size_read(inode); + int ret; + struct page *page; + unsigned long nr_pages = 0; + struct extent_map *em; + struct address_space *mapping = inode->i_mapping; + struct pagevec pvec; + struct extent_map_tree *em_tree; + struct extent_io_tree *tree; + u64 end; + int misses = 0; + + page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page; + last_offset = (page_offset(page) + PAGE_CACHE_SIZE); + em_tree = &BTRFS_I(inode)->extent_tree; + tree = &BTRFS_I(inode)->io_tree; + + if (isize == 0) + return 0; + + end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; + + pagevec_init(&pvec, 0); + while(last_offset < compressed_end) { + page_index = last_offset >> PAGE_CACHE_SHIFT; + + if (page_index > end_index) + break; + + rcu_read_lock(); + page = radix_tree_lookup(&mapping->page_tree, page_index); + rcu_read_unlock(); + if (page) { + misses++; + if (misses > 4) + break; + goto next; + } + + page = alloc_page(mapping_gfp_mask(mapping) | GFP_NOFS); + if (!page) + break; + + page->index = page_index; + /* + * what we want to do here is call add_to_page_cache_lru, + * but that isn't exported, so we reproduce it here + */ + if (add_to_page_cache(page, mapping, + page->index, GFP_NOFS)) { + page_cache_release(page); + goto next; + } + + /* open coding of lru_cache_add, also not exported */ + page_cache_get(page); + if (!pagevec_add(&pvec, page)) + __pagevec_lru_add(&pvec); + + end = last_offset + PAGE_CACHE_SIZE - 1; + /* + * at this point, we have a locked page in the page cache + * for these bytes in the file. But, we have to make + * sure they map to this compressed extent on disk. + */ + set_page_extent_mapped(page); + lock_extent(tree, last_offset, end, GFP_NOFS); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, last_offset, + PAGE_CACHE_SIZE); + spin_unlock(&em_tree->lock); + + if (!em || last_offset < em->start || + (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || + (em->block_start >> 9) != cb->orig_bio->bi_sector) { + free_extent_map(em); + unlock_extent(tree, last_offset, end, GFP_NOFS); + unlock_page(page); + page_cache_release(page); + break; + } + free_extent_map(em); + + if (page->index == end_index) { + char *userpage; + size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1); + + if (zero_offset) { + int zeros; + zeros = PAGE_CACHE_SIZE - zero_offset; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + zero_offset, 0, zeros); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + } + } + + ret = bio_add_page(cb->orig_bio, page, + PAGE_CACHE_SIZE, 0); + + if (ret == PAGE_CACHE_SIZE) { + nr_pages++; + page_cache_release(page); + } else { + unlock_extent(tree, last_offset, end, GFP_NOFS); + unlock_page(page); + page_cache_release(page); + break; + } +next: + last_offset += PAGE_CACHE_SIZE; + } + if (pagevec_count(&pvec)) + __pagevec_lru_add(&pvec); + return 0; +} + /* * for a compressed read, the bio we get passed has all the inode pages * in it. We don't actually do IO on those pages but allocate new ones @@ -373,6 +499,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, struct block_device *bdev; struct bio *comp_bio; u64 cur_disk_byte = (u64)bio->bi_sector << 9; + u64 em_len; struct extent_map *em; int ret; @@ -393,6 +520,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, cb->start = em->start; compressed_len = em->block_len; + em_len = em->len; free_extent_map(em); cb->len = uncompressed_len; @@ -411,6 +539,17 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, } cb->nr_pages = nr_pages; + add_ra_bio_pages(inode, cb->start + em_len, cb); + + if (!btrfs_test_opt(root, NODATASUM) && + !btrfs_test_flag(inode, NODATASUM)) { + btrfs_lookup_bio_sums(root, inode, cb->orig_bio); + } + + /* include any pages we added in add_ra-bio_pages */ + uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; + cb->len = uncompressed_len; + comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; @@ -442,9 +581,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); atomic_inc(&cb->pending_bios); - bio->bi_private = cb; - bio->bi_end_io = end_compressed_bio_write; - bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); + comp_bio->bi_private = cb; + comp_bio->bi_end_io = end_compressed_bio_read; + + bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0); } cur_disk_byte += PAGE_CACHE_SIZE; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 689df070c8e..c83cc5b2ded 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -625,8 +625,8 @@ struct btrfs_fs_info { struct btrfs_transaction *running_transaction; wait_queue_head_t transaction_throttle; wait_queue_head_t transaction_wait; - wait_queue_head_t async_submit_wait; + wait_queue_head_t async_submit_wait; wait_queue_head_t tree_log_wait; struct btrfs_super_block super_copy; @@ -653,6 +653,7 @@ struct btrfs_fs_info { atomic_t nr_async_submits; atomic_t async_submit_draining; atomic_t nr_async_bios; + atomic_t async_delalloc_pages; atomic_t tree_log_writers; atomic_t tree_log_commit; unsigned long tree_log_batch; @@ -677,6 +678,7 @@ struct btrfs_fs_info { * two */ struct btrfs_workers workers; + struct btrfs_workers delalloc_workers; struct btrfs_workers endio_workers; struct btrfs_workers endio_write_workers; struct btrfs_workers submit_workers; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0a28f705a6..8efc123d222 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -539,6 +539,13 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, (atomic_read(&fs_info->nr_async_bios) < limit), HZ/10); } + + while(atomic_read(&fs_info->async_submit_draining) && + atomic_read(&fs_info->nr_async_submits)) { + wait_event(fs_info->async_submit_wait, + (atomic_read(&fs_info->nr_async_submits) == 0)); + } + return 0; } @@ -1437,6 +1444,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); + atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->throttles, 0); @@ -1550,6 +1558,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->workers, "worker", fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", + fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->submit_workers, "submit", min_t(u64, fs_devices->num_devices, fs_info->thread_pool_size)); @@ -1560,15 +1571,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, */ fs_info->submit_workers.idle_thresh = 64; - /* fs_info->workers is responsible for checksumming file data - * blocks and metadata. Using a larger idle thresh allows each - * worker thread to operate on things in roughly the order they - * were sent by the writeback daemons, improving overall locality - * of the IO going down the pipe. - */ - fs_info->workers.idle_thresh = 8; + fs_info->workers.idle_thresh = 16; fs_info->workers.ordered = 1; + fs_info->delalloc_workers.idle_thresh = 2; + fs_info->delalloc_workers.ordered = 1; + btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); btrfs_init_workers(&fs_info->endio_workers, "endio", fs_info->thread_pool_size); @@ -1584,6 +1592,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); + btrfs_start_workers(&fs_info->delalloc_workers, 1); btrfs_start_workers(&fs_info->fixup_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_start_workers(&fs_info->endio_write_workers, @@ -1732,6 +1741,7 @@ fail_tree_root: fail_sys_array: fail_sb_buffer: btrfs_stop_workers(&fs_info->fixup_workers); + btrfs_stop_workers(&fs_info->delalloc_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_write_workers); @@ -1988,6 +1998,7 @@ int close_ctree(struct btrfs_root *root) truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); btrfs_stop_workers(&fs_info->fixup_workers); + btrfs_stop_workers(&fs_info->delalloc_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_write_workers); @@ -2062,7 +2073,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) struct extent_io_tree *tree; u64 num_dirty; u64 start = 0; - unsigned long thresh = 96 * 1024 * 1024; + unsigned long thresh = 32 * 1024 * 1024; tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; if (current_is_pdflush() || current->flags & PF_MEMALLOC) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8af39521eb7..ebd8275a193 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -768,7 +768,11 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, l = path->nodes[0]; btrfs_item_key_to_cpu(l, &key, path->slots[0]); - BUG_ON(key.objectid != bytenr); + if (key.objectid != bytenr) { + btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]); + printk("wanted %Lu found %Lu\n", bytenr, key.objectid); + BUG(); + } BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9b37ce6e516..bbe3bcfcf4a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -47,6 +47,11 @@ struct extent_page_data { struct bio *bio; struct extent_io_tree *tree; get_extent_t *get_extent; + + /* tells writepage not to lock the state bits for this range + * it still does the unlocking + */ + int extent_locked; }; int __init extent_io_init(void) @@ -1198,11 +1203,18 @@ static noinline int lock_delalloc_pages(struct inode *inode, * the caller is taking responsibility for * locked_page */ - if (pages[i] != locked_page) + if (pages[i] != locked_page) { lock_page(pages[i]); + if (pages[i]->mapping != inode->i_mapping) { + ret = -EAGAIN; + unlock_page(pages[i]); + page_cache_release(pages[i]); + goto done; + } + } page_cache_release(pages[i]); + pages_locked++; } - pages_locked += ret; nrpages -= ret; index += ret; cond_resched(); @@ -1262,8 +1274,7 @@ again: * if we're looping. */ if (delalloc_end + 1 - delalloc_start > max_bytes && loops) { - delalloc_end = (delalloc_start + PAGE_CACHE_SIZE - 1) & - ~((u64)PAGE_CACHE_SIZE - 1); + delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1; } /* step two, lock all the pages after the page that has start */ ret = lock_delalloc_pages(inode, locked_page, @@ -1306,7 +1317,10 @@ out_failed: int extent_clear_unlock_delalloc(struct inode *inode, struct extent_io_tree *tree, u64 start, u64 end, struct page *locked_page, - int clear_dirty, int set_writeback, + int unlock_pages, + int clear_unlock, + int clear_delalloc, int clear_dirty, + int set_writeback, int end_writeback) { int ret; @@ -1315,12 +1329,19 @@ int extent_clear_unlock_delalloc(struct inode *inode, unsigned long end_index = end >> PAGE_CACHE_SHIFT; unsigned long nr_pages = end_index - index + 1; int i; - int clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC; + int clear_bits = 0; + if (clear_unlock) + clear_bits |= EXTENT_LOCKED; if (clear_dirty) clear_bits |= EXTENT_DIRTY; + if (clear_delalloc) + clear_bits |= EXTENT_DELALLOC; + clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); + if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) + return 0; while(nr_pages > 0) { ret = find_get_pages_contig(inode->i_mapping, index, @@ -1336,7 +1357,8 @@ int extent_clear_unlock_delalloc(struct inode *inode, set_page_writeback(pages[i]); if (end_writeback) end_page_writeback(pages[i]); - unlock_page(pages[i]); + if (unlock_pages) + unlock_page(pages[i]); page_cache_release(pages[i]); } nr_pages -= ret; @@ -1741,9 +1763,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err) } } - if (uptodate) + if (uptodate) { set_extent_uptodate(tree, start, end, GFP_ATOMIC); + } unlock_extent(tree, start, end, GFP_ATOMIC); if (whole_page) { @@ -1925,6 +1948,7 @@ void set_page_extent_mapped(struct page *page) set_page_private(page, EXTENT_PAGE_PRIVATE); } } +EXPORT_SYMBOL(set_page_extent_mapped); void set_page_extent_head(struct page *page, unsigned long len) { @@ -2143,12 +2167,17 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 delalloc_end; int page_started; int compressed; + unsigned long nr_written = 0; WARN_ON(!PageLocked(page)); pg_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || (page->index == end_index && !pg_offset)) { - page->mapping->a_ops->invalidatepage(page, 0); + if (epd->extent_locked) { + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, start, + page_end, NULL, 1); + } unlock_page(page); return 0; } @@ -2169,27 +2198,33 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_start = start; delalloc_end = 0; page_started = 0; - while(delalloc_end < page_end) { - nr_delalloc = find_lock_delalloc_range(inode, tree, + if (!epd->extent_locked) { + while(delalloc_end < page_end) { + nr_delalloc = find_lock_delalloc_range(inode, tree, page, &delalloc_start, &delalloc_end, 128 * 1024 * 1024); - if (nr_delalloc == 0) { + if (nr_delalloc == 0) { + delalloc_start = delalloc_end + 1; + continue; + } + tree->ops->fill_delalloc(inode, page, delalloc_start, + delalloc_end, &page_started, + &nr_written); delalloc_start = delalloc_end + 1; - continue; } - tree->ops->fill_delalloc(inode, page, delalloc_start, - delalloc_end, &page_started); - delalloc_start = delalloc_end + 1; - } - /* did the fill delalloc function already unlock and start the IO? */ - if (page_started) { - return 0; + /* did the fill delalloc function already unlock and start + * the IO? + */ + if (page_started) { + ret = 0; + goto update_nr_written; + } } - lock_extent(tree, start, page_end, GFP_NOFS); + unlock_start = start; if (tree->ops && tree->ops->writepage_start_hook) { @@ -2199,10 +2234,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unlock_extent(tree, start, page_end, GFP_NOFS); redirty_page_for_writepage(wbc, page); unlock_page(page); - return 0; + ret = 0; + goto update_nr_written; } } + nr_written++; + end = page_end; if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { printk("found delalloc bits after lock_extent\n"); @@ -2333,6 +2371,12 @@ done: if (unlock_start <= page_end) unlock_extent(tree, unlock_start, page_end, GFP_NOFS); unlock_page(page); + +update_nr_written: + wbc->nr_to_write -= nr_written; + if (wbc->range_cyclic || (wbc->nr_to_write > 0 && + wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) + page->mapping->writeback_index = page->index + nr_written; return 0; } @@ -2431,7 +2475,7 @@ retry: unlock_page(page); ret = 0; } - if (ret || (--(wbc->nr_to_write) <= 0)) + if (ret || wbc->nr_to_write <= 0) done = 1; if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; @@ -2452,6 +2496,8 @@ retry: } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; if (wbc->range_cont) wbc->range_start = index << PAGE_CACHE_SHIFT; @@ -2469,6 +2515,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, .bio = NULL, .tree = tree, .get_extent = get_extent, + .extent_locked = 0, }; struct writeback_control wbc_writepages = { .bdi = wbc->bdi, @@ -2491,6 +2538,52 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, } EXPORT_SYMBOL(extent_write_full_page); +int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, + u64 start, u64 end, get_extent_t *get_extent, + int mode) +{ + int ret = 0; + struct address_space *mapping = inode->i_mapping; + struct page *page; + unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >> + PAGE_CACHE_SHIFT; + + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + .extent_locked = 1, + }; + struct writeback_control wbc_writepages = { + .bdi = inode->i_mapping->backing_dev_info, + .sync_mode = mode, + .older_than_this = NULL, + .nr_to_write = nr_pages * 2, + .range_start = start, + .range_end = end + 1, + }; + + while(start <= end) { + page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); + if (clear_page_dirty_for_io(page)) + ret = __extent_writepage(page, &wbc_writepages, &epd); + else { + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, start, + start + PAGE_CACHE_SIZE - 1, + NULL, 1); + unlock_page(page); + } + page_cache_release(page); + start += PAGE_CACHE_SIZE; + } + + if (epd.bio) + submit_one_bio(WRITE, epd.bio, 0, 0); + return ret; +} +EXPORT_SYMBOL(extent_write_locked_range); + int extent_writepages(struct extent_io_tree *tree, struct address_space *mapping, @@ -2502,6 +2595,7 @@ int extent_writepages(struct extent_io_tree *tree, .bio = NULL, .tree = tree, .get_extent = get_extent, + .extent_locked = 0, }; ret = extent_write_cache_pages(tree, mapping, wbc, diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 283110ec4ee..2d5f67065b6 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -35,7 +35,8 @@ typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, unsigned long bio_flags); struct extent_io_ops { int (*fill_delalloc)(struct inode *inode, struct page *locked_page, - u64 start, u64 end, int *page_started); + u64 start, u64 end, int *page_started, + unsigned long *nr_written); int (*writepage_start_hook)(struct page *page, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); extent_submit_bio_hook_t *submit_bio_hook; @@ -172,6 +173,9 @@ int extent_invalidatepage(struct extent_io_tree *tree, int extent_write_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, struct writeback_control *wbc); +int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, + u64 start, u64 end, get_extent_t *get_extent, + int mode); int extent_writepages(struct extent_io_tree *tree, struct address_space *mapping, get_extent_t *get_extent, @@ -256,6 +260,9 @@ int extent_range_uptodate(struct extent_io_tree *tree, int extent_clear_unlock_delalloc(struct inode *inode, struct extent_io_tree *tree, u64 start, u64 end, struct page *locked_page, - int clear_dirty, int set_writeback, - int clear_writeback); + int unlock_page, + int clear_unlock, + int clear_delalloc, int clear_dirty, + int set_writeback, + int end_writeback); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0c8cc35a8b9..337221ecca2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -368,6 +368,8 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, u64 search_start = start; u64 leaf_start; u64 ram_bytes = 0; + u64 orig_parent = 0; + u64 disk_bytenr = 0; u8 compression; u8 encryption; u16 other_encoding = 0; @@ -500,17 +502,31 @@ next_slot: keep = 1; } - if (bookend && found_extent && locked_end < extent_end) { - ret = try_lock_extent(&BTRFS_I(inode)->io_tree, - locked_end, extent_end - 1, GFP_NOFS); - if (!ret) { - btrfs_release_path(root, path); - lock_extent(&BTRFS_I(inode)->io_tree, - locked_end, extent_end - 1, GFP_NOFS); + if (bookend && found_extent) { + if (locked_end < extent_end) { + ret = try_lock_extent(&BTRFS_I(inode)->io_tree, + locked_end, extent_end - 1, + GFP_NOFS); + if (!ret) { + btrfs_release_path(root, path); + lock_extent(&BTRFS_I(inode)->io_tree, + locked_end, extent_end - 1, + GFP_NOFS); + locked_end = extent_end; + continue; + } locked_end = extent_end; - continue; } - locked_end = extent_end; + orig_parent = path->nodes[0]->start; + disk_bytenr = le64_to_cpu(old.disk_bytenr); + if (disk_bytenr != 0) { + ret = btrfs_inc_extent_ref(trans, root, + disk_bytenr, + le64_to_cpu(old.disk_num_bytes), + orig_parent, root->root_key.objectid, + trans->transid, inode->i_ino); + BUG_ON(ret); + } } if (found_inline) { @@ -537,8 +553,12 @@ next_slot: inode_sub_bytes(inode, old_num - new_num); } - btrfs_set_file_extent_num_bytes(leaf, extent, - new_num); + if (!compression && !encryption) { + btrfs_set_file_extent_ram_bytes(leaf, + extent, new_num); + } + btrfs_set_file_extent_num_bytes(leaf, + extent, new_num); btrfs_mark_buffer_dirty(leaf); } else if (key.offset < inline_limit && (end > extent_end) && @@ -582,11 +602,11 @@ next_slot: } /* create bookend, splitting the extent in two */ if (bookend && found_extent) { - u64 disk_bytenr; struct btrfs_key ins; ins.objectid = inode->i_ino; ins.offset = end; btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); + btrfs_release_path(root, path); ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*extent)); @@ -623,14 +643,13 @@ next_slot: btrfs_mark_buffer_dirty(path->nodes[0]); - disk_bytenr = le64_to_cpu(old.disk_bytenr); if (disk_bytenr != 0) { - ret = btrfs_inc_extent_ref(trans, root, - disk_bytenr, - le64_to_cpu(old.disk_num_bytes), - leaf->start, + ret = btrfs_update_extent_ref(trans, root, + disk_bytenr, orig_parent, + leaf->start, root->root_key.objectid, trans->transid, ins.objectid); + BUG_ON(ret); } btrfs_release_path(root, path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3df0ffad976..e01c0d0310a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -86,6 +86,10 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { static void btrfs_truncate(struct inode *inode); static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); +static noinline int cow_file_range(struct inode *inode, + struct page *locked_page, + u64 start, u64 end, int *page_started, + unsigned long *nr_written, int unlock); /* * a very lame attempt at stopping writes when the FS is 85% full. There @@ -262,35 +266,72 @@ static int cow_file_range_inline(struct btrfs_trans_handle *trans, return 0; } +struct async_extent { + u64 start; + u64 ram_size; + u64 compressed_size; + struct page **pages; + unsigned long nr_pages; + struct list_head list; +}; + +struct async_cow { + struct inode *inode; + struct btrfs_root *root; + struct page *locked_page; + u64 start; + u64 end; + struct list_head extents; + struct btrfs_work work; +}; + +static noinline int add_async_extent(struct async_cow *cow, + u64 start, u64 ram_size, + u64 compressed_size, + struct page **pages, + unsigned long nr_pages) +{ + struct async_extent *async_extent; + + async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); + async_extent->start = start; + async_extent->ram_size = ram_size; + async_extent->compressed_size = compressed_size; + async_extent->pages = pages; + async_extent->nr_pages = nr_pages; + list_add_tail(&async_extent->list, &cow->extents); + return 0; +} + /* - * when extent_io.c finds a delayed allocation range in the file, - * the call backs end up in this code. The basic idea is to - * allocate extents on disk for the range, and create ordered data structs - * in ram to track those extents. + * we create compressed extents in two phases. The first + * phase compresses a range of pages that have already been + * locked (both pages and state bits are locked). * - * locked_page is the page that writepage had locked already. We use - * it to make sure we don't do extra locks or unlocks. + * This is done inside an ordered work queue, and the compression + * is spread across many cpus. The actual IO submission is step + * two, and the ordered work queue takes care of making sure that + * happens in the same order things were put onto the queue by + * writepages and friends. * - * *page_started is set to one if we unlock locked_page and do everything - * required to start IO on it. It may be clean and already done with - * IO when we return. + * If this code finds it can't get good compression, it puts an + * entry onto the work queue to write the uncompressed bytes. This + * makes sure that both compressed inodes and uncompressed inodes + * are written in the same order that pdflush sent them down. */ -static int cow_file_range(struct inode *inode, struct page *locked_page, - u64 start, u64 end, int *page_started) +static noinline int compress_file_range(struct inode *inode, + struct page *locked_page, + u64 start, u64 end, + struct async_cow *async_cow, + int *num_added) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - u64 alloc_hint = 0; u64 num_bytes; - unsigned long ram_size; u64 orig_start; u64 disk_num_bytes; - u64 cur_alloc_size; u64 blocksize = root->sectorsize; u64 actual_end; - struct btrfs_key ins; - struct extent_map *em; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; struct page **pages = NULL; unsigned long nr_pages; @@ -298,22 +339,12 @@ static int cow_file_range(struct inode *inode, struct page *locked_page, unsigned long total_compressed = 0; unsigned long total_in = 0; unsigned long max_compressed = 128 * 1024; - unsigned long max_uncompressed = 256 * 1024; + unsigned long max_uncompressed = 128 * 1024; int i; - int ordered_type; int will_compress; - trans = btrfs_join_transaction(root, 1); - BUG_ON(!trans); - btrfs_set_trans_block_group(trans, inode); orig_start = start; - /* - * compression made this loop a bit ugly, but the basic idea is to - * compress some pages but keep the total size of the compressed - * extent relatively small. If compression is off, this goto target - * is never used. - */ again: will_compress = 0; nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1; @@ -324,7 +355,13 @@ again: /* we want to make sure that amount of ram required to uncompress * an extent is reasonable, so we limit the total size in ram - * of a compressed extent to 256k + * of a compressed extent to 128k. This is a crucial number + * because it also controls how easily we can spread reads across + * cpus for decompression. + * + * We also want to make sure the amount of IO required to do + * a random read is reasonably small, so we limit the size of + * a compressed extent to 128k. */ total_compressed = min(total_compressed, max_uncompressed); num_bytes = (end - start + blocksize) & ~(blocksize - 1); @@ -333,18 +370,16 @@ again: total_in = 0; ret = 0; - /* we do compression for mount -o compress and when the - * inode has not been flagged as nocompress + /* + * we do compression for mount -o compress and when the + * inode has not been flagged as nocompress. This flag can + * change at any time if we discover bad compression ratios. */ if (!btrfs_test_flag(inode, NOCOMPRESS) && btrfs_test_opt(root, COMPRESS)) { WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); - /* we want to make sure the amount of IO required to satisfy - * a random read is reasonably small, so we limit the size - * of a compressed extent to 128k - */ ret = btrfs_zlib_compress_pages(inode->i_mapping, start, total_compressed, pages, nr_pages, &nr_pages_ret, @@ -371,26 +406,34 @@ again: } } if (start == 0) { + trans = btrfs_join_transaction(root, 1); + BUG_ON(!trans); + btrfs_set_trans_block_group(trans, inode); + /* lets try to make an inline extent */ - if (ret || total_in < (end - start + 1)) { + if (ret || total_in < (actual_end - start)) { /* we didn't compress the entire range, try - * to make an uncompressed inline extent. This - * is almost sure to fail, but maybe inline sizes - * will get bigger later + * to make an uncompressed inline extent. */ ret = cow_file_range_inline(trans, root, inode, start, end, 0, NULL); } else { + /* try making a compressed inline extent */ ret = cow_file_range_inline(trans, root, inode, start, end, total_compressed, pages); } + btrfs_end_transaction(trans, root); if (ret == 0) { + /* + * inline extent creation worked, we don't need + * to create any more async work items. Unlock + * and free up our temp pages. + */ extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, end, NULL, - 1, 1, 1); - *page_started = 1; + start, end, NULL, 1, 0, + 0, 1, 1, 1); ret = 0; goto free_pages_out; } @@ -435,53 +478,280 @@ again: /* flag the file so we don't compress in the future */ btrfs_set_flag(inode, NOCOMPRESS); } + if (will_compress) { + *num_added += 1; - BUG_ON(disk_num_bytes > - btrfs_super_total_bytes(&root->fs_info->super_copy)); + /* the async work queues will take care of doing actual + * allocation on disk for these compressed pages, + * and will submit them to the elevator. + */ + add_async_extent(async_cow, start, num_bytes, + total_compressed, pages, nr_pages_ret); - btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); + if (start + num_bytes < end) { + start += num_bytes; + pages = NULL; + cond_resched(); + goto again; + } + } else { + /* + * No compression, but we still need to write the pages in + * the file we've been given so far. redirty the locked + * page if it corresponds to our extent and set things up + * for the async work queue to run cow_file_range to do + * the normal delalloc dance + */ + if (page_offset(locked_page) >= start && + page_offset(locked_page) <= end) { + __set_page_dirty_nobuffers(locked_page); + /* unlocked later on in the async handlers */ + } + add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); + *num_added += 1; + } - while(disk_num_bytes > 0) { - unsigned long min_bytes; +out: + return 0; + +free_pages_out: + for (i = 0; i < nr_pages_ret; i++) { + WARN_ON(pages[i]->mapping); + page_cache_release(pages[i]); + } + if (pages) + kfree(pages); + + goto out; +} + +/* + * phase two of compressed writeback. This is the ordered portion + * of the code, which only gets called in the order the work was + * queued. We walk all the async extents created by compress_file_range + * and send them down to the disk. + */ +static noinline int submit_compressed_extents(struct inode *inode, + struct async_cow *async_cow) +{ + struct async_extent *async_extent; + u64 alloc_hint = 0; + struct btrfs_trans_handle *trans; + struct btrfs_key ins; + struct extent_map *em; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree; + int ret; + + if (list_empty(&async_cow->extents)) + return 0; + + trans = btrfs_join_transaction(root, 1); + + while(!list_empty(&async_cow->extents)) { + async_extent = list_entry(async_cow->extents.next, + struct async_extent, list); + list_del(&async_extent->list); + io_tree = &BTRFS_I(inode)->io_tree; + + /* did the compression code fall back to uncompressed IO? */ + if (!async_extent->pages) { + int page_started = 0; + unsigned long nr_written = 0; + + lock_extent(io_tree, async_extent->start, + async_extent->start + async_extent->ram_size - 1, + GFP_NOFS); + + /* allocate blocks */ + cow_file_range(inode, async_cow->locked_page, + async_extent->start, + async_extent->start + + async_extent->ram_size - 1, + &page_started, &nr_written, 0); + + /* + * if page_started, cow_file_range inserted an + * inline extent and took care of all the unlocking + * and IO for us. Otherwise, we need to submit + * all those pages down to the drive. + */ + if (!page_started) + extent_write_locked_range(io_tree, + inode, async_extent->start, + async_extent->start + + async_extent->ram_size - 1, + btrfs_get_extent, + WB_SYNC_ALL); + kfree(async_extent); + cond_resched(); + continue; + } + + lock_extent(io_tree, async_extent->start, + async_extent->start + async_extent->ram_size - 1, + GFP_NOFS); /* - * the max size of a compressed extent is pretty small, - * make the code a little less complex by forcing - * the allocator to find a whole compressed extent at once + * here we're doing allocation and writeback of the + * compressed pages */ - if (will_compress) - min_bytes = disk_num_bytes; - else - min_bytes = root->sectorsize; + btrfs_drop_extent_cache(inode, async_extent->start, + async_extent->start + + async_extent->ram_size - 1, 0); + + ret = btrfs_reserve_extent(trans, root, + async_extent->compressed_size, + async_extent->compressed_size, + 0, alloc_hint, + (u64)-1, &ins, 1); + BUG_ON(ret); + em = alloc_extent_map(GFP_NOFS); + em->start = async_extent->start; + em->len = async_extent->ram_size; + + em->block_start = ins.objectid; + em->block_len = ins.offset; + em->bdev = root->fs_info->fs_devices->latest_bdev; + set_bit(EXTENT_FLAG_PINNED, &em->flags); + set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); + + while(1) { + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret != -EEXIST) { + free_extent_map(em); + break; + } + btrfs_drop_extent_cache(inode, async_extent->start, + async_extent->start + + async_extent->ram_size - 1, 0); + } + + ret = btrfs_add_ordered_extent(inode, async_extent->start, + ins.objectid, + async_extent->ram_size, + ins.offset, + BTRFS_ORDERED_COMPRESSED); + BUG_ON(ret); + + btrfs_end_transaction(trans, root); + + /* + * clear dirty, set writeback and unlock the pages. + */ + extent_clear_unlock_delalloc(inode, + &BTRFS_I(inode)->io_tree, + async_extent->start, + async_extent->start + + async_extent->ram_size - 1, + NULL, 1, 1, 0, 1, 1, 0); + + ret = btrfs_submit_compressed_write(inode, + async_extent->start, + async_extent->ram_size, + ins.objectid, + ins.offset, async_extent->pages, + async_extent->nr_pages); + + BUG_ON(ret); + trans = btrfs_join_transaction(root, 1); + alloc_hint = ins.objectid + ins.offset; + kfree(async_extent); + cond_resched(); + } + + btrfs_end_transaction(trans, root); + return 0; +} + +/* + * when extent_io.c finds a delayed allocation range in the file, + * the call backs end up in this code. The basic idea is to + * allocate extents on disk for the range, and create ordered data structs + * in ram to track those extents. + * + * locked_page is the page that writepage had locked already. We use + * it to make sure we don't do extra locks or unlocks. + * + * *page_started is set to one if we unlock locked_page and do everything + * required to start IO on it. It may be clean and already done with + * IO when we return. + */ +static noinline int cow_file_range(struct inode *inode, + struct page *locked_page, + u64 start, u64 end, int *page_started, + unsigned long *nr_written, + int unlock) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + u64 alloc_hint = 0; + u64 num_bytes; + unsigned long ram_size; + u64 disk_num_bytes; + u64 cur_alloc_size; + u64 blocksize = root->sectorsize; + u64 actual_end; + struct btrfs_key ins; + struct extent_map *em; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + int ret = 0; + + trans = btrfs_join_transaction(root, 1); + BUG_ON(!trans); + btrfs_set_trans_block_group(trans, inode); + actual_end = min_t(u64, i_size_read(inode), end + 1); + + num_bytes = (end - start + blocksize) & ~(blocksize - 1); + num_bytes = max(blocksize, num_bytes); + disk_num_bytes = num_bytes; + ret = 0; + + if (start == 0) { + /* lets try to make an inline extent */ + ret = cow_file_range_inline(trans, root, inode, + start, end, 0, NULL); + if (ret == 0) { + extent_clear_unlock_delalloc(inode, + &BTRFS_I(inode)->io_tree, + start, end, NULL, 1, 1, + 1, 1, 1, 1); + *nr_written = *nr_written + + (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; + *page_started = 1; + ret = 0; + goto out; + } + } + + BUG_ON(disk_num_bytes > + btrfs_super_total_bytes(&root->fs_info->super_copy)); + + btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); + + while(disk_num_bytes > 0) { cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); ret = btrfs_reserve_extent(trans, root, cur_alloc_size, - min_bytes, 0, alloc_hint, + root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { - WARN_ON(1); - goto free_pages_out_fail; + BUG(); } em = alloc_extent_map(GFP_NOFS); em->start = start; - if (will_compress) { - ram_size = num_bytes; - em->len = num_bytes; - } else { - /* ramsize == disk size */ - ram_size = ins.offset; - em->len = ins.offset; - } + ram_size = ins.offset; + em->len = ins.offset; em->block_start = ins.objectid; em->block_len = ins.offset; em->bdev = root->fs_info->fs_devices->latest_bdev; set_bit(EXTENT_FLAG_PINNED, &em->flags); - if (will_compress) - set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); - while(1) { spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); @@ -495,10 +765,8 @@ again: } cur_alloc_size = ins.offset; - ordered_type = will_compress ? BTRFS_ORDERED_COMPRESSED : 0; ret = btrfs_add_ordered_extent(inode, start, ins.objectid, - ram_size, cur_alloc_size, - ordered_type); + ram_size, cur_alloc_size, 0); BUG_ON(ret); if (disk_num_bytes < cur_alloc_size) { @@ -506,82 +774,145 @@ again: cur_alloc_size); break; } - - if (will_compress) { - /* - * we're doing compression, we and we need to - * submit the compressed extents down to the device. - * - * We lock down all the file pages, clearing their - * dirty bits and setting them writeback. Everyone - * that wants to modify the page will wait on the - * ordered extent above. - * - * The writeback bits on the file pages are - * cleared when the compressed pages are on disk - */ - btrfs_end_transaction(trans, root); - - if (start <= page_offset(locked_page) && - page_offset(locked_page) < start + ram_size) { - *page_started = 1; - } - - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, - start + ram_size - 1, - NULL, 1, 1, 0); - - ret = btrfs_submit_compressed_write(inode, start, - ram_size, ins.objectid, - cur_alloc_size, pages, - nr_pages_ret); - - BUG_ON(ret); - trans = btrfs_join_transaction(root, 1); - if (start + ram_size < end) { - start += ram_size; - alloc_hint = ins.objectid + ins.offset; - /* pages will be freed at end_bio time */ - pages = NULL; - goto again; - } else { - /* we've written everything, time to go */ - break; - } - } /* we're not doing compressed IO, don't unlock the first * page (which the caller expects to stay locked), don't * clear any dirty bits and don't set any writeback bits */ extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, start, start + ram_size - 1, - locked_page, 0, 0, 0); + locked_page, unlock, 1, + 1, 0, 0, 0); disk_num_bytes -= cur_alloc_size; num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } - - ret = 0; out: + ret = 0; btrfs_end_transaction(trans, root); return ret; +} -free_pages_out_fail: - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, end, locked_page, 0, 0, 0); -free_pages_out: - for (i = 0; i < nr_pages_ret; i++) { - WARN_ON(pages[i]->mapping); - page_cache_release(pages[i]); +/* + * work queue call back to started compression on a file and pages + */ +static noinline void async_cow_start(struct btrfs_work *work) +{ + struct async_cow *async_cow; + int num_added = 0; + async_cow = container_of(work, struct async_cow, work); + + compress_file_range(async_cow->inode, async_cow->locked_page, + async_cow->start, async_cow->end, async_cow, + &num_added); + if (num_added == 0) + async_cow->inode = NULL; +} + +/* + * work queue call back to submit previously compressed pages + */ +static noinline void async_cow_submit(struct btrfs_work *work) +{ + struct async_cow *async_cow; + struct btrfs_root *root; + unsigned long nr_pages; + + async_cow = container_of(work, struct async_cow, work); + + root = async_cow->root; + nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> + PAGE_CACHE_SHIFT; + + atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); + + if (atomic_read(&root->fs_info->async_delalloc_pages) < + 5 * 1042 * 1024 && + waitqueue_active(&root->fs_info->async_submit_wait)) + wake_up(&root->fs_info->async_submit_wait); + + if (async_cow->inode) { + submit_compressed_extents(async_cow->inode, async_cow); } - if (pages) - kfree(pages); +} - goto out; +static noinline void async_cow_free(struct btrfs_work *work) +{ + struct async_cow *async_cow; + async_cow = container_of(work, struct async_cow, work); + kfree(async_cow); +} + +static int cow_file_range_async(struct inode *inode, struct page *locked_page, + u64 start, u64 end, int *page_started, + unsigned long *nr_written) +{ + struct async_cow *async_cow; + struct btrfs_root *root = BTRFS_I(inode)->root; + unsigned long nr_pages; + u64 cur_end; + int limit = 10 * 1024 * 1042; + + if (!btrfs_test_opt(root, COMPRESS)) { + return cow_file_range(inode, locked_page, start, end, + page_started, nr_written, 1); + } + + clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | + EXTENT_DELALLOC, 1, 0, GFP_NOFS); + while(start < end) { + async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); + async_cow->inode = inode; + async_cow->root = root; + async_cow->locked_page = locked_page; + async_cow->start = start; + + if (btrfs_test_flag(inode, NOCOMPRESS)) + cur_end = end; + else + cur_end = min(end, start + 512 * 1024 - 1); + + async_cow->end = cur_end; + INIT_LIST_HEAD(&async_cow->extents); + + async_cow->work.func = async_cow_start; + async_cow->work.ordered_func = async_cow_submit; + async_cow->work.ordered_free = async_cow_free; + async_cow->work.flags = 0; + + while(atomic_read(&root->fs_info->async_submit_draining) && + atomic_read(&root->fs_info->async_delalloc_pages)) { + wait_event(root->fs_info->async_submit_wait, + (atomic_read(&root->fs_info->async_delalloc_pages) + == 0)); + } + + nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> + PAGE_CACHE_SHIFT; + atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); + + btrfs_queue_worker(&root->fs_info->delalloc_workers, + &async_cow->work); + + if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) { + wait_event(root->fs_info->async_submit_wait, + (atomic_read(&root->fs_info->async_delalloc_pages) < + limit)); + } + + while(atomic_read(&root->fs_info->async_submit_draining) && + atomic_read(&root->fs_info->async_delalloc_pages)) { + wait_event(root->fs_info->async_submit_wait, + (atomic_read(&root->fs_info->async_delalloc_pages) == + 0)); + } + + *nr_written += nr_pages; + start = cur_end + 1; + } + *page_started = 1; + return 0; } /* @@ -592,7 +923,8 @@ free_pages_out: * blocks on disk */ static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, - u64 start, u64 end, int *page_started, int force) + u64 start, u64 end, int *page_started, int force, + unsigned long *nr_written) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; @@ -711,7 +1043,8 @@ out_check: btrfs_release_path(root, path); if (cow_start != (u64)-1) { ret = cow_file_range(inode, locked_page, cow_start, - found_key.offset - 1, page_started); + found_key.offset - 1, page_started, + nr_written, 1); BUG_ON(ret); cow_start = (u64)-1; } @@ -748,9 +1081,10 @@ out_check: ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, num_bytes, num_bytes, type); BUG_ON(ret); + extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, cur_offset, cur_offset + num_bytes - 1, - locked_page, 0, 0, 0); + locked_page, 1, 1, 1, 0, 0, 0); cur_offset = extent_end; if (cur_offset > end) break; @@ -761,7 +1095,7 @@ out_check: cow_start = cur_offset; if (cow_start != (u64)-1) { ret = cow_file_range(inode, locked_page, cow_start, end, - page_started); + page_started, nr_written, 1); BUG_ON(ret); } @@ -775,7 +1109,8 @@ out_check: * extent_io.c call back to do delayed allocation processing */ static int run_delalloc_range(struct inode *inode, struct page *locked_page, - u64 start, u64 end, int *page_started) + u64 start, u64 end, int *page_started, + unsigned long *nr_written) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; @@ -783,13 +1118,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, if (btrfs_test_opt(root, NODATACOW) || btrfs_test_flag(inode, NODATACOW)) ret = run_delalloc_nocow(inode, locked_page, start, end, - page_started, 0); + page_started, 0, nr_written); else if (btrfs_test_flag(inode, PREALLOC)) ret = run_delalloc_nocow(inode, locked_page, start, end, - page_started, 1); + page_started, 1, nr_written); else - ret = cow_file_range(inode, locked_page, start, end, - page_started); + ret = cow_file_range_async(inode, locked_page, start, end, + page_started, nr_written); return ret; } @@ -861,6 +1196,9 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, u64 map_length; int ret; + if (bio_flags & EXTENT_BIO_COMPRESSED) + return 0; + length = bio->bi_size; map_tree = &root->fs_info->mapping_tree; map_length = length; @@ -925,12 +1263,12 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, btrfs_test_flag(inode, NODATASUM); if (!(rw & (1 << BIO_RW))) { - if (!skip_sum) - btrfs_lookup_bio_sums(root, inode, bio); if (bio_flags & EXTENT_BIO_COMPRESSED) return btrfs_submit_compressed_read(inode, bio, mirror_num, bio_flags); + else if (!skip_sum) + btrfs_lookup_bio_sums(root, inode, bio); goto mapit; } else if (!skip_sum) { /* we're doing a write, do the async checksumming */ @@ -966,6 +1304,9 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) { + if ((end & (PAGE_CACHE_SIZE - 1)) == 0) { + WARN_ON(1); + } return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); } @@ -2105,6 +2446,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, int pending_del_nr = 0; int pending_del_slot = 0; int extent_type = -1; + int encoding; u64 mask = root->sectorsize - 1; if (root->ref_cows) @@ -2144,6 +2486,7 @@ search_again: leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); + encoding = 0; if (found_key.objectid != inode->i_ino) break; @@ -2156,6 +2499,10 @@ search_again: fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); extent_type = btrfs_file_extent_type(leaf, fi); + encoding = btrfs_file_extent_compression(leaf, fi); + encoding |= btrfs_file_extent_encryption(leaf, fi); + encoding |= btrfs_file_extent_other_encoding(leaf, fi); + if (extent_type != BTRFS_FILE_EXTENT_INLINE) { item_end += btrfs_file_extent_num_bytes(leaf, fi); @@ -2200,7 +2547,7 @@ search_again: if (extent_type != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); - if (!del_item) { + if (!del_item && !encoding) { u64 orig_num_bytes = btrfs_file_extent_num_bytes(leaf, fi); extent_num_bytes = new_size - @@ -2436,7 +2783,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) last_byte = min(extent_map_end(em), block_end); last_byte = (last_byte + mask) & ~mask; if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { + u64 hint_byte = 0; hole_size = last_byte - cur_offset; + err = btrfs_drop_extents(trans, root, inode, + cur_offset, + cur_offset + hole_size, + cur_offset, &hint_byte); + if (err) + break; err = btrfs_insert_file_extent(trans, root, inode->i_ino, cur_offset, 0, 0, hole_size, 0, hole_size, @@ -3785,6 +4139,7 @@ int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; return extent_writepages(tree, mapping, btrfs_get_extent, wbc); } @@ -4285,9 +4640,11 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) * ordered extents get created before we return */ atomic_inc(&root->fs_info->async_submit_draining); - while(atomic_read(&root->fs_info->nr_async_submits)) { + while(atomic_read(&root->fs_info->nr_async_submits) || + atomic_read(&root->fs_info->async_delalloc_pages)) { wait_event(root->fs_info->async_submit_wait, - (atomic_read(&root->fs_info->nr_async_submits) == 0)); + (atomic_read(&root->fs_info->nr_async_submits) == 0 && + atomic_read(&root->fs_info->async_delalloc_pages) == 0)); } atomic_dec(&root->fs_info->async_submit_draining); return 0; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 370bb428559..027ad6b3839 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -390,7 +390,7 @@ void btrfs_start_ordered_extent(struct inode *inode, * start IO on any dirty ones so the wait doesn't stall waiting * for pdflush to find them */ - btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE); + btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL); if (wait) { wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); @@ -421,6 +421,12 @@ again: */ btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); + /* The compression code will leave pages locked but return from + * writepage without setting the page writeback. Starting again + * with WB_SYNC_ALL will end up waiting for the IO to actually start. + */ + btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); + btrfs_wait_on_page_writeback_range(inode->i_mapping, start >> PAGE_CACHE_SHIFT, orig_end >> PAGE_CACHE_SHIFT); @@ -448,10 +454,7 @@ again: } if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { - printk("inode %lu still ordered or delalloc after wait " - "%llu %llu\n", inode->i_ino, - (unsigned long long)start, - (unsigned long long)orig_end); + schedule_timeout(1); goto again; } return 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 431fdf144b5..ab9d5e89ed1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -375,6 +375,10 @@ int btrfs_sync_fs(struct super_block *sb, int wait) filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } + + btrfs_start_delalloc_inodes(root); + btrfs_wait_ordered_extents(root, 0); + btrfs_clean_old_snapshots(root); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index e99309180a1..ba2527d0873 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -423,8 +423,9 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, /* we didn't make progress in this inflate * call, we're done */ - if (ret != Z_STREAM_END) + if (ret != Z_STREAM_END) { ret = -1; + } break; } -- cgit v1.2.3-70-g09d2 From 3b7885bf96e03271a9fff41124c38ed7176616e2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 6 Nov 2008 21:48:27 -0500 Subject: Btrfs: enforce metadata allocation clustering The allocator uses the last allocation as a starting point for metadata allocations, and tries to allocate in clusters of at least 256k. If the search for a free block fails to find the expected block, this patch forces a new cluster to be found in the free list. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 17 +++++++++++++++++ fs/btrfs/extent_io.c | 7 ------- 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ebd8275a193..b8c6541c33f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2195,6 +2195,23 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (search_start + num_bytes > end) goto new_group; + if (last_ptr && *last_ptr && search_start != *last_ptr) { + total_needed += empty_cluster; + *last_ptr = 0; + /* + * if search_start is still in this block group + * then we just re-search this block group + */ + if (search_start >= start && + search_start < end) { + mutex_unlock(&block_group->alloc_mutex); + continue; + } + + /* else we go to the next block group */ + goto new_group; + } + if (exclude_nr > 0 && (search_start + num_bytes > exclude_start && search_start < exclude_start + exclude_nr)) { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index bbe3bcfcf4a..ad75a9cc360 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2494,13 +2494,6 @@ retry: index = 0; goto retry; } - if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) - mapping->writeback_index = index; - if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) - range_whole = 1; - - if (wbc->range_cont) - wbc->range_start = index << PAGE_CACHE_SHIFT; return ret; } EXPORT_SYMBOL(extent_write_cache_pages); -- cgit v1.2.3-70-g09d2 From 4366211ccd050e00674b3276b5ed81981b2d7793 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 7 Nov 2008 09:06:11 -0500 Subject: Btfs: More metadata allocator optimizations This lowers the empty cluster target for metadata allocations. The lower target makes it easier to do allocations and still seems to perform well. It also fixes the allocator loop to drop the empty cluster when things start getting difficult, avoiding false enospc warnings. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b8c6541c33f..0d73a53c676 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2116,6 +2116,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root * root = orig_root->fs_info->extent_root; u64 total_needed = num_bytes; u64 *last_ptr = NULL; + u64 last_wanted = 0; struct btrfs_block_group_cache *block_group = NULL; int chunk_alloc_done = 0; int empty_cluster = 2 * 1024 * 1024; @@ -2134,22 +2135,28 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (data & BTRFS_BLOCK_GROUP_METADATA) { last_ptr = &root->fs_info->last_alloc; - empty_cluster = 256 * 1024; + empty_cluster = 64 * 1024; } if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) last_ptr = &root->fs_info->last_data_alloc; if (last_ptr) { - if (*last_ptr) + if (*last_ptr) { hint_byte = *last_ptr; - else + last_wanted = *last_ptr; + } else empty_size += empty_cluster; + } else { + empty_cluster = 0; } search_start = max(search_start, first_logical_byte(root, 0)); search_start = max(search_start, hint_byte); total_needed += empty_size; + if (search_start != last_wanted) + last_wanted = 0; + block_group = btrfs_lookup_block_group(root->fs_info, search_start); if (!block_group) block_group = btrfs_lookup_first_block_group(root->fs_info, @@ -2195,9 +2202,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (search_start + num_bytes > end) goto new_group; - if (last_ptr && *last_ptr && search_start != *last_ptr) { + if (last_wanted && search_start != last_wanted) { total_needed += empty_cluster; - *last_ptr = 0; + last_wanted = 0; /* * if search_start is still in this block group * then we just re-search this block group @@ -2223,6 +2230,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (search_start >= start && search_start < end) { mutex_unlock(&block_group->alloc_mutex); + last_wanted = 0; continue; } @@ -2240,6 +2248,11 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, break; } new_group: + last_wanted = 0; + if (loop > 0) { + total_needed -= empty_cluster; + empty_cluster = 0; + } mutex_unlock(&block_group->alloc_mutex); /* * Here's how this works. @@ -2256,11 +2269,6 @@ new_group: if (loop == 0) { head = &space_info->block_groups; cur = head->next; - - if (last_ptr && *last_ptr) { - total_needed += empty_cluster; - *last_ptr = 0; - } loop++; } else if (loop == 1 && cur == head) { if (allowed_chunk_alloc && !chunk_alloc_done) { -- cgit v1.2.3-70-g09d2 From 42e70e7a2f9d96fd843723fa46d5121cb3e551d0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 7 Nov 2008 18:17:11 -0500 Subject: Btrfs: Fix more false enospc errors and an oops from empty clustering In comes cases the empty cluster was added twice to the total number of bytes the allocator was trying to find. With empty clustering on, the hint byte was sometimes outside of the block group. Add an extra goto to find the correct block group. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0d73a53c676..b92e92c29e3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2152,11 +2152,13 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, } search_start = max(search_start, first_logical_byte(root, 0)); search_start = max(search_start, hint_byte); - total_needed += empty_size; - if (search_start != last_wanted) + if (last_wanted && search_start != last_wanted) { last_wanted = 0; + empty_size += empty_cluster; + } + total_needed += empty_size; block_group = btrfs_lookup_block_group(root->fs_info, search_start); if (!block_group) block_group = btrfs_lookup_first_block_group(root->fs_info, @@ -2171,7 +2173,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, * group thats not of the proper type, while looping this * should never happen */ - WARN_ON(!block_group); + if (!block_group) + goto new_group_no_lock; + mutex_lock(&block_group->alloc_mutex); if (unlikely(!block_group_bits(block_group, data))) goto new_group; @@ -2248,12 +2252,13 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, break; } new_group: + mutex_unlock(&block_group->alloc_mutex); +new_group_no_lock: last_wanted = 0; - if (loop > 0) { + if (!allowed_chunk_alloc && loop > 0) { total_needed -= empty_cluster; empty_cluster = 0; } - mutex_unlock(&block_group->alloc_mutex); /* * Here's how this works. * loop == 0: we were searching a block group via a hint @@ -2271,6 +2276,10 @@ new_group: cur = head->next; loop++; } else if (loop == 1 && cur == head) { + + total_needed -= empty_cluster; + empty_cluster = 0; + if (allowed_chunk_alloc && !chunk_alloc_done) { up_read(&space_info->groups_sem); ret = do_chunk_alloc(trans, root, num_bytes + -- cgit v1.2.3-70-g09d2 From 5b7c3fcc46b5deb8a368d5319cf87c78c2df65fe Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 10 Nov 2008 07:26:33 -0500 Subject: Btrfs: Don't substract too much from the allocation target (avoid wrapping) When metadata allocation clustering has to fall back to unclustered allocs because large free areas could not be found, it was sometimes substracting too much from the total bytes to allocate. This would make it wrap below zero. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b92e92c29e3..2451717d36d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2255,9 +2255,9 @@ new_group: mutex_unlock(&block_group->alloc_mutex); new_group_no_lock: last_wanted = 0; - if (!allowed_chunk_alloc && loop > 0) { - total_needed -= empty_cluster; - empty_cluster = 0; + if (!allowed_chunk_alloc) { + total_needed -= empty_size; + empty_size = 0; } /* * Here's how this works. @@ -2277,8 +2277,8 @@ new_group_no_lock: loop++; } else if (loop == 1 && cur == head) { - total_needed -= empty_cluster; - empty_cluster = 0; + total_needed -= empty_size; + empty_size = 0; if (allowed_chunk_alloc && !chunk_alloc_done) { up_read(&space_info->groups_sem); -- cgit v1.2.3-70-g09d2 From f5a31e166772a7b9fff6725b697eb8b57633671e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 10 Nov 2008 11:47:09 -0500 Subject: Btrfs: Try harder while searching for free space The loop searching for free space would exit out too soon when metadata clustering was trying to allocate a large extent. This makes sure a full scan of the free space is done searching for only the minimum extent size requested by the higher layers. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2451717d36d..55d6a66c622 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2123,6 +2123,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, int allowed_chunk_alloc = 0; struct list_head *head = NULL, *cur = NULL; int loop = 0; + int extra_loop = 0; struct btrfs_space_info *space_info; WARN_ON(num_bytes < root->sectorsize); @@ -2191,6 +2192,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, free_space = btrfs_find_free_space(block_group, search_start, total_needed); + if (empty_size) + extra_loop = 1; + if (free_space) { u64 start = block_group->key.objectid; u64 end = block_group->key.objectid + @@ -2254,11 +2258,11 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, new_group: mutex_unlock(&block_group->alloc_mutex); new_group_no_lock: + /* don't try to compare new allocations against the + * last allocation any more + */ last_wanted = 0; - if (!allowed_chunk_alloc) { - total_needed -= empty_size; - empty_size = 0; - } + /* * Here's how this works. * loop == 0: we were searching a block group via a hint @@ -2276,9 +2280,21 @@ new_group_no_lock: cur = head->next; loop++; } else if (loop == 1 && cur == head) { - + int keep_going; + + /* at this point we give up on the empty_size + * allocations and just try to allocate the min + * space. + * + * The extra_loop field was set if an empty_size + * allocation was attempted above, and if this + * is try we need to try the loop again without + * the additional empty_size. + */ total_needed -= empty_size; empty_size = 0; + keep_going = extra_loop; + loop++; if (allowed_chunk_alloc && !chunk_alloc_done) { up_read(&space_info->groups_sem); @@ -2287,13 +2303,19 @@ new_group_no_lock: if (ret < 0) break; down_read(&space_info->groups_sem); - loop++; head = &space_info->block_groups; - cur = head->next; + /* + * we've allocated a new chunk, keep + * trying + */ + keep_going = 1; chunk_alloc_done = 1; } else if (!allowed_chunk_alloc) { space_info->force_alloc = 1; - break; + } + if (keep_going) { + cur = head->next; + extra_loop = 0; } else { break; } -- cgit v1.2.3-70-g09d2 From 8a1413a296d38b54ded651e76ef16c033d38fd5d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 10 Nov 2008 16:13:54 -0500 Subject: Btrfs: empty_size allocation fixes again The allocator wasn't catching all of the cases where it needed to do extra loops because the check to enforce them wasn't happening early enough. When the allocator decided to increase the size of the allocation for metadata clustering, it wasn't always setting the empty_size to include the extra (optional) bytes. This also fixes the empty_size field to be correct. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 55d6a66c622..b7530c3ac20 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2174,6 +2174,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, * group thats not of the proper type, while looping this * should never happen */ + if (empty_size) + extra_loop = 1; + if (!block_group) goto new_group_no_lock; @@ -2192,9 +2195,6 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, free_space = btrfs_find_free_space(block_group, search_start, total_needed); - if (empty_size) - extra_loop = 1; - if (free_space) { u64 start = block_group->key.objectid; u64 end = block_group->key.objectid + @@ -2212,6 +2212,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (last_wanted && search_start != last_wanted) { total_needed += empty_cluster; + empty_size += empty_cluster; last_wanted = 0; /* * if search_start is still in this block group -- cgit v1.2.3-70-g09d2 From 2ed6d66408527be0d1c6131d44cec7e86008ba26 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 13 Nov 2008 09:59:33 -0500 Subject: Btrfs: Fix handling of space info full during allocations When we fail to allocate a new block group, we should still do the checks to make sure allocations try again with the minimum requested allocation size. This also fixes a deadlock that come from a missed down_read in the chunk allocation failure handling. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b7530c3ac20..22820f91d2b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2301,9 +2301,9 @@ new_group_no_lock: up_read(&space_info->groups_sem); ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024, data, 1); - if (ret < 0) - break; down_read(&space_info->groups_sem); + if (ret < 0) + goto loop_check; head = &space_info->block_groups; /* * we've allocated a new chunk, keep @@ -2314,6 +2314,7 @@ new_group_no_lock: } else if (!allowed_chunk_alloc) { space_info->force_alloc = 1; } +loop_check: if (keep_going) { cur = head->next; extra_loop = 0; -- cgit v1.2.3-70-g09d2 From f3465ca44e2a51fd647c167045768a8ab5a96603 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 12 Nov 2008 14:19:50 -0500 Subject: Btrfs: batch extent inserts/updates/deletions on the extent root While profiling the allocator I noticed a good amount of time was being spent in finish_current_insert and del_pending_extents, and as the filesystem filled up more and more time was being spent in those functions. This patch aims to try and reduce that problem. This happens two ways 1) track if we tried to delete an extent that we are going to update or insert. Once we get into finish_current_insert we discard any of the extents that were marked for deletion. This saves us from doing unnecessary work almost every time finish_current_insert runs. 2) Batch insertion/updates/deletions. Instead of doing a btrfs_search_slot for each individual extent and doing the needed operation, we instead keep the leaf around and see if there is anything else we can do on that leaf. On the insert case I introduced a btrfs_insert_some_items, which will take an array of keys with an array of data_sizes and try and squeeze in as many of those keys as possible, and then return how many keys it was able to insert. In the update case we search for an extent ref, update the ref and then loop through the leaf to see if any of the other refs we are looking to update are on that leaf, and then once we are done we release the path and search for the next ref we need to update. And finally for the deletion we try and delete the extent+ref in pairs, so we will try to find extent+ref pairs next to the extent we are trying to free and free them in bulk if possible. This along with the other cluster fix that Chris pushed out a bit ago helps make the allocator preform more uniformly as it fills up the disk. There is still a slight drop as we fill up the disk since we start having to stick new blocks in odd places which results in more COW's than on a empty fs, but the drop is not nearly as severe as it was before. Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 170 ++++++++++ fs/btrfs/ctree.h | 5 + fs/btrfs/extent-tree.c | 827 +++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 909 insertions(+), 93 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ac61c50a331..8bb452456d9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -431,6 +431,25 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 0; } +/* + * same as comp_keys only with two btrfs_key's + */ +static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) +{ + if (k1->objectid > k2->objectid) + return 1; + if (k1->objectid < k2->objectid) + return -1; + if (k1->type > k2->type) + return 1; + if (k1->type < k2->type) + return -1; + if (k1->offset > k2->offset) + return 1; + if (k1->offset < k2->offset) + return -1; + return 0; +} /* * this is used by the defrag code to go through all the @@ -2999,6 +3018,157 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, return ret; } +/* + * Given a key and some data, insert items into the tree. + * This does all the path init required, making room in the tree if needed. + * Returns the number of keys that were inserted. + */ +int btrfs_insert_some_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 *data_size, + int nr) +{ + struct extent_buffer *leaf; + struct btrfs_item *item; + int ret = 0; + int slot; + int slot_orig; + int i; + u32 nritems; + u32 total_data = 0; + u32 total_size = 0; + unsigned int data_end; + struct btrfs_disk_key disk_key; + struct btrfs_key found_key; + + found_key.objectid = 0; + nr = min_t(int, nr, BTRFS_NODEPTRS_PER_BLOCK(root)); + + for (i = 0; i < nr; i++) + total_data += data_size[i]; + + total_data = min_t(u32, total_data, BTRFS_LEAF_DATA_SIZE(root)); + total_size = total_data + (nr * sizeof(struct btrfs_item)); + ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); + if (ret == 0) + return -EEXIST; + if (ret < 0) + goto out; + + slot_orig = path->slots[0]; + leaf = path->nodes[0]; + + nritems = btrfs_header_nritems(leaf); + data_end = leaf_data_end(root, leaf); + + if (btrfs_leaf_free_space(root, leaf) < total_size) { + for (i = nr; i >= 0; i--) { + total_data -= data_size[i]; + total_size -= data_size[i] + sizeof(struct btrfs_item); + if (total_size < btrfs_leaf_free_space(root, leaf)) + break; + } + nr = i; + } + + slot = path->slots[0]; + BUG_ON(slot < 0); + + if (slot != nritems) { + unsigned int old_data = btrfs_item_end_nr(leaf, slot); + + item = btrfs_item_nr(leaf, slot); + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + /* figure out how many keys we can insert in here */ + total_data = data_size[0]; + for (i = 1; i < nr; i++) { + if (comp_cpu_keys(&found_key, cpu_key + i) <= 0) + break; + total_data += data_size[i]; + } + nr = i; + + if (old_data < data_end) { + btrfs_print_leaf(root, leaf); + printk("slot %d old_data %d data_end %d\n", + slot, old_data, data_end); + BUG_ON(1); + } + /* + * item0..itemN ... dataN.offset..dataN.size .. data0.size + */ + /* first correct the data pointers */ + WARN_ON(leaf->map_token); + for (i = slot; i < nritems; i++) { + u32 ioff; + + item = btrfs_item_nr(leaf, i); + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } + + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - total_data); + } + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + + /* shift the items */ + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), + btrfs_item_nr_offset(slot), + (nritems - slot) * sizeof(struct btrfs_item)); + + /* shift the data */ + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + + data_end - total_data, btrfs_leaf_data(leaf) + + data_end, old_data - data_end); + data_end = old_data; + } else { + /* + * this sucks but it has to be done, if we are inserting at + * the end of the leaf only insert 1 of the items, since we + * have no way of knowing whats on the next leaf and we'd have + * to drop our current locks to figure it out + */ + nr = 1; + } + + /* setup the item for the new data */ + for (i = 0; i < nr; i++) { + btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); + btrfs_set_item_key(leaf, &disk_key, slot + i); + item = btrfs_item_nr(leaf, slot + i); + btrfs_set_item_offset(leaf, item, data_end - data_size[i]); + data_end -= data_size[i]; + btrfs_set_item_size(leaf, item, data_size[i]); + } + btrfs_set_header_nritems(leaf, nritems + nr); + btrfs_mark_buffer_dirty(leaf); + + ret = 0; + if (slot == 0) { + btrfs_cpu_key_to_disk(&disk_key, cpu_key); + ret = fixup_low_keys(trans, root, path, &disk_key, 1); + } + + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); + BUG(); + } +out: + if (!ret) + ret = nr; + return ret; +} + /* * Given a key and some data, insert items into the tree. * This does all the path init required, making room in the tree if needed. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c83cc5b2ded..f575939e025 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1719,6 +1719,11 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans, int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, void *data, u32 data_size); +int btrfs_insert_some_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 *data_size, + int nr); int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 22820f91d2b..e785f0a0632 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -42,6 +42,8 @@ struct pending_extent_op { u64 generation; u64 orig_generation; int level; + struct list_head list; + int del; }; static int finish_current_insert(struct btrfs_trans_handle *trans, struct @@ -52,6 +54,13 @@ static struct btrfs_block_group_cache * __btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, int data, int owner); +static int pin_down_bytes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int is_data); +static int update_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int alloc, + int mark_free); static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) { @@ -559,6 +568,251 @@ out: return ret; } +/* + * updates all the backrefs that are pending on update_list for the + * extent_root + */ +static int noinline update_backrefs(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct btrfs_path *path, + struct list_head *update_list) +{ + struct btrfs_key key; + struct btrfs_extent_ref *ref; + struct btrfs_fs_info *info = extent_root->fs_info; + struct pending_extent_op *op; + struct extent_buffer *leaf; + int ret = 0; + struct list_head *cur = update_list->next; + u64 ref_objectid; + u64 ref_root = extent_root->root_key.objectid; + + op = list_entry(cur, struct pending_extent_op, list); + +search: + key.objectid = op->bytenr; + key.type = BTRFS_EXTENT_REF_KEY; + key.offset = op->orig_parent; + + ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1); + BUG_ON(ret); + + leaf = path->nodes[0]; + +loop: + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); + + ref_objectid = btrfs_ref_objectid(leaf, ref); + + if (btrfs_ref_root(leaf, ref) != ref_root || + btrfs_ref_generation(leaf, ref) != op->orig_generation || + (ref_objectid != op->level && + ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { + printk(KERN_ERR "couldn't find %Lu, parent %Lu, root %Lu, " + "owner %u\n", op->bytenr, op->orig_parent, + ref_root, op->level); + btrfs_print_leaf(extent_root, leaf); + BUG(); + } + + key.objectid = op->bytenr; + key.offset = op->parent; + key.type = BTRFS_EXTENT_REF_KEY; + ret = btrfs_set_item_key_safe(trans, extent_root, path, &key); + BUG_ON(ret); + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); + btrfs_set_ref_generation(leaf, ref, op->generation); + + cur = cur->next; + + list_del_init(&op->list); + unlock_extent(&info->extent_ins, op->bytenr, + op->bytenr + op->num_bytes - 1, GFP_NOFS); + kfree(op); + + if (cur == update_list) { + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(extent_root, path); + goto out; + } + + op = list_entry(cur, struct pending_extent_op, list); + + path->slots[0]++; + while (path->slots[0] < btrfs_header_nritems(leaf)) { + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid == op->bytenr && + key.type == BTRFS_EXTENT_REF_KEY) + goto loop; + path->slots[0]++; + } + + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(extent_root, path); + goto search; + +out: + return 0; +} + +static int noinline insert_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct btrfs_path *path, + struct list_head *insert_list, int nr) +{ + struct btrfs_key *keys; + u32 *data_size; + struct pending_extent_op *op; + struct extent_buffer *leaf; + struct list_head *cur = insert_list->next; + struct btrfs_fs_info *info = extent_root->fs_info; + u64 ref_root = extent_root->root_key.objectid; + int i = 0, last = 0, ret; + int total = nr * 2; + + if (!nr) + return 0; + + keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS); + if (!keys) + return -ENOMEM; + + data_size = kzalloc(total * sizeof(u32), GFP_NOFS); + if (!data_size) { + kfree(keys); + return -ENOMEM; + } + + list_for_each_entry(op, insert_list, list) { + keys[i].objectid = op->bytenr; + keys[i].offset = op->num_bytes; + keys[i].type = BTRFS_EXTENT_ITEM_KEY; + data_size[i] = sizeof(struct btrfs_extent_item); + i++; + + keys[i].objectid = op->bytenr; + keys[i].offset = op->parent; + keys[i].type = BTRFS_EXTENT_REF_KEY; + data_size[i] = sizeof(struct btrfs_extent_ref); + i++; + } + + op = list_entry(cur, struct pending_extent_op, list); + i = 0; + while (i < total) { + int c; + ret = btrfs_insert_some_items(trans, extent_root, path, + keys+i, data_size+i, total-i); + BUG_ON(ret < 0); + + if (last && ret > 1) + BUG(); + + leaf = path->nodes[0]; + for (c = 0; c < ret; c++) { + int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY; + + /* + * if the first item we inserted was a backref, then + * the EXTENT_ITEM will be the odd c's, else it will + * be the even c's + */ + if ((ref_first && (c % 2)) || + (!ref_first && !(c % 2))) { + struct btrfs_extent_item *itm; + + itm = btrfs_item_ptr(leaf, path->slots[0] + c, + struct btrfs_extent_item); + btrfs_set_extent_refs(path->nodes[0], itm, 1); + op->del++; + } else { + struct btrfs_extent_ref *ref; + + ref = btrfs_item_ptr(leaf, path->slots[0] + c, + struct btrfs_extent_ref); + btrfs_set_ref_root(leaf, ref, ref_root); + btrfs_set_ref_generation(leaf, ref, + op->generation); + btrfs_set_ref_objectid(leaf, ref, op->level); + btrfs_set_ref_num_refs(leaf, ref, 1); + op->del++; + } + + /* + * using del to see when its ok to free up the + * pending_extent_op. In the case where we insert the + * last item on the list in order to help do batching + * we need to not free the extent op until we actually + * insert the extent_item + */ + if (op->del == 2) { + unlock_extent(&info->extent_ins, op->bytenr, + op->bytenr + op->num_bytes - 1, + GFP_NOFS); + cur = cur->next; + list_del_init(&op->list); + kfree(op); + if (cur != insert_list) + op = list_entry(cur, + struct pending_extent_op, + list); + } + } + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(extent_root, path); + + /* + * Ok backref's and items usually go right next to eachother, + * but if we could only insert 1 item that means that we + * inserted on the end of a leaf, and we have no idea what may + * be on the next leaf so we just play it safe. In order to + * try and help this case we insert the last thing on our + * insert list so hopefully it will end up being the last + * thing on the leaf and everything else will be before it, + * which will let us insert a whole bunch of items at the same + * time. + */ + if (ret == 1 && !last && (i + ret < total)) { + /* + * last: where we will pick up the next time around + * i: our current key to insert, will be total - 1 + * cur: the current op we are screwing with + * op: duh + */ + last = i + ret; + i = total - 1; + cur = insert_list->prev; + op = list_entry(cur, struct pending_extent_op, list); + } else if (last) { + /* + * ok we successfully inserted the last item on the + * list, lets reset everything + * + * i: our current key to insert, so where we left off + * last time + * last: done with this + * cur: the op we are messing with + * op: duh + * total: since we inserted the last key, we need to + * decrement total so we dont overflow + */ + i = last; + last = 0; + cur = insert_list->next; + op = list_entry(cur, struct pending_extent_op, list); + total--; + } else { + i += ret; + } + + cond_resched(); + } + ret = 0; + kfree(keys); + kfree(data_size); + return ret; +} + static int noinline insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -642,6 +896,267 @@ static int noinline remove_extent_backref(struct btrfs_trans_handle *trans, return ret; } +static int noinline free_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct list_head *del_list) +{ + struct btrfs_fs_info *info = extent_root->fs_info; + struct btrfs_path *path; + struct btrfs_key key, found_key; + struct extent_buffer *leaf; + struct list_head *cur; + struct pending_extent_op *op; + struct btrfs_extent_item *ei; + int ret, num_to_del, extent_slot = 0, found_extent = 0; + u32 refs; + u64 bytes_freed = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = 1; + +search: + /* search for the backref for the current ref we want to delete */ + cur = del_list->next; + op = list_entry(cur, struct pending_extent_op, list); + ret = lookup_extent_backref(trans, extent_root, path, op->bytenr, + op->orig_parent, + extent_root->root_key.objectid, + op->orig_generation, op->level, 1); + if (ret) { + printk("Unable to find backref byte nr %Lu root %Lu gen %Lu " + "owner %u\n", op->bytenr, + extent_root->root_key.objectid, op->orig_generation, + op->level); + btrfs_print_leaf(extent_root, path->nodes[0]); + WARN_ON(1); + goto out; + } + + extent_slot = path->slots[0]; + num_to_del = 1; + found_extent = 0; + + /* + * if we aren't the first item on the leaf we can move back one and see + * if our ref is right next to our extent item + */ + if (likely(extent_slot)) { + extent_slot--; + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + extent_slot); + if (found_key.objectid == op->bytenr && + found_key.type == BTRFS_EXTENT_ITEM_KEY && + found_key.offset == op->num_bytes) { + num_to_del++; + found_extent = 1; + } + } + + /* + * if we didn't find the extent we need to delete the backref and then + * search for the extent item key so we can update its ref count + */ + if (!found_extent) { + key.objectid = op->bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = op->num_bytes; + + ret = remove_extent_backref(trans, extent_root, path); + BUG_ON(ret); + btrfs_release_path(extent_root, path); + ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); + BUG_ON(ret); + extent_slot = path->slots[0]; + } + + /* this is where we update the ref count for the extent */ + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); + refs = btrfs_extent_refs(leaf, ei); + BUG_ON(refs == 0); + refs--; + btrfs_set_extent_refs(leaf, ei, refs); + + btrfs_mark_buffer_dirty(leaf); + + /* + * This extent needs deleting. The reason cur_slot is extent_slot + + * num_to_del is because extent_slot points to the slot where the extent + * is, and if the backref was not right next to the extent we will be + * deleting at least 1 item, and will want to start searching at the + * slot directly next to extent_slot. However if we did find the + * backref next to the extent item them we will be deleting at least 2 + * items and will want to start searching directly after the ref slot + */ + if (!refs) { + struct list_head *pos, *n, *end; + int cur_slot = extent_slot+num_to_del; + u64 super_used; + u64 root_used; + + path->slots[0] = extent_slot; + bytes_freed = op->num_bytes; + + /* + * we need to see if we can delete multiple things at once, so + * start looping through the list of extents we are wanting to + * delete and see if their extent/backref's are right next to + * eachother and the extents only have 1 ref + */ + for (pos = cur->next; pos != del_list; pos = pos->next) { + struct pending_extent_op *tmp; + + tmp = list_entry(pos, struct pending_extent_op, list); + + /* we only want to delete extent+ref at this stage */ + if (cur_slot >= btrfs_header_nritems(leaf) - 1) + break; + + btrfs_item_key_to_cpu(leaf, &found_key, cur_slot); + if (found_key.objectid != tmp->bytenr || + found_key.type != BTRFS_EXTENT_ITEM_KEY || + found_key.offset != tmp->num_bytes) + break; + + /* check to make sure this extent only has one ref */ + ei = btrfs_item_ptr(leaf, cur_slot, + struct btrfs_extent_item); + if (btrfs_extent_refs(leaf, ei) != 1) + break; + + btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1); + if (found_key.objectid != tmp->bytenr || + found_key.type != BTRFS_EXTENT_REF_KEY || + found_key.offset != tmp->orig_parent) + break; + + /* + * the ref is right next to the extent, we can set the + * ref count to 0 since we will delete them both now + */ + btrfs_set_extent_refs(leaf, ei, 0); + + /* pin down the bytes for this extent */ + mutex_lock(&info->pinned_mutex); + ret = pin_down_bytes(trans, extent_root, tmp->bytenr, + tmp->num_bytes, tmp->level >= + BTRFS_FIRST_FREE_OBJECTID); + mutex_unlock(&info->pinned_mutex); + BUG_ON(ret < 0); + + /* + * use the del field to tell if we need to go ahead and + * free up the extent when we delete the item or not. + */ + tmp->del = ret; + bytes_freed += tmp->num_bytes; + + num_to_del += 2; + cur_slot += 2; + } + end = pos; + + /* update the free space counters */ + spin_lock_irq(&info->delalloc_lock); + super_used = btrfs_super_bytes_used(&info->super_copy); + btrfs_set_super_bytes_used(&info->super_copy, + super_used - bytes_freed); + spin_unlock_irq(&info->delalloc_lock); + + root_used = btrfs_root_used(&extent_root->root_item); + btrfs_set_root_used(&extent_root->root_item, + root_used - bytes_freed); + + /* delete the items */ + ret = btrfs_del_items(trans, extent_root, path, + path->slots[0], num_to_del); + BUG_ON(ret); + + /* + * loop through the extents we deleted and do the cleanup work + * on them + */ + for (pos = cur, n = pos->next; pos != end; + pos = n, n = pos->next) { + struct pending_extent_op *tmp; +#ifdef BIO_RW_DISCARD + u64 map_length; + struct btrfs_multi_bio *multi = NULL; +#endif + tmp = list_entry(pos, struct pending_extent_op, list); + + /* + * remember tmp->del tells us wether or not we pinned + * down the extent + */ + ret = update_block_group(trans, extent_root, + tmp->bytenr, tmp->num_bytes, 0, + tmp->del); + BUG_ON(ret); + +#ifdef BIO_RW_DISCARD + ret = btrfs_map_block(&info->mapping_tree, READ, + tmp->bytenr, &map_length, &multi, + 0); + if (!ret) { + struct btrfs_bio_stripe *stripe; + int i; + + stripe = multi->stripe; + + if (map_length > tmp->num_bytes) + map_length = tmp->num_bytes; + + for (i = 0; i < multi->num_stripes; + i++, stripe++) + blkdev_issue_discard(stripe->dev->bdev, + stripe->physical >> 9, + map_length >> 9); + kfree(multi); + } +#endif + list_del_init(&tmp->list); + unlock_extent(&info->extent_ins, tmp->bytenr, + tmp->bytenr + tmp->num_bytes - 1, + GFP_NOFS); + kfree(tmp); + } + } else if (refs && found_extent) { + /* + * the ref and extent were right next to eachother, but the + * extent still has a ref, so just free the backref and keep + * going + */ + ret = remove_extent_backref(trans, extent_root, path); + BUG_ON(ret); + + list_del_init(&op->list); + unlock_extent(&info->extent_ins, op->bytenr, + op->bytenr + op->num_bytes - 1, GFP_NOFS); + kfree(op); + } else { + /* + * the extent has multiple refs and the backref we were looking + * for was not right next to it, so just unlock and go next, + * we're good to go + */ + list_del_init(&op->list); + unlock_extent(&info->extent_ins, op->bytenr, + op->bytenr + op->num_bytes - 1, GFP_NOFS); + kfree(op); + } + + btrfs_release_path(extent_root, path); + if (!list_empty(del_list)) + goto search; + +out: + btrfs_free_path(path); + return ret; +} + static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 orig_parent, u64 parent, @@ -685,6 +1200,8 @@ static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, extent_op->generation = ref_generation; extent_op->orig_generation = orig_generation; extent_op->level = (int)owner_objectid; + INIT_LIST_HEAD(&extent_op->list); + extent_op->del = 0; set_extent_bits(&root->fs_info->extent_ins, bytenr, bytenr + num_bytes - 1, @@ -1426,9 +1943,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, while(total) { cache = btrfs_lookup_block_group(info, bytenr); - if (!cache) { + if (!cache) return -1; - } byte_in_group = bytenr - cache->key.objectid; WARN_ON(byte_in_group > cache->key.offset); @@ -1605,102 +2121,176 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, u64 end; u64 priv; u64 search = 0; + u64 skipped = 0; struct btrfs_fs_info *info = extent_root->fs_info; struct btrfs_path *path; - struct btrfs_extent_ref *ref; - struct pending_extent_op *extent_op; - struct btrfs_key key; - struct btrfs_extent_item extent_item; + struct pending_extent_op *extent_op, *tmp; + struct list_head insert_list, update_list; int ret; - int err = 0; + int num_inserts = 0, max_inserts; - btrfs_set_stack_extent_refs(&extent_item, 1); path = btrfs_alloc_path(); + INIT_LIST_HEAD(&insert_list); + INIT_LIST_HEAD(&update_list); - while(1) { - mutex_lock(&info->extent_ins_mutex); + max_inserts = extent_root->leafsize / + (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) + + sizeof(struct btrfs_extent_ref) + + sizeof(struct btrfs_extent_item)); +again: + mutex_lock(&info->extent_ins_mutex); + while (1) { ret = find_first_extent_bit(&info->extent_ins, search, &start, &end, EXTENT_WRITEBACK); if (ret) { - mutex_unlock(&info->extent_ins_mutex); - if (search && all) { - search = 0; + if (skipped && all && !num_inserts) { + skipped = 0; continue; } + mutex_unlock(&info->extent_ins_mutex); break; } ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); if (!ret) { + skipped = 1; search = end + 1; - mutex_unlock(&info->extent_ins_mutex); - cond_resched(); + if (need_resched()) { + mutex_unlock(&info->extent_ins_mutex); + cond_resched(); + mutex_lock(&info->extent_ins_mutex); + } continue; } - BUG_ON(ret < 0); ret = get_state_private(&info->extent_ins, start, &priv); BUG_ON(ret); - extent_op = (struct pending_extent_op *)(unsigned long)priv; - - mutex_unlock(&info->extent_ins_mutex); + extent_op = (struct pending_extent_op *)(unsigned long) priv; if (extent_op->type == PENDING_EXTENT_INSERT) { - key.objectid = start; - key.offset = end + 1 - start; - key.type = BTRFS_EXTENT_ITEM_KEY; - err = btrfs_insert_item(trans, extent_root, &key, - &extent_item, sizeof(extent_item)); - BUG_ON(err); + num_inserts++; + list_add_tail(&extent_op->list, &insert_list); + search = end + 1; + if (num_inserts == max_inserts) { + mutex_unlock(&info->extent_ins_mutex); + break; + } + } else if (extent_op->type == PENDING_BACKREF_UPDATE) { + list_add_tail(&extent_op->list, &update_list); + search = end + 1; + } else { + BUG(); + } + } - mutex_lock(&info->extent_ins_mutex); - clear_extent_bits(&info->extent_ins, start, end, - EXTENT_WRITEBACK, GFP_NOFS); - mutex_unlock(&info->extent_ins_mutex); + /* + * process teh update list, clear the writeback bit for it, and if + * somebody marked this thing for deletion then just unlock it and be + * done, the free_extents will handle it + */ + mutex_lock(&info->extent_ins_mutex); + list_for_each_entry_safe(extent_op, tmp, &update_list, list) { + clear_extent_bits(&info->extent_ins, extent_op->bytenr, + extent_op->bytenr + extent_op->num_bytes - 1, + EXTENT_WRITEBACK, GFP_NOFS); + if (extent_op->del) { + list_del_init(&extent_op->list); + unlock_extent(&info->extent_ins, extent_op->bytenr, + extent_op->bytenr + extent_op->num_bytes + - 1, GFP_NOFS); + kfree(extent_op); + } + } + mutex_unlock(&info->extent_ins_mutex); - err = insert_extent_backref(trans, extent_root, path, - start, extent_op->parent, - extent_root->root_key.objectid, - extent_op->generation, - extent_op->level); - BUG_ON(err); - } else if (extent_op->type == PENDING_BACKREF_UPDATE) { - err = lookup_extent_backref(trans, extent_root, path, - start, extent_op->orig_parent, - extent_root->root_key.objectid, - extent_op->orig_generation, - extent_op->level, 0); - BUG_ON(err); + /* + * still have things left on the update list, go ahead an update + * everything + */ + if (!list_empty(&update_list)) { + ret = update_backrefs(trans, extent_root, path, &update_list); + BUG_ON(ret); + } - mutex_lock(&info->extent_ins_mutex); - clear_extent_bits(&info->extent_ins, start, end, - EXTENT_WRITEBACK, GFP_NOFS); - mutex_unlock(&info->extent_ins_mutex); + /* + * if no inserts need to be done, but we skipped some extents and we + * need to make sure everything is cleaned then reset everything and + * go back to the beginning + */ + if (!num_inserts && all && skipped) { + search = 0; + skipped = 0; + INIT_LIST_HEAD(&update_list); + INIT_LIST_HEAD(&insert_list); + goto again; + } else if (!num_inserts) { + goto out; + } - key.objectid = start; - key.offset = extent_op->parent; - key.type = BTRFS_EXTENT_REF_KEY; - err = btrfs_set_item_key_safe(trans, extent_root, path, - &key); - BUG_ON(err); - ref = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_ref); - btrfs_set_ref_generation(path->nodes[0], ref, - extent_op->generation); - btrfs_mark_buffer_dirty(path->nodes[0]); - btrfs_release_path(extent_root, path); - } else { - BUG_ON(1); + /* + * process the insert extents list. Again if we are deleting this + * extent, then just unlock it, pin down the bytes if need be, and be + * done with it. Saves us from having to actually insert the extent + * into the tree and then subsequently come along and delete it + */ + mutex_lock(&info->extent_ins_mutex); + list_for_each_entry_safe(extent_op, tmp, &insert_list, list) { + clear_extent_bits(&info->extent_ins, extent_op->bytenr, + extent_op->bytenr + extent_op->num_bytes - 1, + EXTENT_WRITEBACK, GFP_NOFS); + if (extent_op->del) { + list_del_init(&extent_op->list); + unlock_extent(&info->extent_ins, extent_op->bytenr, + extent_op->bytenr + extent_op->num_bytes + - 1, GFP_NOFS); + + mutex_lock(&extent_root->fs_info->pinned_mutex); + ret = pin_down_bytes(trans, extent_root, + extent_op->bytenr, + extent_op->num_bytes, 0); + mutex_unlock(&extent_root->fs_info->pinned_mutex); + + ret = update_block_group(trans, extent_root, + extent_op->bytenr, + extent_op->num_bytes, + 0, ret > 0); + BUG_ON(ret); + kfree(extent_op); + num_inserts--; } - kfree(extent_op); - unlock_extent(&info->extent_ins, start, end, GFP_NOFS); - if (all) - search = 0; - else - search = end + 1; + } + mutex_unlock(&info->extent_ins_mutex); - cond_resched(); + ret = insert_extents(trans, extent_root, path, &insert_list, + num_inserts); + BUG_ON(ret); + + /* + * if we broke out of the loop in order to insert stuff because we hit + * the maximum number of inserts at a time we can handle, then loop + * back and pick up where we left off + */ + if (num_inserts == max_inserts) { + INIT_LIST_HEAD(&insert_list); + INIT_LIST_HEAD(&update_list); + num_inserts = 0; + goto again; } + + /* + * again, if we need to make absolutely sure there are no more pending + * extent operations left and we know that we skipped some, go back to + * the beginning and do it all again + */ + if (all && skipped) { + INIT_LIST_HEAD(&insert_list); + INIT_LIST_HEAD(&update_list); + search = 0; + skipped = 0; + num_inserts = 0; + goto again; + } +out: btrfs_free_path(path); return 0; } @@ -1802,6 +2392,12 @@ static int __free_extent(struct btrfs_trans_handle *trans, btrfs_release_path(extent_root, path); ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); + if (ret) { + printk(KERN_ERR "umm, got %d back from search" + ", was looking for %Lu\n", ret, + bytenr); + btrfs_print_leaf(extent_root, path->nodes[0]); + } BUG_ON(ret); extent_slot = path->slots[0]; } @@ -1921,32 +2517,42 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct u64 end; u64 priv; u64 search = 0; + int nr = 0, skipped = 0; struct extent_io_tree *pending_del; struct extent_io_tree *extent_ins; struct pending_extent_op *extent_op; struct btrfs_fs_info *info = extent_root->fs_info; + struct list_head delete_list; + INIT_LIST_HEAD(&delete_list); extent_ins = &extent_root->fs_info->extent_ins; pending_del = &extent_root->fs_info->pending_del; +again: + mutex_lock(&info->extent_ins_mutex); while(1) { - mutex_lock(&info->extent_ins_mutex); ret = find_first_extent_bit(pending_del, search, &start, &end, EXTENT_WRITEBACK); if (ret) { - mutex_unlock(&info->extent_ins_mutex); - if (all && search) { + if (all && skipped && !nr) { search = 0; continue; } + mutex_unlock(&info->extent_ins_mutex); break; } ret = try_lock_extent(extent_ins, start, end, GFP_NOFS); if (!ret) { search = end+1; - mutex_unlock(&info->extent_ins_mutex); - cond_resched(); + skipped = 1; + + if (need_resched()) { + mutex_unlock(&info->extent_ins_mutex); + cond_resched(); + mutex_lock(&info->extent_ins_mutex); + } + continue; } BUG_ON(ret < 0); @@ -1959,15 +2565,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct GFP_NOFS); if (!test_range_bit(extent_ins, start, end, EXTENT_WRITEBACK, 0)) { - mutex_unlock(&info->extent_ins_mutex); -free_extent: - ret = __free_extent(trans, extent_root, - start, end + 1 - start, - extent_op->orig_parent, - extent_root->root_key.objectid, - extent_op->orig_generation, - extent_op->level, 1, 0); - kfree(extent_op); + list_add_tail(&extent_op->list, &delete_list); + nr++; } else { kfree(extent_op); @@ -1980,10 +2579,12 @@ free_extent: clear_extent_bits(&info->extent_ins, start, end, EXTENT_WRITEBACK, GFP_NOFS); - mutex_unlock(&info->extent_ins_mutex); - - if (extent_op->type == PENDING_BACKREF_UPDATE) - goto free_extent; + if (extent_op->type == PENDING_BACKREF_UPDATE) { + list_add_tail(&extent_op->list, &delete_list); + search = end + 1; + nr++; + continue; + } mutex_lock(&extent_root->fs_info->pinned_mutex); ret = pin_down_bytes(trans, extent_root, start, @@ -1993,19 +2594,34 @@ free_extent: ret = update_block_group(trans, extent_root, start, end + 1 - start, 0, ret > 0); + unlock_extent(extent_ins, start, end, GFP_NOFS); BUG_ON(ret); kfree(extent_op); } if (ret) err = ret; - unlock_extent(extent_ins, start, end, GFP_NOFS); - if (all) - search = 0; - else - search = end + 1; - cond_resched(); + search = end + 1; + + if (need_resched()) { + mutex_unlock(&info->extent_ins_mutex); + cond_resched(); + mutex_lock(&info->extent_ins_mutex); + } } + + if (nr) { + ret = free_extents(trans, extent_root, &delete_list); + BUG_ON(ret); + } + + if (all && skipped) { + INIT_LIST_HEAD(&delete_list); + search = 0; + nr = 0; + goto again; + } + return err; } @@ -2024,7 +2640,29 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, WARN_ON(num_bytes < root->sectorsize); if (root == extent_root) { - struct pending_extent_op *extent_op; + struct pending_extent_op *extent_op = NULL; + + mutex_lock(&root->fs_info->extent_ins_mutex); + if (test_range_bit(&root->fs_info->extent_ins, bytenr, + bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { + u64 priv; + ret = get_state_private(&root->fs_info->extent_ins, + bytenr, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *) + (unsigned long)priv; + + extent_op->del = 1; + if (extent_op->type == PENDING_EXTENT_INSERT) { + mutex_unlock(&root->fs_info->extent_ins_mutex); + return 0; + } + } + + if (extent_op) { + ref_generation = extent_op->orig_generation; + parent = extent_op->orig_parent; + } extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); BUG_ON(!extent_op); @@ -2037,8 +2675,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, extent_op->generation = ref_generation; extent_op->orig_generation = ref_generation; extent_op->level = (int)owner_objectid; + INIT_LIST_HEAD(&extent_op->list); + extent_op->del = 0; - mutex_lock(&root->fs_info->extent_ins_mutex); set_extent_bits(&root->fs_info->pending_del, bytenr, bytenr + num_bytes - 1, EXTENT_WRITEBACK, GFP_NOFS); @@ -2515,6 +3154,8 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, extent_op->generation = ref_generation; extent_op->orig_generation = 0; extent_op->level = (int)owner; + INIT_LIST_HEAD(&extent_op->list); + extent_op->del = 0; mutex_lock(&root->fs_info->extent_ins_mutex); set_extent_bits(&root->fs_info->extent_ins, ins->objectid, -- cgit v1.2.3-70-g09d2 From c146afad2c7fea6a366d4945c1bab9b03880f526 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 12 Nov 2008 14:34:12 -0500 Subject: Btrfs: mount ro and remount support This patch adds mount ro and remount support. The main changes in patch are: adding btrfs_remount and related helper function; splitting the transaction related code out of close_ctree into btrfs_commit_super; updating allocator to properly handle read only block group. Signed-off-by: Yan Zheng --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 83 ++++++++++++++++++++++++++++++++++++++------------ fs/btrfs/disk-io.h | 2 ++ fs/btrfs/extent-tree.c | 61 +++++++++++++++++++++---------------- fs/btrfs/inode.c | 15 +++------ fs/btrfs/ioctl.c | 22 +++++++++++++ fs/btrfs/super.c | 37 ++++++++++++++++++++-- 7 files changed, 163 insertions(+), 58 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f575939e025..c4c6c127323 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -541,6 +541,7 @@ struct btrfs_space_info { u64 bytes_used; u64 bytes_pinned; u64 bytes_reserved; + u64 bytes_readonly; int full; int force_alloc; struct list_head list; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3b0e974a9e9..c599f0ee997 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1075,10 +1075,12 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } - ret = btrfs_find_dead_roots(fs_info->tree_root, - root->root_key.objectid, root); - BUG_ON(ret); - + if (!(fs_info->sb->s_flags & MS_RDONLY)) { + ret = btrfs_find_dead_roots(fs_info->tree_root, + root->root_key.objectid, root); + BUG_ON(ret); + btrfs_orphan_cleanup(root); + } return root; } @@ -1700,7 +1702,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_read_block_groups(extent_root); - fs_info->generation = btrfs_super_generation(disk_super) + 1; + fs_info->generation = generation + 1; + fs_info->last_trans_committed = generation; fs_info->data_alloc_profile = (u64)-1; fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; @@ -1715,6 +1718,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!fs_info->transaction_kthread) goto fail_cleaner; + if (sb->s_flags & MS_RDONLY) + return tree_root; + if (btrfs_super_log_root(disk_super) != 0) { u32 blocksize; u64 bytenr = btrfs_super_log_root(disk_super); @@ -1735,7 +1741,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_recover_log_trees(log_tree_root); BUG_ON(ret); } - fs_info->last_trans_committed = btrfs_super_generation(disk_super); ret = btrfs_cleanup_reloc_trees(tree_root); BUG_ON(ret); @@ -1955,28 +1960,69 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) return 0; } -int close_ctree(struct btrfs_root *root) +int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) { + u64 root_objectid = 0; + struct btrfs_root *gang[8]; + int i; int ret; - struct btrfs_trans_handle *trans; - struct btrfs_fs_info *fs_info = root->fs_info; - fs_info->closing = 1; - smp_mb(); + while (1) { + ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, + (void **)gang, root_objectid, + ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + root_objectid = gang[i]->root_key.objectid; + ret = btrfs_find_dead_roots(fs_info->tree_root, + root_objectid, gang[i]); + BUG_ON(ret); + btrfs_orphan_cleanup(gang[i]); + } + root_objectid++; + } + return 0; +} - kthread_stop(root->fs_info->transaction_kthread); - kthread_stop(root->fs_info->cleaner_kthread); +int btrfs_commit_super(struct btrfs_root *root) +{ + struct btrfs_trans_handle *trans; + int ret; + mutex_lock(&root->fs_info->cleaner_mutex); btrfs_clean_old_snapshots(root); + mutex_unlock(&root->fs_info->cleaner_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); - /* run commit again to drop the original snapshot */ + BUG_ON(ret); + /* run commit again to drop the original snapshot */ trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); - write_ctree_super(NULL, root); + ret = write_ctree_super(NULL, root); + return ret; +} + +int close_ctree(struct btrfs_root *root) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + int ret; + + fs_info->closing = 1; + smp_mb(); + + kthread_stop(root->fs_info->transaction_kthread); + kthread_stop(root->fs_info->cleaner_kthread); + + if (!(fs_info->sb->s_flags & MS_RDONLY)) { + ret = btrfs_commit_super(root); + if (ret) { + printk("btrfs: commit super returns %d\n", ret); + } + } if (fs_info->delalloc_bytes) { printk("btrfs: at unmount delalloc count %Lu\n", @@ -2000,12 +2046,10 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(root->fs_info->dev_root->node); btrfs_free_block_groups(root->fs_info); - fs_info->closing = 2; - del_fs_roots(fs_info); - filemap_write_and_wait(fs_info->btree_inode->i_mapping); + del_fs_roots(fs_info); - truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + iput(fs_info->btree_inode); btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->delalloc_workers); @@ -2014,7 +2058,6 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); - iput(fs_info->btree_inode); #if 0 while(!list_empty(&fs_info->hashers)) { struct btrfs_hasher *hasher; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b8d5948fa27..717e94811e4 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -38,6 +38,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, int close_ctree(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_commit_super(struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, @@ -49,6 +50,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, struct btrfs_key *location); struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, struct btrfs_key *location); +int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, u64 device_id, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e785f0a0632..af2de30dbea 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1794,7 +1794,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, *space_info = found; return 0; } - found = kmalloc(sizeof(*found), GFP_NOFS); + found = kzalloc(sizeof(*found), GFP_NOFS); if (!found) return -ENOMEM; @@ -1807,6 +1807,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_used = bytes_used; found->bytes_pinned = 0; found->bytes_reserved = 0; + found->bytes_readonly = 0; found->full = 0; found->force_alloc = 0; *space_info = found; @@ -1829,6 +1830,19 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) } } +static void set_block_group_readonly(struct btrfs_block_group_cache *cache) +{ + spin_lock(&cache->space_info->lock); + spin_lock(&cache->lock); + if (!cache->ro) { + cache->space_info->bytes_readonly += cache->key.offset - + btrfs_block_group_used(&cache->item); + cache->ro = 1; + } + spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); +} + static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags) { u64 num_devices = root->fs_info->fs_devices->num_devices; @@ -1865,7 +1879,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 thresh; u64 start; u64 num_bytes; - int ret = 0, waited = 0; + int ret = 0; + + mutex_lock(&extent_root->fs_info->chunk_mutex); flags = reduce_alloc_profile(extent_root, flags); @@ -1887,46 +1903,28 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, goto out; } - thresh = div_factor(space_info->total_bytes, 6); + thresh = space_info->total_bytes - space_info->bytes_readonly; + thresh = div_factor(thresh, 6); if (!force && (space_info->bytes_used + space_info->bytes_pinned + space_info->bytes_reserved + alloc_bytes) < thresh) { spin_unlock(&space_info->lock); goto out; } - spin_unlock(&space_info->lock); - ret = mutex_trylock(&extent_root->fs_info->chunk_mutex); - if (!ret && !force) { - goto out; - } else if (!ret) { - mutex_lock(&extent_root->fs_info->chunk_mutex); - waited = 1; - } - - if (waited) { - spin_lock(&space_info->lock); - if (space_info->full) { - spin_unlock(&space_info->lock); - goto out_unlock; - } - spin_unlock(&space_info->lock); - } - ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags); if (ret) { printk("space info full %Lu\n", flags); space_info->full = 1; - goto out_unlock; + goto out; } ret = btrfs_make_block_group(trans, extent_root, 0, flags, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); -out_unlock: - mutex_unlock(&extent_root->fs_info->chunk_mutex); out: + mutex_unlock(&extent_root->fs_info->chunk_mutex); return ret; } @@ -1956,12 +1954,18 @@ static int update_block_group(struct btrfs_trans_handle *trans, if (alloc) { old_val += num_bytes; cache->space_info->bytes_used += num_bytes; + if (cache->ro) { + cache->space_info->bytes_readonly -= num_bytes; + WARN_ON(1); + } btrfs_set_block_group_used(&cache->item, old_val); spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); } else { old_val -= num_bytes; cache->space_info->bytes_used -= num_bytes; + if (cache->ro) + cache->space_info->bytes_readonly += num_bytes; btrfs_set_block_group_used(&cache->item, old_val); spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); @@ -5560,8 +5564,7 @@ int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) BUG_ON(IS_ERR(reloc_inode)); __alloc_chunk_for_shrink(root, block_group, 1); - block_group->ro = 1; - block_group->space_info->total_bytes -= block_group->key.offset; + set_block_group_readonly(block_group); btrfs_start_delalloc_inodes(info->tree_root); btrfs_wait_ordered_extents(info->tree_root, 0); @@ -5868,6 +5871,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group = btrfs_lookup_block_group(root->fs_info, group_start); BUG_ON(!block_group); + BUG_ON(!block_group->ro); memcpy(&key, &block_group->key, sizeof(key)); @@ -5881,6 +5885,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, list_del(&block_group->list); up_write(&block_group->space_info->groups_sem); + spin_lock(&block_group->space_info->lock); + block_group->space_info->total_bytes -= block_group->key.offset; + block_group->space_info->bytes_readonly -= block_group->key.offset; + spin_unlock(&block_group->space_info->lock); + /* memset(shrink_block_group, 0, sizeof(*shrink_block_group)); kfree(shrink_block_group); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2ed2deacde9..3e3620e69bb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1808,10 +1808,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) struct inode *inode; int ret = 0, nr_unlink = 0, nr_truncate = 0; - /* don't do orphan cleanup if the fs is readonly. */ - if (root->fs_info->sb->s_flags & MS_RDONLY) - return; - path = btrfs_alloc_path(); if (!path) return; @@ -3050,7 +3046,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct btrfs_root *root = bi->root; struct btrfs_root *sub_root = root; struct btrfs_key location; - int ret, new, do_orphan = 0; + int ret, new; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); @@ -3076,13 +3072,9 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (new && root != sub_root) { igrab(inode); sub_root->inode = inode; - do_orphan = 1; } } - if (unlikely(do_orphan)) - btrfs_orphan_cleanup(sub_root); - return d_splice_alias(inode, dentry); } @@ -3237,7 +3229,7 @@ int btrfs_write_inode(struct inode *inode, int wait) struct btrfs_trans_handle *trans; int ret = 0; - if (root->fs_info->closing > 1) + if (root->fs_info->btree_inode == inode) return 0; if (wait) { @@ -4625,6 +4617,9 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) struct inode *inode; unsigned long flags; + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); while(!list_empty(head)) { binode = list_entry(head->next, struct btrfs_inode, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4d7cc7c504d..52863cebd59 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -378,6 +378,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) int namelen; int mod = 0; + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); if (!vol_args) @@ -478,6 +481,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, int namelen; int ret; + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); if (!vol_args) @@ -534,6 +540,11 @@ static int btrfs_ioctl_defrag(struct file *file) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + ret = mnt_want_write(file->f_path.mnt); + if (ret) + return ret; switch (inode->i_mode & S_IFMT) { case S_IFDIR: @@ -575,6 +586,9 @@ long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) struct btrfs_ioctl_vol_args *vol_args; int ret; + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); if (!vol_args) @@ -621,6 +635,10 @@ long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 off, * they don't overlap)? */ + ret = mnt_want_write(file->f_path.mnt); + if (ret) + return ret; + src_file = fget(srcfd); if (!src_file) return -EBADF; @@ -958,6 +976,10 @@ long btrfs_ioctl_trans_start(struct file *file) goto out; } + ret = mnt_want_write(file->f_path.mnt); + if (ret) + goto out; + mutex_lock(&root->fs_info->trans_mutex); root->fs_info->open_ioctl_trans++; mutex_unlock(&root->fs_info->trans_mutex); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ab9d5e89ed1..04a3bf81650 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -370,6 +370,9 @@ int btrfs_sync_fs(struct super_block *sb, int wait) int ret; root = btrfs_sb(sb); + if (sb->s_flags & MS_RDONLY) + return 0; + sb->s_dirt = 0; if (!wait) { filemap_flush(root->fs_info->btree_inode->i_mapping); @@ -438,7 +441,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, up_write(&s->s_umount); deactivate_super(s); error = -EBUSY; - goto error_bdev; + goto error_close_devices; } } else { @@ -487,7 +490,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, error_s: error = PTR_ERR(s); -error_bdev: +error_close_devices: btrfs_close_devices(fs_devices); error_free_subvol_name: kfree(subvol_name); @@ -495,6 +498,35 @@ error: return error; } +static int btrfs_remount(struct super_block *sb, int *flags, char *data) +{ + struct btrfs_root *root = btrfs_sb(sb); + int ret; + + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + return 0; + + if (*flags & MS_RDONLY) { + sb->s_flags |= MS_RDONLY; + + ret = btrfs_commit_super(root); + WARN_ON(ret); + } else { + if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) + return -EINVAL; + + ret = btrfs_cleanup_reloc_trees(root); + WARN_ON(ret); + + ret = btrfs_cleanup_fs_roots(root->fs_info); + WARN_ON(ret); + + sb->s_flags &= ~MS_RDONLY; + } + + return 0; +} + static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); @@ -582,6 +614,7 @@ static struct super_operations btrfs_super_ops = { .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .statfs = btrfs_statfs, + .remount_fs = btrfs_remount, .write_super_lockfs = btrfs_write_super_lockfs, .unlockfs = btrfs_unlockfs, }; -- cgit v1.2.3-70-g09d2 From 2b82032c34ec40515d3c45c36cd1961f37977de8 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 17 Nov 2008 21:11:30 -0500 Subject: Btrfs: Seed device support Seed device is a special btrfs with SEEDING super flag set and can only be mounted in read-only mode. Seed devices allow people to create new btrfs on top of it. The new FS contains the same contents as the seed device, but it can be mounted in read-write mode. This patch does the following: 1) split code in btrfs_alloc_chunk into two parts. The first part does makes the newly allocated chunk usable, but does not do any operation that modifies the chunk tree. The second part does the the chunk tree modifications. This division is for the bootstrap step of adding storage to the seed device. 2) Update device management code to handle seed device. The basic idea is: For an FS grown from seed devices, its seed devices are put into a list. Seed devices are opened on demand at mounting time. If any seed device is missing or has been changed, btrfs kernel module will refuse to mount the FS. 3) make btrfs_find_block_group not return NULL when all block groups are read-only. Signed-off-by: Yan Zheng --- fs/btrfs/ctree.c | 8 + fs/btrfs/ctree.h | 18 +- fs/btrfs/disk-io.c | 56 ++- fs/btrfs/extent-tree.c | 31 +- fs/btrfs/ioctl.c | 2 +- fs/btrfs/super.c | 9 + fs/btrfs/volumes.c | 1131 ++++++++++++++++++++++++++++++++++++------------ fs/btrfs/volumes.h | 20 +- 8 files changed, 946 insertions(+), 329 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8bb452456d9..dd1c03aea2d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -185,6 +185,10 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, btrfs_set_header_owner(cow, new_root_objectid); btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); + write_extent_buffer(cow, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(cow), + BTRFS_FSID_SIZE); + WARN_ON(btrfs_header_generation(buf) > trans->transid); ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL); kfree(new_root); @@ -274,6 +278,10 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_set_header_owner(cow, root->root_key.objectid); btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); + write_extent_buffer(cow, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(cow), + BTRFS_FSID_SIZE); + WARN_ON(btrfs_header_generation(buf) > trans->transid); if (btrfs_header_generation(buf) != trans->transid) { u32 nr_extents; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c4c6c127323..5ff74282a62 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -177,6 +177,9 @@ struct btrfs_dev_item { /* type and info about this device */ __le64 type; + /* expected generation for this device */ + __le64 generation; + /* grouping information for allocation decisions */ __le32 dev_group; @@ -188,6 +191,9 @@ struct btrfs_dev_item { /* btrfs generated uuid for this device */ u8 uuid[BTRFS_UUID_SIZE]; + + /* uuid of FS who owns this device */ + u8 fsid[BTRFS_UUID_SIZE]; } __attribute__ ((__packed__)); struct btrfs_stripe { @@ -263,6 +269,7 @@ struct btrfs_header { sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) +#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) /* * this is a very generous portion of the super block, giving us @@ -278,7 +285,7 @@ struct btrfs_header { struct btrfs_super_block { u8 csum[BTRFS_CSUM_SIZE]; /* the first 4 fields must match struct btrfs_header */ - u8 fsid[16]; /* FS specific uuid */ + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ __le64 bytenr; /* this block number */ __le64 flags; @@ -941,6 +948,7 @@ BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32); BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8); BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8); +BTRFS_SETGET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64); BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64); BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item, @@ -960,12 +968,19 @@ BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item, seek_speed, 8); BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item, bandwidth, 8); +BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item, + generation, 64); static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) { return (char *)d + offsetof(struct btrfs_dev_item, uuid); } +static inline char *btrfs_device_fsid(struct btrfs_dev_item *d) +{ + return (char *)d + offsetof(struct btrfs_dev_item, fsid); +} + BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); @@ -1661,6 +1676,7 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, struct extent_buffer *buf, u64 orig_start); int btrfs_add_dead_reloc_root(struct btrfs_root *root); int btrfs_cleanup_reloc_trees(struct btrfs_root *root); +u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); /* ctree.c */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c599f0ee997..82833e5d84b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -345,6 +345,25 @@ out: return 0; } +static int check_tree_block_fsid(struct btrfs_root *root, + struct extent_buffer *eb) +{ + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; + u8 fsid[BTRFS_UUID_SIZE]; + int ret = 1; + + read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb), + BTRFS_FSID_SIZE); + while (fs_devices) { + if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { + ret = 0; + break; + } + fs_devices = fs_devices->seed; + } + return ret; +} + int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { @@ -382,9 +401,7 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, ret = -EIO; goto err; } - if (memcmp_extent_buffer(eb, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(eb), - BTRFS_FSID_SIZE)) { + if (check_tree_block_fsid(root, eb)) { printk("bad fsid on block %Lu\n", eb->start); ret = -EIO; goto err; @@ -1558,9 +1575,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; - err = btrfs_parse_options(tree_root, options); - if (err) + ret = btrfs_parse_options(tree_root, options); + if (ret) { + err = ret; goto fail_sb_buffer; + } /* * we need to start all the end_io workers up front because the @@ -1610,18 +1629,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->endio_write_workers, fs_info->thread_pool_size); - err = -EINVAL; - if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { - printk("Btrfs: wanted %llu devices, but found %llu\n", - (unsigned long long)btrfs_super_num_devices(disk_super), - (unsigned long long)fs_devices->open_devices); - if (btrfs_test_opt(tree_root, DEGRADED)) - printk("continuing in degraded mode\n"); - else { - goto fail_sb_buffer; - } - } - fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 4 * 1024 * 1024 / PAGE_CACHE_SIZE); @@ -1672,7 +1679,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_chunk_tree(chunk_root); mutex_unlock(&fs_info->chunk_mutex); - BUG_ON(ret); + if (ret) { + printk("btrfs: failed to read chunk tree on %s\n", sb->s_id); + goto fail_chunk_root; + } btrfs_close_extra_devices(fs_devices); @@ -1684,7 +1694,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_super_root(disk_super), blocksize, generation); if (!tree_root->node) - goto fail_sb_buffer; + goto fail_chunk_root; ret = find_and_setup_root(tree_root, fs_info, @@ -1753,6 +1763,8 @@ fail_extent_root: free_extent_buffer(extent_root->node); fail_tree_root: free_extent_buffer(tree_root->node); +fail_chunk_root: + free_extent_buffer(chunk_root->node); fail_sys_array: fail_sb_buffer: btrfs_stop_workers(&fs_info->fixup_workers); @@ -1823,9 +1835,10 @@ int write_all_supers(struct btrfs_root *root) total_errors++; continue; } - if (!dev->in_fs_metadata) + if (!dev->in_fs_metadata || !dev->writeable) continue; + btrfs_set_stack_device_generation(dev_item, 0); btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); @@ -1834,6 +1847,7 @@ int write_all_supers(struct btrfs_root *root) btrfs_set_stack_device_io_width(dev_item, dev->io_width); btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); + memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE); flags = btrfs_super_flags(sb); btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); @@ -1881,7 +1895,7 @@ int write_all_supers(struct btrfs_root *root) dev = list_entry(cur, struct btrfs_device, dev_list); if (!dev->bdev) continue; - if (!dev->in_fs_metadata) + if (!dev->in_fs_metadata || !dev->writeable) continue; BUG_ON(!dev->pending_io); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index af2de30dbea..197422c1dc4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -355,7 +355,7 @@ __btrfs_find_block_group(struct btrfs_root *root, if (search_start) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_first_block_group(info, search_start); - if (shint && block_group_bits(shint, data) && !shint->ro) { + if (shint && block_group_bits(shint, data)) { spin_lock(&shint->lock); used = btrfs_block_group_used(&shint->item); if (used + shint->pinned + shint->reserved < @@ -366,7 +366,7 @@ __btrfs_find_block_group(struct btrfs_root *root, spin_unlock(&shint->lock); } } - if (hint && !hint->ro && block_group_bits(hint, data)) { + if (hint && block_group_bits(hint, data)) { spin_lock(&hint->lock); used = btrfs_block_group_used(&hint->item); if (used + hint->pinned + hint->reserved < @@ -392,7 +392,7 @@ again: last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); - if (!cache->ro && block_group_bits(cache, data)) { + if (block_group_bits(cache, data)) { free_check = div_factor(cache->key.offset, factor); if (used + cache->pinned + cache->reserved < free_check) { @@ -1843,9 +1843,9 @@ static void set_block_group_readonly(struct btrfs_block_group_cache *cache) spin_unlock(&cache->space_info->lock); } -static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags) +u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) { - u64 num_devices = root->fs_info->fs_devices->num_devices; + u64 num_devices = root->fs_info->fs_devices->rw_devices; if (num_devices == 1) flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); @@ -1877,13 +1877,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, { struct btrfs_space_info *space_info; u64 thresh; - u64 start; - u64 num_bytes; int ret = 0; mutex_lock(&extent_root->fs_info->chunk_mutex); - flags = reduce_alloc_profile(extent_root, flags); + flags = btrfs_reduce_alloc_profile(extent_root, flags); space_info = __find_space_info(extent_root->fs_info, flags); if (!space_info) { @@ -1913,16 +1911,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, } spin_unlock(&space_info->lock); - ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags); + ret = btrfs_alloc_chunk(trans, extent_root, flags); if (ret) { printk("space info full %Lu\n", flags); space_info->full = 1; - goto out; } - - ret = btrfs_make_block_group(trans, extent_root, 0, flags, - BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); - BUG_ON(ret); out: mutex_unlock(&extent_root->fs_info->chunk_mutex); return ret; @@ -3040,7 +3033,7 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } again: - data = reduce_alloc_profile(root, data); + data = btrfs_reduce_alloc_profile(root, data); /* * the only place that sets empty_size is btrfs_realloc_node, which * is not called recursively on allocations @@ -5136,7 +5129,8 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans, else btrfs_node_key_to_cpu(eb, &keys[level], 0); } - if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + if (nodes[0] && + ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { eb = path->nodes[0]; ret = replace_extents_in_leaf(trans, reloc_root, eb, group, reloc_inode); @@ -5377,7 +5371,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; - num_devices = root->fs_info->fs_devices->num_devices; + num_devices = root->fs_info->fs_devices->rw_devices; if (num_devices == 1) { stripped |= BTRFS_BLOCK_GROUP_DUP; stripped = flags & ~stripped; @@ -5801,6 +5795,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) BUG_ON(ret); set_avail_alloc_bits(root->fs_info, cache->flags); + if (btrfs_chunk_readonly(root, cache->key.objectid)) + set_block_group_readonly(cache); } ret = 0; error: @@ -5889,6 +5885,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group->space_info->total_bytes -= block_group->key.offset; block_group->space_info->bytes_readonly -= block_group->key.offset; spin_unlock(&block_group->space_info->lock); + block_group->space_info->full = 0; /* memset(shrink_block_group, 0, sizeof(*shrink_block_group)); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 52863cebd59..f43df72b0e1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -405,7 +405,7 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) devid = simple_strtoull(devstr, &end, 10); printk(KERN_INFO "resizing devid %llu\n", devid); } - device = btrfs_find_device(root, devid, NULL); + device = btrfs_find_device(root, devid, NULL, NULL); if (!device) { printk(KERN_INFO "resizer unable to find device %llu\n", devid); ret = -EINVAL; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 04a3bf81650..92393cc60d0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -431,6 +431,11 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, if (error) goto error_free_subvol_name; + if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { + error = -EACCES; + goto error_close_devices; + } + bdev = fs_devices->latest_bdev; s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); if (IS_ERR(s)) @@ -444,6 +449,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, goto error_close_devices; } + btrfs_close_devices(fs_devices); } else { char b[BDEVNAME_SIZE]; @@ -512,6 +518,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) ret = btrfs_commit_super(root); WARN_ON(ret); } else { + if (root->fs_info->fs_devices->rw_devices == 0) + return -EACCES; + if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) return -EINVAL; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 80a27284dbf..d6f1996de62 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -40,6 +40,12 @@ struct map_lookup { struct btrfs_bio_stripe stripes[]; }; +static int init_first_rw_device(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_device *device); +static int btrfs_relocate_sys_chunks(struct btrfs_root *root); + + #define map_lookup_size(n) (sizeof(struct map_lookup) + \ (sizeof(struct btrfs_bio_stripe) * (n))) @@ -69,25 +75,31 @@ static void unlock_chunks(struct btrfs_root *root) int btrfs_cleanup_fs_uuids(void) { struct btrfs_fs_devices *fs_devices; - struct list_head *uuid_cur; - struct list_head *devices_cur; struct btrfs_device *dev; - list_for_each(uuid_cur, &fs_uuids) { - fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices, - list); + while (!list_empty(&fs_uuids)) { + fs_devices = list_entry(fs_uuids.next, + struct btrfs_fs_devices, list); + list_del(&fs_devices->list); while(!list_empty(&fs_devices->devices)) { - devices_cur = fs_devices->devices.next; - dev = list_entry(devices_cur, struct btrfs_device, - dev_list); + dev = list_entry(fs_devices->devices.next, + struct btrfs_device, dev_list); if (dev->bdev) { close_bdev_excl(dev->bdev); fs_devices->open_devices--; } + fs_devices->num_devices--; + if (dev->writeable) + fs_devices->rw_devices--; list_del(&dev->dev_list); + list_del(&dev->dev_alloc_list); kfree(dev->name); kfree(dev); } + WARN_ON(fs_devices->num_devices); + WARN_ON(fs_devices->open_devices); + WARN_ON(fs_devices->rw_devices); + kfree(fs_devices); } return 0; } @@ -257,6 +269,9 @@ static noinline int device_list_add(const char *path, disk_super->dev_item.uuid); } if (!device) { + if (fs_devices->opened) + return -EBUSY; + device = kzalloc(sizeof(*device), GFP_NOFS); if (!device) { /* we can safely leave the fs_devices entry around */ @@ -273,8 +288,9 @@ static noinline int device_list_add(const char *path, kfree(device); return -ENOMEM; } + INIT_LIST_HEAD(&device->dev_alloc_list); list_add(&device->dev_list, &fs_devices->devices); - list_add(&device->dev_alloc_list, &fs_devices->alloc_list); + device->fs_devices = fs_devices; fs_devices->num_devices++; } @@ -288,58 +304,94 @@ static noinline int device_list_add(const char *path, int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) { - struct list_head *head = &fs_devices->devices; + struct list_head *tmp; struct list_head *cur; struct btrfs_device *device; + int seed_devices = 0; mutex_lock(&uuid_mutex); again: - list_for_each(cur, head) { + list_for_each_safe(cur, tmp, &fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); - if (!device->in_fs_metadata) { - struct block_device *bdev; - list_del(&device->dev_list); - list_del(&device->dev_alloc_list); + if (device->in_fs_metadata) + continue; + + if (device->bdev) { + close_bdev_excl(device->bdev); + device->bdev = NULL; + fs_devices->open_devices--; + } + if (device->writeable) { + list_del_init(&device->dev_alloc_list); + device->writeable = 0; + fs_devices->rw_devices--; + } + if (!seed_devices) { + list_del_init(&device->dev_list); fs_devices->num_devices--; - if (device->bdev) { - bdev = device->bdev; - fs_devices->open_devices--; - mutex_unlock(&uuid_mutex); - close_bdev_excl(bdev); - mutex_lock(&uuid_mutex); - } kfree(device->name); kfree(device); - goto again; } } + + if (fs_devices->seed) { + fs_devices = fs_devices->seed; + seed_devices = 1; + goto again; + } + mutex_unlock(&uuid_mutex); return 0; } -int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) +static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) { - struct list_head *head = &fs_devices->devices; + struct btrfs_fs_devices *seed_devices; struct list_head *cur; struct btrfs_device *device; +again: + if (--fs_devices->opened > 0) + return 0; - mutex_lock(&uuid_mutex); - list_for_each(cur, head) { + list_for_each(cur, &fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); if (device->bdev) { close_bdev_excl(device->bdev); fs_devices->open_devices--; } + if (device->writeable) { + list_del_init(&device->dev_alloc_list); + fs_devices->rw_devices--; + } + device->bdev = NULL; + device->writeable = 0; device->in_fs_metadata = 0; } - fs_devices->mounted = 0; - mutex_unlock(&uuid_mutex); + fs_devices->opened = 0; + fs_devices->seeding = 0; + fs_devices->sprouted = 0; + + seed_devices = fs_devices->seed; + fs_devices->seed = NULL; + if (seed_devices) { + fs_devices = seed_devices; + goto again; + } return 0; } -int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, - int flags, void *holder) +int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) +{ + int ret; + + mutex_lock(&uuid_mutex); + ret = __btrfs_close_devices(fs_devices); + mutex_unlock(&uuid_mutex); + return ret; +} + +int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, void *holder) { struct block_device *bdev; struct list_head *head = &fs_devices->devices; @@ -350,24 +402,18 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, struct btrfs_super_block *disk_super; u64 latest_devid = 0; u64 latest_transid = 0; - u64 transid; u64 devid; + int seeding = 1; int ret = 0; - mutex_lock(&uuid_mutex); - if (fs_devices->mounted) - goto out; - list_for_each(cur, head) { device = list_entry(cur, struct btrfs_device, dev_list); if (device->bdev) continue; - if (!device->name) continue; - bdev = open_bdev_excl(device->name, flags, holder); - + bdev = open_bdev_excl(device->name, MS_RDONLY, holder); if (IS_ERR(bdev)) { printk("open %s failed\n", device->name); goto error; @@ -387,16 +433,32 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, if (devid != device->devid) goto error_brelse; - transid = btrfs_super_generation(disk_super); - if (!latest_transid || transid > latest_transid) { + if (memcmp(device->uuid, disk_super->dev_item.uuid, + BTRFS_UUID_SIZE)) + goto error_brelse; + + device->generation = btrfs_super_generation(disk_super); + if (!latest_transid || device->generation > latest_transid) { latest_devid = devid; - latest_transid = transid; + latest_transid = device->generation; latest_bdev = bdev; } + if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { + device->writeable = 0; + } else { + device->writeable = !bdev_read_only(bdev); + seeding = 0; + } + device->bdev = bdev; device->in_fs_metadata = 0; fs_devices->open_devices++; + if (device->writeable) { + fs_devices->rw_devices++; + list_add(&device->dev_alloc_list, + &fs_devices->alloc_list); + } continue; error_brelse: @@ -410,11 +472,32 @@ error: ret = -EIO; goto out; } - fs_devices->mounted = 1; + fs_devices->seeding = seeding; + fs_devices->opened = 1; fs_devices->latest_bdev = latest_bdev; fs_devices->latest_devid = latest_devid; fs_devices->latest_trans = latest_transid; + fs_devices->total_rw_bytes = 0; out: + return ret; +} + +int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, + int flags, void *holder) +{ + int ret; + + mutex_lock(&uuid_mutex); + if (fs_devices->opened) { + if (fs_devices->sprouted) { + ret = -EBUSY; + } else { + fs_devices->opened++; + ret = 0; + } + } else { + ret = __btrfs_open_devices(fs_devices, holder); + } mutex_unlock(&uuid_mutex); return ret; } @@ -481,12 +564,12 @@ error: */ static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, - struct btrfs_path *path, u64 num_bytes, u64 *start) { struct btrfs_key key; struct btrfs_root *root = device->dev_root; struct btrfs_dev_extent *dev_extent = NULL; + struct btrfs_path *path; u64 hole_size = 0; u64 last_byte = 0; u64 search_start = 0; @@ -496,8 +579,11 @@ static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, int start_found; struct extent_buffer *l; - start_found = 0; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; path->reada = 2; + start_found = 0; /* FIXME use last free of some kind */ @@ -581,7 +667,6 @@ check_pending: /* we have to make sure we didn't find an extent that has already * been allocated by the map tree or the original allocation */ - btrfs_release_path(root, path); BUG_ON(*start < search_start); if (*start + num_bytes > search_end) { @@ -589,10 +674,10 @@ check_pending: goto error; } /* check for pending inserts here */ - return 0; + ret = 0; error: - btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -644,11 +729,10 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, return ret; } -int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, +int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 chunk_tree, u64 chunk_objectid, - u64 chunk_offset, - u64 num_bytes, u64 *start) + u64 chunk_offset, u64 start, u64 num_bytes) { int ret; struct btrfs_path *path; @@ -662,13 +746,8 @@ int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - ret = find_free_dev_extent(trans, device, path, num_bytes, start); - if (ret) { - goto err; - } - key.objectid = device->devid; - key.offset = *start; + key.offset = start; key.type = BTRFS_DEV_EXTENT_KEY; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent)); @@ -687,7 +766,6 @@ int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(leaf); -err: btrfs_free_path(path); return ret; } @@ -735,12 +813,18 @@ error: return ret; } -static noinline int find_next_devid(struct btrfs_root *root, - struct btrfs_path *path, u64 *objectid) +static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) { int ret; struct btrfs_key key; struct btrfs_key found_key; + struct btrfs_path *path; + + root = root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.type = BTRFS_DEV_ITEM_KEY; @@ -763,7 +847,7 @@ static noinline int find_next_devid(struct btrfs_root *root, } ret = 0; error: - btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -781,7 +865,6 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_key key; unsigned long ptr; - u64 free_devid = 0; root = root->fs_info->chunk_root; @@ -789,13 +872,9 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - ret = find_next_devid(root, path, &free_devid); - if (ret) - goto out; - key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.type = BTRFS_DEV_ITEM_KEY; - key.offset = free_devid; + key.offset = device->devid; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*dev_item)); @@ -805,8 +884,8 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); - device->devid = free_devid; btrfs_set_device_id(leaf, dev_item, device->devid); + btrfs_set_device_generation(leaf, dev_item, 0); btrfs_set_device_type(leaf, dev_item, device->type); btrfs_set_device_io_align(leaf, dev_item, device->io_align); btrfs_set_device_io_width(leaf, dev_item, device->io_width); @@ -819,9 +898,11 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, ptr = (unsigned long)btrfs_device_uuid(dev_item); write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); + ptr = (unsigned long)btrfs_device_fsid(dev_item); + write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); - ret = 0; + ret = 0; out: btrfs_free_path(path); return ret; @@ -832,11 +913,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, { int ret; struct btrfs_path *path; - struct block_device *bdev = device->bdev; - struct btrfs_device *next_dev; struct btrfs_key key; - u64 total_bytes; - struct btrfs_fs_devices *fs_devices; struct btrfs_trans_handle *trans; root = root->fs_info->chunk_root; @@ -863,25 +940,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, ret = btrfs_del_item(trans, root, path); if (ret) goto out; - - /* - * at this point, the device is zero sized. We want to - * remove it from the devices list and zero out the old super - */ - list_del_init(&device->dev_list); - list_del_init(&device->dev_alloc_list); - fs_devices = root->fs_info->fs_devices; - - next_dev = list_entry(fs_devices->devices.next, struct btrfs_device, - dev_list); - if (bdev == root->fs_info->sb->s_bdev) - root->fs_info->sb->s_bdev = next_dev->bdev; - if (bdev == fs_devices->latest_bdev) - fs_devices->latest_bdev = next_dev->bdev; - - total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); - btrfs_set_super_num_devices(&root->fs_info->super_copy, - total_bytes - 1); out: btrfs_free_path(path); unlock_chunks(root); @@ -892,11 +950,14 @@ out: int btrfs_rm_device(struct btrfs_root *root, char *device_path) { struct btrfs_device *device; + struct btrfs_device *next_device; struct block_device *bdev; struct buffer_head *bh = NULL; struct btrfs_super_block *disk_super; u64 all_avail; u64 devid; + u64 num_devices; + u8 *dev_uuid; int ret = 0; mutex_lock(&uuid_mutex); @@ -907,14 +968,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) root->fs_info->avail_metadata_alloc_bits; if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && - btrfs_super_num_devices(&root->fs_info->super_copy) <= 4) { + root->fs_info->fs_devices->rw_devices <= 4) { printk("btrfs: unable to go below four devices on raid10\n"); ret = -EINVAL; goto out; } if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && - btrfs_super_num_devices(&root->fs_info->super_copy) <= 2) { + root->fs_info->fs_devices->rw_devices <= 2) { printk("btrfs: unable to go below two devices on raid1\n"); ret = -EINVAL; goto out; @@ -941,15 +1002,15 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) printk("btrfs: no missing devices found to remove\n"); goto out; } - } else { - bdev = open_bdev_excl(device_path, 0, + bdev = open_bdev_excl(device_path, MS_RDONLY, root->fs_info->bdev_holder); if (IS_ERR(bdev)) { ret = PTR_ERR(bdev); goto out; } + set_blocksize(bdev, 4096); bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); if (!bh) { ret = -EIO; @@ -957,45 +1018,97 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } disk_super = (struct btrfs_super_block *)bh->b_data; if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, - sizeof(disk_super->magic))) { - ret = -ENOENT; - goto error_brelse; - } - if (memcmp(disk_super->fsid, root->fs_info->fsid, - BTRFS_FSID_SIZE)) { + sizeof(disk_super->magic))) { ret = -ENOENT; goto error_brelse; } devid = le64_to_cpu(disk_super->dev_item.devid); - device = btrfs_find_device(root, devid, NULL); + dev_uuid = disk_super->dev_item.uuid; + device = btrfs_find_device(root, devid, dev_uuid, + disk_super->fsid); if (!device) { ret = -ENOENT; goto error_brelse; } + } + if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { + printk("btrfs: unable to remove the only writeable device\n"); + ret = -EINVAL; + goto error_brelse; + } + + if (device->writeable) { + list_del_init(&device->dev_alloc_list); + root->fs_info->fs_devices->rw_devices--; } - root->fs_info->fs_devices->num_devices--; - root->fs_info->fs_devices->open_devices--; ret = btrfs_shrink_device(device, 0); if (ret) goto error_brelse; - ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); if (ret) goto error_brelse; - if (bh) { + device->in_fs_metadata = 0; + if (device->fs_devices == root->fs_info->fs_devices) { + list_del_init(&device->dev_list); + root->fs_info->fs_devices->num_devices--; + if (device->bdev) + device->fs_devices->open_devices--; + } + + next_device = list_entry(root->fs_info->fs_devices->devices.next, + struct btrfs_device, dev_list); + if (device->bdev == root->fs_info->sb->s_bdev) + root->fs_info->sb->s_bdev = next_device->bdev; + if (device->bdev == root->fs_info->fs_devices->latest_bdev) + root->fs_info->fs_devices->latest_bdev = next_device->bdev; + + num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; + btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices); + + if (device->fs_devices != root->fs_info->fs_devices) { + BUG_ON(device->writeable); + brelse(bh); + if (bdev) + close_bdev_excl(bdev); + + if (device->bdev) { + close_bdev_excl(device->bdev); + device->bdev = NULL; + device->fs_devices->open_devices--; + } + if (device->fs_devices->open_devices == 0) { + struct btrfs_fs_devices *fs_devices; + fs_devices = root->fs_info->fs_devices; + while (fs_devices) { + if (fs_devices->seed == device->fs_devices) + break; + fs_devices = fs_devices->seed; + } + fs_devices->seed = device->fs_devices->seed; + device->fs_devices->seed = NULL; + __btrfs_close_devices(device->fs_devices); + } + ret = 0; + goto out; + } + + /* + * at this point, the device is zero sized. We want to + * remove it from the devices list and zero out the old super + */ + if (device->writeable) { /* make sure this device isn't detected as part of * the FS anymore */ memset(&disk_super->magic, 0, sizeof(disk_super->magic)); set_buffer_dirty(bh); sync_dirty_buffer(bh); - - brelse(bh); } + brelse(bh); if (device->bdev) { /* one close for the device struct or super_block */ @@ -1021,6 +1134,129 @@ out: return ret; } +/* + * does all the dirty work required for changing file system's UUID. + */ +static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; + struct btrfs_fs_devices *old_devices; + struct btrfs_super_block *disk_super = &root->fs_info->super_copy; + struct btrfs_device *device; + u64 super_flags; + + BUG_ON(!mutex_is_locked(&uuid_mutex)); + if (!fs_devices->seeding || fs_devices->opened != 1) + return -EINVAL; + + old_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); + if (!old_devices) + return -ENOMEM; + + memcpy(old_devices, fs_devices, sizeof(*old_devices)); + old_devices->opened = 1; + old_devices->sprouted = 1; + INIT_LIST_HEAD(&old_devices->devices); + INIT_LIST_HEAD(&old_devices->alloc_list); + list_splice_init(&fs_devices->devices, &old_devices->devices); + list_splice_init(&fs_devices->alloc_list, &old_devices->alloc_list); + list_for_each_entry(device, &old_devices->devices, dev_list) { + device->fs_devices = old_devices; + } + list_add(&old_devices->list, &fs_uuids); + + fs_devices->seeding = 0; + fs_devices->num_devices = 0; + fs_devices->open_devices = 0; + fs_devices->seed = old_devices; + + generate_random_uuid(fs_devices->fsid); + memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); + memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); + super_flags = btrfs_super_flags(disk_super) & + ~BTRFS_SUPER_FLAG_SEEDING; + btrfs_set_super_flags(disk_super, super_flags); + + return 0; +} + +/* + * strore the expected generation for seed devices in device items. + */ +static int btrfs_finish_sprout(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_dev_item *dev_item; + struct btrfs_device *device; + struct btrfs_key key; + u8 fs_uuid[BTRFS_UUID_SIZE]; + u8 dev_uuid[BTRFS_UUID_SIZE]; + u64 devid; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + root = root->fs_info->chunk_root; + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.offset = 0; + key.type = BTRFS_DEV_ITEM_KEY; + + while (1) { + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) + goto error; + + leaf = path->nodes[0]; +next_slot: + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret > 0) + break; + if (ret < 0) + goto error; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + btrfs_release_path(root, path); + continue; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID || + key.type != BTRFS_DEV_ITEM_KEY) + break; + + dev_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_item); + devid = btrfs_device_id(leaf, dev_item); + read_extent_buffer(leaf, dev_uuid, + (unsigned long)btrfs_device_uuid(dev_item), + BTRFS_UUID_SIZE); + read_extent_buffer(leaf, fs_uuid, + (unsigned long)btrfs_device_fsid(dev_item), + BTRFS_UUID_SIZE); + device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); + BUG_ON(!device); + + if (device->fs_devices->seeding) { + btrfs_set_device_generation(leaf, dev_item, + device->generation); + btrfs_mark_buffer_dirty(leaf); + } + + path->slots[0]++; + goto next_slot; + } + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + int btrfs_init_new_device(struct btrfs_root *root, char *device_path) { struct btrfs_trans_handle *trans; @@ -1028,26 +1264,34 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) struct block_device *bdev; struct list_head *cur; struct list_head *devices; + struct super_block *sb = root->fs_info->sb; u64 total_bytes; + int seeding_dev = 0; int ret = 0; + if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) + return -EINVAL; bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder); if (!bdev) { return -EIO; } + if (root->fs_info->fs_devices->seeding) { + seeding_dev = 1; + down_write(&sb->s_umount); + mutex_lock(&uuid_mutex); + } + filemap_write_and_wait(bdev->bd_inode->i_mapping); mutex_lock(&root->fs_info->volume_mutex); - trans = btrfs_start_transaction(root, 1); - lock_chunks(root); devices = &root->fs_info->fs_devices->devices; list_for_each(cur, devices) { device = list_entry(cur, struct btrfs_device, dev_list); if (device->bdev == bdev) { ret = -EEXIST; - goto out; + goto error; } } @@ -1055,18 +1299,31 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (!device) { /* we can safely leave the fs_devices entry around */ ret = -ENOMEM; - goto out_close_bdev; + goto error; } - device->barriers = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - spin_lock_init(&device->io_lock); device->name = kstrdup(device_path, GFP_NOFS); if (!device->name) { kfree(device); - goto out_close_bdev; + ret = -ENOMEM; + goto error; } + + ret = find_next_devid(root, &device->devid); + if (ret) { + kfree(device); + goto error; + } + + trans = btrfs_start_transaction(root, 1); + lock_chunks(root); + + device->barriers = 1; + device->writeable = 1; + device->work.func = pending_bios_fn; + generate_random_uuid(device->uuid); + spin_lock_init(&device->io_lock); + device->generation = trans->transid; device->io_width = root->sectorsize; device->io_align = root->sectorsize; device->sector_size = root->sectorsize; @@ -1074,12 +1331,22 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->dev_root = root->fs_info->dev_root; device->bdev = bdev; device->in_fs_metadata = 1; + set_blocksize(device->bdev, 4096); - ret = btrfs_add_device(trans, root, device); - if (ret) - goto out_close_bdev; + if (seeding_dev) { + sb->s_flags &= ~MS_RDONLY; + ret = btrfs_prepare_sprout(trans, root); + BUG_ON(ret); + } - set_blocksize(device->bdev, 4096); + device->fs_devices = root->fs_info->fs_devices; + list_add(&device->dev_list, &root->fs_info->fs_devices->devices); + list_add(&device->dev_alloc_list, + &root->fs_info->fs_devices->alloc_list); + root->fs_info->fs_devices->num_devices++; + root->fs_info->fs_devices->open_devices++; + root->fs_info->fs_devices->rw_devices++; + root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); btrfs_set_super_total_bytes(&root->fs_info->super_copy, @@ -1089,20 +1356,34 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) btrfs_set_super_num_devices(&root->fs_info->super_copy, total_bytes + 1); - list_add(&device->dev_list, &root->fs_info->fs_devices->devices); - list_add(&device->dev_alloc_list, - &root->fs_info->fs_devices->alloc_list); - root->fs_info->fs_devices->num_devices++; - root->fs_info->fs_devices->open_devices++; -out: + if (seeding_dev) { + ret = init_first_rw_device(trans, root, device); + BUG_ON(ret); + ret = btrfs_finish_sprout(trans, root); + BUG_ON(ret); + } else { + ret = btrfs_add_device(trans, root, device); + } + unlock_chunks(root); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->volume_mutex); + btrfs_commit_transaction(trans, root); - return ret; + if (seeding_dev) { + mutex_unlock(&uuid_mutex); + up_write(&sb->s_umount); -out_close_bdev: + ret = btrfs_relocate_sys_chunks(root); + BUG_ON(ret); + } +out: + mutex_unlock(&root->fs_info->volume_mutex); + return ret; +error: close_bdev_excl(bdev); + if (seeding_dev) { + mutex_unlock(&uuid_mutex); + up_write(&sb->s_umount); + } goto out; } @@ -1160,7 +1441,15 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, u64 old_total = btrfs_super_total_bytes(super_copy); u64 diff = new_size - device->total_bytes; + if (!device->writeable) + return -EACCES; + if (new_size <= device->total_bytes) + return -EINVAL; + btrfs_set_super_total_bytes(super_copy, old_total + diff); + device->fs_devices->total_rw_bytes += diff; + + device->total_bytes = new_size; return btrfs_update_device(trans, device); } @@ -1248,7 +1537,6 @@ int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 return ret; } - int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset) @@ -1308,24 +1596,82 @@ int btrfs_relocate_chunk(struct btrfs_root *root, BUG_ON(ret); } - ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); - BUG_ON(ret); + ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); + BUG_ON(ret); + + spin_lock(&em_tree->lock); + remove_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + + kfree(map); + em->bdev = NULL; + + /* once for the tree */ + free_extent_map(em); + /* once for us */ + free_extent_map(em); + + unlock_chunks(root); + btrfs_end_transaction(trans, root); + return 0; +} + +static int btrfs_relocate_sys_chunks(struct btrfs_root *root) +{ + struct btrfs_root *chunk_root = root->fs_info->chunk_root; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_chunk *chunk; + struct btrfs_key key; + struct btrfs_key found_key; + u64 chunk_tree = chunk_root->root_key.objectid; + u64 chunk_type; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.offset = (u64)-1; + key.type = BTRFS_CHUNK_ITEM_KEY; + + while (1) { + ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); + if (ret < 0) + goto error; + BUG_ON(ret == 0); + + ret = btrfs_previous_item(chunk_root, path, key.objectid, + key.type); + if (ret < 0) + goto error; + if (ret > 0) + break; - spin_lock(&em_tree->lock); - remove_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - kfree(map); - em->bdev = NULL; + chunk = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_chunk); + chunk_type = btrfs_chunk_type(leaf, chunk); + btrfs_release_path(chunk_root, path); - /* once for the tree */ - free_extent_map(em); - /* once for us */ - free_extent_map(em); + if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { + ret = btrfs_relocate_chunk(chunk_root, chunk_tree, + found_key.objectid, + found_key.offset); + BUG_ON(ret); + } - unlock_chunks(root); - btrfs_end_transaction(trans, root); - return 0; + if (found_key.offset == 0) + break; + key.offset = found_key.offset - 1; + } + ret = 0; +error: + btrfs_free_path(path); + return ret; } static u64 div_factor(u64 num, int factor) @@ -1337,7 +1683,6 @@ static u64 div_factor(u64 num, int factor) return num; } - int btrfs_balance(struct btrfs_root *dev_root) { int ret; @@ -1353,6 +1698,8 @@ int btrfs_balance(struct btrfs_root *dev_root) struct btrfs_trans_handle *trans; struct btrfs_key found_key; + if (dev_root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; mutex_lock(&dev_root->fs_info->volume_mutex); dev_root = dev_root->fs_info->dev_root; @@ -1363,7 +1710,8 @@ int btrfs_balance(struct btrfs_root *dev_root) old_size = device->total_bytes; size_to_free = div_factor(old_size, 1); size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); - if (device->total_bytes - device->bytes_used > size_to_free) + if (!device->writeable || + device->total_bytes - device->bytes_used > size_to_free) continue; ret = btrfs_shrink_device(device, old_size - size_to_free); @@ -1453,6 +1801,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) u64 old_total = btrfs_super_total_bytes(super_copy); u64 diff = device->total_bytes - new_size; + if (new_size >= device->total_bytes) + return -EINVAL; path = btrfs_alloc_path(); if (!path) @@ -1469,6 +1819,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) lock_chunks(root); device->total_bytes = new_size; + if (device->writeable) + device->fs_devices->total_rw_bytes -= diff; ret = btrfs_update_device(trans, device); if (ret) { unlock_chunks(root); @@ -1561,32 +1913,27 @@ static u64 noinline chunk_bytes_by_type(u64 type, u64 calc_size, return calc_size * num_stripes; } - -int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, u64 *start, - u64 *num_bytes, u64 type) +static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct map_lookup **map_ret, + u64 *num_bytes, u64 *stripe_size, + u64 start, u64 type) { - u64 dev_offset; struct btrfs_fs_info *info = extent_root->fs_info; - struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; - struct btrfs_path *path; - struct btrfs_stripe *stripes; struct btrfs_device *device = NULL; - struct btrfs_chunk *chunk; - struct list_head private_devs; - struct list_head *dev_list; + struct btrfs_fs_devices *fs_devices = info->fs_devices; struct list_head *cur; + struct map_lookup *map = NULL; struct extent_map_tree *em_tree; - struct map_lookup *map; struct extent_map *em; + struct list_head private_devs; int min_stripe_size = 1 * 1024 * 1024; - u64 physical; u64 calc_size = 1024 * 1024 * 1024; u64 max_chunk_size = calc_size; u64 min_free; u64 avail; u64 max_avail = 0; - u64 percent_max; + u64 dev_offset; int num_stripes = 1; int min_stripes = 1; int sub_stripes = 0; @@ -1594,19 +1941,17 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int ret; int index; int stripe_len = 64 * 1024; - struct btrfs_key key; if ((type & BTRFS_BLOCK_GROUP_RAID1) && (type & BTRFS_BLOCK_GROUP_DUP)) { WARN_ON(1); type &= ~BTRFS_BLOCK_GROUP_DUP; } - dev_list = &extent_root->fs_info->fs_devices->alloc_list; - if (list_empty(dev_list)) + if (list_empty(&fs_devices->alloc_list)) return -ENOSPC; if (type & (BTRFS_BLOCK_GROUP_RAID0)) { - num_stripes = extent_root->fs_info->fs_devices->open_devices; + num_stripes = fs_devices->rw_devices; min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_DUP)) { @@ -1614,14 +1959,13 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { - num_stripes = min_t(u64, 2, - extent_root->fs_info->fs_devices->open_devices); + num_stripes = min_t(u64, 2, fs_devices->rw_devices); if (num_stripes < 2) return -ENOSPC; min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { - num_stripes = extent_root->fs_info->fs_devices->open_devices; + num_stripes = fs_devices->rw_devices; if (num_stripes < 4) return -ENOSPC; num_stripes &= ~(u32)1; @@ -1641,15 +1985,19 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, min_stripe_size = 1 * 1024 * 1024; } - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - /* we don't want a chunk larger than 10% of the FS */ - percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1); - max_chunk_size = min(percent_max, max_chunk_size); + /* we don't want a chunk larger than 10% of writeable space */ + max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), + max_chunk_size); again: + if (!map || map->num_stripes != num_stripes) { + kfree(map); + map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); + if (!map) + return -ENOMEM; + map->num_stripes = num_stripes; + } + if (calc_size * num_stripes > max_chunk_size) { calc_size = max_chunk_size; do_div(calc_size, num_stripes); @@ -1662,8 +2010,7 @@ again: do_div(calc_size, stripe_len); calc_size *= stripe_len; - INIT_LIST_HEAD(&private_devs); - cur = dev_list->next; + cur = fs_devices->alloc_list.next; index = 0; if (type & BTRFS_BLOCK_GROUP_DUP) @@ -1679,10 +2026,10 @@ again: if (!looped) min_free += 1024 * 1024; - /* build a private list of devices we will allocate from */ + INIT_LIST_HEAD(&private_devs); while(index < num_stripes) { device = list_entry(cur, struct btrfs_device, dev_alloc_list); - + BUG_ON(!device->writeable); if (device->total_bytes > device->bytes_used) avail = device->total_bytes - device->bytes_used; else @@ -1690,24 +2037,28 @@ again: cur = cur->next; if (device->in_fs_metadata && avail >= min_free) { - u64 ignored_start = 0; - ret = find_free_dev_extent(trans, device, path, - min_free, - &ignored_start); + ret = find_free_dev_extent(trans, device, + min_free, &dev_offset); if (ret == 0) { list_move_tail(&device->dev_alloc_list, &private_devs); + map->stripes[index].dev = device; + map->stripes[index].physical = dev_offset; index++; - if (type & BTRFS_BLOCK_GROUP_DUP) + if (type & BTRFS_BLOCK_GROUP_DUP) { + map->stripes[index].dev = device; + map->stripes[index].physical = + dev_offset + calc_size; index++; + } } } else if (device->in_fs_metadata && avail > max_avail) max_avail = avail; - if (cur == dev_list) + if (cur == &fs_devices->alloc_list) break; } + list_splice(&private_devs, &fs_devices->alloc_list); if (index < num_stripes) { - list_splice(&private_devs, dev_list); if (index >= min_stripes) { num_stripes = index; if (type & (BTRFS_BLOCK_GROUP_RAID10)) { @@ -1722,115 +2073,246 @@ again: calc_size = max_avail; goto again; } - btrfs_free_path(path); + kfree(map); return -ENOSPC; } - key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; - key.type = BTRFS_CHUNK_ITEM_KEY; - ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, - &key.offset); - if (ret) { - btrfs_free_path(path); - return ret; - } + map->sector_size = extent_root->sectorsize; + map->stripe_len = stripe_len; + map->io_align = stripe_len; + map->io_width = stripe_len; + map->type = type; + map->num_stripes = num_stripes; + map->sub_stripes = sub_stripes; - chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); - if (!chunk) { - btrfs_free_path(path); - return -ENOMEM; - } + *map_ret = map; + *stripe_size = calc_size; + *num_bytes = chunk_bytes_by_type(type, calc_size, + num_stripes, sub_stripes); - map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); - if (!map) { - kfree(chunk); - btrfs_free_path(path); + em = alloc_extent_map(GFP_NOFS); + if (!em) { + kfree(map); return -ENOMEM; } - btrfs_free_path(path); - path = NULL; + em->bdev = (struct block_device *)map; + em->start = start; + em->len = *num_bytes; + em->block_start = 0; + em->block_len = em->len; - stripes = &chunk->stripe; - *num_bytes = chunk_bytes_by_type(type, calc_size, - num_stripes, sub_stripes); + em_tree = &extent_root->fs_info->mapping_tree.map_tree; + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + BUG_ON(ret); + free_extent_map(em); - index = 0; - while(index < num_stripes) { - struct btrfs_stripe *stripe; - BUG_ON(list_empty(&private_devs)); - cur = private_devs.next; - device = list_entry(cur, struct btrfs_device, dev_alloc_list); + ret = btrfs_make_block_group(trans, extent_root, 0, type, + BTRFS_FIRST_CHUNK_TREE_OBJECTID, + start, *num_bytes); + BUG_ON(ret); - /* loop over this device again if we're doing a dup group */ - if (!(type & BTRFS_BLOCK_GROUP_DUP) || - (index == num_stripes - 1)) - list_move_tail(&device->dev_alloc_list, dev_list); + index = 0; + while (index < map->num_stripes) { + device = map->stripes[index].dev; + dev_offset = map->stripes[index].physical; ret = btrfs_alloc_dev_extent(trans, device, - info->chunk_root->root_key.objectid, - BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset, - calc_size, &dev_offset); + info->chunk_root->root_key.objectid, + BTRFS_FIRST_CHUNK_TREE_OBJECTID, + start, dev_offset, calc_size); BUG_ON(ret); - device->bytes_used += calc_size; + index++; + } + + return 0; +} + +static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct map_lookup *map, u64 chunk_offset, + u64 chunk_size, u64 stripe_size) +{ + u64 dev_offset; + struct btrfs_key key; + struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; + struct btrfs_device *device; + struct btrfs_chunk *chunk; + struct btrfs_stripe *stripe; + size_t item_size = btrfs_chunk_item_size(map->num_stripes); + int index = 0; + int ret; + + chunk = kzalloc(item_size, GFP_NOFS); + if (!chunk) + return -ENOMEM; + + index = 0; + while (index < map->num_stripes) { + device = map->stripes[index].dev; + device->bytes_used += stripe_size; ret = btrfs_update_device(trans, device); BUG_ON(ret); + index++; + } + + index = 0; + stripe = &chunk->stripe; + while (index < map->num_stripes) { + device = map->stripes[index].dev; + dev_offset = map->stripes[index].physical; - map->stripes[index].dev = device; - map->stripes[index].physical = dev_offset; - stripe = stripes + index; btrfs_set_stack_stripe_devid(stripe, device->devid); btrfs_set_stack_stripe_offset(stripe, dev_offset); memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); - physical = dev_offset; + stripe++; index++; } - BUG_ON(!list_empty(&private_devs)); - /* key was set above */ - btrfs_set_stack_chunk_length(chunk, *num_bytes); + btrfs_set_stack_chunk_length(chunk, chunk_size); btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); - btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); - btrfs_set_stack_chunk_type(chunk, type); - btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); - btrfs_set_stack_chunk_io_align(chunk, stripe_len); - btrfs_set_stack_chunk_io_width(chunk, stripe_len); + btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len); + btrfs_set_stack_chunk_type(chunk, map->type); + btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes); + btrfs_set_stack_chunk_io_align(chunk, map->stripe_len); + btrfs_set_stack_chunk_io_width(chunk, map->stripe_len); btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); - btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes); - map->sector_size = extent_root->sectorsize; - map->stripe_len = stripe_len; - map->io_align = stripe_len; - map->io_width = stripe_len; - map->type = type; - map->num_stripes = num_stripes; - map->sub_stripes = sub_stripes; + btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes); - ret = btrfs_insert_item(trans, chunk_root, &key, chunk, - btrfs_chunk_item_size(num_stripes)); - BUG_ON(ret); - *start = key.offset;; + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = chunk_offset; - em = alloc_extent_map(GFP_NOFS); - if (!em) - return -ENOMEM; - em->bdev = (struct block_device *)map; - em->start = key.offset; - em->len = *num_bytes; - em->block_start = 0; - em->block_len = em->len; + ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size); + BUG_ON(ret); - if (type & BTRFS_BLOCK_GROUP_SYSTEM) { - ret = btrfs_add_system_chunk(trans, chunk_root, &key, - chunk, btrfs_chunk_item_size(num_stripes)); + if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { + ret = btrfs_add_system_chunk(trans, chunk_root, &key, chunk, + item_size); BUG_ON(ret); } kfree(chunk); + return 0; +} - em_tree = &extent_root->fs_info->mapping_tree.map_tree; - spin_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); +/* + * Chunk allocation falls into two parts. The first part does works + * that make the new allocated chunk useable, but not do any operation + * that modifies the chunk tree. The second part does the works that + * require modifying the chunk tree. This division is important for the + * bootstrap process of adding storage to a seed btrfs. + */ +int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 type) +{ + u64 chunk_offset; + u64 chunk_size; + u64 stripe_size; + struct map_lookup *map; + struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; + int ret; + + ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, + &chunk_offset); + if (ret) + return ret; + + ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, + &stripe_size, chunk_offset, type); + if (ret) + return ret; + + ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, + chunk_size, stripe_size); + BUG_ON(ret); + return 0; +} + +static int noinline init_first_rw_device(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_device *device) +{ + u64 chunk_offset; + u64 sys_chunk_offset; + u64 chunk_size; + u64 sys_chunk_size; + u64 stripe_size; + u64 sys_stripe_size; + u64 alloc_profile; + struct map_lookup *map; + struct map_lookup *sys_map; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *extent_root = fs_info->extent_root; + int ret; + + ret = find_next_chunk(fs_info->chunk_root, + BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); + BUG_ON(ret); + + alloc_profile = BTRFS_BLOCK_GROUP_METADATA | + (fs_info->metadata_alloc_profile & + fs_info->avail_metadata_alloc_bits); + alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); + + ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, + &stripe_size, chunk_offset, alloc_profile); + BUG_ON(ret); + + sys_chunk_offset = chunk_offset + chunk_size; + + alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM | + (fs_info->system_alloc_profile & + fs_info->avail_system_alloc_bits); + alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); + + ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, + &sys_chunk_size, &sys_stripe_size, + sys_chunk_offset, alloc_profile); + BUG_ON(ret); + + ret = btrfs_add_device(trans, fs_info->chunk_root, device); + BUG_ON(ret); + + /* + * Modifying chunk tree needs allocating new blocks from both + * system block group and metadata block group. So we only can + * do operations require modifying the chunk tree after both + * block groups were created. + */ + ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, + chunk_size, stripe_size); + BUG_ON(ret); + + ret = __finish_chunk_alloc(trans, extent_root, sys_map, + sys_chunk_offset, sys_chunk_size, + sys_stripe_size); BUG_ON(ret); + return 0; +} + +int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) +{ + struct extent_map *em; + struct map_lookup *map; + struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; + int readonly = 0; + int i; + + spin_lock(&map_tree->map_tree.lock); + em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); + spin_unlock(&map_tree->map_tree.lock); + if (!em) + return 1; + + map = (struct map_lookup *)em->bdev; + for (i = 0; i < map->num_stripes; i++) { + if (!map->stripes[i].dev->writeable) { + readonly = 1; + break; + } + } free_extent_map(em); - return ret; + return readonly; } void btrfs_mapping_init(struct btrfs_mapping_tree *tree) @@ -2227,6 +2709,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } bio->bi_sector = multi->stripes[dev_nr].physical >> 9; dev = multi->stripes[dev_nr].dev; + BUG_ON(rw == WRITE && !dev->writeable); if (dev && dev->bdev) { bio->bi_bdev = dev->bdev; if (async_submit) @@ -2246,11 +2729,23 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, - u8 *uuid) + u8 *uuid, u8 *fsid) { - struct list_head *head = &root->fs_info->fs_devices->devices; - - return __find_device(head, devid, uuid); + struct btrfs_device *device; + struct btrfs_fs_devices *cur_devices; + + cur_devices = root->fs_info->fs_devices; + while (cur_devices) { + if (!fsid || + !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) { + device = __find_device(&cur_devices->devices, + devid, uuid); + if (device) + return device; + } + cur_devices = cur_devices->seed; + } + return NULL; } static struct btrfs_device *add_missing_dev(struct btrfs_root *root, @@ -2262,8 +2757,6 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, device = kzalloc(sizeof(*device), GFP_NOFS); list_add(&device->dev_list, &fs_devices->devices); - list_add(&device->dev_alloc_list, - &fs_devices->alloc_list); device->barriers = 1; device->dev_root = root->fs_info->dev_root; device->devid = devid; @@ -2274,7 +2767,6 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, return device; } - static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) @@ -2339,8 +2831,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, read_extent_buffer(leaf, uuid, (unsigned long) btrfs_stripe_dev_uuid_nr(chunk, i), BTRFS_UUID_SIZE); - map->stripes[i].dev = btrfs_find_device(root, devid, uuid); - + map->stripes[i].dev = btrfs_find_device(root, devid, uuid, + NULL); if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { kfree(map); free_extent_map(em); @@ -2387,6 +2879,50 @@ static int fill_device_from_item(struct extent_buffer *leaf, return 0; } +static int open_seed_devices(struct btrfs_root *root, u8 *fsid) +{ + struct btrfs_fs_devices *fs_devices; + int ret; + + mutex_lock(&uuid_mutex); + + fs_devices = root->fs_info->fs_devices->seed; + while (fs_devices) { + if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) { + ret = 0; + goto out; + } + fs_devices = fs_devices->seed; + } + + fs_devices = find_fsid(fsid); + if (!fs_devices) { + ret = -ENOENT; + goto out; + } + if (fs_devices->opened) { + ret = -EBUSY; + goto out; + } + + ret = __btrfs_open_devices(fs_devices, root->fs_info->bdev_holder); + if (ret) + goto out; + + if (!fs_devices->seeding) { + __btrfs_close_devices(fs_devices); + ret = -EINVAL; + goto out; + } + + fs_devices->seed = root->fs_info->fs_devices->seed; + root->fs_info->fs_devices->seed = fs_devices; + fs_devices->sprouted = 1; +out: + mutex_unlock(&uuid_mutex); + return ret; +} + static int read_one_dev(struct btrfs_root *root, struct extent_buffer *leaf, struct btrfs_dev_item *dev_item) @@ -2394,23 +2930,50 @@ static int read_one_dev(struct btrfs_root *root, struct btrfs_device *device; u64 devid; int ret; + int seed_devices = 0; + u8 fs_uuid[BTRFS_UUID_SIZE]; u8 dev_uuid[BTRFS_UUID_SIZE]; devid = btrfs_device_id(leaf, dev_item); read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); - device = btrfs_find_device(root, devid, dev_uuid); - if (!device) { - printk("warning devid %Lu missing\n", devid); - device = add_missing_dev(root, devid, dev_uuid); - if (!device) - return -ENOMEM; + read_extent_buffer(leaf, fs_uuid, + (unsigned long)btrfs_device_fsid(dev_item), + BTRFS_UUID_SIZE); + + if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { + ret = open_seed_devices(root, fs_uuid); + if (ret) + return ret; + seed_devices = 1; + } + + device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); + if (!device || !device->bdev) { + if (!btrfs_test_opt(root, DEGRADED) || seed_devices) + return -EIO; + + if (!device) { + printk("warning devid %Lu missing\n", devid); + device = add_missing_dev(root, devid, dev_uuid); + if (!device) + return -ENOMEM; + } + } + + if (device->fs_devices != root->fs_info->fs_devices) { + BUG_ON(device->writeable); + if (device->generation != + btrfs_device_generation(leaf, dev_item)) + return -EINVAL; } fill_device_from_item(leaf, dev_item, device); device->dev_root = root->fs_info->dev_root; device->in_fs_metadata = 1; + if (device->writeable) + device->fs_devices->total_rw_bytes += device->total_bytes; ret = 0; #if 0 ret = btrfs_open_device(device); @@ -2528,12 +3091,15 @@ again: dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); ret = read_one_dev(root, leaf, dev_item); - BUG_ON(ret); + if (ret) + goto error; } } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { struct btrfs_chunk *chunk; chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); ret = read_one_chunk(root, &found_key, leaf, chunk); + if (ret) + goto error; } path->slots[0]++; } @@ -2542,9 +3108,8 @@ again: btrfs_release_path(root, path); goto again; } - - btrfs_free_path(path); ret = 0; error: + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index c50e50580b5..1f6f25a5787 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -26,6 +26,7 @@ struct buffer_head; struct btrfs_device { struct list_head dev_list; struct list_head dev_alloc_list; + struct btrfs_fs_devices *fs_devices; struct btrfs_root *dev_root; struct buffer_head *pending_io; struct bio *pending_bios; @@ -34,6 +35,7 @@ struct btrfs_device { u64 generation; int barriers; + int writeable; int in_fs_metadata; spinlock_t io_lock; @@ -77,6 +79,8 @@ struct btrfs_fs_devices { u64 latest_trans; u64 num_devices; u64 open_devices; + u64 rw_devices; + u64 total_rw_bytes; struct block_device *latest_bdev; /* all of the devices in the FS */ struct list_head devices; @@ -84,7 +88,12 @@ struct btrfs_fs_devices { /* devices not currently being allocated */ struct list_head alloc_list; struct list_head list; - int mounted; + + struct btrfs_fs_devices *seed; + int seeding; + int sprouted; + + int opened; }; struct btrfs_bio_stripe { @@ -109,16 +118,14 @@ struct btrfs_multi_bio { int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 chunk_tree, u64 chunk_objectid, - u64 chunk_offset, - u64 num_bytes, u64 *start); + u64 chunk_offset, u64 start, u64 num_bytes); int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, struct btrfs_multi_bio **multi_ret, int mirror_num); int btrfs_read_sys_array(struct btrfs_root *root); int btrfs_read_chunk_tree(struct btrfs_root *root); int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, u64 *start, - u64 *num_bytes, u64 type); + struct btrfs_root *extent_root, u64 type); void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, @@ -141,10 +148,11 @@ int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 new_size); struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, - u8 *uuid); + u8 *uuid, u8 *fsid); int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_root *root, char *path); int btrfs_balance(struct btrfs_root *dev_root); void btrfs_unlock_volumes(void); void btrfs_lock_volumes(void); +int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); #endif -- cgit v1.2.3-70-g09d2 From e3e469f86eebb1b3364c118add362d00c6cff956 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 17 Nov 2008 21:11:49 -0500 Subject: Btrfs: fix free space leak In my batch delete/update/insert patch I introduced a free space leak. The extent that we do the original search on in free_extents is never pinned, so we always update the block saying that it has free space, but the free space never actually gets added to the free space tree, since op->del will always be 0 and it's never actually added to the pinned extents tree. This patch fixes this problem by making sure we call pin_down_bytes on the pending extent op and set op->del to the return value of pin_down_bytes so update_block_group is called with the right value. This seems to fix the case where we were getting ENOSPC when there was plenty of space available. Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 197422c1dc4..6c29669d81a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -999,6 +999,14 @@ search: path->slots[0] = extent_slot; bytes_freed = op->num_bytes; + mutex_lock(&info->pinned_mutex); + ret = pin_down_bytes(trans, extent_root, op->bytenr, + op->num_bytes, op->level >= + BTRFS_FIRST_FREE_OBJECTID); + mutex_unlock(&info->pinned_mutex); + BUG_ON(ret < 0); + op->del = ret; + /* * we need to see if we can delete multiple things at once, so * start looping through the list of extents we are wanting to -- cgit v1.2.3-70-g09d2 From 4ce4cb526f67775c1cce3e3fa01c292672ba874e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 17 Nov 2008 21:12:00 -0500 Subject: Btrfs: Add some debugging around the ENOSPC bugs Some people are still reporting problems with early enospc. This will help narrown down the cause. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6c29669d81a..b0f2241274f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2984,6 +2984,9 @@ loop_check: *last_ptr = ins->objectid + ins->offset; ret = 0; } else if (!ret) { + printk(KERN_ERR "we were searching for %Lu bytes, num_bytes %Lu," + " loop %d, allowed_alloc %d\n", total_needed, num_bytes, + loop, allowed_chunk_alloc); ret = -ENOSPC; } -- cgit v1.2.3-70-g09d2 From b4eec2ca1167bae46295aeb66abd15fd52387845 Mon Sep 17 00:00:00 2001 From: Liu Hui Date: Tue, 18 Nov 2008 11:30:10 -0500 Subject: Btrfs: Some fixes for batching extent insert. In insert_extents(), when ret==1 and last is not zero, it should check if the current inserted item is the last item in this batching inserts. If so, it should just break from loop. If not, 'cur = insert_list->next' will make no sense because the list is empty now, and 'op' will point to an unexpectable place. There are also some trivial fixs in this patch including one comment typo error and deleting two redundant lines. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 -- fs/btrfs/extent-tree.c | 10 +++++++--- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index dd1c03aea2d..71ef0a2e2da 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3041,7 +3041,6 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, struct btrfs_item *item; int ret = 0; int slot; - int slot_orig; int i; u32 nritems; u32 total_data = 0; @@ -3064,7 +3063,6 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, if (ret < 0) goto out; - slot_orig = path->slots[0]; leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b0f2241274f..1121d518bf8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -798,9 +798,12 @@ static int noinline insert_extents(struct btrfs_trans_handle *trans, */ i = last; last = 0; - cur = insert_list->next; - op = list_entry(cur, struct pending_extent_op, list); total--; + if (i < total) { + cur = insert_list->next; + op = list_entry(cur, struct pending_extent_op, + list); + } } else { i += ret; } @@ -2150,6 +2153,7 @@ again: if (ret) { if (skipped && all && !num_inserts) { skipped = 0; + search = 0; continue; } mutex_unlock(&info->extent_ins_mutex); @@ -2189,7 +2193,7 @@ again: } /* - * process teh update list, clear the writeback bit for it, and if + * process the update list, clear the writeback bit for it, and if * somebody marked this thing for deletion then just unlock it and be * done, the free_extents will handle it */ -- cgit v1.2.3-70-g09d2 From 07103a3cdb24099324a11be1f35279b463cdfc31 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 19 Nov 2008 15:17:55 -0500 Subject: Btrfs: fix free space accounting when unpinning extents This patch fixes what I hope is the last early ENOSPC bug left. I did not know that pinned extents would merge into one big extent when inserted on to the pinned extent tree, so I was adding free space to a block group that could possibly span multiple block groups. This is a big issue because first that space doesn't exist in that block group, and second we won't actually use that space because there are a bunch of other checks to make sure we're allocating within the constraints of the block group. This patch fixes the problem by adding the btrfs_add_free_space to btrfs_update_pinned_extents which makes sure we are adding the appropriate amount of free space to the appropriate block group. Thanks much to Lee Trager for running my myriad of debug patches to help me track this problem down. Thank you, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1121d518bf8..ee73efe7542 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2034,6 +2034,8 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); fs_info->total_pinned -= len; + if (cache->cached) + btrfs_add_free_space(cache, bytenr, len); } bytenr += len; num -= len; @@ -2099,7 +2101,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, u64 start; u64 end; int ret; - struct btrfs_block_group_cache *cache; mutex_lock(&root->fs_info->pinned_mutex); while(1) { @@ -2109,9 +2110,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, break; btrfs_update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); - cache = btrfs_lookup_block_group(root->fs_info, start); - if (cache->cached) - btrfs_add_free_space(cache, start, end - start + 1); if (need_resched()) { mutex_unlock(&root->fs_info->pinned_mutex); cond_resched(); -- cgit v1.2.3-70-g09d2 From 15916de835a683bd8133a0d1ac0c982b795ab4ff Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 19 Nov 2008 21:17:22 -0500 Subject: Btrfs: Fixes for 2.6.28-rc API changes * open/close_bdev_excl -> open/close_bdev_exclusive * blkdev_issue_discard takes a GFP mask now * Fix blkdev_issue_discard usage now that it is enabled Signed-off-by: Chris Mason --- fs/btrfs/compat.h | 7 +++++++ fs/btrfs/compression.c | 4 ++-- fs/btrfs/extent-tree.c | 27 ++++++++++++++++++++------- fs/btrfs/extent_io.c | 4 ++-- fs/btrfs/version.sh | 34 +++++++++++++++++----------------- fs/btrfs/volumes.c | 41 +++++++++++++++++++++++------------------ fs/btrfs/volumes.h | 3 +++ 7 files changed, 74 insertions(+), 46 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h index cd6598b169d..dd1defdbfa3 100644 --- a/fs/btrfs/compat.h +++ b/fs/btrfs/compat.h @@ -21,4 +21,11 @@ static inline struct dentry *d_obtain_alias(struct inode *inode) } #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) +# define __pagevec_lru_add_file __pagevec_lru_add +# define open_bdev_exclusive open_bdev_excl +# define close_bdev_exclusive(bdev, mode) close_bdev_excl(bdev) +#endif + + #endif /* _COMPAT_H_ */ diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index bfd1512cce0..df05f513e1e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -419,7 +419,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, /* open coding of lru_cache_add, also not exported */ page_cache_get(page); if (!pagevec_add(&pvec, page)) - __pagevec_lru_add(&pvec); + __pagevec_lru_add_file(&pvec); end = last_offset + PAGE_CACHE_SIZE - 1; /* @@ -475,7 +475,7 @@ next: last_offset += PAGE_CACHE_SIZE; } if (pagevec_count(&pvec)) - __pagevec_lru_add(&pvec); + __pagevec_lru_add_file(&pvec); return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ee73efe7542..62d49705d14 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -28,6 +28,7 @@ #include "volumes.h" #include "locking.h" #include "ref-cache.h" +#include "compat.h" #define PENDING_EXTENT_INSERT 0 #define PENDING_EXTENT_DELETE 1 @@ -899,6 +900,17 @@ static int noinline remove_extent_backref(struct btrfs_trans_handle *trans, return ret; } +static void btrfs_issue_discard(struct block_device *bdev, + u64 start, u64 len) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) + blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); +#else + blkdev_issue_discard(bdev, start >> 9, len >> 9); +#endif +} + + static int noinline free_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, struct list_head *del_list) @@ -1108,6 +1120,7 @@ search: BUG_ON(ret); #ifdef BIO_RW_DISCARD + map_length = tmp->num_bytes; ret = btrfs_map_block(&info->mapping_tree, READ, tmp->bytenr, &map_length, &multi, 0); @@ -1115,16 +1128,16 @@ search: struct btrfs_bio_stripe *stripe; int i; - stripe = multi->stripe; + stripe = multi->stripes; if (map_length > tmp->num_bytes) map_length = tmp->num_bytes; for (i = 0; i < multi->num_stripes; i++, stripe++) - blkdev_issue_discard(stripe->dev->bdev, - stripe->physical >> 9, - map_length >> 9); + btrfs_issue_discard(stripe->dev->bdev, + stripe->physical, + map_length); kfree(multi); } #endif @@ -2498,9 +2511,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, map_length = num_bytes; for (i = 0; i < multi->num_stripes; i++, stripe++) { - blkdev_issue_discard(stripe->dev->bdev, - stripe->physical >> 9, - map_length >> 9); + btrfs_issue_discard(stripe->dev->bdev, + stripe->physical, + map_length); } kfree(multi); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a0f3804efe4..3a65c10dce3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2639,14 +2639,14 @@ int extent_readpages(struct extent_io_tree *tree, /* open coding of lru_cache_add, also not exported */ page_cache_get(page); if (!pagevec_add(&pvec, page)) - __pagevec_lru_add(&pvec); + __pagevec_lru_add_file(&pvec); __extent_read_full_page(tree, page, get_extent, &bio, 0, &bio_flags); } page_cache_release(page); } if (pagevec_count(&pvec)) - __pagevec_lru_add(&pvec); + __pagevec_lru_add_file(&pvec); BUG_ON(!list_empty(pages)); if (bio) submit_one_bio(READ, bio, 0, bio_flags); diff --git a/fs/btrfs/version.sh b/fs/btrfs/version.sh index 0f57f24404d..1ca1952fd91 100644 --- a/fs/btrfs/version.sh +++ b/fs/btrfs/version.sh @@ -8,24 +8,24 @@ v="v0.16" -which hg > /dev/null -if [ -d .hg ] && [ $? == 0 ]; then - last=$(hg tags | grep -m1 -o '^v[0-9.]\+') - - # now check if the repo has commits since then... - if [[ $(hg id -t) == $last || \ - $(hg di -r "$last:." | awk '/^diff/{print $NF}' | sort -u) == .hgtags ]] - then - # check if it's dirty - if [[ $(hg id | cut -d' ' -f1) == *+ ]]; then - v=$last+ - else - v=$last +which git &> /dev/null +if [ $? == 0 ]; then + git branch >& /dev/null + if [ $? == 0 ]; then + if head=`git rev-parse --verify HEAD 2>/dev/null`; then + if tag=`git describe --tags 2>/dev/null`; then + v="$tag" + fi + + # Are there uncommitted changes? + git update-index --refresh --unmerged > /dev/null + if git diff-index --name-only HEAD | \ + grep -v "^scripts/package" \ + | read dummy; then + v="$v"-dirty + fi fi - else - # includes dirty flag - v=$last+$(hg id -i) - fi + fi fi echo "#ifndef __BUILD_VERSION" > .build-version.h diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ecf0633ab8c..c3ee63f92a5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -85,7 +85,7 @@ int btrfs_cleanup_fs_uuids(void) dev = list_entry(fs_devices->devices.next, struct btrfs_device, dev_list); if (dev->bdev) { - close_bdev_excl(dev->bdev); + close_bdev_exclusive(dev->bdev, dev->mode); fs_devices->open_devices--; } fs_devices->num_devices--; @@ -317,7 +317,7 @@ again: continue; if (device->bdev) { - close_bdev_excl(device->bdev); + close_bdev_exclusive(device->bdev, device->mode); device->bdev = NULL; fs_devices->open_devices--; } @@ -356,7 +356,7 @@ again: list_for_each(cur, &fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); if (device->bdev) { - close_bdev_excl(device->bdev); + close_bdev_exclusive(device->bdev, device->mode); fs_devices->open_devices--; } if (device->writeable) { @@ -391,7 +391,8 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) return ret; } -int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, void *holder) +int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, + int flags, void *holder) { struct block_device *bdev; struct list_head *head = &fs_devices->devices; @@ -413,7 +414,7 @@ int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, void *holder) if (!device->name) continue; - bdev = open_bdev_excl(device->name, MS_RDONLY, holder); + bdev = open_bdev_exclusive(device->name, flags, holder); if (IS_ERR(bdev)) { printk("open %s failed\n", device->name); goto error; @@ -453,6 +454,8 @@ int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, void *holder) device->bdev = bdev; device->in_fs_metadata = 0; + device->mode = flags; + fs_devices->open_devices++; if (device->writeable) { fs_devices->rw_devices++; @@ -464,7 +467,7 @@ int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, void *holder) error_brelse: brelse(bh); error_close: - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, MS_RDONLY); error: continue; } @@ -496,7 +499,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, ret = 0; } } else { - ret = __btrfs_open_devices(fs_devices, holder); + ret = __btrfs_open_devices(fs_devices, flags, holder); } mutex_unlock(&uuid_mutex); return ret; @@ -514,7 +517,7 @@ int btrfs_scan_one_device(const char *path, int flags, void *holder, mutex_lock(&uuid_mutex); - bdev = open_bdev_excl(path, flags, holder); + bdev = open_bdev_exclusive(path, flags, holder); if (IS_ERR(bdev)) { ret = PTR_ERR(bdev); @@ -551,7 +554,7 @@ int btrfs_scan_one_device(const char *path, int flags, void *holder, error_brelse: brelse(bh); error_close: - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, flags); error: mutex_unlock(&uuid_mutex); return ret; @@ -1003,7 +1006,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) goto out; } } else { - bdev = open_bdev_excl(device_path, MS_RDONLY, + bdev = open_bdev_exclusive(device_path, MS_RDONLY, root->fs_info->bdev_holder); if (IS_ERR(bdev)) { ret = PTR_ERR(bdev); @@ -1073,10 +1076,10 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) BUG_ON(device->writeable); brelse(bh); if (bdev) - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, MS_RDONLY); if (device->bdev) { - close_bdev_excl(device->bdev); + close_bdev_exclusive(device->bdev, device->mode); device->bdev = NULL; device->fs_devices->open_devices--; } @@ -1112,11 +1115,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (device->bdev) { /* one close for the device struct or super_block */ - close_bdev_excl(device->bdev); + close_bdev_exclusive(device->bdev, device->mode); } if (bdev) { /* one close for us */ - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, MS_RDONLY); } kfree(device->name); kfree(device); @@ -1127,7 +1130,7 @@ error_brelse: brelse(bh); error_close: if (bdev) - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, MS_RDONLY); out: mutex_unlock(&root->fs_info->volume_mutex); mutex_unlock(&uuid_mutex); @@ -1272,7 +1275,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) return -EINVAL; - bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder); + bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); if (!bdev) { return -EIO; } @@ -1331,6 +1334,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->dev_root = root->fs_info->dev_root; device->bdev = bdev; device->in_fs_metadata = 1; + device->mode = 0; set_blocksize(device->bdev, 4096); if (seeding_dev) { @@ -1379,7 +1383,7 @@ out: mutex_unlock(&root->fs_info->volume_mutex); return ret; error: - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, 0); if (seeding_dev) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); @@ -2907,7 +2911,8 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) goto out; } - ret = __btrfs_open_devices(fs_devices, root->fs_info->bdev_holder); + ret = __btrfs_open_devices(fs_devices, MS_RDONLY, + root->fs_info->bdev_holder); if (ret) goto out; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 1f6f25a5787..9b41e4d3984 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -42,6 +42,9 @@ struct btrfs_device { struct block_device *bdev; + /* the mode sent to open_bdev_exclusive */ + fmode_t mode; + char *name; /* the internal btrfs device id */ -- cgit v1.2.3-70-g09d2 From 4b4e25f2a6ddb070bab7f7dd2bd2926fb8db9e04 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 20 Nov 2008 10:22:27 -0500 Subject: Btrfs: compat code fixes The btrfs git kernel trees is used to build a standalone tree for compiling against older kernels. This commit makes the standalone tree work with 2.6.27 Signed-off-by: Chris Mason --- fs/btrfs/compat.h | 1 + fs/btrfs/compression.c | 2 +- fs/btrfs/disk-io.c | 3 ++- fs/btrfs/extent-tree.c | 5 ++++- fs/btrfs/inode.c | 2 +- fs/btrfs/ioctl.c | 1 + fs/btrfs/super.c | 2 ++ fs/btrfs/volumes.c | 2 ++ 8 files changed, 14 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h index dd1defdbfa3..75e4426d6fb 100644 --- a/fs/btrfs/compat.h +++ b/fs/btrfs/compat.h @@ -25,6 +25,7 @@ static inline struct dentry *d_obtain_alias(struct inode *inode) # define __pagevec_lru_add_file __pagevec_lru_add # define open_bdev_exclusive open_bdev_excl # define close_bdev_exclusive(bdev, mode) close_bdev_excl(bdev) +typedef unsigned __bitwise__ fmode_t; #endif diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index df05f513e1e..4febe2eb0b8 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -34,13 +34,13 @@ #include #include #include +#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" #include "volumes.h" #include "ordered-data.h" -#include "compat.h" #include "compression.h" #include "extent_io.h" #include "extent_map.h" diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c8dcb47b6d7..981652233f7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -26,7 +26,8 @@ #include // for block_sync_page #include #include -# include +#include +#include "compat.h" #include "crc32c.h" #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 62d49705d14..b33e0bfb99e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include "compat.h" #include "hash.h" #include "crc32c.h" #include "ctree.h" @@ -900,6 +902,7 @@ static int noinline remove_extent_backref(struct btrfs_trans_handle *trans, return ret; } +#ifdef BIO_RW_DISCARD static void btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { @@ -909,7 +912,7 @@ static void btrfs_issue_discard(struct block_device *bdev, blkdev_issue_discard(bdev, start >> 9, len >> 9); #endif } - +#endif static int noinline free_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2c77e0957f7..7a8ad4292f7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -38,6 +38,7 @@ #include #include #include +#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -47,7 +48,6 @@ #include "volumes.h" #include "ordered-data.h" #include "xattr.h" -#include "compat.h" #include "tree-log.h" #include "ref-cache.h" #include "compression.h" diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8828109fa58..f3d68457e66 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -41,6 +41,7 @@ #include #include #include +#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 77c5eff3e20..1975ea273dc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c3ee63f92a5..724ead54529 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include "compat.h" #include "ctree.h" #include "extent_map.h" #include "disk-io.h" -- cgit v1.2.3-70-g09d2 From ea6a478ed9758cb0f5af228104b9434840aa20ff Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 20 Nov 2008 12:16:16 -0500 Subject: Btrfs: Fix for lockdep warnings with alloc_mutex and pinned_mutex This the lockdep complaint by having a different mutex to gaurd caching the block group, so you don't end up with this backwards dependancy. Thank you, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 32 ++++++++++++++++++-------------- 2 files changed, 19 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0f2a9b584fb..166896dd44c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -580,6 +580,7 @@ struct btrfs_block_group_cache { struct btrfs_block_group_item item; spinlock_t lock; struct mutex alloc_mutex; + struct mutex cache_mutex; u64 pinned; u64 reserved; u64 flags; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b33e0bfb99e..a970472eab1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -170,8 +170,8 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, start = extent_end + 1; } else if (extent_start > start && extent_start < end) { size = extent_start - start; - ret = btrfs_add_free_space_lock(block_group, start, - size); + ret = btrfs_add_free_space(block_group, start, + size); BUG_ON(ret); start = extent_end + 1; } else { @@ -181,7 +181,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, if (start < end) { size = end - start; - ret = btrfs_add_free_space_lock(block_group, start, size); + ret = btrfs_add_free_space(block_group, start, size); BUG_ON(ret); } mutex_unlock(&info->pinned_mutex); @@ -2842,17 +2842,19 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (!block_group) goto new_group_no_lock; + if (unlikely(!block_group->cached)) { + mutex_lock(&block_group->cache_mutex); + ret = cache_block_group(root, block_group); + mutex_unlock(&block_group->cache_mutex); + if (ret) + break; + } + mutex_lock(&block_group->alloc_mutex); if (unlikely(!block_group_bits(block_group, data))) goto new_group; - ret = cache_block_group(root, block_group); - if (ret) { - mutex_unlock(&block_group->alloc_mutex); - break; - } - - if (block_group->ro) + if (unlikely(block_group->ro)) goto new_group; free_space = btrfs_find_free_space(block_group, search_start, @@ -3273,12 +3275,12 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group; block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); - mutex_lock(&block_group->alloc_mutex); + mutex_lock(&block_group->cache_mutex); cache_block_group(root, block_group); + mutex_unlock(&block_group->cache_mutex); - ret = btrfs_remove_free_space_lock(block_group, ins->objectid, - ins->offset); - mutex_unlock(&block_group->alloc_mutex); + ret = btrfs_remove_free_space(block_group, ins->objectid, + ins->offset); BUG_ON(ret); ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, ref_generation, owner, ins); @@ -5801,6 +5803,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) spin_lock_init(&cache->lock); mutex_init(&cache->alloc_mutex); + mutex_init(&cache->cache_mutex); INIT_LIST_HEAD(&cache->list); read_extent_buffer(leaf, &cache->item, btrfs_item_ptr_offset(leaf, path->slots[0]), @@ -5854,6 +5857,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->key.offset = size; spin_lock_init(&cache->lock); mutex_init(&cache->alloc_mutex); + mutex_init(&cache->cache_mutex); INIT_LIST_HEAD(&cache->list); btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); -- cgit v1.2.3-70-g09d2 From b2950863c61bc24cf0f63bc05947d9d50663c4c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Dec 2008 09:54:17 -0500 Subject: Btrfs: make things static and include the right headers Shut up various sparse warnings about symbols that should be either static or have their declarations in scope. Signed-off-by: Christoph Hellwig --- fs/btrfs/ctree.c | 2 +- fs/btrfs/disk-io.c | 16 ++++++++-------- fs/btrfs/extent-tree.c | 12 ++++++------ fs/btrfs/extent_io.c | 35 +++++++++++++++-------------------- fs/btrfs/free-space-cache.c | 6 ++++-- fs/btrfs/inode-item.c | 2 +- fs/btrfs/inode.c | 26 +++++++++++++------------- fs/btrfs/ioctl.c | 14 +++++++------- fs/btrfs/root-tree.c | 2 ++ fs/btrfs/super.c | 2 +- fs/btrfs/tree-log.c | 5 +++-- fs/btrfs/volumes.c | 12 ++++++------ fs/btrfs/zlib.c | 1 + 13 files changed, 68 insertions(+), 67 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 71ef0a2e2da..a83cbdf1d8c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -217,7 +217,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, * this uses that block instead of allocating a new one. btrfs_alloc_reserved_extent * is used to finish the allocation. */ -int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, +static int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fb04665e500..8a2bcc7024f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -93,9 +93,9 @@ struct async_submit_bio { * extents on the btree inode are pretty simple, there's one extent * that covers the entire device */ -struct extent_map *btree_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 len, - int create) +static struct extent_map *btree_get_extent(struct inode *inode, + struct page *page, size_t page_offset, u64 start, u64 len, + int create) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em; @@ -295,7 +295,7 @@ printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror * checksum a dirty tree block before IO. This has extra checks to make * sure we only fill in the checksum field in the first page of a multi-page block */ -int csum_dirty_buffer(struct btrfs_root *root, struct page *page) +static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { struct extent_io_tree *tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; @@ -365,7 +365,7 @@ static int check_tree_block_fsid(struct btrfs_root *root, return ret; } -int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, +static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { struct extent_io_tree *tree; @@ -660,7 +660,7 @@ static int btree_writepages(struct address_space *mapping, return extent_writepages(tree, mapping, btree_get_extent, wbc); } -int btree_readpage(struct file *file, struct page *page) +static int btree_readpage(struct file *file, struct page *page) { struct extent_io_tree *tree; tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -1200,7 +1200,7 @@ static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) } } -void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) +static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { struct inode *inode; struct extent_map_tree *em_tree; @@ -1842,7 +1842,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) put_bh(bh); } -int write_all_supers(struct btrfs_root *root) +static int write_all_supers(struct btrfs_root *root) { struct list_head *cur; struct list_head *head = &root->fs_info->fs_devices->devices; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a970472eab1..d1563852938 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -74,7 +74,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) * this adds the block group to the fs_info rb tree for the block group * cache */ -int btrfs_add_block_group_cache(struct btrfs_fs_info *info, +static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, struct btrfs_block_group_cache *block_group) { struct rb_node **p; @@ -289,7 +289,7 @@ err: /* * return the block group that starts at or after bytenr */ -struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct +static struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) { @@ -3445,7 +3445,7 @@ static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, +static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, u32 *refs) { int ret; @@ -5434,7 +5434,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) return flags; } -int __alloc_chunk_for_shrink(struct btrfs_root *root, +static int __alloc_chunk_for_shrink(struct btrfs_root *root, struct btrfs_block_group_cache *shrink_block_group, int force) { @@ -5703,8 +5703,8 @@ out: return ret; } -int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key) +static int find_first_block_group(struct btrfs_root *root, + struct btrfs_path *path, struct btrfs_key *key) { int ret = 0; struct btrfs_key found_key; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d79ccdbfdd9..c3dfe2a0ec8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -112,7 +112,7 @@ void extent_io_tree_init(struct extent_io_tree *tree, } EXPORT_SYMBOL(extent_io_tree_init); -struct extent_state *alloc_extent_state(gfp_t mask) +static struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; #ifdef LEAK_DEBUG @@ -136,7 +136,7 @@ struct extent_state *alloc_extent_state(gfp_t mask) } EXPORT_SYMBOL(alloc_extent_state); -void free_extent_state(struct extent_state *state) +static void free_extent_state(struct extent_state *state) { if (!state) return; @@ -662,7 +662,7 @@ static void set_state_bits(struct extent_io_tree *tree, * [start, end] is inclusive * This takes the tree lock. */ -int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, +static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, int exclusive, u64 *failed_start, gfp_t mask) { struct extent_state *state; @@ -879,12 +879,11 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_new); -int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, +static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); } -EXPORT_SYMBOL(clear_extent_new); int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) @@ -894,27 +893,24 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_uptodate); -int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, +static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); } -EXPORT_SYMBOL(clear_extent_uptodate); -int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, +static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, 0, NULL, mask); } -EXPORT_SYMBOL(set_extent_writeback); -int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, +static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); } -EXPORT_SYMBOL(clear_extent_writeback); int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) { @@ -994,7 +990,7 @@ EXPORT_SYMBOL(set_range_dirty); /* * helper function to set both pages and extents in the tree writeback */ -int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) +static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) { unsigned long index = start >> PAGE_CACHE_SHIFT; unsigned long end_index = end >> PAGE_CACHE_SHIFT; @@ -1010,7 +1006,6 @@ int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) set_extent_writeback(tree, start, end, GFP_NOFS); return 0; } -EXPORT_SYMBOL(set_range_writeback); /* * find the first offset in the io tree with 'bits' set. zero is @@ -1432,11 +1427,13 @@ out: spin_unlock_irq(&tree->lock); return total_bytes; } + +#if 0 /* * helper function to lock both pages and extents in the tree. * pages must be locked first. */ -int lock_range(struct extent_io_tree *tree, u64 start, u64 end) +static int lock_range(struct extent_io_tree *tree, u64 start, u64 end) { unsigned long index = start >> PAGE_CACHE_SHIFT; unsigned long end_index = end >> PAGE_CACHE_SHIFT; @@ -1473,12 +1470,11 @@ failed: } return err; } -EXPORT_SYMBOL(lock_range); /* * helper function to unlock both pages and extents in the tree. */ -int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) +static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) { unsigned long index = start >> PAGE_CACHE_SHIFT; unsigned long end_index = end >> PAGE_CACHE_SHIFT; @@ -1493,7 +1489,7 @@ int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) unlock_extent(tree, start, end, GFP_NOFS); return 0; } -EXPORT_SYMBOL(unlock_range); +#endif /* * set the private field for a given byte offset in the tree. If there isn't @@ -1956,7 +1952,7 @@ void set_page_extent_mapped(struct page *page) } EXPORT_SYMBOL(set_page_extent_mapped); -void set_page_extent_head(struct page *page, unsigned long len) +static void set_page_extent_head(struct page *page, unsigned long len) { set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); } @@ -2397,7 +2393,7 @@ update_nr_written: * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. */ -int extent_write_cache_pages(struct extent_io_tree *tree, +static int extent_write_cache_pages(struct extent_io_tree *tree, struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data, @@ -2502,7 +2498,6 @@ retry: } return ret; } -EXPORT_SYMBOL(extent_write_cache_pages); static noinline void flush_write_bio(void *data) { diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f4926c0f3c8..09462adfbe3 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -443,7 +443,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) mutex_unlock(&block_group->alloc_mutex); } -struct btrfs_free_space *btrfs_find_free_space_offset(struct +#if 0 +static struct btrfs_free_space *btrfs_find_free_space_offset(struct btrfs_block_group_cache *block_group, u64 offset, u64 bytes) @@ -458,7 +459,7 @@ struct btrfs_free_space *btrfs_find_free_space_offset(struct return ret; } -struct btrfs_free_space *btrfs_find_free_space_bytes(struct +static struct btrfs_free_space *btrfs_find_free_space_bytes(struct btrfs_block_group_cache *block_group, u64 offset, u64 bytes) @@ -472,6 +473,7 @@ struct btrfs_free_space *btrfs_find_free_space_bytes(struct return ret; } +#endif struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache *block_group, u64 offset, diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index d93451c66ba..3d46fa1f29a 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -20,7 +20,7 @@ #include "disk-io.h" #include "transaction.h" -int find_name_in_backref(struct btrfs_path *path, const char * name, +static int find_name_in_backref(struct btrfs_path *path, const char *name, int name_len, struct btrfs_inode_ref **ref_ret) { struct extent_buffer *leaf; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b3d4078b69a..bd58ba655a4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1130,7 +1130,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, * bytes in this file, and to maintain the list of inodes that * have pending delalloc work to be done. */ -int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, +static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { unsigned long flags; @@ -1151,7 +1151,7 @@ int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, /* * extent_io.c clear_bit_hook, see set_bit_hook for why */ -int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, +static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { @@ -1215,7 +1215,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -int __btrfs_submit_bio_start(struct inode *inode, int rw, struct bio *bio, +static int __btrfs_submit_bio_start(struct inode *inode, int rw, struct bio *bio, int mirror_num, unsigned long bio_flags) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1234,7 +1234,7 @@ int __btrfs_submit_bio_start(struct inode *inode, int rw, struct bio *bio, * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, +static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, int mirror_num, unsigned long bio_flags) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1245,7 +1245,7 @@ int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, * extent_io.c submission hook. This does the right thing for csum calculation on write, * or reading the csums from the tree before a read */ -int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, +static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num, unsigned long bio_flags) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1313,7 +1313,7 @@ struct btrfs_writepage_fixup { struct btrfs_work work; }; -void btrfs_writepage_fixup_worker(struct btrfs_work *work) +static void btrfs_writepage_fixup_worker(struct btrfs_work *work) { struct btrfs_writepage_fixup *fixup; struct btrfs_ordered_extent *ordered; @@ -1372,7 +1372,7 @@ out_page: * to fix it up. The async helper will wait for ordered extents, set * the delalloc bit and make it safe to write the page. */ -int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) +static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) { struct inode *inode = page->mapping->host; struct btrfs_writepage_fixup *fixup; @@ -1526,7 +1526,7 @@ nocow: return 0; } -int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, +static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate) { return btrfs_finish_ordered_io(page->mapping->host, start, end); @@ -1548,7 +1548,7 @@ struct io_failure_record { int last_mirror; }; -int btrfs_io_failed_hook(struct bio *failed_bio, +static int btrfs_io_failed_hook(struct bio *failed_bio, struct page *page, u64 start, u64 end, struct extent_state *state) { @@ -1642,7 +1642,7 @@ int btrfs_io_failed_hook(struct bio *failed_bio, * each time an IO finishes, we do a fast check in the IO failure tree * to see if we need to process or clean up an io_failure_record */ -int btrfs_clean_io_failures(struct inode *inode, u64 start) +static int btrfs_clean_io_failures(struct inode *inode, u64 start) { u64 private; u64 private_failure; @@ -1675,7 +1675,7 @@ int btrfs_clean_io_failures(struct inode *inode, u64 start) * if there's a match, we allow the bio to finish. If not, we go through * the io_failure_record routines to find good copies */ -int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, +static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); @@ -4362,8 +4362,8 @@ out: * Invalidate a single dcache entry at the root of the filesystem. * Needed after creation of snapshot or subvolume. */ -void btrfs_invalidate_dcache_root(struct inode *dir, char *name, - int namelen) +static void btrfs_invalidate_dcache_root(struct inode *dir, + char *name, int namelen) { struct dentry *alias, *entry; struct qstr qstr; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 35f650e183e..cc7c5161e26 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -354,7 +354,7 @@ out_unlock: } -int btrfs_defrag_file(struct file *file) +static int btrfs_defrag_file(struct file *file) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -649,7 +649,7 @@ static int btrfs_ioctl_defrag(struct file *file) return 0; } -long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) { struct btrfs_ioctl_vol_args *vol_args; int ret; @@ -671,7 +671,7 @@ out: return ret; } -long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) { struct btrfs_ioctl_vol_args *vol_args; int ret; @@ -696,8 +696,8 @@ out: return ret; } -long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 off, - u64 olen, u64 destoff) +static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, + u64 off, u64 olen, u64 destoff) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1035,7 +1035,7 @@ out_fput: return ret; } -long btrfs_ioctl_clone_range(struct file *file, unsigned long argptr) +static long btrfs_ioctl_clone_range(struct file *file, unsigned long argptr) { struct btrfs_ioctl_clone_range_args args; @@ -1051,7 +1051,7 @@ long btrfs_ioctl_clone_range(struct file *file, unsigned long argptr) * basically own the machine, and have a very in depth understanding * of all the possible deadlocks and enospc problems. */ -long btrfs_ioctl_trans_start(struct file *file) +static long btrfs_ioctl_trans_start(struct file *file) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index dbe20d4c6ea..f99335a999d 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -276,6 +276,7 @@ out: return ret; } +#if 0 /* this will get used when snapshot deletion is implemented */ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root, u64 root_id, u8 type, u64 ref_id) @@ -299,6 +300,7 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, btrfs_free_path(path); return ret; } +#endif int btrfs_find_root_ref(struct btrfs_root *tree_root, struct btrfs_path *path, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1975ea273dc..93a21c77064 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -647,7 +647,7 @@ static int btrfs_interface_init(void) return misc_register(&btrfs_misc); } -void btrfs_interface_exit(void) +static void btrfs_interface_exit(void) { if (misc_deregister(&btrfs_misc) < 0) printk("misc_deregister failed for control device"); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index be4fc30a30e..4fcfc8b1189 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -23,6 +23,7 @@ #include "locking.h" #include "print-tree.h" #include "compat.h" +#include "tree-log.h" /* magic values for the inode_only field in btrfs_log_inode: * @@ -78,7 +79,7 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, * tree of log tree roots. This must be called with a tree log transaction * running (see start_log_trans). */ -int btrfs_add_log_tree(struct btrfs_trans_handle *trans, +static int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_key key; @@ -1934,7 +1935,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, return ret; } -int wait_log_commit(struct btrfs_root *log) +static int wait_log_commit(struct btrfs_root *log) { DEFINE_WAIT(wait); u64 transid = log->fs_info->tree_log_transid; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 724ead54529..769f2c5d9e9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -238,7 +238,7 @@ done: return 0; } -void pending_bios_fn(struct btrfs_work *work) +static void pending_bios_fn(struct btrfs_work *work) { struct btrfs_device *device; @@ -686,7 +686,7 @@ error: return ret; } -int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, +static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 start) { @@ -1393,7 +1393,7 @@ error: goto out; } -int noinline btrfs_update_device(struct btrfs_trans_handle *trans, +static int noinline btrfs_update_device(struct btrfs_trans_handle *trans, struct btrfs_device *device) { int ret; @@ -1497,7 +1497,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, return 0; } -int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 +static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 chunk_offset) { struct btrfs_super_block *super_copy = &root->fs_info->super_copy; @@ -1543,7 +1543,7 @@ int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 return ret; } -int btrfs_relocate_chunk(struct btrfs_root *root, +static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset) { @@ -1884,7 +1884,7 @@ done: return ret; } -int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, +static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_chunk *chunk, int item_size) diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 5b9f7002513..c4617cde6c7 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -30,6 +30,7 @@ #include #include #include +#include "compression.h" /* Plan: call deflate() with avail_in == *sourcelen, avail_out = *dstlen - 12 and flush == Z_FINISH. -- cgit v1.2.3-70-g09d2 From a512bbf855ff0af474257475f2e6da7acd854f52 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 8 Dec 2008 16:46:26 -0500 Subject: Btrfs: superblock duplication This patch implements superblock duplication. Superblocks are stored at offset 16K, 64M and 256G on every devices. Spaces used by superblocks are preserved by the allocator, which uses a reverse mapping function to find the logical addresses that correspond to superblocks. Thank you, Signed-off-by: Yan Zheng --- fs/btrfs/disk-io.c | 208 ++++++++++++++++++++++++++++++-------------- fs/btrfs/disk-io.h | 17 +++- fs/btrfs/extent-tree.c | 54 ++++++------ fs/btrfs/free-space-cache.c | 1 - fs/btrfs/transaction.c | 2 +- fs/btrfs/tree-log.c | 3 +- fs/btrfs/volumes.c | 107 ++++++++++++++++++----- fs/btrfs/volumes.h | 6 +- 8 files changed, 279 insertions(+), 119 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 61dc3b2c834..c72f4f3b912 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1595,8 +1595,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info, BTRFS_ROOT_TREE_OBJECTID); - bh = __bread(fs_devices->latest_bdev, - BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + bh = btrfs_read_dev_super(fs_devices->latest_bdev); if (!bh) goto fail_iput; @@ -1710,7 +1709,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, } mutex_lock(&fs_info->chunk_mutex); - ret = btrfs_read_sys_array(tree_root); + ret = btrfs_read_sys_array(tree_root, btrfs_super_bytenr(disk_super)); mutex_unlock(&fs_info->chunk_mutex); if (ret) { printk("btrfs: failed to read the system array on %s\n", @@ -1905,19 +1904,147 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) put_bh(bh); } -static int write_all_supers(struct btrfs_root *root) +struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) +{ + struct buffer_head *bh; + struct buffer_head *latest = NULL; + struct btrfs_super_block *super; + int i; + u64 transid = 0; + u64 bytenr; + + /* we would like to check all the supers, but that would make + * a btrfs mount succeed after a mkfs from a different FS. + * So, we need to add a special mount option to scan for + * later supers, using BTRFS_SUPER_MIRROR_MAX instead + */ + for (i = 0; i < 1; i++) { + bytenr = btrfs_sb_offset(i); + if (bytenr + 4096 >= i_size_read(bdev->bd_inode)) + break; + bh = __bread(bdev, bytenr / 4096, 4096); + if (!bh) + continue; + + super = (struct btrfs_super_block *)bh->b_data; + if (btrfs_super_bytenr(super) != bytenr || + strncmp((char *)(&super->magic), BTRFS_MAGIC, + sizeof(super->magic))) { + brelse(bh); + continue; + } + + if (!latest || btrfs_super_generation(super) > transid) { + brelse(latest); + latest = bh; + transid = btrfs_super_generation(super); + } else { + brelse(bh); + } + } + return latest; +} + +static int write_dev_supers(struct btrfs_device *device, + struct btrfs_super_block *sb, + int do_barriers, int wait, int max_mirrors) +{ + struct buffer_head *bh; + int i; + int ret; + int errors = 0; + u32 crc; + u64 bytenr; + int last_barrier = 0; + + if (max_mirrors == 0) + max_mirrors = BTRFS_SUPER_MIRROR_MAX; + + /* make sure only the last submit_bh does a barrier */ + if (do_barriers) { + for (i = 0; i < max_mirrors; i++) { + bytenr = btrfs_sb_offset(i); + if (bytenr + BTRFS_SUPER_INFO_SIZE >= + device->total_bytes) + break; + last_barrier = i; + } + } + + for (i = 0; i < max_mirrors; i++) { + bytenr = btrfs_sb_offset(i); + if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) + break; + + if (wait) { + bh = __find_get_block(device->bdev, bytenr / 4096, + BTRFS_SUPER_INFO_SIZE); + BUG_ON(!bh); + brelse(bh); + wait_on_buffer(bh); + if (buffer_uptodate(bh)) { + brelse(bh); + continue; + } + } else { + btrfs_set_super_bytenr(sb, bytenr); + + crc = ~(u32)0; + crc = btrfs_csum_data(NULL, (char *)sb + + BTRFS_CSUM_SIZE, crc, + BTRFS_SUPER_INFO_SIZE - + BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, sb->csum); + + bh = __getblk(device->bdev, bytenr / 4096, + BTRFS_SUPER_INFO_SIZE); + memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); + + set_buffer_uptodate(bh); + get_bh(bh); + lock_buffer(bh); + bh->b_end_io = btrfs_end_buffer_write_sync; + } + + if (i == last_barrier && do_barriers && device->barriers) { + ret = submit_bh(WRITE_BARRIER, bh); + if (ret == -EOPNOTSUPP) { + printk("btrfs: disabling barriers on dev %s\n", + device->name); + set_buffer_uptodate(bh); + device->barriers = 0; + get_bh(bh); + lock_buffer(bh); + ret = submit_bh(WRITE, bh); + } + } else { + ret = submit_bh(WRITE, bh); + } + + if (!ret && wait) { + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + errors++; + } else if (ret) { + errors++; + } + if (wait) + brelse(bh); + } + return errors < i ? 0 : -1; +} + +int write_all_supers(struct btrfs_root *root, int max_mirrors) { struct list_head *cur; struct list_head *head = &root->fs_info->fs_devices->devices; struct btrfs_device *dev; struct btrfs_super_block *sb; struct btrfs_dev_item *dev_item; - struct buffer_head *bh; int ret; int do_barriers; int max_errors; int total_errors = 0; - u32 crc; u64 flags; max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; @@ -1944,40 +2071,11 @@ static int write_all_supers(struct btrfs_root *root) btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE); + flags = btrfs_super_flags(sb); btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); - - crc = ~(u32)0; - crc = btrfs_csum_data(root, (char *)sb + BTRFS_CSUM_SIZE, crc, - BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); - btrfs_csum_final(crc, sb->csum); - - bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / 4096, - BTRFS_SUPER_INFO_SIZE); - - memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); - dev->pending_io = bh; - - get_bh(bh); - set_buffer_uptodate(bh); - lock_buffer(bh); - bh->b_end_io = btrfs_end_buffer_write_sync; - - if (do_barriers && dev->barriers) { - ret = submit_bh(WRITE_BARRIER, bh); - if (ret == -EOPNOTSUPP) { - printk("btrfs: disabling barriers on dev %s\n", - dev->name); - set_buffer_uptodate(bh); - dev->barriers = 0; - get_bh(bh); - lock_buffer(bh); - ret = submit_bh(WRITE, bh); - } - } else { - ret = submit_bh(WRITE, bh); - } + ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors); if (ret) total_errors++; } @@ -1985,8 +2083,8 @@ static int write_all_supers(struct btrfs_root *root) printk("btrfs: %d errors while writing supers\n", total_errors); BUG(); } - total_errors = 0; + total_errors = 0; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); if (!dev->bdev) @@ -1994,29 +2092,9 @@ static int write_all_supers(struct btrfs_root *root) if (!dev->in_fs_metadata || !dev->writeable) continue; - BUG_ON(!dev->pending_io); - bh = dev->pending_io; - wait_on_buffer(bh); - if (!buffer_uptodate(dev->pending_io)) { - if (do_barriers && dev->barriers) { - printk("btrfs: disabling barriers on dev %s\n", - dev->name); - set_buffer_uptodate(bh); - get_bh(bh); - lock_buffer(bh); - dev->barriers = 0; - ret = submit_bh(WRITE, bh); - BUG_ON(ret); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - total_errors++; - } else { - total_errors++; - } - - } - dev->pending_io = NULL; - brelse(bh); + ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors); + if (ret) + total_errors++; } if (total_errors > max_errors) { printk("btrfs: %d errors while writing supers\n", total_errors); @@ -2025,12 +2103,12 @@ static int write_all_supers(struct btrfs_root *root) return 0; } -int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root - *root) +int write_ctree_super(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int max_mirrors) { int ret; - ret = write_all_supers(root); + ret = write_all_supers(root, max_mirrors); return ret; } @@ -2116,7 +2194,7 @@ int btrfs_commit_super(struct btrfs_root *root) ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); - ret = write_ctree_super(NULL, root); + ret = write_ctree_super(NULL, root, 0); return ret; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 717e94811e4..c0ff404c31b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -19,8 +19,20 @@ #ifndef __DISKIO__ #define __DISKIO__ -#define BTRFS_SUPER_INFO_OFFSET (16 * 1024) +#define BTRFS_SUPER_INFO_OFFSET (64 * 1024) #define BTRFS_SUPER_INFO_SIZE 4096 + +#define BTRFS_SUPER_MIRROR_MAX 3 +#define BTRFS_SUPER_MIRROR_SHIFT 12 + +static inline u64 btrfs_sb_offset(int mirror) +{ + u64 start = 16 * 1024; + if (mirror) + return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror); + return BTRFS_SUPER_INFO_OFFSET; +} + struct btrfs_device; struct btrfs_fs_devices; @@ -37,7 +49,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, char *options); int close_ctree(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, int max_mirrors); +struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); int btrfs_commit_super(struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d1563852938..803647bc840 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -189,6 +189,29 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, return 0; } +static int remove_sb_from_cache(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + u64 bytenr; + u64 *logical; + int stripe_len; + int i, nr, ret; + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(&root->fs_info->mapping_tree, + cache->key.objectid, bytenr, 0, + &logical, &nr, &stripe_len); + BUG_ON(ret); + while (nr--) { + btrfs_remove_free_space(cache, logical[nr], + stripe_len); + } + kfree(logical); + } + return 0; +} + static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) { @@ -197,9 +220,7 @@ static int cache_block_group(struct btrfs_root *root, struct btrfs_key key; struct extent_buffer *leaf; int slot; - u64 last = 0; - u64 first_free; - int found = 0; + u64 last = block_group->key.objectid; if (!block_group) return 0; @@ -220,23 +241,13 @@ static int cache_block_group(struct btrfs_root *root, * skip the locking here */ path->skip_locking = 1; - first_free = max_t(u64, block_group->key.objectid, - BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); - key.objectid = block_group->key.objectid; + key.objectid = last; key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; - ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); - if (ret < 0) - goto err; - if (ret == 0) { - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid + key.offset > first_free) - first_free = key.objectid + key.offset; - } + while(1) { leaf = path->nodes[0]; slot = path->slots[0]; @@ -258,11 +269,6 @@ static int cache_block_group(struct btrfs_root *root, break; if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { - if (!found) { - last = first_free; - found = 1; - } - add_new_free_space(block_group, root->fs_info, last, key.objectid); @@ -272,13 +278,11 @@ next: path->slots[0]++; } - if (!found) - last = first_free; - add_new_free_space(block_group, root->fs_info, last, block_group->key.objectid + block_group->key.offset); + remove_sb_from_cache(root, block_group); block_group->cached = 1; ret = 0; err: @@ -1974,10 +1978,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, if (alloc) { old_val += num_bytes; cache->space_info->bytes_used += num_bytes; - if (cache->ro) { + if (cache->ro) cache->space_info->bytes_readonly -= num_bytes; - WARN_ON(1); - } btrfs_set_block_group_used(&cache->item, old_val); spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 09462adfbe3..2e69b9c3043 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -290,7 +290,6 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, ret = -EINVAL; goto out; } - unlink_free_space(block_group, info); if (info->bytes == bytes) { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c38f6a0e30b..47cd5fcad2c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1038,7 +1038,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); - write_ctree_super(trans, root); + write_ctree_super(trans, root, 0); /* * the super is written, we can safely allow the tree-loggers diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 08469ec0585..d3f9c2c663c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1996,7 +1996,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, btrfs_header_level(log->fs_info->log_root_tree->node)); - write_ctree_super(trans, log->fs_info->tree_root); + write_ctree_super(trans, log->fs_info->tree_root, 2); log->fs_info->tree_log_transid++; log->fs_info->tree_log_batch = 0; atomic_set(&log->fs_info->tree_log_commit, 0); @@ -2006,7 +2006,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, out: mutex_unlock(&log->fs_info->tree_log_mutex); return 0; - } /* * free all the extents used by the tree log. This should be called diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2049d179ccd..a79b3cc09e9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -423,15 +423,11 @@ int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, } set_blocksize(bdev, 4096); - bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + bh = btrfs_read_dev_super(bdev); if (!bh) goto error_close; disk_super = (struct btrfs_super_block *)bh->b_data; - if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, - sizeof(disk_super->magic))) - goto error_brelse; - devid = le64_to_cpu(disk_super->dev_item.devid); if (devid != device->devid) goto error_brelse; @@ -529,17 +525,12 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, ret = set_blocksize(bdev, 4096); if (ret) goto error_close; - bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + bh = btrfs_read_dev_super(bdev); if (!bh) { ret = -EIO; goto error_close; } disk_super = (struct btrfs_super_block *)bh->b_data; - if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, - sizeof(disk_super->magic))) { - ret = -EINVAL; - goto error_brelse; - } devid = le64_to_cpu(disk_super->dev_item.devid); transid = btrfs_super_generation(disk_super); if (disk_super->label[0]) @@ -553,7 +544,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, printk("devid %Lu transid %Lu %s\n", devid, transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); -error_brelse: brelse(bh); error_close: close_bdev_exclusive(bdev, flags); @@ -1016,17 +1006,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } set_blocksize(bdev, 4096); - bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + bh = btrfs_read_dev_super(bdev); if (!bh) { ret = -EIO; goto error_close; } disk_super = (struct btrfs_super_block *)bh->b_data; - if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, - sizeof(disk_super->magic))) { - ret = -ENOENT; - goto error_brelse; - } devid = le64_to_cpu(disk_super->dev_item.devid); dev_uuid = disk_super->dev_item.uuid; device = btrfs_find_device(root, devid, dev_uuid, @@ -2563,6 +2548,88 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, mirror_num, NULL); } +int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, + u64 chunk_start, u64 physical, u64 devid, + u64 **logical, int *naddrs, int *stripe_len) +{ + struct extent_map_tree *em_tree = &map_tree->map_tree; + struct extent_map *em; + struct map_lookup *map; + u64 *buf; + u64 bytenr; + u64 length; + u64 stripe_nr; + int i, j, nr = 0; + + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, chunk_start, 1); + spin_unlock(&em_tree->lock); + + BUG_ON(!em || em->start != chunk_start); + map = (struct map_lookup *)em->bdev; + + length = em->len; + if (map->type & BTRFS_BLOCK_GROUP_RAID10) + do_div(length, map->num_stripes / map->sub_stripes); + else if (map->type & BTRFS_BLOCK_GROUP_RAID0) + do_div(length, map->num_stripes); + + buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); + BUG_ON(!buf); + + for (i = 0; i < map->num_stripes; i++) { + if (devid && map->stripes[i].dev->devid != devid) + continue; + if (map->stripes[i].physical > physical || + map->stripes[i].physical + length <= physical) + continue; + + stripe_nr = physical - map->stripes[i].physical; + do_div(stripe_nr, map->stripe_len); + + if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + stripe_nr = stripe_nr * map->num_stripes + i; + do_div(stripe_nr, map->sub_stripes); + } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) { + stripe_nr = stripe_nr * map->num_stripes + i; + } + bytenr = chunk_start + stripe_nr * map->stripe_len; + for (j = 0; j < nr; j++) { + if (buf[j] == bytenr) + break; + } + if (j == nr) + buf[nr++] = bytenr; + } + + for (i = 0; i > nr; i++) { + struct btrfs_multi_bio *multi; + struct btrfs_bio_stripe *stripe; + int ret; + + length = 1; + ret = btrfs_map_block(map_tree, WRITE, buf[i], + &length, &multi, 0); + BUG_ON(ret); + + stripe = multi->stripes; + for (j = 0; j < multi->num_stripes; j++) { + if (stripe->physical >= physical && + physical < stripe->physical + length) + break; + } + BUG_ON(j >= multi->num_stripes); + kfree(multi); + } + + *logical = buf; + *naddrs = nr; + *stripe_len = map->stripe_len; + + free_extent_map(em); + return 0; +} + int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, u64 logical, struct page *page) { @@ -3003,7 +3070,7 @@ int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf) return read_one_dev(root, buf, dev_item); } -int btrfs_read_sys_array(struct btrfs_root *root) +int btrfs_read_sys_array(struct btrfs_root *root, u64 sb_bytenr) { struct btrfs_super_block *super_copy = &root->fs_info->super_copy; struct extent_buffer *sb; @@ -3018,7 +3085,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) u32 cur; struct btrfs_key key; - sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET, + sb = btrfs_find_create_tree_block(root, sb_bytenr, BTRFS_SUPER_INFO_SIZE); if (!sb) return -ENOMEM; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index fcbdcb3ae13..bdebe83c319 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -28,7 +28,6 @@ struct btrfs_device { struct list_head dev_alloc_list; struct btrfs_fs_devices *fs_devices; struct btrfs_root *dev_root; - struct buffer_head *pending_io; struct bio *pending_bios; struct bio *pending_bio_tail; int running_pending; @@ -125,7 +124,10 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, struct btrfs_multi_bio **multi_ret, int mirror_num); -int btrfs_read_sys_array(struct btrfs_root *root); +int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, + u64 chunk_start, u64 physical, u64 devid, + u64 **logical, int *naddrs, int *stripe_len); +int btrfs_read_sys_array(struct btrfs_root *root, u64 sb_bytenr); int btrfs_read_chunk_tree(struct btrfs_root *root); int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 type); -- cgit v1.2.3-70-g09d2 From 459931eca5f4b8c9ad259d07cc1ca49afed54804 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 10 Dec 2008 09:10:46 -0500 Subject: Btrfs: Delete csum items when freeing extents This finishes off the new checksumming code by removing csum items for extents that are no longer in use. The trick is doing it without racing because a single csum item may hold csums for more than one extent. Extra checks are added to btrfs_csum_file_blocks to make sure that we are using the correct csum item after dropping locks. A new btrfs_split_item is added to split a single csum item so it can be split without dropping the leaf lock. This is used to remove csum bytes from the middle of an item. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 131 ++++++++++++++++++++++++++-- fs/btrfs/ctree.h | 13 +++ fs/btrfs/extent-tree.c | 6 +- fs/btrfs/file-item.c | 226 +++++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 335 insertions(+), 41 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 19c0dd33b1e..c0c95cccbb5 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1512,7 +1512,8 @@ cow_done: if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len > 0 && btrfs_header_nritems(b) >= + if ((p->search_for_split || ins_len > 0) && + btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); @@ -1596,7 +1597,8 @@ cow_done: goto done; } } - unlock_up(p, level, lowest_unlock); + if (!p->search_for_split) + unlock_up(p, level, lowest_unlock); goto done; } } @@ -2636,11 +2638,11 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, int num_doubles = 0; struct btrfs_disk_key disk_key; - if (extend) + if (extend && data_size) space_needed = data_size; /* first try to make some room by pushing left and right */ - if (ins_key->type != BTRFS_DIR_ITEM_KEY) { + if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size, 0); if (wret < 0) { return wret; @@ -2721,7 +2723,7 @@ again: } else { if (leaf_space_used(l, 0, mid + 1) + space_needed > BTRFS_LEAF_DATA_SIZE(root)) { - if (!extend && slot == 0) { + if (!extend && data_size && slot == 0) { btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, @@ -2742,7 +2744,7 @@ again: } btrfs_mark_buffer_dirty(right); return ret; - } else if (extend && slot == 0) { + } else if ((extend || !data_size) && slot == 0) { mid = 1; } else { mid = slot; @@ -2827,6 +2829,123 @@ again: return ret; } +/* + * This function splits a single item into two items, + * giving 'new_key' to the new item and splitting the + * old one at split_offset (from the start of the item). + * + * The path may be released by this operation. After + * the split, the path is pointing to the old item. The + * new item is going to be in the same node as the old one. + * + * Note, the item being split must be smaller enough to live alone on + * a tree block with room for one extra struct btrfs_item + * + * This allows us to split the item in place, keeping a lock on the + * leaf the entire time. + */ +int btrfs_split_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key, + unsigned long split_offset) +{ + u32 item_size; + struct extent_buffer *leaf; + struct btrfs_key orig_key; + struct btrfs_item *item; + struct btrfs_item *new_item; + int ret = 0; + int slot; + u32 nritems; + u32 orig_offset; + struct btrfs_disk_key disk_key; + char *buf; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); + if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) + goto split; + + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + btrfs_release_path(root, path); + + path->search_for_split = 1; + path->keep_locks = 1; + + ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); + path->search_for_split = 0; + + /* if our item isn't there or got smaller, return now */ + if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], + path->slots[0])) { + path->keep_locks = 0; + return -EAGAIN; + } + + ret = split_leaf(trans, root, &orig_key, path, 0, 0); + path->keep_locks = 0; + BUG_ON(ret); + + BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); + leaf = path->nodes[0]; + +split: + item = btrfs_item_nr(leaf, path->slots[0]); + orig_offset = btrfs_item_offset(leaf, item); + item_size = btrfs_item_size(leaf, item); + + + buf = kmalloc(item_size, GFP_NOFS); + read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, + path->slots[0]), item_size); + slot = path->slots[0] + 1; + leaf = path->nodes[0]; + + nritems = btrfs_header_nritems(leaf); + + if (slot != nritems) { + /* shift the items */ + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), + btrfs_item_nr_offset(slot), + (nritems - slot) * sizeof(struct btrfs_item)); + + } + + btrfs_cpu_key_to_disk(&disk_key, new_key); + btrfs_set_item_key(leaf, &disk_key, slot); + + new_item = btrfs_item_nr(leaf, slot); + + btrfs_set_item_offset(leaf, new_item, orig_offset); + btrfs_set_item_size(leaf, new_item, item_size - split_offset); + + btrfs_set_item_offset(leaf, item, + orig_offset + item_size - split_offset); + btrfs_set_item_size(leaf, item, split_offset); + + btrfs_set_header_nritems(leaf, nritems + 1); + + /* write the data for the start of the original item */ + write_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, path->slots[0]), + split_offset); + + /* write the data for the new item */ + write_extent_buffer(leaf, buf + split_offset, + btrfs_item_ptr_offset(leaf, slot), + item_size - split_offset); + btrfs_mark_buffer_dirty(leaf); + + ret = 0; + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); + BUG(); + } + kfree(buf); + return ret; +} + /* * make the item pointed to by the path smaller. new_size indicates * how small to make it, and from_end tells us if we just chop bytes diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f72b4381934..5b0c79d22c0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -409,6 +409,12 @@ struct btrfs_path { int keep_locks; int skip_locking; int lowest_level; + + /* + * set by btrfs_split_item, tells search_slot to keep all locks + * and to force calls to keep space in the nodes + */ + int search_for_split; }; /* @@ -1816,6 +1822,11 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 new_size, int from_end); +int btrfs_split_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key, + unsigned long split_offset); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); @@ -1953,6 +1964,8 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key *location, int mod); /* file-item.c */ +int btrfs_del_csums(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, u64 len); int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u32 *dst); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 803647bc840..cc74316dc42 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2484,7 +2484,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, mark_free = 1; BUG_ON(ret < 0); } - /* block accounting for super block */ spin_lock_irq(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); @@ -2504,6 +2503,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, mark_free); BUG_ON(ret); + if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + ret = btrfs_del_csums(trans, root, bytenr, num_bytes); + BUG_ON(ret); + } + #ifdef BIO_RW_DISCARD /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a3ad2ce0011..3ebef871ee6 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -310,6 +310,179 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, return 0; } +/* + * helper function for csum removal, this expects the + * key to describe the csum pointed to by the path, and it expects + * the csum to overlap the range [bytenr, len] + * + * The csum should not be entirely contained in the range and the + * range should not be entirely contained in the csum. + * + * This calls btrfs_truncate_item with the correct args based on the + * overlap, and fixes up the key as required. + */ +static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key, + u64 bytenr, u64 len) +{ + struct extent_buffer *leaf; + u16 csum_size = + btrfs_super_csum_size(&root->fs_info->super_copy); + u64 csum_end; + u64 end_byte = bytenr + len; + u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits; + int ret; + + leaf = path->nodes[0]; + csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; + csum_end <<= root->fs_info->sb->s_blocksize_bits; + csum_end += key->offset; + + if (key->offset < bytenr && csum_end <= end_byte) { + /* + * [ bytenr - len ] + * [ ] + * [csum ] + * A simple truncate off the end of the item + */ + u32 new_size = (bytenr - key->offset) >> blocksize_bits; + new_size *= csum_size; + ret = btrfs_truncate_item(trans, root, path, new_size, 1); + BUG_ON(ret); + } else if (key->offset >= bytenr && csum_end > end_byte && + end_byte > key->offset) { + /* + * [ bytenr - len ] + * [ ] + * [csum ] + * we need to truncate from the beginning of the csum + */ + u32 new_size = (csum_end - end_byte) >> blocksize_bits; + new_size *= csum_size; + + ret = btrfs_truncate_item(trans, root, path, new_size, 0); + BUG_ON(ret); + + key->offset = end_byte; + ret = btrfs_set_item_key_safe(trans, root, path, key); + BUG_ON(ret); + } else { + BUG(); + } + return 0; +} + +/* + * deletes the csum items from the csum tree for a given + * range of bytes. + */ +int btrfs_del_csums(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, u64 len) +{ + struct btrfs_path *path; + struct btrfs_key key; + u64 end_byte = bytenr + len; + u64 csum_end; + struct extent_buffer *leaf; + int ret; + u16 csum_size = + btrfs_super_csum_size(&root->fs_info->super_copy); + int blocksize_bits = root->fs_info->sb->s_blocksize_bits; + + root = root->fs_info->csum_root; + + path = btrfs_alloc_path(); + + while(1) { + key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key.offset = end_byte - 1; + key.type = BTRFS_EXTENT_CSUM_KEY; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + if (path->slots[0] == 0) + goto out; + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || + key.type != BTRFS_EXTENT_CSUM_KEY) { + break; + } + + if (key.offset >= end_byte) + break; + + csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; + csum_end <<= blocksize_bits; + csum_end += key.offset; + + /* this csum ends before we start, we're done */ + if (csum_end <= bytenr) + break; + + /* delete the entire item, it is inside our range */ + if (key.offset >= bytenr && csum_end <= end_byte) { + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + } else if (key.offset < bytenr && csum_end > end_byte) { + unsigned long offset; + unsigned long shift_len; + unsigned long item_offset; + /* + * [ bytenr - len ] + * [csum ] + * + * Our bytes are in the middle of the csum, + * we need to split this item and insert a new one. + * + * But we can't drop the path because the + * csum could change, get removed, extended etc. + * + * The trick here is the max size of a csum item leaves + * enough room in the tree block for a single + * item header. So, we split the item in place, + * adding a new header pointing to the existing + * bytes. Then we loop around again and we have + * a nicely formed csum item that we can neatly + * truncate. + */ + offset = (bytenr - key.offset) >> blocksize_bits; + offset *= csum_size; + + shift_len = (len >> blocksize_bits) * csum_size; + + item_offset = btrfs_item_ptr_offset(leaf, + path->slots[0]); + + memset_extent_buffer(leaf, 0, item_offset + offset, + shift_len); + key.offset = bytenr; + + /* + * btrfs_split_item returns -EAGAIN when the + * item changed size or key + */ + ret = btrfs_split_item(trans, root, path, &key, offset); + BUG_ON(ret && ret != -EAGAIN); + + key.offset = end_byte - 1; + } else { + ret = truncate_one_csum(trans, root, path, + &key, bytenr, len); + BUG_ON(ret); + } + btrfs_release_path(root, path); + } +out: + btrfs_free_path(path); + return 0; +} + int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_ordered_sum *sums) @@ -396,28 +569,40 @@ again: csum_size, 1); if (ret < 0) goto fail_unlock; - if (ret == 0) { - BUG(); - } - if (path->slots[0] == 0) { - goto insert; + + if (ret > 0) { + if (path->slots[0] == 0) + goto insert; + path->slots[0]--; } - path->slots[0]--; + leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); csum_offset = (bytenr - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; + if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY || found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) { goto insert; } + if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) / csum_size) { u32 diff = (csum_offset + 1) * csum_size; + + /* + * is the item big enough already? we dropped our lock + * before and need to recheck + */ + if (diff < btrfs_item_size_nr(leaf, path->slots[0])) + goto csum; + diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); - if (diff != csum_size) + if (diff != csum_size) { goto insert; + } + ret = btrfs_extend_item(trans, root, path, diff); BUG_ON(ret); goto csum; @@ -518,30 +703,3 @@ out: fail_unlock: goto out; } - -int btrfs_csum_truncate(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - u64 isize) -{ - struct btrfs_key key; - struct extent_buffer *leaf = path->nodes[0]; - int slot = path->slots[0]; - int ret; - u32 new_item_size; - u64 new_item_span; - u64 blocks; - - btrfs_item_key_to_cpu(leaf, &key, slot); - if (isize <= key.offset) - return 0; - new_item_span = isize - key.offset; - blocks = (new_item_span + root->sectorsize - 1) >> - root->fs_info->sb->s_blocksize_bits; - new_item_size = blocks * - btrfs_super_csum_size(&root->fs_info->super_copy); - if (new_item_size >= btrfs_item_size_nr(leaf, slot)) - return 0; - ret = btrfs_truncate_item(trans, root, path, new_item_size, 1); - BUG_ON(ret); - return ret; -} -- cgit v1.2.3-70-g09d2 From 0403e47ee26f26e960ee9038552bc89df4a1fb3d Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 10 Dec 2008 20:32:51 -0500 Subject: Btrfs: Add checking of csum tree in balancing code This updates the space balancing code for the new checksum format. Signed-off-by: Yan Zheng --- fs/btrfs/disk-io.c | 2 ++ fs/btrfs/extent-tree.c | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c72f4f3b912..6f042de1ac4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1097,6 +1097,8 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, return fs_info->chunk_root; if (location->objectid == BTRFS_DEV_TREE_OBJECTID) return fs_info->dev_root; + if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) + return fs_info->csum_root; root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cc74316dc42..673ff59c288 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -241,7 +241,7 @@ static int cache_block_group(struct btrfs_root *root, * skip the locking here */ path->skip_locking = 1; - key.objectid = last; + key.objectid = max_t(u64, last, BTRFS_SUPER_INFO_OFFSET); key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -4087,7 +4087,8 @@ static int is_cowonly_root(u64 root_objectid) root_objectid == BTRFS_EXTENT_TREE_OBJECTID || root_objectid == BTRFS_CHUNK_TREE_OBJECTID || root_objectid == BTRFS_DEV_TREE_OBJECTID || - root_objectid == BTRFS_TREE_LOG_OBJECTID) + root_objectid == BTRFS_TREE_LOG_OBJECTID || + root_objectid == BTRFS_CSUM_TREE_OBJECTID) return 1; return 0; } @@ -5497,8 +5498,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_generation(leaf, item, 1); btrfs_set_inode_size(leaf, item, size); btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); - btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM | - BTRFS_INODE_NOCOMPRESS); + btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); btrfs_mark_buffer_dirty(leaf); btrfs_release_path(root, path); out: -- cgit v1.2.3-70-g09d2 From d2fb3437e4d8d12c73c587615ad187d5288547ec Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 11 Dec 2008 16:30:39 -0500 Subject: Btrfs: fix leaking block group on balance The block group structs are referenced in many different places, and it's not safe to free while balancing. So, those block group structs were simply leaked instead. This patch replaces the block group pointer in the inode with the starting byte offset of the block group and adds reference counting to the block group struct. Signed-off-by: Yan Zheng --- fs/btrfs/btrfs_inode.h | 8 ++- fs/btrfs/ctree.h | 17 ++++--- fs/btrfs/extent-tree.c | 132 ++++++++++++++++++++++--------------------------- fs/btrfs/inode.c | 43 +++++----------- fs/btrfs/ioctl.c | 2 +- fs/btrfs/transaction.c | 2 +- fs/btrfs/transaction.h | 2 +- 7 files changed, 88 insertions(+), 118 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 1b9ec1ab1f6..a8c9693b75a 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -28,11 +28,6 @@ struct btrfs_inode { /* which subvolume this inode belongs to */ struct btrfs_root *root; - /* the block group preferred for allocations. This pointer is buggy - * and needs to be replaced with a bytenr instead - */ - struct btrfs_block_group_cache *block_group; - /* key used to find this inode on disk. This is used by the code * to read in roots of subvolumes */ @@ -115,6 +110,9 @@ struct btrfs_inode { */ u64 index_cnt; + /* the start of block group preferred for allocations. */ + u64 block_group; + struct inode vfs_inode; }; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5b0c79d22c0..8733081d97a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -653,6 +653,9 @@ struct btrfs_block_group_cache { /* for block groups in the same raid type */ struct list_head list; + + /* usage count */ + atomic_t count; }; struct btrfs_leaf_ref_tree { @@ -1706,10 +1709,8 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr); -struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, - struct btrfs_block_group_cache - *hint, u64 search_start, - int data, int owner); +u64 btrfs_find_block_group(struct btrfs_root *root, + u64 search_start, u64 search_hint, int owner); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize, u64 parent, @@ -1770,6 +1771,7 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, u64 owner_objectid); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); int btrfs_make_block_group(struct btrfs_trans_handle *trans, @@ -2019,10 +2021,9 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); -int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry, - struct btrfs_trans_handle *trans, u64 new_dirid, - struct btrfs_block_group_cache *block_group); - +int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, + struct btrfs_root *new_root, struct dentry *dentry, + u64 new_dirid, u64 alloc_hint); int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 673ff59c288..1cc89246ee2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -53,10 +53,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, int all); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, int all); -static struct btrfs_block_group_cache * -__btrfs_find_block_group(struct btrfs_root *root, - struct btrfs_block_group_cache *hint, - u64 search_start, int data, int owner); static int pin_down_bytes(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int is_data); @@ -142,6 +138,8 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, break; } } + if (ret) + atomic_inc(&ret->count); spin_unlock(&info->block_group_cache_lock); return ret; @@ -318,6 +316,12 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return cache; } +static inline void put_block_group(struct btrfs_block_group_cache *cache) +{ + if (atomic_dec_and_test(&cache->count)) + kfree(cache); +} + static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, u64 flags) { @@ -341,54 +345,16 @@ static u64 div_factor(u64 num, int factor) return num; } -static struct btrfs_block_group_cache * -__btrfs_find_block_group(struct btrfs_root *root, - struct btrfs_block_group_cache *hint, - u64 search_start, int data, int owner) +u64 btrfs_find_block_group(struct btrfs_root *root, + u64 search_start, u64 search_hint, int owner) { struct btrfs_block_group_cache *cache; - struct btrfs_block_group_cache *found_group = NULL; - struct btrfs_fs_info *info = root->fs_info; u64 used; - u64 last = 0; - u64 free_check; + u64 last = max(search_hint, search_start); + u64 group_start = 0; int full_search = 0; - int factor = 10; + int factor = 9; int wrapped = 0; - - if (data & BTRFS_BLOCK_GROUP_METADATA) - factor = 9; - - if (search_start) { - struct btrfs_block_group_cache *shint; - shint = btrfs_lookup_first_block_group(info, search_start); - if (shint && block_group_bits(shint, data)) { - spin_lock(&shint->lock); - used = btrfs_block_group_used(&shint->item); - if (used + shint->pinned + shint->reserved < - div_factor(shint->key.offset, factor)) { - spin_unlock(&shint->lock); - return shint; - } - spin_unlock(&shint->lock); - } - } - if (hint && block_group_bits(hint, data)) { - spin_lock(&hint->lock); - used = btrfs_block_group_used(&hint->item); - if (used + hint->pinned + hint->reserved < - div_factor(hint->key.offset, factor)) { - spin_unlock(&hint->lock); - return hint; - } - spin_unlock(&hint->lock); - last = hint->key.objectid + hint->key.offset; - } else { - if (hint) - last = max(hint->key.objectid, search_start); - else - last = search_start; - } again: while (1) { cache = btrfs_lookup_first_block_group(root->fs_info, last); @@ -399,16 +365,18 @@ again: last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); - if (block_group_bits(cache, data)) { - free_check = div_factor(cache->key.offset, factor); + if ((full_search || !cache->ro) && + block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { if (used + cache->pinned + cache->reserved < - free_check) { - found_group = cache; + div_factor(cache->key.offset, factor)) { + group_start = cache->key.objectid; spin_unlock(&cache->lock); + put_block_group(cache); goto found; } } spin_unlock(&cache->lock); + put_block_group(cache); cond_resched(); } if (!wrapped) { @@ -423,18 +391,7 @@ again: goto again; } found: - return found_group; -} - -struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, - struct btrfs_block_group_cache - *hint, u64 search_start, - int data, int owner) -{ - - struct btrfs_block_group_cache *ret; - ret = __btrfs_find_block_group(root, hint, search_start, data, owner); - return ret; + return group_start; } /* simple helper to search for an existing extent at a given offset */ @@ -1809,6 +1766,19 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, return werr; } +int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) +{ + struct btrfs_block_group_cache *block_group; + int readonly = 0; + + block_group = btrfs_lookup_block_group(root->fs_info, bytenr); + if (!block_group || block_group->ro) + readonly = 1; + if (block_group) + put_block_group(block_group); + return readonly; +} + static int update_space_info(struct btrfs_fs_info *info, u64 flags, u64 total_bytes, u64 bytes_used, struct btrfs_space_info **space_info) @@ -1995,10 +1965,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, int ret; ret = btrfs_add_free_space(cache, bytenr, num_bytes); - if (ret) - return -1; + WARN_ON(ret); } } + put_block_group(cache); total -= num_bytes; bytenr += num_bytes; } @@ -2008,12 +1978,16 @@ static int update_block_group(struct btrfs_trans_handle *trans, static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) { struct btrfs_block_group_cache *cache; + u64 bytenr; cache = btrfs_lookup_first_block_group(root->fs_info, search_start); if (!cache) return 0; - return cache->key.objectid; + bytenr = cache->key.objectid; + put_block_group(cache); + + return bytenr; } int btrfs_update_pinned_extents(struct btrfs_root *root, @@ -2055,6 +2029,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, if (cache->cached) btrfs_add_free_space(cache, bytenr, len); } + put_block_group(cache); bytenr += len; num -= len; } @@ -2085,6 +2060,7 @@ static int update_reserved_extents(struct btrfs_root *root, } spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); + put_block_group(cache); bytenr += len; num -= len; } @@ -2724,6 +2700,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, cache = btrfs_lookup_block_group(root->fs_info, bytenr); BUG_ON(!cache); btrfs_add_free_space(cache, bytenr, num_bytes); + put_block_group(cache); update_reserved_extents(root, bytenr, num_bytes, 0); return 0; } @@ -2928,6 +2905,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, } new_group: mutex_unlock(&block_group->alloc_mutex); + put_block_group(block_group); + block_group = NULL; new_group_no_lock: /* don't try to compare new allocations against the * last allocation any more @@ -2997,6 +2976,8 @@ loop_check: block_group = list_entry(cur, struct btrfs_block_group_cache, list); + atomic_inc(&block_group->count); + search_start = block_group->key.objectid; cur = cur->next; } @@ -3004,7 +2985,7 @@ loop_check: /* we found what we needed */ if (ins->objectid) { if (!(data & BTRFS_BLOCK_GROUP_DATA)) - trans->block_group = block_group; + trans->block_group = block_group->key.objectid; if (last_ptr) *last_ptr = ins->objectid + ins->offset; @@ -3015,6 +2996,8 @@ loop_check: loop, allowed_chunk_alloc); ret = -ENOSPC; } + if (block_group) + put_block_group(block_group); up_read(&space_info->groups_sem); return ret; @@ -3124,6 +3107,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) return -ENOSPC; } btrfs_add_free_space(cache, start, len); + put_block_group(cache); update_reserved_extents(root, start, len, 0); return 0; } @@ -3288,6 +3272,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset); BUG_ON(ret); + put_block_group(block_group); ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, ref_generation, owner, ins); return ret; @@ -5703,6 +5688,7 @@ next: WARN_ON(block_group->reserved > 0); WARN_ON(btrfs_block_group_used(&block_group->item) > 0); spin_unlock(&block_group->lock); + put_block_group(block_group); ret = 0; out: btrfs_free_path(path); @@ -5763,6 +5749,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) down_write(&block_group->space_info->groups_sem); list_del(&block_group->list); up_write(&block_group->space_info->groups_sem); + + WARN_ON(atomic_read(&block_group->count) != 1); kfree(block_group); spin_lock(&info->block_group_cache_lock); @@ -5807,6 +5795,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) break; } + atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); mutex_init(&cache->alloc_mutex); mutex_init(&cache->cache_mutex); @@ -5861,11 +5850,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->key.objectid = chunk_offset; cache->key.offset = size; + cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); mutex_init(&cache->alloc_mutex); mutex_init(&cache->cache_mutex); INIT_LIST_HEAD(&cache->list); - btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); btrfs_set_block_group_used(&cache->item, bytes_used); btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); @@ -5926,10 +5916,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&block_group->space_info->lock); block_group->space_info->full = 0; - /* - memset(shrink_block_group, 0, sizeof(*shrink_block_group)); - kfree(shrink_block_group); - */ + put_block_group(block_group); + put_block_group(block_group); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 932d8c0b2c0..0a28b770631 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -989,7 +989,6 @@ next_slot: if (extent_type == BTRFS_FILE_EXTENT_REG || extent_type == BTRFS_FILE_EXTENT_PREALLOC) { - struct btrfs_block_group_cache *block_group; disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); extent_end = found_key.offset + btrfs_file_extent_num_bytes(leaf, fi); @@ -1007,9 +1006,7 @@ next_slot: goto out_check; if (btrfs_cross_ref_exist(trans, root, disk_bytenr)) goto out_check; - block_group = btrfs_lookup_block_group(root->fs_info, - disk_bytenr); - if (!block_group || block_group->ro) + if (btrfs_extent_readonly(root, disk_bytenr)) goto out_check; disk_bytenr += btrfs_file_extent_offset(leaf, fi); nocow = 1; @@ -1969,16 +1966,11 @@ void btrfs_read_locked_inode(struct inode *inode) rdev = btrfs_inode_rdev(leaf, inode_item); BTRFS_I(inode)->index_cnt = (u64)-1; + BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); alloc_group_block = btrfs_inode_block_group(leaf, inode_item); - BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, - alloc_group_block); - BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); - if (!BTRFS_I(inode)->block_group) { - BTRFS_I(inode)->block_group = btrfs_find_block_group(root, - NULL, 0, - BTRFS_BLOCK_GROUP_METADATA, 0); - } + BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, + alloc_group_block, 0); btrfs_free_path(path); inode_item = NULL; @@ -2048,8 +2040,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_inode_transid(leaf, item, trans->transid); btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); - btrfs_set_inode_block_group(leaf, item, - BTRFS_I(inode)->block_group->key.objectid); + btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); } /* @@ -3358,14 +3349,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, const char *name, int name_len, - u64 ref_objectid, - u64 objectid, - struct btrfs_block_group_cache *group, - int mode, u64 *index) + u64 ref_objectid, u64 objectid, + u64 alloc_hint, int mode, u64 *index) { struct inode *inode; struct btrfs_inode_item *inode_item; - struct btrfs_block_group_cache *new_inode_group; struct btrfs_key *location; struct btrfs_path *path; struct btrfs_inode_ref *ref; @@ -3401,13 +3389,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 0; else owner = 1; - new_inode_group = btrfs_find_block_group(root, group, 0, - BTRFS_BLOCK_GROUP_METADATA, owner); - if (!new_inode_group) { - printk("find_block group failed\n"); - new_inode_group = group; - } - BTRFS_I(inode)->block_group = new_inode_group; + BTRFS_I(inode)->block_group = + btrfs_find_block_group(root, 0, alloc_hint, owner); key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -4366,16 +4349,16 @@ out: /* * create a new subvolume directory/inode (helper for the ioctl). */ -int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry, - struct btrfs_trans_handle *trans, u64 new_dirid, - struct btrfs_block_group_cache *block_group) +int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, + struct btrfs_root *new_root, struct dentry *dentry, + u64 new_dirid, u64 alloc_hint) { struct inode *inode; int error; u64 index = 0; inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, - new_dirid, block_group, S_IFDIR | 0700, &index); + new_dirid, alloc_hint, S_IFDIR | 0700, &index); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = &btrfs_dir_inode_operations; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 69c4a07f586..5d67858ce99 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -173,7 +173,7 @@ static noinline int create_subvol(struct btrfs_root *root, trans = btrfs_start_transaction(new_root, 1); BUG_ON(!trans); - ret = btrfs_create_subvol_root(new_root, dentry, trans, new_dirid, + ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid, BTRFS_I(dir)->block_group); if (ret) goto fail; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 47cd5fcad2c..4604178a43a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -182,7 +182,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; h->blocks_used = 0; - h->block_group = NULL; + h->block_group = 0; h->alloc_exclude_nr = 0; h->alloc_exclude_start = 0; root->fs_info->running_transaction->use_count++; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 202c8be6c05..ffe7f639732 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -41,7 +41,7 @@ struct btrfs_trans_handle { unsigned long blocks_reserved; unsigned long blocks_used; struct btrfs_transaction *transaction; - struct btrfs_block_group_cache *block_group; + u64 block_group; u64 alloc_exclude_start; u64 alloc_exclude_nr; }; -- cgit v1.2.3-70-g09d2 From e4404d6e8da678d852b7f767f665f8edf76c9e9f Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Fri, 12 Dec 2008 10:03:26 -0500 Subject: Btrfs: shared seed device This patch makes seed device possible to be shared by multiple mounted file systems. The sharing is achieved by cloning seed device's btrfs_fs_devices structure. Thanks you, Signed-off-by: Yan Zheng --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 37 ++++---- fs/btrfs/super.c | 8 +- fs/btrfs/volumes.c | 240 +++++++++++++++++++++++++++---------------------- fs/btrfs/volumes.h | 3 +- 5 files changed, 156 insertions(+), 134 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6f042de1ac4..541a8279ac7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1711,7 +1711,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, } mutex_lock(&fs_info->chunk_mutex); - ret = btrfs_read_sys_array(tree_root, btrfs_super_bytenr(disk_super)); + ret = btrfs_read_sys_array(tree_root); mutex_unlock(&fs_info->chunk_mutex); if (ret) { printk("btrfs: failed to read the system array on %s\n", diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1cc89246ee2..171057a3267 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -218,7 +218,7 @@ static int cache_block_group(struct btrfs_root *root, struct btrfs_key key; struct extent_buffer *leaf; int slot; - u64 last = block_group->key.objectid; + u64 last; if (!block_group) return 0; @@ -239,7 +239,8 @@ static int cache_block_group(struct btrfs_root *root, * skip the locking here */ path->skip_locking = 1; - key.objectid = max_t(u64, last, BTRFS_SUPER_INFO_OFFSET); + last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); + key.objectid = last; key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -5335,8 +5336,20 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, prev_block = block_start; } - if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID && - pass >= 2) { + btrfs_record_root_in_trans(found_root); + if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + /* + * try to update data extent references while + * keeping metadata shared between snapshots. + */ + if (pass == 1) { + ret = relocate_one_path(trans, found_root, + path, &first_key, ref_path, + group, reloc_inode); + if (ret < 0) + goto out; + continue; + } /* * use fallback method to process the remaining * references. @@ -5359,23 +5372,9 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, path, extent_key, &first_key, ref_path, new_extents, nr_extents); - if (ret < 0) - goto out; - continue; - } - - btrfs_record_root_in_trans(found_root); - if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { + } else { ret = relocate_tree_block(trans, found_root, path, &first_key, ref_path); - } else { - /* - * try to update data extent references while - * keeping metadata shared between snapshots. - */ - ret = relocate_one_path(trans, found_root, path, - &first_key, ref_path, - group, reloc_inode); } if (ret < 0) goto out; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 09908f25fca..84c3b66564d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -58,14 +58,15 @@ static struct super_operations btrfs_super_ops; static void btrfs_put_super (struct super_block * sb) { struct btrfs_root *root = btrfs_sb(sb); - struct btrfs_fs_info *fs = root->fs_info; int ret; ret = close_ctree(root); if (ret) { printk("close ctree returns %d\n", ret); } - btrfs_sysfs_del_super(fs); +#if 0 + btrfs_sysfs_del_super(root->fs_info); +#endif sb->s_fs_info = NULL; } @@ -349,11 +350,12 @@ static int btrfs_fill_super(struct super_block * sb, err = -ENOMEM; goto fail_close; } - +#if 0 /* this does the super kobj at the same time */ err = btrfs_sysfs_add_super(tree_root->fs_info); if (err) goto fail_close; +#endif sb->s_root = root_dentry; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4d210a731d4..6672adcec9f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -47,7 +47,6 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_device *device); static int btrfs_relocate_sys_chunks(struct btrfs_root *root); - #define map_lookup_size(n) (sizeof(struct map_lookup) + \ (sizeof(struct btrfs_bio_stripe) * (n))) @@ -74,34 +73,29 @@ static void unlock_chunks(struct btrfs_root *root) mutex_unlock(&root->fs_info->chunk_mutex); } +static void free_fs_devices(struct btrfs_fs_devices *fs_devices) +{ + struct btrfs_device *device; + WARN_ON(fs_devices->opened); + while (!list_empty(&fs_devices->devices)) { + device = list_entry(fs_devices->devices.next, + struct btrfs_device, dev_list); + list_del(&device->dev_list); + kfree(device->name); + kfree(device); + } + kfree(fs_devices); +} + int btrfs_cleanup_fs_uuids(void) { struct btrfs_fs_devices *fs_devices; - struct btrfs_device *dev; while (!list_empty(&fs_uuids)) { fs_devices = list_entry(fs_uuids.next, struct btrfs_fs_devices, list); list_del(&fs_devices->list); - while(!list_empty(&fs_devices->devices)) { - dev = list_entry(fs_devices->devices.next, - struct btrfs_device, dev_list); - if (dev->bdev) { - close_bdev_exclusive(dev->bdev, dev->mode); - fs_devices->open_devices--; - } - fs_devices->num_devices--; - if (dev->writeable) - fs_devices->rw_devices--; - list_del(&dev->dev_list); - list_del(&dev->dev_alloc_list); - kfree(dev->name); - kfree(dev); - } - WARN_ON(fs_devices->num_devices); - WARN_ON(fs_devices->open_devices); - WARN_ON(fs_devices->rw_devices); - kfree(fs_devices); + free_fs_devices(fs_devices); } return 0; } @@ -304,12 +298,55 @@ static noinline int device_list_add(const char *path, return 0; } +static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) +{ + struct btrfs_fs_devices *fs_devices; + struct btrfs_device *device; + struct btrfs_device *orig_dev; + + fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); + if (!fs_devices) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&fs_devices->devices); + INIT_LIST_HEAD(&fs_devices->alloc_list); + INIT_LIST_HEAD(&fs_devices->list); + fs_devices->latest_devid = orig->latest_devid; + fs_devices->latest_trans = orig->latest_trans; + memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); + + list_for_each_entry(orig_dev, &orig->devices, dev_list) { + device = kzalloc(sizeof(*device), GFP_NOFS); + if (!device) + goto error; + + device->name = kstrdup(orig_dev->name, GFP_NOFS); + if (!device->name) + goto error; + + device->devid = orig_dev->devid; + device->work.func = pending_bios_fn; + memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); + device->barriers = 1; + spin_lock_init(&device->io_lock); + INIT_LIST_HEAD(&device->dev_list); + INIT_LIST_HEAD(&device->dev_alloc_list); + + list_add(&device->dev_list, &fs_devices->devices); + device->fs_devices = fs_devices; + fs_devices->num_devices++; + } + return fs_devices; +error: + free_fs_devices(fs_devices); + return ERR_PTR(-ENOMEM); +} + int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) { struct list_head *tmp; struct list_head *cur; struct btrfs_device *device; - int seed_devices = 0; mutex_lock(&uuid_mutex); again: @@ -328,17 +365,14 @@ again: device->writeable = 0; fs_devices->rw_devices--; } - if (!seed_devices) { - list_del_init(&device->dev_list); - fs_devices->num_devices--; - kfree(device->name); - kfree(device); - } + list_del_init(&device->dev_list); + fs_devices->num_devices--; + kfree(device->name); + kfree(device); } if (fs_devices->seed) { fs_devices = fs_devices->seed; - seed_devices = 1; goto again; } @@ -348,10 +382,9 @@ again: static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) { - struct btrfs_fs_devices *seed_devices; struct list_head *cur; struct btrfs_device *device; -again: + if (--fs_devices->opened > 0) return 0; @@ -370,31 +403,38 @@ again: device->writeable = 0; device->in_fs_metadata = 0; } + WARN_ON(fs_devices->open_devices); + WARN_ON(fs_devices->rw_devices); fs_devices->opened = 0; fs_devices->seeding = 0; - fs_devices->sprouted = 0; - seed_devices = fs_devices->seed; - fs_devices->seed = NULL; - if (seed_devices) { - fs_devices = seed_devices; - goto again; - } return 0; } int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) { + struct btrfs_fs_devices *seed_devices = NULL; int ret; mutex_lock(&uuid_mutex); ret = __btrfs_close_devices(fs_devices); + if (!fs_devices->opened) { + seed_devices = fs_devices->seed; + fs_devices->seed = NULL; + } mutex_unlock(&uuid_mutex); + + while (seed_devices) { + fs_devices = seed_devices; + seed_devices = fs_devices->seed; + __btrfs_close_devices(fs_devices); + free_fs_devices(fs_devices); + } return ret; } -int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, - fmode_t flags, void *holder) +static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, + fmode_t flags, void *holder) { struct block_device *bdev; struct list_head *head = &fs_devices->devices; @@ -490,12 +530,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, mutex_lock(&uuid_mutex); if (fs_devices->opened) { - if (fs_devices->sprouted) { - ret = -EBUSY; - } else { - fs_devices->opened++; - ret = 0; - } + fs_devices->opened++; + ret = 0; } else { ret = __btrfs_open_devices(fs_devices, flags, holder); } @@ -1043,12 +1079,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) goto error_brelse; device->in_fs_metadata = 0; - if (device->fs_devices == root->fs_info->fs_devices) { - list_del_init(&device->dev_list); - root->fs_info->fs_devices->num_devices--; - if (device->bdev) - device->fs_devices->open_devices--; - } + list_del_init(&device->dev_list); + device->fs_devices->num_devices--; next_device = list_entry(root->fs_info->fs_devices->devices.next, struct btrfs_device, dev_list); @@ -1057,34 +1089,27 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (device->bdev == root->fs_info->fs_devices->latest_bdev) root->fs_info->fs_devices->latest_bdev = next_device->bdev; + if (device->bdev) { + close_bdev_exclusive(device->bdev, device->mode); + device->bdev = NULL; + device->fs_devices->open_devices--; + } + num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices); - if (device->fs_devices != root->fs_info->fs_devices) { - BUG_ON(device->writeable); - brelse(bh); - if (bdev) - close_bdev_exclusive(bdev, FMODE_READ); - - if (device->bdev) { - close_bdev_exclusive(device->bdev, device->mode); - device->bdev = NULL; - device->fs_devices->open_devices--; - } - if (device->fs_devices->open_devices == 0) { - struct btrfs_fs_devices *fs_devices; - fs_devices = root->fs_info->fs_devices; - while (fs_devices) { - if (fs_devices->seed == device->fs_devices) - break; - fs_devices = fs_devices->seed; - } - fs_devices->seed = device->fs_devices->seed; - device->fs_devices->seed = NULL; - __btrfs_close_devices(device->fs_devices); + if (device->fs_devices->open_devices == 0) { + struct btrfs_fs_devices *fs_devices; + fs_devices = root->fs_info->fs_devices; + while (fs_devices) { + if (fs_devices->seed == device->fs_devices) + break; + fs_devices = fs_devices->seed; } - ret = 0; - goto out; + fs_devices->seed = device->fs_devices->seed; + device->fs_devices->seed = NULL; + __btrfs_close_devices(device->fs_devices); + free_fs_devices(device->fs_devices); } /* @@ -1099,20 +1124,10 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) set_buffer_dirty(bh); sync_dirty_buffer(bh); } - brelse(bh); - if (device->bdev) { - /* one close for the device struct or super_block */ - close_bdev_exclusive(device->bdev, device->mode); - } - if (bdev) { - /* one close for us */ - close_bdev_exclusive(bdev, FMODE_READ); - } kfree(device->name); kfree(device); ret = 0; - goto out; error_brelse: brelse(bh); @@ -1133,34 +1148,41 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, { struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; struct btrfs_fs_devices *old_devices; + struct btrfs_fs_devices *seed_devices; struct btrfs_super_block *disk_super = &root->fs_info->super_copy; struct btrfs_device *device; u64 super_flags; BUG_ON(!mutex_is_locked(&uuid_mutex)); - if (!fs_devices->seeding || fs_devices->opened != 1) + if (!fs_devices->seeding) return -EINVAL; - old_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!old_devices) + seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); + if (!seed_devices) return -ENOMEM; - memcpy(old_devices, fs_devices, sizeof(*old_devices)); - old_devices->opened = 1; - old_devices->sprouted = 1; - INIT_LIST_HEAD(&old_devices->devices); - INIT_LIST_HEAD(&old_devices->alloc_list); - list_splice_init(&fs_devices->devices, &old_devices->devices); - list_splice_init(&fs_devices->alloc_list, &old_devices->alloc_list); - list_for_each_entry(device, &old_devices->devices, dev_list) { - device->fs_devices = old_devices; + old_devices = clone_fs_devices(fs_devices); + if (IS_ERR(old_devices)) { + kfree(seed_devices); + return PTR_ERR(old_devices); } + list_add(&old_devices->list, &fs_uuids); + memcpy(seed_devices, fs_devices, sizeof(*seed_devices)); + seed_devices->opened = 1; + INIT_LIST_HEAD(&seed_devices->devices); + INIT_LIST_HEAD(&seed_devices->alloc_list); + list_splice_init(&fs_devices->devices, &seed_devices->devices); + list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); + list_for_each_entry(device, &seed_devices->devices, dev_list) { + device->fs_devices = seed_devices; + } + fs_devices->seeding = 0; fs_devices->num_devices = 0; fs_devices->open_devices = 0; - fs_devices->seed = old_devices; + fs_devices->seed = seed_devices; generate_random_uuid(fs_devices->fsid); memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); @@ -2642,7 +2664,6 @@ int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, NULL, 0, page); } - static void end_bio_multi_stripe(struct bio *bio, int err) { struct btrfs_multi_bio *multi = bio->bi_private; @@ -2840,6 +2861,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, device->dev_root = root->fs_info->dev_root; device->devid = devid; device->work.func = pending_bios_fn; + device->fs_devices = fs_devices; fs_devices->num_devices++; spin_lock_init(&device->io_lock); INIT_LIST_HEAD(&device->dev_alloc_list); @@ -2980,8 +3002,10 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) ret = -ENOENT; goto out; } - if (fs_devices->opened) { - ret = -EBUSY; + + fs_devices = clone_fs_devices(fs_devices); + if (IS_ERR(fs_devices)) { + ret = PTR_ERR(fs_devices); goto out; } @@ -2992,13 +3016,13 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) if (!fs_devices->seeding) { __btrfs_close_devices(fs_devices); + free_fs_devices(fs_devices); ret = -EINVAL; goto out; } fs_devices->seed = root->fs_info->fs_devices->seed; root->fs_info->fs_devices->seed = fs_devices; - fs_devices->sprouted = 1; out: mutex_unlock(&uuid_mutex); return ret; @@ -3011,7 +3035,6 @@ static int read_one_dev(struct btrfs_root *root, struct btrfs_device *device; u64 devid; int ret; - int seed_devices = 0; u8 fs_uuid[BTRFS_UUID_SIZE]; u8 dev_uuid[BTRFS_UUID_SIZE]; @@ -3025,14 +3048,13 @@ static int read_one_dev(struct btrfs_root *root, if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { ret = open_seed_devices(root, fs_uuid); - if (ret) + if (ret && !btrfs_test_opt(root, DEGRADED)) return ret; - seed_devices = 1; } device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); if (!device || !device->bdev) { - if (!btrfs_test_opt(root, DEGRADED) || seed_devices) + if (!btrfs_test_opt(root, DEGRADED)) return -EIO; if (!device) { @@ -3074,7 +3096,7 @@ int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf) return read_one_dev(root, buf, dev_item); } -int btrfs_read_sys_array(struct btrfs_root *root, u64 sb_bytenr) +int btrfs_read_sys_array(struct btrfs_root *root) { struct btrfs_super_block *super_copy = &root->fs_info->super_copy; struct extent_buffer *sb; @@ -3089,7 +3111,7 @@ int btrfs_read_sys_array(struct btrfs_root *root, u64 sb_bytenr) u32 cur; struct btrfs_key key; - sb = btrfs_find_create_tree_block(root, sb_bytenr, + sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET, BTRFS_SUPER_INFO_SIZE); if (!sb) return -ENOMEM; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index bdebe83c319..86c44e9ae11 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -93,7 +93,6 @@ struct btrfs_fs_devices { struct btrfs_fs_devices *seed; int seeding; - int sprouted; int opened; }; @@ -127,7 +126,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, u64 chunk_start, u64 physical, u64 devid, u64 **logical, int *naddrs, int *stripe_len); -int btrfs_read_sys_array(struct btrfs_root *root, u64 sb_bytenr); +int btrfs_read_sys_array(struct btrfs_root *root); int btrfs_read_chunk_tree(struct btrfs_root *root); int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 type); -- cgit v1.2.3-70-g09d2 From 17d217fe970d34720f4f1633dca73a6aa2f3d9d1 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Fri, 12 Dec 2008 10:03:38 -0500 Subject: Btrfs: fix nodatasum handling in balancing code Checksums on data can be disabled by mount option, so it's possible some data extents don't have checksums or have invalid checksums. This causes trouble for data relocation. This patch contains following things to make data relocation work. 1) make nodatasum/nodatacow mount option only affects new files. Checksums and COW on data are only controlled by the inode flags. 2) check the existence of checksum in the nodatacow checker. If checksums exist, force COW the data extent. This ensure that checksum for a given block is either valid or does not exist. 3) update data relocation code to properly handle the case of checksum missing. Signed-off-by: Yan Zheng --- fs/btrfs/compression.c | 9 ++-- fs/btrfs/ctree.h | 5 ++- fs/btrfs/extent-tree.c | 50 ++++++++++++++++++++-- fs/btrfs/extent_io.h | 1 + fs/btrfs/file-item.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/file.c | 8 ---- fs/btrfs/inode.c | 74 ++++++++++++++++++++++++++------ 7 files changed, 226 insertions(+), 35 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ad727413730..2436163d543 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -124,8 +124,7 @@ static int check_compressed_csum(struct inode *inode, u32 csum; u32 *cb_sum = &cb->sums; - if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) + if (btrfs_test_flag(inode, NODATASUM)) return 0; for (i = 0; i < cb->nr_pages; i++) { @@ -671,8 +670,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, */ atomic_inc(&cb->pending_bios); - if (!btrfs_test_opt(root, NODATASUM) && - !btrfs_test_flag(inode, NODATASUM)) { + if (!btrfs_test_flag(inode, NODATASUM)) { btrfs_lookup_bio_sums(root, inode, comp_bio, sums); } @@ -699,8 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); BUG_ON(ret); - if (!btrfs_test_opt(root, NODATASUM) && - !btrfs_test_flag(inode, NODATASUM)) { + if (!btrfs_test_flag(inode, NODATASUM)) { btrfs_lookup_bio_sums(root, inode, comp_bio, sums); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8733081d97a..b89999de456 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1702,7 +1702,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr); + struct btrfs_root *root, u64 objectid, u64 bytenr); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); @@ -1789,6 +1789,7 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, struct extent_buffer *buf, u64 orig_start); int btrfs_add_dead_reloc_root(struct btrfs_root *root); int btrfs_cleanup_reloc_trees(struct btrfs_root *root); +int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); /* ctree.c */ int btrfs_previous_item(struct btrfs_root *root, @@ -1994,6 +1995,8 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); +int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, + u64 end, struct list_head *list); /* inode.c */ /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 171057a3267..8004695d24d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1359,7 +1359,7 @@ out: } int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr) + struct btrfs_root *root, u64 objectid, u64 bytenr) { struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_path *path; @@ -1418,8 +1418,9 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, ref_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); ref_root = btrfs_ref_root(leaf, ref_item); - if (ref_root != root->root_key.objectid && - ref_root != BTRFS_TREE_LOG_OBJECTID) { + if ((ref_root != root->root_key.objectid && + ref_root != BTRFS_TREE_LOG_OBJECTID) || + objectid != btrfs_ref_objectid(leaf, ref_item)) { ret = 1; goto out; } @@ -5367,7 +5368,6 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, if (ret) goto out; } - btrfs_record_root_in_trans(found_root); ret = replace_one_extent(trans, found_root, path, extent_key, &first_key, ref_path, @@ -5534,6 +5534,7 @@ static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info, } else { BUG_ON(1); } + BTRFS_I(inode)->index_cnt = group->key.objectid; err = btrfs_orphan_add(trans, inode); out: @@ -5546,6 +5547,47 @@ out: return inode; } +int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) +{ + + struct btrfs_ordered_sum *sums; + struct btrfs_sector_sum *sector_sum; + struct btrfs_ordered_extent *ordered; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct list_head list; + size_t offset; + int ret; + u64 disk_bytenr; + + INIT_LIST_HEAD(&list); + + ordered = btrfs_lookup_ordered_extent(inode, file_pos); + BUG_ON(ordered->file_offset != file_pos || ordered->len != len); + + disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; + ret = btrfs_lookup_csums_range(root, disk_bytenr, + disk_bytenr + len - 1, &list); + + while (!list_empty(&list)) { + sums = list_entry(list.next, struct btrfs_ordered_sum, list); + list_del_init(&sums->list); + + sector_sum = sums->sums; + sums->bytenr = ordered->start; + + offset = 0; + while (offset < sums->len) { + sector_sum->bytenr += ordered->start - disk_bytenr; + sector_sum++; + offset += root->sectorsize; + } + + btrfs_add_ordered_sum(inode, ordered, sums); + } + btrfs_put_ordered_extent(ordered); + return 0; +} + int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) { struct btrfs_trans_handle *trans; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2d5f67065b6..c5b483a7913 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -16,6 +16,7 @@ #define EXTENT_ORDERED (1 << 9) #define EXTENT_ORDERED_METADATA (1 << 10) #define EXTENT_BOUNDARY (1 << 11) +#define EXTENT_NODATASUM (1 << 12) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) /* flags for bio submission */ diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 3ebef871ee6..df0447632db 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -140,6 +140,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } + int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u32 *dst) { @@ -185,9 +186,16 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, if (ret == -ENOENT || ret == -EFBIG) ret = 0; sum = 0; - printk("no csum found for inode %lu start " - "%llu\n", inode->i_ino, - (unsigned long long)offset); + if (BTRFS_I(inode)->root->root_key.objectid == + BTRFS_DATA_RELOC_TREE_OBJECTID) { + set_extent_bits(io_tree, offset, + offset + bvec->bv_len - 1, + EXTENT_NODATASUM, GFP_NOFS); + } else { + printk("no csum found for inode %lu " + "start %llu\n", inode->i_ino, + (unsigned long long)offset); + } item = NULL; btrfs_release_path(root, path); goto found; @@ -228,6 +236,106 @@ found: return 0; } +int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, + struct list_head *list) +{ + struct btrfs_key key; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_ordered_sum *sums; + struct btrfs_sector_sum *sector_sum; + struct btrfs_csum_item *item; + unsigned long offset; + int ret; + size_t size; + u64 csum_end; + u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); + + path = btrfs_alloc_path(); + BUG_ON(!path); + + key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key.offset = start; + key.type = BTRFS_EXTENT_CSUM_KEY; + + ret = btrfs_search_slot(NULL, root->fs_info->csum_root, + &key, path, 0, 0); + if (ret < 0) + goto fail; + if (ret > 0 && path->slots[0] > 0) { + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); + if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && + key.type == BTRFS_EXTENT_CSUM_KEY) { + offset = (start - key.offset) >> + root->fs_info->sb->s_blocksize_bits; + if (offset * csum_size < + btrfs_item_size_nr(leaf, path->slots[0] - 1)) + path->slots[0]--; + } + } + + while (start <= end) { + leaf = path->nodes[0]; + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root->fs_info->csum_root, path); + if (ret < 0) + goto fail; + if (ret > 0) + break; + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || + key.type != BTRFS_EXTENT_CSUM_KEY) + break; + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.offset > end) + break; + + if (key.offset > start) + start = key.offset; + + size = btrfs_item_size_nr(leaf, path->slots[0]); + csum_end = key.offset + (size / csum_size) * root->sectorsize; + + size = min(csum_end, end + 1) - start; + sums = kzalloc(btrfs_ordered_sum_size(root, size), GFP_NOFS); + BUG_ON(!sums); + + sector_sum = sums->sums; + sums->bytenr = start; + sums->len = size; + + offset = (start - key.offset) >> + root->fs_info->sb->s_blocksize_bits; + offset *= csum_size; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_csum_item); + while (size > 0) { + read_extent_buffer(path->nodes[0], §or_sum->sum, + ((unsigned long)item) + offset, + csum_size); + sector_sum->bytenr = start; + + size -= root->sectorsize; + start += root->sectorsize; + offset += csum_size; + sector_sum++; + } + list_add_tail(&sums->list, list); + + path->slots[0]++; + } + ret = 0; +fail: + btrfs_free_path(path); + return ret; +} + int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 file_start, int contig) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 71bfe3a6a44..507081059d9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1059,14 +1059,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; - /* - * if this is a nodatasum mount, force summing off for the inode - * all the time. That way a later mount with summing on won't - * get confused - */ - if (btrfs_test_opt(root, NODATASUM)) - btrfs_set_flag(inode, NODATASUM); - /* * there are lots of better ways to do this, but this code * makes sure the first and last page in the file range are diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0a28b770631..e64a4fe19a6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -771,6 +771,13 @@ static noinline int cow_file_range(struct inode *inode, ram_size, cur_alloc_size, 0); BUG_ON(ret); + if (root->root_key.objectid == + BTRFS_DATA_RELOC_TREE_OBJECTID) { + ret = btrfs_reloc_clone_csums(inode, start, + cur_alloc_size); + BUG_ON(ret); + } + if (disk_num_bytes < cur_alloc_size) { printk("num_bytes %Lu cur_alloc %Lu\n", disk_num_bytes, cur_alloc_size); @@ -910,6 +917,26 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, return 0; } +static int noinline csum_exist_in_range(struct btrfs_root *root, + u64 bytenr, u64 num_bytes) +{ + int ret; + struct btrfs_ordered_sum *sums; + LIST_HEAD(list); + + ret = btrfs_lookup_csums_range(root, bytenr, bytenr + num_bytes - 1, + &list); + if (ret == 0 && list_empty(&list)) + return 0; + + while (!list_empty(&list)) { + sums = list_entry(list.next, struct btrfs_ordered_sum, list); + list_del(&sums->list); + kfree(sums); + } + return 1; +} + /* * when nowcow writeback call back. This checks for snapshots or COW copies * of the extents that exist in the file, and COWs the file as required. @@ -971,6 +998,7 @@ next_slot: nocow = 0; disk_bytenr = 0; + num_bytes = 0; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid > inode->i_ino || @@ -996,19 +1024,29 @@ next_slot: path->slots[0]++; goto next_slot; } + if (disk_bytenr == 0) + goto out_check; if (btrfs_file_extent_compression(leaf, fi) || btrfs_file_extent_encryption(leaf, fi) || btrfs_file_extent_other_encoding(leaf, fi)) goto out_check; - if (disk_bytenr == 0) - goto out_check; if (extent_type == BTRFS_FILE_EXTENT_REG && !force) goto out_check; - if (btrfs_cross_ref_exist(trans, root, disk_bytenr)) - goto out_check; if (btrfs_extent_readonly(root, disk_bytenr)) goto out_check; + if (btrfs_cross_ref_exist(trans, root, inode->i_ino, + disk_bytenr)) + goto out_check; disk_bytenr += btrfs_file_extent_offset(leaf, fi); + disk_bytenr += cur_offset - found_key.offset; + num_bytes = min(end + 1, extent_end) - cur_offset; + /* + * force cow if csum exists in the range. + * this ensure that csum for a given extent are + * either valid or do not exist. + */ + if (csum_exist_in_range(root, disk_bytenr, num_bytes)) + goto out_check; nocow = 1; } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { extent_end = found_key.offset + @@ -1041,8 +1079,6 @@ out_check: cow_start = (u64)-1; } - disk_bytenr += cur_offset - found_key.offset; - num_bytes = min(end + 1, extent_end) - cur_offset; if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { struct extent_map *em; struct extent_map_tree *em_tree; @@ -1105,11 +1141,9 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, unsigned long *nr_written) { - struct btrfs_root *root = BTRFS_I(inode)->root; int ret; - if (btrfs_test_opt(root, NODATACOW) || - btrfs_test_flag(inode, NODATACOW)) + if (btrfs_test_flag(inode, NODATACOW)) ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 1, nr_written); else if (btrfs_test_flag(inode, PREALLOC)) @@ -1252,8 +1286,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); BUG_ON(ret); - skip_sum = btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM); + skip_sum = btrfs_test_flag(inode, NODATASUM); if (!(rw & (1 << BIO_RW))) { if (bio_flags & EXTENT_BIO_COMPRESSED) { @@ -1263,6 +1296,9 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, btrfs_lookup_bio_sums(root, inode, bio, NULL); goto mapit; } else if (!skip_sum) { + /* csum items have already been cloned */ + if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + goto mapit; /* we're doing a write, do the async checksumming */ return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, inode, rw, bio, mirror_num, @@ -1692,9 +1728,15 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, ClearPageChecked(page); goto good; } - if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) + if (btrfs_test_flag(inode, NODATASUM)) + return 0; + + if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && + test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { + clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, + GFP_NOFS); return 0; + } if (state && state->start == start) { private = state->private; @@ -3391,6 +3433,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 1; BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, alloc_hint, owner); + if ((mode & S_IFREG)) { + if (btrfs_test_opt(root, NODATASUM)) + btrfs_set_flag(inode, NODATASUM); + if (btrfs_test_opt(root, NODATACOW)) + btrfs_set_flag(inode, NODATACOW); + } key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); -- cgit v1.2.3-70-g09d2 From 75eff68ea6959efd999b125fc3dcf73931a3b30b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Dec 2008 15:54:40 -0500 Subject: Btrfs: Don't use spin*lock_irq for the delalloc lock The delalloc lock doesn't need to have irqs disabled, nobody that changes the number of delalloc bytes in the FS is running with irqs off. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 12 ++++++------ fs/btrfs/inode.c | 34 ++++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8004695d24d..fbd6a8f28b5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1047,11 +1047,11 @@ search: end = pos; /* update the free space counters */ - spin_lock_irq(&info->delalloc_lock); + spin_lock(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); btrfs_set_super_bytes_used(&info->super_copy, super_used - bytes_freed); - spin_unlock_irq(&info->delalloc_lock); + spin_unlock(&info->delalloc_lock); root_used = btrfs_root_used(&extent_root->root_item); btrfs_set_root_used(&extent_root->root_item, @@ -2463,11 +2463,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, BUG_ON(ret < 0); } /* block accounting for super block */ - spin_lock_irq(&info->delalloc_lock); + spin_lock(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); btrfs_set_super_bytes_used(&info->super_copy, super_used - num_bytes); - spin_unlock_irq(&info->delalloc_lock); + spin_unlock(&info->delalloc_lock); /* block accounting for root item */ root_used = btrfs_root_used(&root->root_item); @@ -3151,10 +3151,10 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, parent = ins->objectid; /* block accounting for super block */ - spin_lock_irq(&info->delalloc_lock); + spin_lock(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); - spin_unlock_irq(&info->delalloc_lock); + spin_unlock(&info->delalloc_lock); /* block accounting for root item */ root_used = btrfs_root_used(&root->root_item); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5313a13a998..0577e77e661 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -101,10 +101,9 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, u64 total; u64 used; u64 thresh; - unsigned long flags; int ret = 0; - spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); + spin_lock(&root->fs_info->delalloc_lock); total = btrfs_super_total_bytes(&root->fs_info->super_copy); used = btrfs_super_bytes_used(&root->fs_info->super_copy); if (for_del) @@ -116,7 +115,7 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, if (used + root->fs_info->delalloc_bytes + num_required > thresh) ret = -ENOSPC; - spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); + spin_unlock(&root->fs_info->delalloc_lock); return ret; } @@ -1166,17 +1165,21 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { - unsigned long flags; + /* + * set_bit and clear bit hooks normally require _irqsave/restore + * but in this case, we are only testeing for the DELALLOC + * bit, which is only set or cleared with irqs on + */ if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; - spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); + spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += end - start + 1; root->fs_info->delalloc_bytes += end - start + 1; if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { list_add_tail(&BTRFS_I(inode)->delalloc_inodes, &root->fs_info->delalloc_inodes); } - spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); + spin_unlock(&root->fs_info->delalloc_lock); } return 0; } @@ -1187,11 +1190,15 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { + /* + * set_bit and clear bit hooks normally require _irqsave/restore + * but in this case, we are only testeing for the DELALLOC + * bit, which is only set or cleared with irqs on + */ if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; - unsigned long flags; - spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); + spin_lock(&root->fs_info->delalloc_lock); if (end - start + 1 > root->fs_info->delalloc_bytes) { printk("warning: delalloc account %Lu %Lu\n", end - start + 1, root->fs_info->delalloc_bytes); @@ -1205,7 +1212,7 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { list_del_init(&BTRFS_I(inode)->delalloc_inodes); } - spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); + spin_unlock(&root->fs_info->delalloc_lock); } return 0; } @@ -4651,27 +4658,26 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) struct list_head *head = &root->fs_info->delalloc_inodes; struct btrfs_inode *binode; struct inode *inode; - unsigned long flags; if (root->fs_info->sb->s_flags & MS_RDONLY) return -EROFS; - spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); + spin_lock(&root->fs_info->delalloc_lock); while(!list_empty(head)) { binode = list_entry(head->next, struct btrfs_inode, delalloc_inodes); inode = igrab(&binode->vfs_inode); if (!inode) list_del_init(&binode->delalloc_inodes); - spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); + spin_unlock(&root->fs_info->delalloc_lock); if (inode) { filemap_flush(inode->i_mapping); iput(inode); } cond_resched(); - spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); + spin_lock(&root->fs_info->delalloc_lock); } - spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); + spin_unlock(&root->fs_info->delalloc_lock); /* the filemap_flush will queue IO into the worker threads, but * we have to make sure the IO is actually started and that -- cgit v1.2.3-70-g09d2 From dcbdd4dcb9793b00b46ab023e9330922c8c7c54c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 16 Dec 2008 13:51:01 -0500 Subject: Btrfs: delete checksum items before marking blocks free Btrfs maintains a cache of blocks available for allocation in ram. The code that frees extents was marking the extents free and then deleting the checksum items. This meant it was possible the extent would be reallocated before the checksum item was actually deleted, leading to races and other problems as the checksums were updated for the newly allocated extent. The fix is to delete the checksum before marking the extent free. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 +++--- fs/btrfs/file-item.c | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fbd6a8f28b5..9ef2a2be268 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2477,15 +2477,15 @@ static int __free_extent(struct btrfs_trans_handle *trans, num_to_del); BUG_ON(ret); btrfs_release_path(extent_root, path); - ret = update_block_group(trans, root, bytenr, num_bytes, 0, - mark_free); - BUG_ON(ret); if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { ret = btrfs_del_csums(trans, root, bytenr, num_bytes); BUG_ON(ret); } + ret = update_block_group(trans, root, bytenr, num_bytes, 0, + mark_free); + BUG_ON(ret); #ifdef BIO_RW_DISCARD /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index df0447632db..7acadf3b742 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -537,6 +537,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, if (key.offset >= bytenr && csum_end <= end_byte) { ret = btrfs_del_item(trans, root, path); BUG_ON(ret); + if (key.offset == bytenr) + break; } else if (key.offset < bytenr && csum_end > end_byte) { unsigned long offset; unsigned long shift_len; @@ -583,6 +585,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, ret = truncate_one_csum(trans, root, path, &key, bytenr, len); BUG_ON(ret); + if (key.offset < bytenr) + break; } btrfs_release_path(root, path); } -- cgit v1.2.3-70-g09d2 From 34bf63c4ddddd92bfba3387d134c37bf4426b2ce Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Fri, 19 Dec 2008 10:58:46 -0500 Subject: Btrfs: properly update block accounting for metadata This adds the missing block accounting code to finish_current_insert and makes block accounting for root item properly protected by the delalloc spin lock. Signed-off-by: Yan Zheng --- fs/btrfs/extent-tree.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9ef2a2be268..274bb91efa2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1051,11 +1051,11 @@ search: super_used = btrfs_super_bytes_used(&info->super_copy); btrfs_set_super_bytes_used(&info->super_copy, super_used - bytes_freed); - spin_unlock(&info->delalloc_lock); root_used = btrfs_root_used(&extent_root->root_item); btrfs_set_root_used(&extent_root->root_item, root_used - bytes_freed); + spin_unlock(&info->delalloc_lock); /* delete the items */ ret = btrfs_del_items(trans, extent_root, path, @@ -2242,6 +2242,7 @@ again: extent_op->bytenr + extent_op->num_bytes - 1, EXTENT_WRITEBACK, GFP_NOFS); if (extent_op->del) { + u64 used; list_del_init(&extent_op->list); unlock_extent(&info->extent_ins, extent_op->bytenr, extent_op->bytenr + extent_op->num_bytes @@ -2253,6 +2254,15 @@ again: extent_op->num_bytes, 0); mutex_unlock(&extent_root->fs_info->pinned_mutex); + spin_lock(&info->delalloc_lock); + used = btrfs_super_bytes_used(&info->super_copy); + btrfs_set_super_bytes_used(&info->super_copy, + used - extent_op->num_bytes); + used = btrfs_root_used(&extent_root->root_item); + btrfs_set_root_used(&extent_root->root_item, + used - extent_op->num_bytes); + spin_unlock(&info->delalloc_lock); + ret = update_block_group(trans, extent_root, extent_op->bytenr, extent_op->num_bytes, @@ -2467,12 +2477,12 @@ static int __free_extent(struct btrfs_trans_handle *trans, super_used = btrfs_super_bytes_used(&info->super_copy); btrfs_set_super_bytes_used(&info->super_copy, super_used - num_bytes); - spin_unlock(&info->delalloc_lock); /* block accounting for root item */ root_used = btrfs_root_used(&root->root_item); btrfs_set_root_used(&root->root_item, root_used - num_bytes); + spin_unlock(&info->delalloc_lock); ret = btrfs_del_items(trans, extent_root, path, path->slots[0], num_to_del); BUG_ON(ret); @@ -3154,11 +3164,11 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, spin_lock(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); - spin_unlock(&info->delalloc_lock); /* block accounting for root item */ root_used = btrfs_root_used(&root->root_item); btrfs_set_root_used(&root->root_item, root_used + num_bytes); + spin_unlock(&info->delalloc_lock); if (root == extent_root) { struct pending_extent_op *extent_op; -- cgit v1.2.3-70-g09d2 From 1f80e4db0fcb3bdc2be51389baf558a9519682f0 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Fri, 19 Dec 2008 10:59:04 -0500 Subject: Btrfs: set EXTENT_BOUNDARY bit before marking extent delalloc. There is a race in relocate_inode_pages, it happens when find_delalloc_range finds the delalloc extent before the boundary bit is set. Thank you, Signed-off-by: Yan Zheng --- fs/btrfs/extent-tree.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 274bb91efa2..fe0e59ab33c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3994,10 +3994,10 @@ again: } set_page_extent_mapped(page); - btrfs_set_extent_delalloc(inode, page_start, page_end); if (i == first_index) set_extent_bits(io_tree, page_start, page_end, EXTENT_BOUNDARY, GFP_NOFS); + btrfs_set_extent_delalloc(inode, page_start, page_end); set_page_dirty(page); total_dirty++; @@ -4405,7 +4405,7 @@ static int noinline get_new_locations(struct inode *reloc_inode, path->slots[0]++; } - WARN_ON(cur_pos + offset > last_byte); + BUG_ON(cur_pos + offset > last_byte); if (cur_pos + offset < last_byte) { ret = -ENOENT; goto out; @@ -5712,7 +5712,6 @@ next: if (pass == 0) { btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); - WARN_ON(reloc_inode->i_mapping->nrpages); } if (total_found > 0) { -- cgit v1.2.3-70-g09d2 From 1f3c79a28c8837e8572b98f6d14142d9a6133c56 Mon Sep 17 00:00:00 2001 From: Liu Hui Date: Mon, 5 Jan 2009 15:57:51 -0500 Subject: Btrfs: Fix free block discard calls down to the block layer This is a patch to fix discard semantic to make Btrfs work with FTL and SSD. We can improve FTL's performance by telling it which sectors are freed by file system. But if we don't tell FTL the information of free sectors in proper time, the transaction mechanism of Btrfs will be destroyed and Btrfs could not roll back the previous transaction under the power loss condition. There are some problems in the old implementation: 1, In __free_extent(), the pinned down extents should not be discarded. 2, In free_extents(), the free extents are all pinned, so they need to be discarded in transaction committing time instead of free_extents(). 3, The reserved extent used by log tree should be discard too. This patch change discard behavior as follows: 1, For the extents which need to be free at once, we discard them in update_block_group(). 2, Delay discarding the pinned extent in btrfs_finish_extent_commit() when committing transaction. 3, Remove discarding from free_extents() and __free_extent() 4, Add discard interface into btrfs_free_reserved_extent() 5, Discard sectors before updating the free space cache, otherwise, FTL will destroy file system data. --- fs/btrfs/extent-tree.c | 99 ++++++++++++++++++++++++-------------------------- 1 file changed, 48 insertions(+), 51 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe0e59ab33c..780c1eeb829 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -876,6 +876,38 @@ static void btrfs_issue_discard(struct block_device *bdev, } #endif +static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, + u64 num_bytes) +{ +#ifdef BIO_RW_DISCARD + int ret; + u64 map_length = num_bytes; + struct btrfs_multi_bio *multi = NULL; + + /* Tell the block device(s) that the sectors can be discarded */ + ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, + bytenr, &map_length, &multi, 0); + if (!ret) { + struct btrfs_bio_stripe *stripe = multi->stripes; + int i; + + if (map_length > num_bytes) + map_length = num_bytes; + + for (i = 0; i < multi->num_stripes; i++, stripe++) { + btrfs_issue_discard(stripe->dev->bdev, + stripe->physical, + map_length); + } + kfree(multi); + } + + return ret; +#else + return 0; +#endif +} + static int noinline free_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, struct list_head *del_list) @@ -1069,10 +1101,6 @@ search: for (pos = cur, n = pos->next; pos != end; pos = n, n = pos->next) { struct pending_extent_op *tmp; -#ifdef BIO_RW_DISCARD - u64 map_length; - struct btrfs_multi_bio *multi = NULL; -#endif tmp = list_entry(pos, struct pending_extent_op, list); /* @@ -1084,28 +1112,6 @@ search: tmp->del); BUG_ON(ret); -#ifdef BIO_RW_DISCARD - map_length = tmp->num_bytes; - ret = btrfs_map_block(&info->mapping_tree, READ, - tmp->bytenr, &map_length, &multi, - 0); - if (!ret) { - struct btrfs_bio_stripe *stripe; - int i; - - stripe = multi->stripes; - - if (map_length > tmp->num_bytes) - map_length = tmp->num_bytes; - - for (i = 0; i < multi->num_stripes; - i++, stripe++) - btrfs_issue_discard(stripe->dev->bdev, - stripe->physical, - map_length); - kfree(multi); - } -#endif list_del_init(&tmp->list); unlock_extent(&info->extent_ins, tmp->bytenr, tmp->bytenr + tmp->num_bytes - 1, @@ -1965,6 +1971,11 @@ static int update_block_group(struct btrfs_trans_handle *trans, spin_unlock(&cache->space_info->lock); if (mark_free) { int ret; + + ret = btrfs_discard_extent(root, bytenr, + num_bytes); + WARN_ON(ret); + ret = btrfs_add_free_space(cache, bytenr, num_bytes); WARN_ON(ret); @@ -2104,8 +2115,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, EXTENT_DIRTY); if (ret) break; + + ret = btrfs_discard_extent(root, start, end + 1 - start); + btrfs_update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); + if (need_resched()) { mutex_unlock(&root->fs_info->pinned_mutex); cond_resched(); @@ -2113,7 +2128,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, } } mutex_unlock(&root->fs_info->pinned_mutex); - return 0; + return ret; } static int finish_current_insert(struct btrfs_trans_handle *trans, @@ -2458,10 +2473,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, if (refs == 0) { u64 super_used; u64 root_used; -#ifdef BIO_RW_DISCARD - u64 map_length = num_bytes; - struct btrfs_multi_bio *multi = NULL; -#endif if (pin) { mutex_lock(&root->fs_info->pinned_mutex); @@ -2496,25 +2507,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, ret = update_block_group(trans, root, bytenr, num_bytes, 0, mark_free); BUG_ON(ret); -#ifdef BIO_RW_DISCARD - /* Tell the block device(s) that the sectors can be discarded */ - ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - bytenr, &map_length, &multi, 0); - if (!ret) { - struct btrfs_bio_stripe *stripe = multi->stripes; - int i; - - if (map_length > num_bytes) - map_length = num_bytes; - - for (i = 0; i < multi->num_stripes; i++, stripe++) { - btrfs_issue_discard(stripe->dev->bdev, - stripe->physical, - map_length); - } - kfree(multi); - } -#endif } btrfs_free_path(path); finish_current_insert(trans, extent_root, 0); @@ -3112,16 +3104,21 @@ again: int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) { struct btrfs_block_group_cache *cache; + int ret = 0; cache = btrfs_lookup_block_group(root->fs_info, start); if (!cache) { printk(KERN_ERR "Unable to find block group for %Lu\n", start); return -ENOSPC; } + + ret = btrfs_discard_extent(root, start, len); + btrfs_add_free_space(cache, start, len); put_block_group(cache); update_reserved_extents(root, start, len, 0); - return 0; + + return ret; } int btrfs_reserve_extent(struct btrfs_trans_handle *trans, -- cgit v1.2.3-70-g09d2 From d397712bcc6a759a560fd247e6053ecae091f958 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 5 Jan 2009 21:25:51 -0500 Subject: Btrfs: Fix checkpatch.pl warnings There were many, most are fixed now. struct-funcs.c generates some warnings but these are bogus. Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 5 +- fs/btrfs/async-thread.c | 6 +- fs/btrfs/compat.h | 4 +- fs/btrfs/compression.c | 12 +-- fs/btrfs/ctree.c | 217 ++++++++++++++++-------------------------- fs/btrfs/ctree.h | 30 +++--- fs/btrfs/dir-item.c | 2 +- fs/btrfs/disk-io.c | 187 +++++++++++++------------------------ fs/btrfs/export.c | 8 +- fs/btrfs/extent-tree.c | 223 +++++++++++++++++++++++--------------------- fs/btrfs/extent_io.c | 213 ++++++++++++++++++++---------------------- fs/btrfs/extent_map.c | 14 +-- fs/btrfs/file-item.c | 18 ++-- fs/btrfs/file.c | 49 +++++----- fs/btrfs/free-space-cache.c | 37 ++++---- fs/btrfs/inode-map.c | 1 - fs/btrfs/inode.c | 173 +++++++++++++++++----------------- fs/btrfs/ioctl.c | 37 ++++---- fs/btrfs/locking.c | 5 +- fs/btrfs/ordered-data.c | 34 +++---- fs/btrfs/print-tree.c | 73 +++++++++------ fs/btrfs/ref-cache.c | 12 +-- fs/btrfs/root-tree.c | 17 ++-- fs/btrfs/struct-funcs.c | 4 +- fs/btrfs/super.c | 25 +++-- fs/btrfs/sysfs.c | 6 +- fs/btrfs/transaction.c | 45 ++++----- fs/btrfs/transaction.h | 6 +- fs/btrfs/tree-defrag.c | 9 +- fs/btrfs/tree-log.c | 70 +++++++------- fs/btrfs/volumes.c | 78 ++++++++-------- fs/btrfs/xattr.c | 3 +- fs/btrfs/zlib.c | 45 ++++----- 33 files changed, 770 insertions(+), 898 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 867eaf1f8ef..1d53b62dbba 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -161,8 +161,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) ret = __btrfs_setxattr(inode, name, value, size, 0); out: - if (value) - kfree(value); + kfree(value); if (!ret) btrfs_update_cached_acl(inode, p_acl, acl); @@ -213,7 +212,7 @@ static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name, } static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) + const void *value, size_t size, int flags) { return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); } diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 4229450b759..8e2fec05dbe 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -104,7 +104,7 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, spin_lock_irqsave(&workers->lock, flags); - while(!list_empty(&workers->order_list)) { + while (!list_empty(&workers->order_list)) { work = list_entry(workers->order_list.next, struct btrfs_work, order_list); @@ -143,7 +143,7 @@ static int worker_loop(void *arg) struct btrfs_work *work; do { spin_lock_irq(&worker->lock); - while(!list_empty(&worker->pending)) { + while (!list_empty(&worker->pending)) { cur = worker->pending.next; work = list_entry(cur, struct btrfs_work, list); list_del(&work->list); @@ -188,7 +188,7 @@ int btrfs_stop_workers(struct btrfs_workers *workers) struct btrfs_worker_thread *worker; list_splice_init(&workers->idle_list, &workers->worker_list); - while(!list_empty(&workers->worker_list)) { + while (!list_empty(&workers->worker_list)) { cur = workers->worker_list.next; worker = list_entry(cur, struct btrfs_worker_thread, worker_list); diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h index 75e4426d6fb..594d60bdd3c 100644 --- a/fs/btrfs/compat.h +++ b/fs/btrfs/compat.h @@ -4,7 +4,7 @@ #define btrfs_drop_nlink(inode) drop_nlink(inode) #define btrfs_inc_nlink(inode) inc_nlink(inode) -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27) +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 27) static inline struct dentry *d_obtain_alias(struct inode *inode) { struct dentry *d; @@ -21,7 +21,7 @@ static inline struct dentry *d_obtain_alias(struct inode *inode) } #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) # define __pagevec_lru_add_file __pagevec_lru_add # define open_bdev_exclusive open_bdev_excl # define close_bdev_exclusive(bdev, mode) close_bdev_excl(bdev) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 2436163d543..ee848d8585d 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -137,7 +137,8 @@ static int check_compressed_csum(struct inode *inode, kunmap_atomic(kaddr, KM_USER0); if (csum != *cb_sum) { - printk("btrfs csum failed ino %lu extent %llu csum %u " + printk(KERN_INFO "btrfs csum failed ino %lu " + "extent %llu csum %u " "wanted %u mirror %d\n", inode->i_ino, (unsigned long long)disk_start, csum, *cb_sum, cb->mirror_num); @@ -217,7 +218,7 @@ csum_failed: * we have verified the checksum already, set page * checked so the end_io handlers know about it */ - while(bio_index < cb->orig_bio->bi_vcnt) { + while (bio_index < cb->orig_bio->bi_vcnt) { SetPageChecked(bvec->bv_page); bvec++; bio_index++; @@ -246,7 +247,7 @@ static noinline int end_compressed_writeback(struct inode *inode, u64 start, int i; int ret; - while(nr_pages > 0) { + while (nr_pages > 0) { ret = find_get_pages_contig(inode->i_mapping, index, min_t(unsigned long, nr_pages, ARRAY_SIZE(pages)), pages); @@ -463,7 +464,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; pagevec_init(&pvec, 0); - while(last_offset < compressed_end) { + while (last_offset < compressed_end) { page_index = last_offset >> PAGE_CACHE_SHIFT; if (page_index > end_index) @@ -697,9 +698,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); BUG_ON(ret); - if (!btrfs_test_flag(inode, NODATASUM)) { + if (!btrfs_test_flag(inode, NODATASUM)) btrfs_lookup_bio_sums(root, inode, comp_bio, sums); - } ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); BUG_ON(ret); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7fad2e3ad6f..9e46c077681 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -67,7 +67,7 @@ void btrfs_free_path(struct btrfs_path *p) * * It is safe to call this on paths that no locks or extent buffers held. */ -void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) +noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; @@ -112,7 +112,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) { struct extent_buffer *eb; - while(1) { + while (1) { eb = btrfs_root_node(root); btrfs_tree_lock(eb); @@ -202,22 +202,22 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, } /* - * does the dirty work in cow of a single block. The parent block - * (if supplied) is updated to point to the new cow copy. The new - * buffer is marked dirty and returned locked. If you modify the block - * it needs to be marked dirty again. + * does the dirty work in cow of a single block. The parent block (if + * supplied) is updated to point to the new cow copy. The new buffer is marked + * dirty and returned locked. If you modify the block it needs to be marked + * dirty again. * * search_start -- an allocation hint for the new block * - * empty_size -- a hint that you plan on doing more cow. This is the size in bytes - * the allocator should try to find free next to the block it returns. This is - * just a hint and may be ignored by the allocator. + * empty_size -- a hint that you plan on doing more cow. This is the size in + * bytes the allocator should try to find free next to the block it returns. + * This is just a hint and may be ignored by the allocator. * * prealloc_dest -- if you have already reserved a destination for the cow, - * this uses that block instead of allocating a new one. btrfs_alloc_reserved_extent - * is used to finish the allocation. + * this uses that block instead of allocating a new one. + * btrfs_alloc_reserved_extent is used to finish the allocation. */ -static int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, +static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, @@ -366,7 +366,7 @@ static int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, * This version of it has extra checks so that a block isn't cow'd more than * once per transaction, as long as it hasn't been written yet */ -int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, +noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret, u64 prealloc_dest) @@ -375,13 +375,16 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, int ret; if (trans->transaction != root->fs_info->running_transaction) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + printk(KERN_CRIT "trans %llu running %llu\n", + (unsigned long long)trans->transid, + (unsigned long long) root->fs_info->running_transaction->transid); WARN_ON(1); } if (trans->transid != root->fs_info->generation) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->generation); + printk(KERN_CRIT "trans %llu running %llu\n", + (unsigned long long)trans->transid, + (unsigned long long)root->fs_info->generation); WARN_ON(1); } @@ -489,16 +492,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (cache_only && parent_level != 1) return 0; - if (trans->transaction != root->fs_info->running_transaction) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->running_transaction->transid); + if (trans->transaction != root->fs_info->running_transaction) WARN_ON(1); - } - if (trans->transid != root->fs_info->generation) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->generation); + if (trans->transid != root->fs_info->generation) WARN_ON(1); - } parent_nritems = btrfs_header_nritems(parent); blocksize = btrfs_level_size(root, parent_level - 1); @@ -681,51 +678,18 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(btrfs_node_blockptr(parent, parent_slot) != btrfs_header_bytenr(leaf)); } -#if 0 - for (i = 0; nritems > 1 && i < nritems - 2; i++) { - btrfs_item_key_to_cpu(leaf, &cpukey, i + 1); - btrfs_item_key(leaf, &leaf_key, i); - if (comp_keys(&leaf_key, &cpukey) >= 0) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad key\n", i); - BUG_ON(1); - } - if (btrfs_item_offset_nr(leaf, i) != - btrfs_item_end_nr(leaf, i + 1)) { - btrfs_print_leaf(root, leaf); - printk("slot %d offset bad\n", i); - BUG_ON(1); - } - if (i == 0) { - if (btrfs_item_offset_nr(leaf, i) + - btrfs_item_size_nr(leaf, i) != - BTRFS_LEAF_DATA_SIZE(root)) { - btrfs_print_leaf(root, leaf); - printk("slot %d first offset bad\n", i); - BUG_ON(1); - } - } - } - if (nritems > 0) { - if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) { - btrfs_print_leaf(root, leaf); - printk("slot %d bad size \n", nritems - 1); - BUG_ON(1); - } - } -#endif if (slot != 0 && slot < nritems - 1) { btrfs_item_key(leaf, &leaf_key, slot); btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); if (comp_keys(&leaf_key, &cpukey) <= 0) { btrfs_print_leaf(root, leaf); - printk("slot %d offset bad key\n", slot); + printk(KERN_CRIT "slot %d offset bad key\n", slot); BUG_ON(1); } if (btrfs_item_offset_nr(leaf, slot - 1) != btrfs_item_end_nr(leaf, slot)) { btrfs_print_leaf(root, leaf); - printk("slot %d offset bad\n", slot); + printk(KERN_CRIT "slot %d offset bad\n", slot); BUG_ON(1); } } @@ -736,7 +700,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, if (btrfs_item_offset_nr(leaf, slot) != btrfs_item_end_nr(leaf, slot + 1)) { btrfs_print_leaf(root, leaf); - printk("slot %d offset bad\n", slot); + printk(KERN_CRIT "slot %d offset bad\n", slot); BUG_ON(1); } } @@ -745,30 +709,10 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, return 0; } -static int noinline check_block(struct btrfs_root *root, +static noinline int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { - u64 found_start; return 0; - if (btrfs_header_level(path->nodes[level]) != level) - printk("warning: bad level %Lu wanted %d found %d\n", - path->nodes[level]->start, level, - btrfs_header_level(path->nodes[level])); - found_start = btrfs_header_bytenr(path->nodes[level]); - if (found_start != path->nodes[level]->start) { - printk("warning: bad bytentr %Lu found %Lu\n", - path->nodes[level]->start, found_start); - } -#if 0 - struct extent_buffer *buf = path->nodes[level]; - - if (memcmp_extent_buffer(buf, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(buf), - BTRFS_FSID_SIZE)) { - printk("warning bad block %Lu\n", buf->start); - return 1; - } -#endif if (level == 0) return check_leaf(root, path, level); return check_node(root, path, level); @@ -802,7 +746,7 @@ static noinline int generic_bin_search(struct extent_buffer *eb, unsigned long map_len = 0; int err; - while(low < high) { + while (low < high) { mid = (low + high) / 2; offset = p + mid * item_size; @@ -1130,7 +1074,7 @@ enospc: * when they are completely full. This is also done top down, so we * have to be pessimistic. */ -static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, +static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { @@ -1296,7 +1240,7 @@ static noinline void reada_for_search(struct btrfs_root *root, nritems = btrfs_header_nritems(node); nr = slot; - while(1) { + while (1) { if (direction < 0) { if (nr == 0) break; @@ -1322,7 +1266,8 @@ static noinline void reada_for_search(struct btrfs_root *root, nscan++; if (path->reada < 2 && (nread > (64 * 1024) || nscan > 32)) break; - if(nread > (256 * 1024) || nscan > 128) + + if (nread > (256 * 1024) || nscan > 128) break; if (search < lowest_read) @@ -1333,17 +1278,17 @@ static noinline void reada_for_search(struct btrfs_root *root, } /* - * when we walk down the tree, it is usually safe to unlock the higher layers in - * the tree. The exceptions are when our path goes through slot 0, because operations - * on the tree might require changing key pointers higher up in the tree. + * when we walk down the tree, it is usually safe to unlock the higher layers + * in the tree. The exceptions are when our path goes through slot 0, because + * operations on the tree might require changing key pointers higher up in the + * tree. * - * callers might also have set path->keep_locks, which tells this code to - * keep the lock if the path points to the last slot in the block. This is - * part of walking through the tree, and selecting the next slot in the higher - * block. + * callers might also have set path->keep_locks, which tells this code to keep + * the lock if the path points to the last slot in the block. This is part of + * walking through the tree, and selecting the next slot in the higher block. * - * lowest_unlock sets the lowest level in the tree we're allowed to unlock. - * so if lowest_unlock is 1, level 0 won't be unlocked + * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so + * if lowest_unlock is 1, level 0 won't be unlocked */ static noinline void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) @@ -1832,9 +1777,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, if (!empty && src_nritems <= 8) return 1; - if (push_items <= 0) { + if (push_items <= 0) return 1; - } if (empty) { push_items = min(src_nritems, push_items); @@ -1854,7 +1798,7 @@ static int push_node_left(struct btrfs_trans_handle *trans, copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(dst_nritems), btrfs_node_key_ptr_offset(0), - push_items * sizeof(struct btrfs_key_ptr)); + push_items * sizeof(struct btrfs_key_ptr)); if (push_items < src_nritems) { memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), @@ -1899,19 +1843,16 @@ static int balance_node_right(struct btrfs_trans_handle *trans, src_nritems = btrfs_header_nritems(src); dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; - if (push_items <= 0) { + if (push_items <= 0) return 1; - } - if (src_nritems < 4) { + if (src_nritems < 4) return 1; - } max_push = src_nritems / 2 + 1; /* don't try to empty the node */ - if (max_push >= src_nritems) { + if (max_push >= src_nritems) return 1; - } if (max_push < push_items) push_items = max_push; @@ -1924,7 +1865,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(src_nritems - push_items), - push_items * sizeof(struct btrfs_key_ptr)); + push_items * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(src, src_nritems - push_items); btrfs_set_header_nritems(dst, dst_nritems + push_items); @@ -1945,7 +1886,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, * * returns zero on success or < 0 on failure. */ -static int noinline insert_new_root(struct btrfs_trans_handle *trans, +static noinline int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { @@ -2176,14 +2117,15 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int noinline btrfs_leaf_free_space(struct btrfs_root *root, +noinline int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) { int nritems = btrfs_header_nritems(leaf); int ret; ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); if (ret < 0) { - printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n", + printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, " + "used %d nritems %d\n", ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), leaf_space_used(leaf, 0, nritems), nritems); } @@ -2219,9 +2161,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root int ret; slot = path->slots[1]; - if (!path->nodes[1]) { + if (!path->nodes[1]) return 1; - } + upper = path->nodes[1]; if (slot >= btrfs_header_nritems(upper) - 1) return 1; @@ -2418,9 +2360,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; right_nritems = btrfs_header_nritems(right); - if (right_nritems == 0) { + if (right_nritems == 0) return 1; - } WARN_ON(!btrfs_tree_locked(path->nodes[1])); @@ -2502,7 +2443,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root push_items * sizeof(struct btrfs_item)); push_space = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset_nr(right, push_items -1); + btrfs_item_offset_nr(right, push_items - 1); copy_extent_buffer(left, right, btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space, @@ -2537,7 +2478,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* fixup right node */ if (push_items > right_nritems) { - printk("push items %d nr %u\n", push_items, right_nritems); + printk(KERN_CRIT "push items %d nr %u\n", push_items, + right_nritems); WARN_ON(1); } @@ -2640,9 +2582,8 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, /* first try to make some room by pushing left and right */ if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size, 0); - if (wret < 0) { + if (wret < 0) return wret; - } if (wret) { wret = push_leaf_left(trans, root, path, data_size, 0); if (wret < 0) @@ -2665,7 +2606,7 @@ again: l = path->nodes[0]; slot = path->slots[0]; nritems = btrfs_header_nritems(l); - mid = (nritems + 1)/ 2; + mid = (nritems + 1) / 2; right = btrfs_alloc_free_block(trans, root, root->leafsize, path->nodes[1]->start, @@ -2734,7 +2675,7 @@ again: path->slots[0] = 0; if (path->slots[1] == 0) { wret = fixup_low_keys(trans, root, - path, &disk_key, 1); + path, &disk_key, 1); if (wret) ret = wret; } @@ -3033,8 +2974,8 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, BTRFS_FILE_EXTENT_INLINE) { ptr = btrfs_item_ptr_offset(leaf, slot); memmove_extent_buffer(leaf, ptr, - (unsigned long)fi, - offsetof(struct btrfs_file_extent_item, + (unsigned long)fi, + offsetof(struct btrfs_file_extent_item, disk_bytenr)); } } @@ -3096,7 +3037,8 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, BUG_ON(slot < 0); if (slot >= nritems) { btrfs_print_leaf(root, leaf); - printk("slot %d too large, nritems %d\n", slot, nritems); + printk(KERN_CRIT "slot %d too large, nritems %d\n", + slot, nritems); BUG_ON(1); } @@ -3218,7 +3160,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, if (old_data < data_end) { btrfs_print_leaf(root, leaf); - printk("slot %d old_data %d data_end %d\n", + printk(KERN_CRIT "slot %d old_data %d data_end %d\n", slot, old_data, data_end); BUG_ON(1); } @@ -3317,9 +3259,8 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, unsigned int data_end; struct btrfs_disk_key disk_key; - for (i = 0; i < nr; i++) { + for (i = 0; i < nr; i++) total_data += data_size[i]; - } total_size = total_data + (nr * sizeof(struct btrfs_item)); ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); @@ -3336,7 +3277,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, if (btrfs_leaf_free_space(root, leaf) < total_size) { btrfs_print_leaf(root, leaf); - printk("not enough freespace need %u have %d\n", + printk(KERN_CRIT "not enough freespace need %u have %d\n", total_size, btrfs_leaf_free_space(root, leaf)); BUG(); } @@ -3349,7 +3290,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, if (old_data < data_end) { btrfs_print_leaf(root, leaf); - printk("slot %d old_data %d data_end %d\n", + printk(KERN_CRIT "slot %d old_data %d data_end %d\n", slot, old_data, data_end); BUG_ON(1); } @@ -3457,7 +3398,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, int wret; nritems = btrfs_header_nritems(parent); - if (slot != nritems -1) { + if (slot != nritems - 1) { memmove_extent_buffer(parent, btrfs_node_key_ptr_offset(slot), btrfs_node_key_ptr_offset(slot + 1), @@ -3614,7 +3555,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_header_nritems(leaf) == 0) { path->slots[1] = slot; - ret = btrfs_del_leaf(trans, root, path, leaf->start); + ret = btrfs_del_leaf(trans, root, path, + leaf->start); BUG_ON(ret); free_extent_buffer(leaf); } else { @@ -3717,7 +3659,7 @@ again: ret = 1; goto out; } - while(1) { + while (1) { nritems = btrfs_header_nritems(cur); level = btrfs_header_level(cur); sret = bin_search(cur, min_key, level, &slot); @@ -3738,7 +3680,7 @@ again: * min_trans parameters. If it isn't in cache or is too * old, skip to the next one. */ - while(slot < nritems) { + while (slot < nritems) { u64 blockptr; u64 gen; struct extent_buffer *tmp; @@ -3830,7 +3772,7 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, struct extent_buffer *c; WARN_ON(!path->keep_locks); - while(level < BTRFS_MAX_LEVEL) { + while (level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; @@ -3839,9 +3781,8 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, next: if (slot >= btrfs_header_nritems(c)) { level++; - if (level == BTRFS_MAX_LEVEL) { + if (level == BTRFS_MAX_LEVEL) return 1; - } continue; } if (level == 0) @@ -3889,9 +3830,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int ret; nritems = btrfs_header_nritems(path->nodes[0]); - if (nritems == 0) { + if (nritems == 0) return 1; - } btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); @@ -3915,7 +3855,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) goto done; } - while(level < BTRFS_MAX_LEVEL) { + while (level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; @@ -3923,9 +3863,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) c = path->nodes[level]; if (slot >= btrfs_header_nritems(c)) { level++; - if (level == BTRFS_MAX_LEVEL) { + if (level == BTRFS_MAX_LEVEL) return 1; - } continue; } @@ -3946,7 +3885,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) break; } path->slots[level] = slot; - while(1) { + while (1) { level--; c = path->nodes[level]; if (path->locks[level]) @@ -3986,7 +3925,7 @@ int btrfs_previous_item(struct btrfs_root *root, u32 nritems; int ret; - while(1) { + while (1) { if (path->slots[0] == 0) { ret = btrfs_prev_leaf(root, path); if (ret != 0) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ccea0648e10..eee060f8811 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -126,7 +126,6 @@ struct btrfs_ordered_sum; static int btrfs_csum_sizes[] = { 4, 0 }; /* four bytes for CRC32 */ -//#define BTRFS_CRC32_SIZE 4 #define BTRFS_EMPTY_DIR_SIZE 0 #define BTRFS_FT_UNKNOWN 0 @@ -283,8 +282,8 @@ struct btrfs_header { } __attribute__ ((__packed__)); #define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ - sizeof(struct btrfs_header)) / \ - sizeof(struct btrfs_key_ptr)) + sizeof(struct btrfs_header)) / \ + sizeof(struct btrfs_key_ptr)) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ @@ -1512,7 +1511,7 @@ static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb) static inline int btrfs_is_leaf(struct extent_buffer *eb) { - return (btrfs_header_level(eb) == 0); + return btrfs_header_level(eb) == 0; } /* struct btrfs_root_item */ @@ -1597,8 +1596,8 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) /* struct btrfs_file_extent_item */ BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); -static inline unsigned long btrfs_file_extent_inline_start(struct - btrfs_file_extent_item *e) +static inline unsigned long +btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) { unsigned long offset = (unsigned long)e; offset += offsetof(struct btrfs_file_extent_item, disk_bytenr); @@ -1660,20 +1659,20 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, const char *name, int len) { /* if we already have a name just free it */ - if (root->name) - kfree(root->name); + kfree(root->name); root->name = kmalloc(len+1, GFP_KERNEL); if (!root->name) return -ENOMEM; memcpy(root->name, name, len); - root->name[len] ='\0'; + root->name[len] = '\0'; return 0; } -static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { +static inline u32 btrfs_level_size(struct btrfs_root *root, int level) +{ if (level == 0) return root->leafsize; return root->nodesize; @@ -1707,9 +1706,9 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); -struct btrfs_block_group_cache *btrfs_lookup_block_group(struct - btrfs_fs_info *info, - u64 bytenr); +struct btrfs_block_group_cache *btrfs_lookup_block_group( + struct btrfs_fs_info *info, + u64 bytenr); u64 btrfs_find_block_group(struct btrfs_root *root, u64 search_start, u64 search_hint, int owner); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, @@ -1908,8 +1907,9 @@ int btrfs_search_root(struct btrfs_root *root, u64 search_start, int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_root *latest_root); /* dir-item.c */ -int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, const char *name, int name_len, u64 dir, +int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, const char *name, + int name_len, u64 dir, struct btrfs_key *location, u8 type, u64 index); struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 5040b71f190..926a0b287a7 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -333,7 +333,7 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, leaf = path->nodes[0]; dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); total_len = btrfs_item_size_nr(leaf, path->slots[0]); - while(cur < total_len) { + while (cur < total_len) { this_len = sizeof(*dir_item) + btrfs_dir_name_len(leaf, dir_item) + btrfs_dir_data_len(leaf, dir_item); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dae25e78a6b..81a313874ae 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -23,7 +23,7 @@ #include #include #include -#include // for block_sync_page +#include #include #include #include @@ -40,19 +40,6 @@ #include "ref-cache.h" #include "tree-log.h" -#if 0 -static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) -{ - if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) { - printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n", - (unsigned long long)extent_buffer_blocknr(buf), - (unsigned long long)btrfs_header_blocknr(buf)); - return 1; - } - return 0; -} -#endif - static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); @@ -128,23 +115,13 @@ static struct extent_map *btree_get_extent(struct inode *inode, u64 failed_start = em->start; u64 failed_len = em->len; - printk("failed to insert %Lu %Lu -> %Lu into tree\n", - em->start, em->len, em->block_start); free_extent_map(em); em = lookup_extent_mapping(em_tree, start, len); if (em) { - printk("after failing, found %Lu %Lu %Lu\n", - em->start, em->len, em->block_start); ret = 0; } else { em = lookup_extent_mapping(em_tree, failed_start, failed_len); - if (em) { - printk("double failure lookup gives us " - "%Lu %Lu -> %Lu\n", em->start, - em->len, em->block_start); - free_extent_map(em); - } ret = -EIO; } } else if (ret) { @@ -191,15 +168,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, unsigned long inline_result; len = buf->len - offset; - while(len > 0) { + while (len > 0) { err = map_private_extent_buffer(buf, offset, 32, &map_token, &kaddr, &map_start, &map_len, KM_USER0); - if (err) { - printk("failed to map extent buffer! %lu\n", - offset); + if (err) return 1; - } cur_len = min(len, map_len - (offset - map_start)); crc = btrfs_csum_data(root, kaddr + offset - map_start, crc, cur_len); @@ -218,15 +192,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, btrfs_csum_final(crc, result); if (verify) { - /* FIXME, this is not good */ if (memcmp_extent_buffer(buf, result, 0, csum_size)) { u32 val; u32 found = 0; memcpy(&found, result, csum_size); read_extent_buffer(buf, &val, 0, csum_size); - printk("btrfs: %s checksum verify failed on %llu " - "wanted %X found %X level %d\n", + printk(KERN_INFO "btrfs: %s checksum verify failed " + "on %llu wanted %X found %X level %d\n", root->fs_info->sb->s_id, buf->start, val, found, btrfs_header_level(buf)); if (result != (char *)&inline_result) @@ -293,7 +266,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) return ret; -printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror_num); + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, eb->start, eb->len); if (num_copies == 1) @@ -307,9 +280,10 @@ printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror } /* - * checksum a dirty tree block before IO. This has extra checks to make - * sure we only fill in the checksum field in the first page of a multi-page block + * checksum a dirty tree block before IO. This has extra checks to make sure + * we only fill in the checksum field in the first page of a multi-page block */ + static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { struct extent_io_tree *tree; @@ -327,28 +301,22 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) if (!page->private) goto out; len = page->private >> 2; - if (len == 0) { - WARN_ON(1); - } + WARN_ON(len == 0); + eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); found_start = btrfs_header_bytenr(eb); if (found_start != start) { - printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", - start, found_start, len); WARN_ON(1); goto err; } if (eb->first_page != page) { - printk("bad first page %lu %lu\n", eb->first_page->index, - page->index); WARN_ON(1); goto err; } if (!PageUptodate(page)) { - printk("csum not up to date page %lu\n", page->index); WARN_ON(1); goto err; } @@ -396,29 +364,30 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, goto out; if (!page->private) goto out; + len = page->private >> 2; - if (len == 0) { - WARN_ON(1); - } + WARN_ON(len == 0); + eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); found_start = btrfs_header_bytenr(eb); if (found_start != start) { - printk("bad tree block start %llu %llu\n", + printk(KERN_INFO "btrfs bad tree block start %llu %llu\n", (unsigned long long)found_start, (unsigned long long)eb->start); ret = -EIO; goto err; } if (eb->first_page != page) { - printk("bad first page %lu %lu\n", eb->first_page->index, - page->index); + printk(KERN_INFO "btrfs bad first page %lu %lu\n", + eb->first_page->index, page->index); WARN_ON(1); ret = -EIO; goto err; } if (check_tree_block_fsid(root, eb)) { - printk("bad fsid on block %Lu\n", eb->start); + printk(KERN_INFO "btrfs bad fsid on block %llu\n", + (unsigned long long)eb->start); ret = -EIO; goto err; } @@ -578,7 +547,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, HZ/10); } #endif - while(atomic_read(&fs_info->async_submit_draining) && + while (atomic_read(&fs_info->async_submit_draining) && atomic_read(&fs_info->nr_async_submits)) { wait_event(fs_info->async_submit_wait, (atomic_read(&fs_info->nr_async_submits) == 0)); @@ -594,7 +563,7 @@ static int btree_csum_one_bio(struct bio *bio) struct btrfs_root *root; WARN_ON(bio->bi_vcnt <= 0); - while(bio_index < bio->bi_vcnt) { + while (bio_index < bio->bi_vcnt) { root = BTRFS_I(bvec->bv_page->mapping->host)->root; csum_dirty_buffer(root, bvec->bv_page); bio_index++; @@ -680,9 +649,8 @@ static int btree_writepages(struct address_space *mapping, num_dirty = count_range_bits(tree, &start, (u64)-1, thresh, EXTENT_DIRTY); - if (num_dirty < thresh) { + if (num_dirty < thresh) return 0; - } } return extent_writepages(tree, mapping, btree_get_extent, wbc); } @@ -701,15 +669,14 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) int ret; if (PageWriteback(page) || PageDirty(page)) - return 0; + return 0; tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_state(map, tree, page, gfp_flags); - if (!ret) { + if (!ret) return 0; - } ret = try_release_extent_buffer(tree, page); if (ret == 1) { @@ -728,8 +695,8 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); if (PagePrivate(page)) { - printk("warning page private not zero on page %Lu\n", - page_offset(page)); + printk(KERN_WARNING "btrfs warning page private not zero " + "on page %llu\n", (unsigned long long)page_offset(page)); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -813,7 +780,7 @@ int btrfs_write_tree_block(struct extent_buffer *buf) int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) { return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, - buf->start, buf->start + buf->len -1); + buf->start, buf->start + buf->len - 1); } struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, @@ -832,11 +799,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); - if (ret == 0) { + if (ret == 0) buf->flags |= EXTENT_UPTODATE; - } else { + else WARN_ON(1); - } return buf; } @@ -944,7 +910,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, if (!log_root_tree) return 0; - while(1) { + while (1) { ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, 0, &start, &end, EXTENT_DIRTY); if (ret) @@ -1165,24 +1131,6 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, root->in_sysfs = 1; return root; } -#if 0 -static int add_hasher(struct btrfs_fs_info *info, char *type) { - struct btrfs_hasher *hasher; - - hasher = kmalloc(sizeof(*hasher), GFP_NOFS); - if (!hasher) - return -ENOMEM; - hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC); - if (!hasher->hash_tfm) { - kfree(hasher); - return -EINVAL; - } - spin_lock(&info->hash_lock); - list_add(&hasher->list, &info->hashers); - spin_unlock(&info->hash_lock); - return 0; -} -#endif static int btrfs_congested_fn(void *congested_data, int bdi_bits) { @@ -1226,9 +1174,8 @@ static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) continue; bdi = blk_get_backing_dev_info(device->bdev); - if (bdi->unplug_io_fn) { + if (bdi->unplug_io_fn) bdi->unplug_io_fn(bdi, page); - } } } @@ -1420,8 +1367,9 @@ static int transaction_kthread(void *arg) mutex_lock(&root->fs_info->transaction_kthread_mutex); if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { - printk("btrfs: total reference cache size %Lu\n", - root->fs_info->total_ref_cache_size); + printk(KERN_INFO "btrfs: total reference cache " + "size %llu\n", + root->fs_info->total_ref_cache_size); } mutex_lock(&root->fs_info->trans_mutex); @@ -1592,14 +1540,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, atomic_set(&fs_info->tree_log_writers, 0); fs_info->tree_log_transid = 0; -#if 0 - ret = add_hasher(fs_info, "crc32c"); - if (ret) { - printk("btrfs: failed hash setup, modprobe cryptomgr?\n"); - err = -ENOMEM; - goto fail_iput; - } -#endif __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); @@ -1720,7 +1660,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { - printk("btrfs: valid FS not found on %s\n", sb->s_id); + printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } @@ -1728,8 +1668,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_read_sys_array(tree_root); mutex_unlock(&fs_info->chunk_mutex); if (ret) { - printk("btrfs: failed to read the system array on %s\n", - sb->s_id); + printk(KERN_WARNING "btrfs: failed to read the system " + "array on %s\n", sb->s_id); goto fail_sys_array; } @@ -1746,14 +1686,15 @@ struct btrfs_root *open_ctree(struct super_block *sb, BUG_ON(!chunk_root->node); read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), - BTRFS_UUID_SIZE); + (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), + BTRFS_UUID_SIZE); mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_chunk_tree(chunk_root); mutex_unlock(&fs_info->chunk_mutex); if (ret) { - printk("btrfs: failed to read chunk tree on %s\n", sb->s_id); + printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", + sb->s_id); goto fail_chunk_root; } @@ -1812,7 +1753,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { - printk("Btrfs log replay required on RO media\n"); + printk(KERN_WARNING "Btrfs log replay required " + "on RO media\n"); err = -EIO; goto fail_trans_kthread; } @@ -2097,7 +2039,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) total_errors++; } if (total_errors > max_errors) { - printk("btrfs: %d errors while writing supers\n", total_errors); + printk(KERN_ERR "btrfs: %d errors while writing supers\n", + total_errors); BUG(); } @@ -2114,7 +2057,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) total_errors++; } if (total_errors > max_errors) { - printk("btrfs: %d errors while writing supers\n", total_errors); + printk(KERN_ERR "btrfs: %d errors while writing supers\n", + total_errors); BUG(); } return 0; @@ -2137,16 +2081,11 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) down_write(&root->anon_super.s_umount); kill_anon_super(&root->anon_super); } -#if 0 - if (root->in_sysfs) - btrfs_sysfs_del_root(root); -#endif if (root->node) free_extent_buffer(root->node); if (root->commit_root) free_extent_buffer(root->commit_root); - if (root->name) - kfree(root->name); + kfree(root->name); kfree(root); return 0; } @@ -2157,7 +2096,7 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) struct btrfs_root *gang[8]; int i; - while(1) { + while (1) { ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)gang, 0, ARRAY_SIZE(gang)); @@ -2228,18 +2167,17 @@ int close_ctree(struct btrfs_root *root) if (!(fs_info->sb->s_flags & MS_RDONLY)) { ret = btrfs_commit_super(root); - if (ret) { - printk("btrfs: commit super returns %d\n", ret); - } + if (ret) + printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } if (fs_info->delalloc_bytes) { - printk("btrfs: at unmount delalloc count %Lu\n", + printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", fs_info->delalloc_bytes); } if (fs_info->total_ref_cache_size) { - printk("btrfs: at umount reference cache size %Lu\n", - fs_info->total_ref_cache_size); + printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", + (unsigned long long)fs_info->total_ref_cache_size); } if (fs_info->extent_root->node) @@ -2248,13 +2186,13 @@ int close_ctree(struct btrfs_root *root) if (fs_info->tree_root->node) free_extent_buffer(fs_info->tree_root->node); - if (root->fs_info->chunk_root->node); + if (root->fs_info->chunk_root->node) free_extent_buffer(root->fs_info->chunk_root->node); - if (root->fs_info->dev_root->node); + if (root->fs_info->dev_root->node) free_extent_buffer(root->fs_info->dev_root->node); - if (root->fs_info->csum_root->node); + if (root->fs_info->csum_root->node) free_extent_buffer(root->fs_info->csum_root->node); btrfs_free_block_groups(root->fs_info); @@ -2273,7 +2211,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->submit_workers); #if 0 - while(!list_empty(&fs_info->hashers)) { + while (!list_empty(&fs_info->hashers)) { struct btrfs_hasher *hasher; hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, hashers); @@ -2324,9 +2262,11 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) WARN_ON(!btrfs_tree_locked(buf)); if (transid != root->fs_info->generation) { - printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", + printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " + "found %llu running %llu\n", (unsigned long long)buf->start, - transid, root->fs_info->generation); + (unsigned long long)transid, + (unsigned long long)root->fs_info->generation); WARN_ON(1); } set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); @@ -2361,9 +2301,8 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; int ret; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); - if (ret == 0) { + if (ret == 0) buf->flags |= EXTENT_UPTODATE; - } return ret; } diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 48b82cd7583..85315d2c90d 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -7,9 +7,11 @@ #include "export.h" #include "compat.h" -#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, parent_objectid)/4) -#define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, parent_root_objectid)/4) -#define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid)/4) +#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \ + parent_objectid) / 4) +#define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, \ + parent_root_objectid) / 4) +#define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4) static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, int connectable) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 780c1eeb829..ec43fa526d7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -49,10 +49,10 @@ struct pending_extent_op { int del; }; -static int finish_current_insert(struct btrfs_trans_handle *trans, struct - btrfs_root *extent_root, int all); -static int del_pending_extents(struct btrfs_trans_handle *trans, struct - btrfs_root *extent_root, int all); +static int finish_current_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, int all); +static int del_pending_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, int all); static int pin_down_bytes(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int is_data); @@ -247,7 +247,7 @@ static int cache_block_group(struct btrfs_root *root, if (ret < 0) goto err; - while(1) { + while (1) { leaf = path->nodes[0]; slot = path->slots[0]; if (slot >= btrfs_header_nritems(leaf)) { @@ -292,9 +292,8 @@ err: /* * return the block group that starts at or after bytenr */ -static struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct - btrfs_fs_info *info, - u64 bytenr) +static struct btrfs_block_group_cache * +btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) { struct btrfs_block_group_cache *cache; @@ -306,9 +305,9 @@ static struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct /* * return the block group that contains teh given bytenr */ -struct btrfs_block_group_cache *btrfs_lookup_block_group(struct - btrfs_fs_info *info, - u64 bytenr) +struct btrfs_block_group_cache *btrfs_lookup_block_group( + struct btrfs_fs_info *info, + u64 bytenr) { struct btrfs_block_group_cache *cache; @@ -492,7 +491,7 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) * to the key objectid. */ -static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, +static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, u64 parent, @@ -537,7 +536,7 @@ out: * updates all the backrefs that are pending on update_list for the * extent_root */ -static int noinline update_backrefs(struct btrfs_trans_handle *trans, +static noinline int update_backrefs(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, struct btrfs_path *path, struct list_head *update_list) @@ -573,9 +572,11 @@ loop: btrfs_ref_generation(leaf, ref) != op->orig_generation || (ref_objectid != op->level && ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { - printk(KERN_ERR "couldn't find %Lu, parent %Lu, root %Lu, " - "owner %u\n", op->bytenr, op->orig_parent, - ref_root, op->level); + printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, " + "root %llu, owner %u\n", + (unsigned long long)op->bytenr, + (unsigned long long)op->orig_parent, + (unsigned long long)ref_root, op->level); btrfs_print_leaf(extent_root, leaf); BUG(); } @@ -620,7 +621,7 @@ out: return 0; } -static int noinline insert_extents(struct btrfs_trans_handle *trans, +static noinline int insert_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, struct btrfs_path *path, struct list_head *insert_list, int nr) @@ -781,7 +782,7 @@ static int noinline insert_extents(struct btrfs_trans_handle *trans, return ret; } -static int noinline insert_extent_backref(struct btrfs_trans_handle *trans, +static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, u64 parent, @@ -840,7 +841,7 @@ out: return ret; } -static int noinline remove_extent_backref(struct btrfs_trans_handle *trans, +static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path) { @@ -868,7 +869,7 @@ static int noinline remove_extent_backref(struct btrfs_trans_handle *trans, static void btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); #else blkdev_issue_discard(bdev, start >> 9, len >> 9); @@ -908,7 +909,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, #endif } -static int noinline free_extents(struct btrfs_trans_handle *trans, +static noinline int free_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, struct list_head *del_list) { @@ -937,10 +938,11 @@ search: extent_root->root_key.objectid, op->orig_generation, op->level, 1); if (ret) { - printk("Unable to find backref byte nr %Lu root %Lu gen %Lu " - "owner %u\n", op->bytenr, - extent_root->root_key.objectid, op->orig_generation, - op->level); + printk(KERN_ERR "btrfs unable to find backref byte nr %llu " + "root %llu gen %llu owner %u\n", + (unsigned long long)op->bytenr, + (unsigned long long)extent_root->root_key.objectid, + (unsigned long long)op->orig_generation, op->level); btrfs_print_leaf(extent_root, path->nodes[0]); WARN_ON(1); goto out; @@ -1282,7 +1284,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(l, &key, path->slots[0]); if (key.objectid != bytenr) { btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]); - printk("wanted %Lu found %Lu\n", bytenr, key.objectid); + printk(KERN_ERR "btrfs wanted %llu found %llu\n", + (unsigned long long)bytenr, + (unsigned long long)key.objectid); BUG(); } BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); @@ -1353,7 +1357,8 @@ int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, goto out; if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); - printk("failed to find block number %Lu\n", bytenr); + printk(KERN_INFO "btrfs failed to find block number %llu\n", + (unsigned long long)bytenr); BUG(); } l = path->nodes[0]; @@ -1738,7 +1743,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - while(1) { + while (1) { cache = NULL; spin_lock(&root->fs_info->block_group_cache_lock); for (n = rb_first(&root->fs_info->block_group_cache_tree); @@ -1921,10 +1926,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, spin_unlock(&space_info->lock); ret = btrfs_alloc_chunk(trans, extent_root, flags); - if (ret) { -printk("space info full %Lu\n", flags); + if (ret) space_info->full = 1; - } out: mutex_unlock(&extent_root->fs_info->chunk_mutex); return ret; @@ -1941,7 +1944,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 old_val; u64 byte_in_group; - while(total) { + while (total) { cache = btrfs_lookup_block_group(info, bytenr); if (!cache) return -1; @@ -2089,7 +2092,7 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) int ret; mutex_lock(&root->fs_info->pinned_mutex); - while(1) { + while (1) { ret = find_first_extent_bit(pinned_extents, last, &start, &end, EXTENT_DIRTY); if (ret) @@ -2110,7 +2113,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, int ret; mutex_lock(&root->fs_info->pinned_mutex); - while(1) { + while (1) { ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY); if (ret) @@ -2400,7 +2403,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, if (ret == 0) { struct btrfs_key found_key; extent_slot = path->slots[0]; - while(extent_slot > 0) { + while (extent_slot > 0) { extent_slot--; btrfs_item_key_to_cpu(path->nodes[0], &found_key, extent_slot); @@ -2422,8 +2425,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, &key, path, -1, 1); if (ret) { printk(KERN_ERR "umm, got %d back from search" - ", was looking for %Lu\n", ret, - bytenr); + ", was looking for %llu\n", ret, + (unsigned long long)bytenr); btrfs_print_leaf(extent_root, path->nodes[0]); } BUG_ON(ret); @@ -2432,9 +2435,12 @@ static int __free_extent(struct btrfs_trans_handle *trans, } else { btrfs_print_leaf(extent_root, path->nodes[0]); WARN_ON(1); - printk("Unable to find ref byte nr %Lu root %Lu " - "gen %Lu owner %Lu\n", bytenr, - root_objectid, ref_generation, owner_objectid); + printk(KERN_ERR "btrfs unable to find ref byte nr %llu " + "root %llu gen %llu owner %llu\n", + (unsigned long long)bytenr, + (unsigned long long)root_objectid, + (unsigned long long)ref_generation, + (unsigned long long)owner_objectid); } leaf = path->nodes[0]; @@ -2517,8 +2523,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, * find all the blocks marked as pending in the radix tree and remove * them from the extent map */ -static int del_pending_extents(struct btrfs_trans_handle *trans, struct - btrfs_root *extent_root, int all) +static int del_pending_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, int all) { int ret; int err = 0; @@ -2539,7 +2545,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct again: mutex_lock(&info->extent_ins_mutex); - while(1) { + while (1) { ret = find_first_extent_bit(pending_del, search, &start, &end, EXTENT_WRITEBACK); if (ret) { @@ -2753,7 +2759,7 @@ static u64 stripe_align(struct btrfs_root *root, u64 val) * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -static int noinline find_free_extent(struct btrfs_trans_handle *trans, +static noinline int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_bytes, u64 empty_size, u64 search_start, u64 search_end, @@ -2762,7 +2768,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, int data) { int ret = 0; - struct btrfs_root * root = orig_root->fs_info->extent_root; + struct btrfs_root *root = orig_root->fs_info->extent_root; u64 total_needed = num_bytes; u64 *last_ptr = NULL; u64 last_wanted = 0; @@ -2995,8 +3001,10 @@ loop_check: *last_ptr = ins->objectid + ins->offset; ret = 0; } else if (!ret) { - printk(KERN_ERR "we were searching for %Lu bytes, num_bytes %Lu," - " loop %d, allowed_alloc %d\n", total_needed, num_bytes, + printk(KERN_ERR "btrfs searching for %llu bytes, " + "num_bytes %llu, loop %d, allowed_alloc %d\n", + (unsigned long long)total_needed, + (unsigned long long)num_bytes, loop, allowed_chunk_alloc); ret = -ENOSPC; } @@ -3012,19 +3020,22 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) struct btrfs_block_group_cache *cache; struct list_head *l; - printk(KERN_INFO "space_info has %Lu free, is %sfull\n", - info->total_bytes - info->bytes_used - info->bytes_pinned - - info->bytes_reserved, (info->full) ? "" : "not "); + printk(KERN_INFO "space_info has %llu free, is %sfull\n", + (unsigned long long)(info->total_bytes - info->bytes_used - + info->bytes_pinned - info->bytes_reserved), + (info->full) ? "" : "not "); down_read(&info->groups_sem); list_for_each(l, &info->block_groups) { cache = list_entry(l, struct btrfs_block_group_cache, list); spin_lock(&cache->lock); - printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used " - "%Lu pinned %Lu reserved\n", - cache->key.objectid, cache->key.offset, - btrfs_block_group_used(&cache->item), - cache->pinned, cache->reserved); + printk(KERN_INFO "block group %llu has %llu bytes, %llu used " + "%llu pinned %llu reserved\n", + (unsigned long long)cache->key.objectid, + (unsigned long long)cache->key.offset, + (unsigned long long)btrfs_block_group_used(&cache->item), + (unsigned long long)cache->pinned, + (unsigned long long)cache->reserved); btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } @@ -3045,15 +3056,15 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, if (data) { alloc_profile = info->avail_data_alloc_bits & - info->data_alloc_profile; + info->data_alloc_profile; data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; } else if (root == root->fs_info->chunk_root) { alloc_profile = info->avail_system_alloc_bits & - info->system_alloc_profile; + info->system_alloc_profile; data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; } else { alloc_profile = info->avail_metadata_alloc_bits & - info->metadata_alloc_profile; + info->metadata_alloc_profile; data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } again: @@ -3092,8 +3103,9 @@ again: struct btrfs_space_info *sinfo; sinfo = __find_space_info(root->fs_info, data); - printk("allocation failed flags %Lu, wanted %Lu\n", - data, num_bytes); + printk(KERN_ERR "btrfs allocation failed flags %llu, " + "wanted %llu\n", (unsigned long long)data, + (unsigned long long)num_bytes); dump_space_info(sinfo, num_bytes); BUG(); } @@ -3108,7 +3120,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) cache = btrfs_lookup_block_group(root->fs_info, start); if (!cache) { - printk(KERN_ERR "Unable to find block group for %Lu\n", start); + printk(KERN_ERR "Unable to find block group for %llu\n", + (unsigned long long)start); return -ENOSPC; } @@ -3235,10 +3248,12 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, } update_block: - ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); + ret = update_block_group(trans, root, ins->objectid, + ins->offset, 1, 0); if (ret) { - printk("update block group failed for %Lu %Lu\n", - ins->objectid, ins->offset); + printk(KERN_ERR "btrfs update block group failed for %llu " + "%llu\n", (unsigned long long)ins->objectid, + (unsigned long long)ins->offset); BUG(); } out: @@ -3420,7 +3435,7 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, +static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_leaf_ref *ref) { @@ -3445,15 +3460,15 @@ static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, - u32 *refs) +static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, + u64 len, u32 *refs) { int ret; ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); BUG_ON(ret); -#if 0 // some debugging code in case we see problems here +#if 0 /* some debugging code in case we see problems here */ /* if the refs count is one, it won't get increased again. But * if the ref count is > 1, someone may be decreasing it at * the same time we are. @@ -3474,8 +3489,8 @@ static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len free_extent_buffer(eb); } if (*refs == 1) { - printk("block %llu went down to one during drop_snap\n", - (unsigned long long)start); + printk(KERN_ERR "btrfs block %llu went down to one " + "during drop_snap\n", (unsigned long long)start); } } @@ -3489,7 +3504,7 @@ static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. */ -static int noinline walk_down_tree(struct btrfs_trans_handle *trans, +static noinline int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { @@ -3516,7 +3531,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, /* * walk down to the last node level and free all the leaves */ - while(*level >= 0) { + while (*level >= 0) { WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; @@ -3576,10 +3591,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, *level = 0; break; } - if (printk_ratelimit()) { - printk("leaf ref miss for bytenr %llu\n", - (unsigned long long)bytenr); - } } next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { @@ -3641,7 +3652,7 @@ out: * walk_down_tree. The main difference is that it checks reference * counts while tree blocks are locked. */ -static int noinline walk_down_subtree(struct btrfs_trans_handle *trans, +static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { @@ -3730,7 +3741,7 @@ out: * to find the first node higher up where we haven't yet gone through * all the slots */ -static int noinline walk_up_tree(struct btrfs_trans_handle *trans, +static noinline int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level, int max_level) @@ -3839,7 +3850,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root } } } - while(1) { + while (1) { wret = walk_down_tree(trans, root, path, &level); if (wret > 0) break; @@ -3920,7 +3931,7 @@ static unsigned long calc_ra(unsigned long start, unsigned long last, return min(last, start + nr - 1); } -static int noinline relocate_inode_pages(struct inode *inode, u64 start, +static noinline int relocate_inode_pages(struct inode *inode, u64 start, u64 len) { u64 page_start; @@ -4011,7 +4022,7 @@ out_unlock: return ret; } -static int noinline relocate_data_extent(struct inode *reloc_inode, +static noinline int relocate_data_extent(struct inode *reloc_inode, struct btrfs_key *extent_key, u64 offset) { @@ -4087,7 +4098,7 @@ static int is_cowonly_root(u64 root_objectid) return 0; } -static int noinline __next_ref_path(struct btrfs_trans_handle *trans, +static noinline int __next_ref_path(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, struct btrfs_ref_path *ref_path, int first_time) @@ -4119,11 +4130,10 @@ walk_down: if (level < ref_path->lowest_level) break; - if (level >= 0) { + if (level >= 0) bytenr = ref_path->nodes[level]; - } else { + else bytenr = ref_path->extent_start; - } BUG_ON(bytenr == 0); parent = ref_path->nodes[level + 1]; @@ -4170,11 +4180,12 @@ walk_up: level = ref_path->current_level; while (level < BTRFS_MAX_LEVEL - 1) { u64 ref_objectid; - if (level >= 0) { + + if (level >= 0) bytenr = ref_path->nodes[level]; - } else { + else bytenr = ref_path->extent_start; - } + BUG_ON(bytenr == 0); key.objectid = bytenr; @@ -4299,7 +4310,7 @@ static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, return __next_ref_path(trans, extent_root, ref_path, 0); } -static int noinline get_new_locations(struct inode *reloc_inode, +static noinline int get_new_locations(struct inode *reloc_inode, struct btrfs_key *extent_key, u64 offset, int no_fragment, struct disk_extent **extents, @@ -4420,7 +4431,7 @@ out: return ret; } -static int noinline replace_one_extent(struct btrfs_trans_handle *trans, +static noinline int replace_one_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *extent_key, @@ -4778,7 +4789,7 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, return 0; } -static int noinline invalidate_extent_cache(struct btrfs_root *root, +static noinline int invalidate_extent_cache(struct btrfs_root *root, struct extent_buffer *leaf, struct btrfs_block_group_cache *group, struct btrfs_root *target_root) @@ -4826,7 +4837,7 @@ static int noinline invalidate_extent_cache(struct btrfs_root *root, return 0; } -static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, +static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf, struct btrfs_block_group_cache *group, @@ -5035,7 +5046,7 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root) return 0; } -static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, +static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_root *reloc_root; @@ -5102,7 +5113,7 @@ static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, * tree blocks are shared between reloc trees, so they are also shared * between subvols. */ -static int noinline relocate_one_path(struct btrfs_trans_handle *trans, +static noinline int relocate_one_path(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *first_key, @@ -5199,7 +5210,7 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans, return 0; } -static int noinline relocate_tree_block(struct btrfs_trans_handle *trans, +static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *first_key, @@ -5217,7 +5228,7 @@ static int noinline relocate_tree_block(struct btrfs_trans_handle *trans, return 0; } -static int noinline del_extent_zero(struct btrfs_trans_handle *trans, +static noinline int del_extent_zero(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, struct btrfs_path *path, struct btrfs_key *extent_key) @@ -5233,7 +5244,7 @@ out: return ret; } -static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info, +static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, struct btrfs_ref_path *ref_path) { struct btrfs_key root_key; @@ -5248,7 +5259,7 @@ static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info, return btrfs_read_fs_root_no_name(fs_info, &root_key); } -static int noinline relocate_one_extent(struct btrfs_root *extent_root, +static noinline int relocate_one_extent(struct btrfs_root *extent_root, struct btrfs_path *path, struct btrfs_key *extent_key, struct btrfs_block_group_cache *group, @@ -5276,8 +5287,8 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); if (!ref_path) { - ret = -ENOMEM; - goto out; + ret = -ENOMEM; + goto out; } for (loops = 0; ; loops++) { @@ -5497,7 +5508,7 @@ out: return ret; } -static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info, +static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *group) { struct inode *inode = NULL; @@ -5617,7 +5628,7 @@ int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) block_group = btrfs_lookup_block_group(info, group_start); BUG_ON(!block_group); - printk("btrfs relocating block group %llu flags %llu\n", + printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", (unsigned long long)block_group->key.objectid, (unsigned long long)block_group->flags); @@ -5649,7 +5660,7 @@ again: btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); mutex_unlock(&root->fs_info->cleaner_mutex); - while(1) { + while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; @@ -5712,7 +5723,7 @@ next: } if (total_found > 0) { - printk("btrfs found %llu extents in pass %d\n", + printk(KERN_INFO "btrfs found %llu extents in pass %d\n", (unsigned long long)total_found, pass); pass++; if (total_found == skipped && pass > 2) { @@ -5754,7 +5765,7 @@ static int find_first_block_group(struct btrfs_root *root, if (ret < 0) goto out; - while(1) { + while (1) { slot = path->slots[0]; leaf = path->nodes[0]; if (slot >= btrfs_header_nritems(leaf)) { @@ -5825,7 +5836,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) if (!path) return -ENOMEM; - while(1) { + while (1) { ret = find_first_block_group(root, path, &key); if (ret > 0) { ret = 0; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0bf7684207a..39edb551dca 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -32,7 +32,7 @@ static LIST_HEAD(states); #define LEAK_DEBUG 0 #ifdef LEAK_DEBUG -static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(leak_lock); #endif #define BUFFER_LRU_MAX 64 @@ -81,7 +81,11 @@ void extent_io_exit(void) while (!list_empty(&states)) { state = list_entry(states.next, struct extent_state, leak_list); - printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs)); + printk(KERN_ERR "btrfs state leak: start %llu end %llu " + "state %lu in tree %p refs %d\n", + (unsigned long long)state->start, + (unsigned long long)state->end, + state->state, state->tree, atomic_read(&state->refs)); list_del(&state->leak_list); kmem_cache_free(extent_state_cache, state); @@ -89,7 +93,9 @@ void extent_io_exit(void) while (!list_empty(&buffers)) { eb = list_entry(buffers.next, struct extent_buffer, leak_list); - printk("buffer leak start %Lu len %lu refs %d\n", eb->start, eb->len, atomic_read(&eb->refs)); + printk(KERN_ERR "btrfs buffer leak start %llu len %lu " + "refs %d\n", (unsigned long long)eb->start, + eb->len, atomic_read(&eb->refs)); list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); } @@ -158,11 +164,11 @@ EXPORT_SYMBOL(free_extent_state); static struct rb_node *tree_insert(struct rb_root *root, u64 offset, struct rb_node *node) { - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; struct tree_entry *entry; - while(*p) { + while (*p) { parent = *p; entry = rb_entry(parent, struct tree_entry, rb_node); @@ -185,13 +191,13 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, struct rb_node **next_ret) { struct rb_root *root = &tree->state; - struct rb_node * n = root->rb_node; + struct rb_node *n = root->rb_node; struct rb_node *prev = NULL; struct rb_node *orig_prev = NULL; struct tree_entry *entry; struct tree_entry *prev_entry = NULL; - while(n) { + while (n) { entry = rb_entry(n, struct tree_entry, rb_node); prev = n; prev_entry = entry; @@ -200,14 +206,13 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, n = n->rb_left; else if (offset > entry->end) n = n->rb_right; - else { + else return n; - } } if (prev_ret) { orig_prev = prev; - while(prev && offset > prev_entry->end) { + while (prev && offset > prev_entry->end) { prev = rb_next(prev); prev_entry = rb_entry(prev, struct tree_entry, rb_node); } @@ -217,7 +222,7 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, if (next_ret) { prev_entry = rb_entry(prev, struct tree_entry, rb_node); - while(prev && offset < prev_entry->start) { + while (prev && offset < prev_entry->start) { prev = rb_prev(prev); prev_entry = rb_entry(prev, struct tree_entry, rb_node); } @@ -233,9 +238,8 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree, struct rb_node *ret; ret = __etree_search(tree, offset, &prev, NULL); - if (!ret) { + if (!ret) return prev; - } return ret; } @@ -243,11 +247,11 @@ static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree, u64 offset, struct rb_node *node) { struct rb_root *root = &tree->buffer; - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; struct extent_buffer *eb; - while(*p) { + while (*p) { parent = *p; eb = rb_entry(parent, struct extent_buffer, rb_node); @@ -268,10 +272,10 @@ static struct extent_buffer *buffer_search(struct extent_io_tree *tree, u64 offset) { struct rb_root *root = &tree->buffer; - struct rb_node * n = root->rb_node; + struct rb_node *n = root->rb_node; struct extent_buffer *eb; - while(n) { + while (n) { eb = rb_entry(n, struct extent_buffer, rb_node); if (offset < eb->start) n = n->rb_left; @@ -363,7 +367,9 @@ static int insert_state(struct extent_io_tree *tree, struct rb_node *node; if (end < start) { - printk("end < start %Lu %Lu\n", end, start); + printk(KERN_ERR "btrfs end < start %llu %llu\n", + (unsigned long long)end, + (unsigned long long)start); WARN_ON(1); } if (bits & EXTENT_DIRTY) @@ -376,7 +382,10 @@ static int insert_state(struct extent_io_tree *tree, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + printk(KERN_ERR "btrfs found node %llu %llu on insert of " + "%llu %llu\n", (unsigned long long)found->start, + (unsigned long long)found->end, + (unsigned long long)start, (unsigned long long)end); free_extent_state(state); return -EEXIST; } @@ -412,7 +421,6 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); free_extent_state(prealloc); return -EEXIST; } @@ -661,8 +669,9 @@ static void set_state_bits(struct extent_io_tree *tree, * [start, end] is inclusive * This takes the tree lock. */ -static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, - int exclusive, u64 *failed_start, gfp_t mask) +static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int exclusive, u64 *failed_start, + gfp_t mask) { struct extent_state *state; struct extent_state *prealloc = NULL; @@ -763,7 +772,7 @@ again: if (end < last_start) this_end = end; else - this_end = last_start -1; + this_end = last_start - 1; err = insert_state(tree, prealloc, start, this_end, bits); prealloc = NULL; @@ -891,8 +900,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_uptodate); -static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) +static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, + u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); } @@ -904,8 +913,8 @@ static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, 0, NULL, mask); } -static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) +static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, + u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); } @@ -1025,11 +1034,10 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, * our range starts. */ node = tree_search(tree, start); - if (!node) { + if (!node) goto out; - } - while(1) { + while (1) { state = rb_entry(node, struct extent_state, rb_node); if (state->end >= start && (state->state & bits)) { *start_ret = state->start; @@ -1062,15 +1070,14 @@ struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, * our range starts. */ node = tree_search(tree, start); - if (!node) { + if (!node) goto out; - } - while(1) { + while (1) { state = rb_entry(node, struct extent_state, rb_node); - if (state->end >= start && (state->state & bits)) { + if (state->end >= start && (state->state & bits)) return state; - } + node = rb_next(node); if (!node) break; @@ -1108,7 +1115,7 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree, goto out; } - while(1) { + while (1) { state = rb_entry(node, struct extent_state, rb_node); if (found && (state->start != cur_start || (state->state & EXTENT_BOUNDARY))) { @@ -1150,7 +1157,7 @@ static noinline int __unlock_for_delalloc(struct inode *inode, if (index == locked_page->index && end_index == index) return 0; - while(nr_pages > 0) { + while (nr_pages > 0) { ret = find_get_pages_contig(inode->i_mapping, index, min_t(unsigned long, nr_pages, ARRAY_SIZE(pages)), pages); @@ -1186,7 +1193,7 @@ static noinline int lock_delalloc_pages(struct inode *inode, /* skip the page at the start index */ nrpages = end_index - index + 1; - while(nrpages > 0) { + while (nrpages > 0) { ret = find_get_pages_contig(inode->i_mapping, index, min_t(unsigned long, nrpages, ARRAY_SIZE(pages)), pages); @@ -1263,17 +1270,16 @@ again: * pages in order, so we can't process delalloc bytes before * locked_page */ - if (delalloc_start < *start) { + if (delalloc_start < *start) delalloc_start = *start; - } /* * make sure to limit the number of pages we try to lock down * if we're looping. */ - if (delalloc_end + 1 - delalloc_start > max_bytes && loops) { + if (delalloc_end + 1 - delalloc_start > max_bytes && loops) delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1; - } + /* step two, lock all the pages after the page that has start */ ret = lock_delalloc_pages(inode, locked_page, delalloc_start, delalloc_end); @@ -1341,7 +1347,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) return 0; - while(nr_pages > 0) { + while (nr_pages > 0) { ret = find_get_pages_contig(inode->i_mapping, index, min_t(unsigned long, nr_pages, ARRAY_SIZE(pages)), pages); @@ -1384,7 +1390,6 @@ u64 count_range_bits(struct extent_io_tree *tree, int found = 0; if (search_end <= cur_start) { - printk("search_end %Lu start %Lu\n", search_end, cur_start); WARN_ON(1); return 0; } @@ -1399,11 +1404,10 @@ u64 count_range_bits(struct extent_io_tree *tree, * our range starts. */ node = tree_search(tree, cur_start); - if (!node) { + if (!node) goto out; - } - while(1) { + while (1) { state = rb_entry(node, struct extent_state, rb_node); if (state->start > search_end) break; @@ -1927,19 +1931,15 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, nr = bio_get_nr_vecs(bdev); bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); - if (!bio) { - printk("failed to allocate bio nr %d\n", nr); - } bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; - if (bio_ret) { + if (bio_ret) *bio_ret = bio; - } else { + else ret = submit_one_bio(rw, bio, mirror_num, bio_flags); - } return ret; } @@ -2028,13 +2028,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, break; } extent_offset = cur - em->start; - if (extent_map_end(em) <= cur) { -printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur); - } BUG_ON(extent_map_end(em) <= cur); - if (end < cur) { -printk("2bad mapping end %Lu cur %Lu\n", end, cur); - } BUG_ON(end < cur); if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) @@ -2199,7 +2193,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_end = 0; page_started = 0; if (!epd->extent_locked) { - while(delalloc_end < page_end) { + while (delalloc_end < page_end) { nr_delalloc = find_lock_delalloc_range(inode, tree, page, &delalloc_start, @@ -2242,9 +2236,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, nr_written++; end = page_end; - if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { - printk("found delalloc bits after lock_extent\n"); - } + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) + printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); if (last_byte <= start) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); @@ -2297,7 +2290,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); - unlock_extent(tree, unlock_start, cur + iosize -1, + unlock_extent(tree, unlock_start, cur + iosize - 1, GFP_NOFS); /* @@ -2344,9 +2337,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, set_range_writeback(tree, cur, cur + iosize - 1); if (!PageWriteback(page)) { - printk("warning page %lu not writeback, " - "cur %llu end %llu\n", page->index, - (unsigned long long)cur, + printk(KERN_ERR "btrfs warning page %lu not " + "writeback, cur %llu end %llu\n", + page->index, (unsigned long long)cur, (unsigned long long)end); } @@ -2430,8 +2423,8 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, retry: while (!done && (index <= end) && (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + PAGECACHE_TAG_DIRTY, min(end - index, + (pgoff_t)PAGEVEC_SIZE-1) + 1))) { unsigned i; scanned = 1; @@ -2536,9 +2529,8 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, extent_write_cache_pages(tree, mapping, &wbc_writepages, __extent_writepage, &epd, flush_write_bio); - if (epd.bio) { + if (epd.bio) submit_one_bio(WRITE, epd.bio, 0, 0); - } return ret; } EXPORT_SYMBOL(extent_write_full_page); @@ -2568,7 +2560,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, .range_end = end + 1, }; - while(start <= end) { + while (start <= end) { page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); if (clear_page_dirty_for_io(page)) ret = __extent_writepage(page, &wbc_writepages, &epd); @@ -2606,9 +2598,8 @@ int extent_writepages(struct extent_io_tree *tree, ret = extent_write_cache_pages(tree, mapping, wbc, __extent_writepage, &epd, flush_write_bio); - if (epd.bio) { + if (epd.bio) submit_one_bio(WRITE, epd.bio, 0, 0); - } return ret; } EXPORT_SYMBOL(extent_writepages); @@ -2666,7 +2657,7 @@ int extent_invalidatepage(struct extent_io_tree *tree, u64 end = start + PAGE_CACHE_SIZE - 1; size_t blocksize = page->mapping->host->i_sb->s_blocksize; - start += (offset + blocksize -1) & ~(blocksize - 1); + start += (offset + blocksize - 1) & ~(blocksize - 1); if (start > end) return 0; @@ -2727,12 +2718,12 @@ int extent_prepare_write(struct extent_io_tree *tree, orig_block_start = block_start; lock_extent(tree, page_start, page_end, GFP_NOFS); - while(block_start <= block_end) { + while (block_start <= block_end) { em = get_extent(inode, page, page_offset, block_start, block_end - block_start + 1, 1); - if (IS_ERR(em) || !em) { + if (IS_ERR(em) || !em) goto err; - } + cur_end = min(block_end, extent_map_end(em) - 1); block_off_start = block_start & (PAGE_CACHE_SIZE - 1); block_off_end = block_off_start + blocksize; @@ -3170,7 +3161,7 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, } __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); set_extent_dirty(tree, page_offset(page), - page_offset(page) + PAGE_CACHE_SIZE -1, + page_offset(page) + PAGE_CACHE_SIZE - 1, GFP_NOFS); unlock_page(page); } @@ -3235,7 +3226,7 @@ int extent_range_uptodate(struct extent_io_tree *tree, ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); if (ret) return 1; - while(start <= end) { + while (start <= end) { index = start >> PAGE_CACHE_SHIFT; page = find_get_page(tree->mapping, index); uptodate = PageUptodate(page); @@ -3321,16 +3312,12 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, lock_page(page); } locked_pages++; - if (!PageUptodate(page)) { + if (!PageUptodate(page)) all_uptodate = 0; - } } if (all_uptodate) { if (start_i == 0) eb->flags |= EXTENT_UPTODATE; - if (ret) { - printk("all up to date but ret is %d\n", ret); - } goto unlock_exit; } @@ -3345,10 +3332,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, err = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, &bio_flags); - if (err) { + if (err) ret = err; - printk("err %d from __extent_read_full_page\n", ret); - } } else { unlock_page(page); } @@ -3357,26 +3342,23 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (bio) submit_one_bio(READ, bio, mirror_num, bio_flags); - if (ret || !wait) { - if (ret) - printk("ret %d wait %d returning\n", ret, wait); + if (ret || !wait) return ret; - } + for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); wait_on_page_locked(page); - if (!PageUptodate(page)) { - printk("page not uptodate after wait_on_page_locked\n"); + if (!PageUptodate(page)) ret = -EIO; - } } + if (!ret) eb->flags |= EXTENT_UPTODATE; return ret; unlock_exit: i = start_i; - while(locked_pages > 0) { + while (locked_pages > 0) { page = extent_buffer_page(eb, i); i++; unlock_page(page); @@ -3403,7 +3385,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - while(len > 0) { + while (len > 0) { page = extent_buffer_page(eb, i); cur = min(len, (PAGE_CACHE_SIZE - offset)); @@ -3442,8 +3424,11 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, offset = 0; *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; } + if (start + min_len > eb->len) { -printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); + printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, " + "wanted %lu %lu\n", (unsigned long long)eb->start, + eb->len, start, min_len); WARN_ON(1); } @@ -3506,7 +3491,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - while(len > 0) { + while (len > 0) { page = extent_buffer_page(eb, i); cur = min(len, (PAGE_CACHE_SIZE - offset)); @@ -3542,7 +3527,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - while(len > 0) { + while (len > 0) { page = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(page)); @@ -3574,7 +3559,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - while(len > 0) { + while (len > 0) { page = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(page)); @@ -3607,7 +3592,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, offset = (start_offset + dst_offset) & ((unsigned long)PAGE_CACHE_SIZE - 1); - while(len > 0) { + while (len > 0) { page = extent_buffer_page(dst, i); WARN_ON(!PageUptodate(page)); @@ -3674,17 +3659,17 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_i; if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); + printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " + "len %lu dst len %lu\n", src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); + printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " + "len %lu dst len %lu\n", dst_offset, len, dst->len); BUG_ON(1); } - while(len > 0) { + while (len > 0) { dst_off_in_page = (start_offset + dst_offset) & ((unsigned long)PAGE_CACHE_SIZE - 1); src_off_in_page = (start_offset + src_offset) & @@ -3722,20 +3707,20 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_i; if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); + printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " + "len %lu len %lu\n", src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); + printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " + "len %lu len %lu\n", dst_offset, len, dst->len); BUG_ON(1); } if (dst_offset < src_offset) { memcpy_extent_buffer(dst, dst_offset, src_offset, len); return; } - while(len > 0) { + while (len > 0) { dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index fd3ebfb8c3c..4a83e33ada3 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -89,11 +89,11 @@ EXPORT_SYMBOL(free_extent_map); static struct rb_node *tree_insert(struct rb_root *root, u64 offset, struct rb_node *node) { - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; struct extent_map *entry; - while(*p) { + while (*p) { parent = *p; entry = rb_entry(parent, struct extent_map, rb_node); @@ -122,13 +122,13 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, struct rb_node **prev_ret, struct rb_node **next_ret) { - struct rb_node * n = root->rb_node; + struct rb_node *n = root->rb_node; struct rb_node *prev = NULL; struct rb_node *orig_prev = NULL; struct extent_map *entry; struct extent_map *prev_entry = NULL; - while(n) { + while (n) { entry = rb_entry(n, struct extent_map, rb_node); prev = n; prev_entry = entry; @@ -145,7 +145,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, if (prev_ret) { orig_prev = prev; - while(prev && offset >= extent_map_end(prev_entry)) { + while (prev && offset >= extent_map_end(prev_entry)) { prev = rb_next(prev); prev_entry = rb_entry(prev, struct extent_map, rb_node); } @@ -155,7 +155,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, if (next_ret) { prev_entry = rb_entry(prev, struct extent_map, rb_node); - while(prev && offset < prev_entry->start) { + while (prev && offset < prev_entry->start) { prev = rb_prev(prev); prev_entry = rb_entry(prev, struct extent_map, rb_node); } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index cc6e0b6de94..b11abfad81a 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -24,7 +24,7 @@ #include "transaction.h" #include "print-tree.h" -#define MAX_CSUM_ITEMS(r,size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ +#define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) * 2) / \ size) - 1)) int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, @@ -166,7 +166,7 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, WARN_ON(bio->bi_vcnt <= 0); disk_bytenr = (u64)bio->bi_sector << 9; - while(bio_index < bio->bi_vcnt) { + while (bio_index < bio->bi_vcnt) { offset = page_offset(bvec->bv_page) + bvec->bv_offset; ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); if (ret == 0) @@ -192,8 +192,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, offset + bvec->bv_len - 1, EXTENT_NODATASUM, GFP_NOFS); } else { - printk("no csum found for inode %lu " - "start %llu\n", inode->i_ino, + printk(KERN_INFO "btrfs no csum found " + "for inode %lu start %llu\n", + inode->i_ino, (unsigned long long)offset); } item = NULL; @@ -373,7 +374,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, BUG_ON(!ordered); sums->bytenr = ordered->start; - while(bio_index < bio->bi_vcnt) { + while (bio_index < bio->bi_vcnt) { if (!contig) offset = page_offset(bvec->bv_page) + bvec->bv_offset; @@ -507,7 +508,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); - while(1) { + while (1) { key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; key.offset = end_byte - 1; key.type = BTRFS_EXTENT_CSUM_KEY; @@ -715,9 +716,8 @@ again: goto csum; diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); - if (diff != csum_size) { + if (diff != csum_size) goto insert; - } ret = btrfs_extend_item(trans, root, path, diff); BUG_ON(ret); @@ -732,7 +732,7 @@ insert: u64 next_sector = sector_sum->bytenr; struct btrfs_sector_sum *next = sector_sum + 1; - while(tmp < sums->len) { + while (tmp < sums->len) { if (next_sector + root->sectorsize != next->bytenr) break; tmp += root->sectorsize; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5908521922f..0e3a13a4565 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -44,10 +44,10 @@ /* simple helper to fault in pages and copy. This should go away * and be replaced with calls into generic code. */ -static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, +static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, struct page **prepared_pages, - const char __user * buf) + const char __user *buf) { long page_fault = 0; int i; @@ -78,7 +78,7 @@ static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, /* * unlocks pages after btrfs_file_write is done with them */ -static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) +static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) { size_t i; for (i = 0; i < num_pages; i++) { @@ -103,7 +103,7 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) * this also makes the decision about creating an inline extent vs * doing real data extents, marking pages dirty and delalloc as required. */ -static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, +static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct file *file, struct page **pages, @@ -137,9 +137,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, btrfs_set_trans_block_group(trans, inode); hint_byte = 0; - if ((end_of_last_block & 4095) == 0) { - printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block); - } set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); /* check for reserved extents on each page, we don't want @@ -185,7 +182,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, len = (u64)-1; testend = 0; } - while(1) { + while (1) { if (!split) split = alloc_extent_map(GFP_NOFS); if (!split2) @@ -295,7 +292,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode) path = btrfs_alloc_path(); ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, last_offset, 0); - while(1) { + while (1) { nritems = btrfs_header_nritems(path->nodes[0]); if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(root, path); @@ -314,8 +311,10 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode) if (found_key.offset < last_offset) { WARN_ON(1); btrfs_print_leaf(root, leaf); - printk("inode %lu found offset %Lu expected %Lu\n", - inode->i_ino, found_key.offset, last_offset); + printk(KERN_ERR "inode %lu found offset %llu " + "expected %llu\n", inode->i_ino, + (unsigned long long)found_key.offset, + (unsigned long long)last_offset); err = 1; goto out; } @@ -331,7 +330,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode) extent_end = found_key.offset + btrfs_file_extent_inline_len(leaf, extent); extent_end = (extent_end + root->sectorsize - 1) & - ~((u64)root->sectorsize -1 ); + ~((u64)root->sectorsize - 1); } last_offset = extent_end; path->slots[0]++; @@ -339,8 +338,9 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode) if (0 && last_offset < inode->i_size) { WARN_ON(1); btrfs_print_leaf(root, leaf); - printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino, - last_offset, inode->i_size); + printk(KERN_ERR "inode %lu found offset %llu size %llu\n", + inode->i_ino, (unsigned long long)last_offset, + (unsigned long long)inode->i_size); err = 1; } @@ -362,7 +362,7 @@ out: * inline_limit is used to tell this code which offsets in the file to keep * if they contain inline extents. */ -int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, +noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 inline_limit, u64 *hint_byte) { @@ -398,7 +398,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - while(1) { + while (1) { recow = 0; btrfs_release_path(root, path); ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, @@ -649,16 +649,15 @@ next_slot: if (disk_bytenr != 0) { ret = btrfs_update_extent_ref(trans, root, disk_bytenr, orig_parent, - leaf->start, + leaf->start, root->root_key.objectid, trans->transid, ins.objectid); BUG_ON(ret); } btrfs_release_path(root, path); - if (disk_bytenr != 0) { + if (disk_bytenr != 0) inode_add_bytes(inode, extent_end - end); - } } if (found_extent && !keep) { @@ -944,7 +943,7 @@ done: * waits for data=ordered extents to finish before allowing the pages to be * modified. */ -static int noinline prepare_pages(struct btrfs_root *root, struct file *file, +static noinline int prepare_pages(struct btrfs_root *root, struct file *file, struct page **pages, size_t num_pages, loff_t pos, unsigned long first_index, unsigned long last_index, size_t write_bytes) @@ -979,7 +978,8 @@ again: struct btrfs_ordered_extent *ordered; lock_extent(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, GFP_NOFS); - ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1); + ordered = btrfs_lookup_first_ordered_extent(inode, + last_pos - 1); if (ordered && ordered->file_offset + ordered->len > start_pos && ordered->file_offset < last_pos) { @@ -1085,7 +1085,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } } - while(count > 0) { + while (count > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); size_t write_bytes = min(count, nrptrs * (size_t)PAGE_CACHE_SIZE - @@ -1178,7 +1178,7 @@ out_nolock: return num_written ? num_written : err; } -int btrfs_release_file(struct inode * inode, struct file * filp) +int btrfs_release_file(struct inode *inode, struct file *filp) { if (filp->private_data) btrfs_ioctl_trans_end(filp); @@ -1237,9 +1237,8 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) } ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); - if (ret < 0) { + if (ret < 0) goto out; - } /* we've logged all the items and now have a consistent * version of the file in the log. It is possible that diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 2e69b9c3043..d1e5f0e84c5 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -213,10 +213,13 @@ static int __btrfs_add_free_space(struct btrfs_block_group_cache *block_group, info->offset = offset; info->bytes += bytes; } else if (right_info && right_info->offset != offset+bytes) { - printk(KERN_ERR "adding space in the middle of an existing " - "free space area. existing: offset=%Lu, bytes=%Lu. " - "new: offset=%Lu, bytes=%Lu\n", right_info->offset, - right_info->bytes, offset, bytes); + printk(KERN_ERR "btrfs adding space in the middle of an " + "existing free space area. existing: " + "offset=%llu, bytes=%llu. new: offset=%llu, " + "bytes=%llu\n", (unsigned long long)right_info->offset, + (unsigned long long)right_info->bytes, + (unsigned long long)offset, + (unsigned long long)bytes); BUG(); } @@ -225,11 +228,14 @@ static int __btrfs_add_free_space(struct btrfs_block_group_cache *block_group, if (unlikely((left_info->offset + left_info->bytes) != offset)) { - printk(KERN_ERR "free space to the left of new free " - "space isn't quite right. existing: offset=%Lu," - " bytes=%Lu. new: offset=%Lu, bytes=%Lu\n", - left_info->offset, left_info->bytes, offset, - bytes); + printk(KERN_ERR "btrfs free space to the left " + "of new free space isn't " + "quite right. existing: offset=%llu, " + "bytes=%llu. new: offset=%llu, bytes=%llu\n", + (unsigned long long)left_info->offset, + (unsigned long long)left_info->bytes, + (unsigned long long)offset, + (unsigned long long)bytes); BUG(); } @@ -265,8 +271,7 @@ out: BUG(); } - if (alloc_info) - kfree(alloc_info); + kfree(alloc_info); return ret; } @@ -283,9 +288,11 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, if (info && info->offset == offset) { if (info->bytes < bytes) { - printk(KERN_ERR "Found free space at %Lu, size %Lu," - "trying to use %Lu\n", - info->offset, info->bytes, bytes); + printk(KERN_ERR "Found free space at %llu, size %llu," + "trying to use %llu\n", + (unsigned long long)info->offset, + (unsigned long long)info->bytes, + (unsigned long long)bytes); WARN_ON(1); ret = -EINVAL; goto out; @@ -401,8 +408,6 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, info = rb_entry(n, struct btrfs_free_space, offset_index); if (info->bytes >= bytes) count++; - //printk(KERN_INFO "offset=%Lu, bytes=%Lu\n", info->offset, - // info->bytes); } printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" "\n", count); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 80038c5ef7c..2aa79873eb4 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -129,7 +129,6 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, last_ino = key.objectid + 1; path->slots[0]++; } - // FIXME -ENOSPC BUG_ON(1); found: btrfs_release_path(root, path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 068bad46338..1b35ea63b6c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -124,7 +124,7 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, * the btree. The caller should have done a btrfs_drop_extents so that * no overlapping inline items exist in the btree */ -static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, +static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, size_t size, size_t compressed_size, struct page **compressed_pages) @@ -148,7 +148,8 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, cur_size = compressed_size; } - path = btrfs_alloc_path(); if (!path) + path = btrfs_alloc_path(); + if (!path) return -ENOMEM; btrfs_set_trans_block_group(trans, inode); @@ -165,7 +166,6 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); if (ret) { err = ret; - printk("got bad ret %d\n", ret); goto fail; } leaf = path->nodes[0]; @@ -181,7 +181,7 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, if (use_compress) { struct page *cpage; int i = 0; - while(compressed_size > 0) { + while (compressed_size > 0) { cpage = compressed_pages[i]; cur_size = min_t(unsigned long, compressed_size, PAGE_CACHE_SIZE); @@ -519,8 +519,7 @@ free_pages_out: WARN_ON(pages[i]->mapping); page_cache_release(pages[i]); } - if (pages) - kfree(pages); + kfree(pages); goto out; } @@ -549,7 +548,7 @@ static noinline int submit_compressed_extents(struct inode *inode, trans = btrfs_join_transaction(root, 1); - while(!list_empty(&async_cow->extents)) { + while (!list_empty(&async_cow->extents)) { async_extent = list_entry(async_cow->extents.next, struct async_extent, list); list_del(&async_extent->list); @@ -562,8 +561,8 @@ static noinline int submit_compressed_extents(struct inode *inode, unsigned long nr_written = 0; lock_extent(io_tree, async_extent->start, - async_extent->start + async_extent->ram_size - 1, - GFP_NOFS); + async_extent->start + + async_extent->ram_size - 1, GFP_NOFS); /* allocate blocks */ cow_file_range(inode, async_cow->locked_page, @@ -581,7 +580,7 @@ static noinline int submit_compressed_extents(struct inode *inode, if (!page_started) extent_write_locked_range(io_tree, inode, async_extent->start, - async_extent->start + + async_extent->start + async_extent->ram_size - 1, btrfs_get_extent, WB_SYNC_ALL); @@ -618,7 +617,7 @@ static noinline int submit_compressed_extents(struct inode *inode, set_bit(EXTENT_FLAG_PINNED, &em->flags); set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); - while(1) { + while (1) { spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); spin_unlock(&em_tree->lock); @@ -651,11 +650,11 @@ static noinline int submit_compressed_extents(struct inode *inode, NULL, 1, 1, 0, 1, 1, 0); ret = btrfs_submit_compressed_write(inode, - async_extent->start, - async_extent->ram_size, - ins.objectid, - ins.offset, async_extent->pages, - async_extent->nr_pages); + async_extent->start, + async_extent->ram_size, + ins.objectid, + ins.offset, async_extent->pages, + async_extent->nr_pages); BUG_ON(ret); trans = btrfs_join_transaction(root, 1); @@ -735,14 +734,13 @@ static noinline int cow_file_range(struct inode *inode, btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); - while(disk_num_bytes > 0) { + while (disk_num_bytes > 0) { cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); ret = btrfs_reserve_extent(trans, root, cur_alloc_size, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); - if (ret) { - BUG(); - } + BUG_ON(ret); + em = alloc_extent_map(GFP_NOFS); em->start = start; em->orig_start = em->start; @@ -755,7 +753,7 @@ static noinline int cow_file_range(struct inode *inode, em->bdev = root->fs_info->fs_devices->latest_bdev; set_bit(EXTENT_FLAG_PINNED, &em->flags); - while(1) { + while (1) { spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); spin_unlock(&em_tree->lock); @@ -779,11 +777,9 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(ret); } - if (disk_num_bytes < cur_alloc_size) { - printk("num_bytes %Lu cur_alloc %Lu\n", disk_num_bytes, - cur_alloc_size); + if (disk_num_bytes < cur_alloc_size) break; - } + /* we're not doing compressed IO, don't unlock the first * page (which the caller expects to stay locked), don't * clear any dirty bits and don't set any writeback bits @@ -842,9 +838,8 @@ static noinline void async_cow_submit(struct btrfs_work *work) waitqueue_active(&root->fs_info->async_submit_wait)) wake_up(&root->fs_info->async_submit_wait); - if (async_cow->inode) { + if (async_cow->inode) submit_compressed_extents(async_cow->inode, async_cow); - } } static noinline void async_cow_free(struct btrfs_work *work) @@ -871,7 +866,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | EXTENT_DELALLOC, 1, 0, GFP_NOFS); - while(start < end) { + while (start < end) { async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); async_cow->inode = inode; async_cow->root = root; @@ -904,7 +899,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, limit)); } - while(atomic_read(&root->fs_info->async_submit_draining) && + while (atomic_read(&root->fs_info->async_submit_draining) && atomic_read(&root->fs_info->async_delalloc_pages)) { wait_event(root->fs_info->async_submit_wait, (atomic_read(&root->fs_info->async_delalloc_pages) == @@ -918,7 +913,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, return 0; } -static int noinline csum_exist_in_range(struct btrfs_root *root, +static noinline int csum_exist_in_range(struct btrfs_root *root, u64 bytenr, u64 num_bytes) { int ret; @@ -1146,13 +1141,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, if (btrfs_test_flag(inode, NODATACOW)) ret = run_delalloc_nocow(inode, locked_page, start, end, - page_started, 1, nr_written); + page_started, 1, nr_written); else if (btrfs_test_flag(inode, PREALLOC)) ret = run_delalloc_nocow(inode, locked_page, start, end, - page_started, 0, nr_written); + page_started, 0, nr_written); else ret = cow_file_range_async(inode, locked_page, start, end, - page_started, nr_written); + page_started, nr_written); return ret; } @@ -1200,8 +1195,11 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, spin_lock(&root->fs_info->delalloc_lock); if (end - start + 1 > root->fs_info->delalloc_bytes) { - printk("warning: delalloc account %Lu %Lu\n", - end - start + 1, root->fs_info->delalloc_bytes); + printk(KERN_INFO "btrfs warning: delalloc account " + "%llu %llu\n", + (unsigned long long)end - start + 1, + (unsigned long long) + root->fs_info->delalloc_bytes); root->fs_info->delalloc_bytes = 0; BTRFS_I(inode)->delalloc_bytes = 0; } else { @@ -1241,9 +1239,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, ret = btrfs_map_block(map_tree, READ, logical, &map_length, NULL, 0); - if (map_length < length + size) { + if (map_length < length + size) return 1; - } return 0; } @@ -1255,8 +1252,9 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -static int __btrfs_submit_bio_start(struct inode *inode, int rw, struct bio *bio, - int mirror_num, unsigned long bio_flags) +static int __btrfs_submit_bio_start(struct inode *inode, int rw, + struct bio *bio, int mirror_num, + unsigned long bio_flags) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; @@ -1341,9 +1339,8 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) { - if ((end & (PAGE_CACHE_SIZE - 1)) == 0) { + if ((end & (PAGE_CACHE_SIZE - 1)) == 0) WARN_ON(1); - } return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); } @@ -1755,14 +1752,14 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, } local_irq_save(flags); kaddr = kmap_atomic(page, KM_IRQ0); - if (ret) { + if (ret) goto zeroit; - } + csum = btrfs_csum_data(root, kaddr + offset, csum, end - start + 1); btrfs_csum_final(csum, (char *)&csum); - if (csum != private) { + if (csum != private) goto zeroit; - } + kunmap_atomic(kaddr, KM_IRQ0); local_irq_restore(flags); good: @@ -1773,9 +1770,10 @@ good: return 0; zeroit: - printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n", - page->mapping->host->i_ino, (unsigned long long)start, csum, - private); + printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " + "private %llu\n", page->mapping->host->i_ino, + (unsigned long long)start, csum, + (unsigned long long)private); memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); kunmap_atomic(kaddr, KM_IRQ0); @@ -2097,9 +2095,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, /* * copy everything in the in-memory inode into the btree. */ -int noinline btrfs_update_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode) +noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode) { struct btrfs_inode_item *inode_item; struct btrfs_path *path; @@ -2174,7 +2171,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, inode->i_ino, dir->i_ino, &index); if (ret) { - printk("failed to delete reference to %.*s, " + printk(KERN_INFO "btrfs failed to delete reference to %.*s, " "inode %lu parent %lu\n", name_len, name, inode->i_ino, dir->i_ino); goto err; @@ -2280,9 +2277,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) /* now the directory is empty */ err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, dentry->d_name.name, dentry->d_name.len); - if (!err) { + if (!err) btrfs_i_size_write(inode, 0); - } fail_trans: nr = trans->blocks_used; @@ -2516,9 +2512,9 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, search_again: ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { + if (ret < 0) goto error; - } + if (ret > 0) { /* there are no items in the tree for us to truncate, we're * done @@ -2530,7 +2526,7 @@ search_again: path->slots[0]--; } - while(1) { + while (1) { fi = NULL; leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); @@ -2562,19 +2558,18 @@ search_again: item_end--; } if (item_end < new_size) { - if (found_type == BTRFS_DIR_ITEM_KEY) { + if (found_type == BTRFS_DIR_ITEM_KEY) found_type = BTRFS_INODE_ITEM_KEY; - } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { + else if (found_type == BTRFS_EXTENT_ITEM_KEY) found_type = BTRFS_EXTENT_DATA_KEY; - } else if (found_type == BTRFS_EXTENT_DATA_KEY) { + else if (found_type == BTRFS_EXTENT_DATA_KEY) found_type = BTRFS_XATTR_ITEM_KEY; - } else if (found_type == BTRFS_XATTR_ITEM_KEY) { + else if (found_type == BTRFS_XATTR_ITEM_KEY) found_type = BTRFS_INODE_REF_KEY; - } else if (found_type) { + else if (found_type) found_type--; - } else { + else break; - } btrfs_set_key_type(&key, found_type); goto next; } @@ -2656,7 +2651,7 @@ delete: pending_del_nr++; pending_del_slot = path->slots[0]; } else { - printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot); + BUG(); } } else { break; @@ -2938,9 +2933,10 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, namelen, 0); if (IS_ERR(di)) ret = PTR_ERR(di); - if (!di || IS_ERR(di)) { + + if (!di || IS_ERR(di)) goto out_err; - } + btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); out: btrfs_free_path(path); @@ -3020,8 +3016,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) static int btrfs_find_actor(struct inode *inode, void *opaque) { struct btrfs_iget_args *args = opaque; - return (args->ino == inode->i_ino && - args->root == BTRFS_I(inode)->root); + return args->ino == inode->i_ino && + args->root == BTRFS_I(inode)->root; } struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, @@ -3085,7 +3081,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) { - struct inode * inode; + struct inode *inode; struct btrfs_inode *bi = BTRFS_I(dir); struct btrfs_root *root = bi->root; struct btrfs_root *sub_root = root; @@ -3385,9 +3381,8 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index) if (BTRFS_I(dir)->index_cnt == (u64)-1) { ret = btrfs_set_inode_index_count(dir); - if (ret) { + if (ret) return ret; - } } *index = BTRFS_I(dir)->index_cnt; @@ -3879,12 +3874,13 @@ static noinline int uncompress_inline(struct btrfs_path *path, /* * a bit scary, this does extent mapping from logical file offset to the disk. - * the ugly parts come from merging extents from the disk with the - * in-ram representation. This gets more complex because of the data=ordered code, + * the ugly parts come from merging extents from the disk with the in-ram + * representation. This gets more complex because of the data=ordered code, * where the in-ram extents might be locked pending data=ordered completion. * * This also copies inline extents directly into the page. */ + struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create) @@ -4081,7 +4077,7 @@ again: extent_map_end(em) - 1, GFP_NOFS); goto insert; } else { - printk("unkknown found_type %d\n", found_type); + printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); WARN_ON(1); } not_found: @@ -4093,7 +4089,11 @@ not_found_em: insert: btrfs_release_path(root, path); if (em->start > start || extent_map_end(em) <= start) { - printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len); + printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed " + "[%llu %llu]\n", (unsigned long long)em->start, + (unsigned long long)em->len, + (unsigned long long)start, + (unsigned long long)len); err = -EIO; goto out; } @@ -4130,8 +4130,6 @@ insert: } } else { err = -EIO; - printk("failing to insert %Lu %Lu\n", - start, len); free_extent_map(em); em = NULL; } @@ -4147,9 +4145,8 @@ out: btrfs_free_path(path); if (trans) { ret = btrfs_end_transaction(trans, root); - if (!err) { + if (!err) err = ret; - } } if (err) { free_extent_map(em); @@ -4482,13 +4479,15 @@ void btrfs_destroy_inode(struct inode *inode) } spin_unlock(&BTRFS_I(inode)->root->list_lock); - while(1) { + while (1) { ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); if (!ordered) break; else { - printk("found ordered extent %Lu %Lu\n", - ordered->file_offset, ordered->len); + printk(KERN_ERR "btrfs found ordered " + "extent %llu %llu on inode cleanup\n", + (unsigned long long)ordered->file_offset, + (unsigned long long)ordered->len); btrfs_remove_ordered_extent(inode, ordered); btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); @@ -4572,8 +4571,8 @@ static int btrfs_getattr(struct vfsmount *mnt, return 0; } -static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, - struct inode * new_dir,struct dentry *new_dentry) +static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(old_dir)->root; @@ -4663,7 +4662,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) return -EROFS; spin_lock(&root->fs_info->delalloc_lock); - while(!list_empty(head)) { + while (!list_empty(head)) { binode = list_entry(head->next, struct btrfs_inode, delalloc_inodes); inode = igrab(&binode->vfs_inode); @@ -4684,7 +4683,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) * ordered extents get created before we return */ atomic_inc(&root->fs_info->async_submit_draining); - while(atomic_read(&root->fs_info->nr_async_submits) || + while (atomic_read(&root->fs_info->nr_async_submits) || atomic_read(&root->fs_info->async_delalloc_pages)) { wait_event(root->fs_info->async_submit_wait, (atomic_read(&root->fs_info->nr_async_submits) == 0 && diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ba484aac1b9..c2aa33e3feb 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -311,7 +311,7 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, * to see if is references the subvolume where we are * placing this new snapshot. */ - while(1) { + while (1) { if (!test || dir == snap_src->fs_info->sb->s_root || test == snap_src->fs_info->sb->s_root || @@ -319,7 +319,8 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, break; } if (S_ISLNK(test->d_inode->i_mode)) { - printk("Symlink in snapshot path, failed\n"); + printk(KERN_INFO "Btrfs symlink in snapshot " + "path, failed\n"); error = -EMLINK; btrfs_free_path(path); goto out_drop_write; @@ -329,7 +330,8 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, path, test_oid, parent_oid); if (ret == 0) { - printk("Snapshot creation failed, looping\n"); + printk(KERN_INFO "Btrfs snapshot creation " + "failed, looping\n"); error = -EMLINK; btrfs_free_path(path); goto out_drop_write; @@ -617,7 +619,8 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, src_inode = src_file->f_path.dentry->d_inode; if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { - printk("btrfs: Snapshot src from another FS\n"); + printk(KERN_INFO "btrfs: Snapshot src from " + "another FS\n"); ret = -EINVAL; fput(src_file); goto out; @@ -810,9 +813,6 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, ((off + len) & (bs-1))) goto out_unlock; - printk("final src extent is %llu~%llu\n", off, len); - printk("final dst extent is %llu~%llu\n", destoff, len); - /* do any pending delalloc/csum calc on src, one way or another, and lock file content */ while (1) { @@ -883,10 +883,13 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, comp = btrfs_file_extent_compression(leaf, extent); type = btrfs_file_extent_type(leaf, extent); if (type == BTRFS_FILE_EXTENT_REG) { - disko = btrfs_file_extent_disk_bytenr(leaf, extent); - diskl = btrfs_file_extent_disk_num_bytes(leaf, extent); + disko = btrfs_file_extent_disk_bytenr(leaf, + extent); + diskl = btrfs_file_extent_disk_num_bytes(leaf, + extent); datao = btrfs_file_extent_offset(leaf, extent); - datal = btrfs_file_extent_num_bytes(leaf, extent); + datal = btrfs_file_extent_num_bytes(leaf, + extent); } else if (type == BTRFS_FILE_EXTENT_INLINE) { /* take upper bound, may be compressed */ datal = btrfs_file_extent_ram_bytes(leaf, @@ -916,8 +919,6 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - printk(" orig disk %llu~%llu data %llu~%llu\n", - disko, diskl, datao, datal); if (off > key.offset) { datao += off - key.offset; @@ -929,8 +930,6 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, /* disko == 0 means it's a hole */ if (!disko) datao = 0; - printk(" final disk %llu~%llu data %llu~%llu\n", - disko, diskl, datao, datal); btrfs_set_file_extent_offset(leaf, extent, datao); @@ -952,12 +951,11 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, skip = off - key.offset; new_key.offset += skip; } + if (key.offset + datal > off+len) trim = key.offset + datal - (off+len); - printk("len %lld skip %lld trim %lld\n", - datal, skip, trim); + if (comp && (skip || trim)) { - printk("btrfs clone_range can't split compressed inline extents yet\n"); ret = -EINVAL; goto out; } @@ -969,7 +967,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, goto out; if (skip) { - u32 start = btrfs_file_extent_calc_inline_size(0); + u32 start = + btrfs_file_extent_calc_inline_size(0); memmove(buf+start, buf+start+skip, datal); } @@ -985,7 +984,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, btrfs_mark_buffer_dirty(leaf); } - next: +next: btrfs_release_path(root, path); key.offset++; } diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index e30aa6e2958..39bae7761db 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -31,9 +31,10 @@ * difference in almost every workload, but spinning for the right amount of * time needs some help. * - * In general, we want to spin as long as the lock holder is doing btree searches, - * and we should give up if they are in more expensive code. + * In general, we want to spin as long as the lock holder is doing btree + * searches, and we should give up if they are in more expensive code. */ + int btrfs_tree_lock(struct extent_buffer *eb) { int i; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index d9e232227da..a2094017027 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -39,11 +39,11 @@ static u64 entry_end(struct btrfs_ordered_extent *entry) static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, struct rb_node *node) { - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; struct btrfs_ordered_extent *entry; - while(*p) { + while (*p) { parent = *p; entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node); @@ -67,13 +67,13 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, struct rb_node **prev_ret) { - struct rb_node * n = root->rb_node; + struct rb_node *n = root->rb_node; struct rb_node *prev = NULL; struct rb_node *test; struct btrfs_ordered_extent *entry; struct btrfs_ordered_extent *prev_entry = NULL; - while(n) { + while (n) { entry = rb_entry(n, struct btrfs_ordered_extent, rb_node); prev = n; prev_entry = entry; @@ -88,7 +88,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, if (!prev_ret) return NULL; - while(prev && file_offset >= entry_end(prev_entry)) { + while (prev && file_offset >= entry_end(prev_entry)) { test = rb_next(prev); if (!test) break; @@ -102,7 +102,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, if (prev) prev_entry = rb_entry(prev, struct btrfs_ordered_extent, rb_node); - while(prev && file_offset < entry_end(prev_entry)) { + while (prev && file_offset < entry_end(prev_entry)) { test = rb_prev(prev); if (!test) break; @@ -193,10 +193,8 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, node = tree_insert(&tree->tree, file_offset, &entry->rb_node); - if (node) { - printk("warning dup entry from add_ordered_extent\n"); - BUG(); - } + BUG_ON(node); + set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, entry_end(entry) - 1, GFP_NOFS); @@ -282,7 +280,7 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) struct btrfs_ordered_sum *sum; if (atomic_dec_and_test(&entry->refs)) { - while(!list_empty(&entry->list)) { + while (!list_empty(&entry->list)) { cur = entry->list.next; sum = list_entry(cur, struct btrfs_ordered_sum, list); list_del(&sum->list); @@ -432,11 +430,10 @@ again: orig_end >> PAGE_CACHE_SHIFT); end = orig_end; - while(1) { + while (1) { ordered = btrfs_lookup_first_ordered_extent(inode, end); - if (!ordered) { + if (!ordered) break; - } if (ordered->file_offset > orig_end) { btrfs_put_ordered_extent(ordered); break; @@ -492,7 +489,7 @@ out: * if none is found */ struct btrfs_ordered_extent * -btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset) +btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; @@ -553,7 +550,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, * yet */ node = &ordered->rb_node; - while(1) { + while (1) { node = rb_prev(node); if (!node) break; @@ -581,9 +578,8 @@ int btrfs_ordered_update_i_size(struct inode *inode, * between our ordered extent and the next one. */ test = rb_entry(node, struct btrfs_ordered_extent, rb_node); - if (test->file_offset > entry_end(ordered)) { + if (test->file_offset > entry_end(ordered)) i_size_test = test->file_offset; - } } else { i_size_test = i_size_read(inode); } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 64725c13aa1..5f8f218c100 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -24,13 +24,14 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) { int num_stripes = btrfs_chunk_num_stripes(eb, chunk); int i; - printk("\t\tchunk length %llu owner %llu type %llu num_stripes %d\n", + printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu " + "num_stripes %d\n", (unsigned long long)btrfs_chunk_length(eb, chunk), (unsigned long long)btrfs_chunk_owner(eb, chunk), (unsigned long long)btrfs_chunk_type(eb, chunk), num_stripes); for (i = 0 ; i < num_stripes ; i++) { - printk("\t\t\tstripe %d devid %llu offset %llu\n", i, + printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i, (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i), (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i)); } @@ -38,8 +39,8 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) static void print_dev_item(struct extent_buffer *eb, struct btrfs_dev_item *dev_item) { - printk("\t\tdev item devid %llu " - "total_bytes %llu bytes used %Lu\n", + printk(KERN_INFO "\t\tdev item devid %llu " + "total_bytes %llu bytes used %llu\n", (unsigned long long)btrfs_device_id(eb, dev_item), (unsigned long long)btrfs_device_total_bytes(eb, dev_item), (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); @@ -61,14 +62,15 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_dev_extent *dev_extent; u32 type; - printk("leaf %llu total ptrs %d free space %d\n", + printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", (unsigned long long)btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(l, i); btrfs_item_key_to_cpu(l, &key, i); type = btrfs_key_type(&key); - printk("\titem %d key (%llu %x %llu) itemoff %d itemsize %d\n", + printk(KERN_INFO "\titem %d key (%llu %x %llu) itemoff %d " + "itemsize %d\n", i, (unsigned long long)key.objectid, type, (unsigned long long)key.offset, @@ -76,33 +78,36 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); - printk("\t\tinode generation %llu size %llu mode %o\n", - (unsigned long long)btrfs_inode_generation(l, ii), + printk(KERN_INFO "\t\tinode generation %llu size %llu " + "mode %o\n", + (unsigned long long) + btrfs_inode_generation(l, ii), (unsigned long long)btrfs_inode_size(l, ii), btrfs_inode_mode(l, ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(l, di, &found_key); - printk("\t\tdir oid %llu type %u\n", + printk(KERN_INFO "\t\tdir oid %llu type %u\n", (unsigned long long)found_key.objectid, btrfs_dir_type(l, di)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - printk("\t\troot data bytenr %llu refs %u\n", - (unsigned long long)btrfs_disk_root_bytenr(l, ri), + printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n", + (unsigned long long) + btrfs_disk_root_bytenr(l, ri), btrfs_disk_root_refs(l, ri)); break; case BTRFS_EXTENT_ITEM_KEY: ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); - printk("\t\textent data refs %u\n", + printk(KERN_INFO "\t\textent data refs %u\n", btrfs_extent_refs(l, ei)); break; case BTRFS_EXTENT_REF_KEY: ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref); - printk("\t\textent back ref root %llu gen %llu " - "owner %llu num_refs %lu\n", + printk(KERN_INFO "\t\textent back ref root %llu " + "gen %llu owner %llu num_refs %lu\n", (unsigned long long)btrfs_ref_root(l, ref), (unsigned long long)btrfs_ref_generation(l, ref), (unsigned long long)btrfs_ref_objectid(l, ref), @@ -114,26 +119,36 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_file_extent_item); if (btrfs_file_extent_type(l, fi) == BTRFS_FILE_EXTENT_INLINE) { - printk("\t\tinline extent data size %u\n", - btrfs_file_extent_inline_len(l, fi)); + printk(KERN_INFO "\t\tinline extent data " + "size %u\n", + btrfs_file_extent_inline_len(l, fi)); break; } - printk("\t\textent data disk bytenr %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_disk_bytenr(l, fi), - (unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi)); - printk("\t\textent data offset %llu nr %llu ram %llu\n", - (unsigned long long)btrfs_file_extent_offset(l, fi), - (unsigned long long)btrfs_file_extent_num_bytes(l, fi), - (unsigned long long)btrfs_file_extent_ram_bytes(l, fi)); + printk(KERN_INFO "\t\textent data disk bytenr %llu " + "nr %llu\n", + (unsigned long long) + btrfs_file_extent_disk_bytenr(l, fi), + (unsigned long long) + btrfs_file_extent_disk_num_bytes(l, fi)); + printk(KERN_INFO "\t\textent data offset %llu " + "nr %llu ram %llu\n", + (unsigned long long) + btrfs_file_extent_offset(l, fi), + (unsigned long long) + btrfs_file_extent_num_bytes(l, fi), + (unsigned long long) + btrfs_file_extent_ram_bytes(l, fi)); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); - printk("\t\tblock group used %llu\n", - (unsigned long long)btrfs_disk_block_group_used(l, bi)); + printk(KERN_INFO "\t\tblock group used %llu\n", + (unsigned long long) + btrfs_disk_block_group_used(l, bi)); break; case BTRFS_CHUNK_ITEM_KEY: - print_chunk(l, btrfs_item_ptr(l, i, struct btrfs_chunk)); + print_chunk(l, btrfs_item_ptr(l, i, + struct btrfs_chunk)); break; case BTRFS_DEV_ITEM_KEY: print_dev_item(l, btrfs_item_ptr(l, i, @@ -142,7 +157,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_DEV_EXTENT_KEY: dev_extent = btrfs_item_ptr(l, i, struct btrfs_dev_extent); - printk("\t\tdev extent chunk_tree %llu\n" + printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n" "\t\tchunk objectid %llu chunk offset %llu " "length %llu\n", (unsigned long long) @@ -171,13 +186,13 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) btrfs_print_leaf(root, c); return; } - printk("node %llu level %d total ptrs %d free spc %u\n", + printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n", (unsigned long long)btrfs_header_bytenr(c), btrfs_header_level(c), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { btrfs_node_key_to_cpu(c, &key, i); - printk("\tkey %d (%llu %u %llu) block %llu\n", + printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n", i, (unsigned long long)key.objectid, key.type, diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index a50ebb67055..6f0acc4c9ea 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -74,11 +74,11 @@ void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, struct rb_node *node) { - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; struct btrfs_leaf_ref *entry; - while(*p) { + while (*p) { parent = *p; entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); @@ -98,10 +98,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) { - struct rb_node * n = root->rb_node; + struct rb_node *n = root->rb_node; struct btrfs_leaf_ref *entry; - while(n) { + while (n) { entry = rb_entry(n, struct btrfs_leaf_ref, rb_node); WARN_ON(!entry->in_tree); @@ -127,7 +127,7 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, return 0; spin_lock(&tree->lock); - while(!list_empty(&tree->list)) { + while (!list_empty(&tree->list)) { ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list); BUG_ON(ref->tree != tree); if (ref->root_gen > max_root_gen) diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index f99335a999d..b48650de447 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -132,8 +132,9 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); - printk("unable to update root key %Lu %u %Lu\n", - key->objectid, key->type, key->offset); + printk(KERN_CRIT "unable to update root key %llu %u %llu\n", + (unsigned long long)key->objectid, key->type, + (unsigned long long)key->offset); BUG_ON(1); } @@ -159,9 +160,9 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root /* * at mount time we want to find all the old transaction snapshots that were in - * the process of being deleted if we crashed. This is any root item with an offset - * lower than the latest root. They need to be queued for deletion to finish - * what was happening when we crashed. + * the process of being deleted if we crashed. This is any root item with an + * offset lower than the latest root. They need to be queued for deletion to + * finish what was happening when we crashed. */ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_root *latest) @@ -188,7 +189,7 @@ again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; - while(1) { + while (1) { leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; @@ -258,11 +259,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) goto out; - if (ret) { -btrfs_print_leaf(root, path->nodes[0]); -printk("failed to del %Lu %u %Lu\n", key->objectid, key->type, key->offset); - } BUG_ON(ret != 0); leaf = path->nodes[0]; ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index 8d7f568009c..c0f7ecaf1e7 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -66,7 +66,7 @@ u##bits btrfs_##name(struct extent_buffer *eb, \ unsigned long map_len; \ u##bits res; \ err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ + sizeof(((type *)0)->member), \ &map_token, &kaddr, \ &map_start, &map_len, KM_USER1); \ if (err) { \ @@ -103,7 +103,7 @@ void btrfs_set_##name(struct extent_buffer *eb, \ unsigned long map_start; \ unsigned long map_len; \ err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ + sizeof(((type *)0)->member), \ &map_token, &kaddr, \ &map_start, &map_len, KM_USER1); \ if (err) { \ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ccdcb7bb7ad..b4c101d9322 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -55,18 +55,12 @@ static struct super_operations btrfs_super_ops; -static void btrfs_put_super (struct super_block * sb) +static void btrfs_put_super(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); int ret; ret = close_ctree(root); - if (ret) { - printk("close ctree returns %d\n", ret); - } -#if 0 - btrfs_sysfs_del_super(root->fs_info); -#endif sb->s_fs_info = NULL; } @@ -299,12 +293,12 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, return error; } -static int btrfs_fill_super(struct super_block * sb, +static int btrfs_fill_super(struct super_block *sb, struct btrfs_fs_devices *fs_devices, - void * data, int silent) + void *data, int silent) { - struct inode * inode; - struct dentry * root_dentry; + struct inode *inode; + struct dentry *root_dentry; struct btrfs_super_block *disk_super; struct btrfs_root *tree_root; struct btrfs_inode *bi; @@ -479,8 +473,10 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, root = dget(s->s_root); else { mutex_lock(&s->s_root->d_inode->i_mutex); - root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name)); + root = lookup_one_len(subvol_name, s->s_root, + strlen(subvol_name)); mutex_unlock(&s->s_root->d_inode->i_mutex); + if (IS_ERR(root)) { up_write(&s->s_umount); deactivate_super(s); @@ -557,8 +553,9 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bavail = buf->f_bfree; buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_type = BTRFS_SUPER_MAGIC; + /* We treat it as constant endianness (it doesn't matter _which_) - because we want the fsid to come out the same whether mounted + because we want the fsid to come out the same whether mounted on a big-endian or little-endian host */ buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); @@ -658,7 +655,7 @@ static int btrfs_interface_init(void) static void btrfs_interface_exit(void) { if (misc_deregister(&btrfs_misc) < 0) - printk("misc_deregister failed for control device"); + printk(KERN_INFO "misc_deregister failed for control device"); } static int __init init_btrfs_fs(void) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 04087c02084..a240b6fa81d 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -67,7 +67,8 @@ struct btrfs_root_attr { }; #define ROOT_ATTR(name, mode, show, store) \ -static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, show, store) +static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \ + show, store) ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL); ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL); @@ -86,7 +87,8 @@ struct btrfs_super_attr { }; #define SUPER_ATTR(name, mode, show, store) \ -static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, show, store) +static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \ + show, store) SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL); SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4e7b56e9d3a..56ab1f5ea11 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -28,9 +28,6 @@ #include "ref-cache.h" #include "tree-log.h" -extern struct kmem_cache *btrfs_trans_handle_cachep; -extern struct kmem_cache *btrfs_transaction_cachep; - #define BTRFS_ROOT_TRANS_TAG 0 static noinline void put_transaction(struct btrfs_transaction *transaction) @@ -85,10 +82,10 @@ static noinline int join_transaction(struct btrfs_root *root) } /* - * this does all the record keeping required to make sure that a - * reference counted root is properly recorded in a given transaction. - * This is required to make sure the old root from before we joined the transaction - * is deleted when the transaction commits + * this does all the record keeping required to make sure that a reference + * counted root is properly recorded in a given transaction. This is required + * to make sure the old root from before we joined the transaction is deleted + * when the transaction commits */ noinline int btrfs_record_root_in_trans(struct btrfs_root *root) { @@ -144,7 +141,7 @@ static void wait_current_trans(struct btrfs_root *root) if (cur_trans && cur_trans->blocked) { DEFINE_WAIT(wait); cur_trans->use_count++; - while(1) { + while (1) { prepare_to_wait(&root->fs_info->transaction_wait, &wait, TASK_UNINTERRUPTIBLE); if (cur_trans->blocked) { @@ -213,7 +210,7 @@ static noinline int wait_for_commit(struct btrfs_root *root, { DEFINE_WAIT(wait); mutex_lock(&root->fs_info->trans_mutex); - while(!commit->commit_done) { + while (!commit->commit_done) { prepare_to_wait(&commit->commit_wait, &wait, TASK_UNINTERRUPTIBLE); if (commit->commit_done) @@ -228,8 +225,8 @@ static noinline int wait_for_commit(struct btrfs_root *root, } /* - * rate limit against the drop_snapshot code. This helps to slow down new operations - * if the drop_snapshot code isn't able to keep up. + * rate limit against the drop_snapshot code. This helps to slow down new + * operations if the drop_snapshot code isn't able to keep up. */ static void throttle_on_drops(struct btrfs_root *root) { @@ -332,12 +329,12 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, u64 end; unsigned long index; - while(1) { + while (1) { ret = find_first_extent_bit(dirty_pages, start, &start, &end, EXTENT_DIRTY); if (ret) break; - while(start <= end) { + while (start <= end) { cond_resched(); index = start >> PAGE_CACHE_SHIFT; @@ -368,14 +365,14 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, page_cache_release(page); } } - while(1) { + while (1) { ret = find_first_extent_bit(dirty_pages, 0, &start, &end, EXTENT_DIRTY); if (ret) break; clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); - while(start <= end) { + while (start <= end) { index = start >> PAGE_CACHE_SHIFT; start = (u64)(index + 1) << PAGE_CACHE_SHIFT; page = find_get_page(btree_inode->i_mapping, index); @@ -431,7 +428,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, btrfs_write_dirty_block_groups(trans, root); btrfs_extent_post_op(trans, root); - while(1) { + while (1) { old_root_bytenr = btrfs_root_bytenr(&root->root_item); if (old_root_bytenr == root->node->start) break; @@ -472,7 +469,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_extent_post_op(trans, fs_info->tree_root); - while(!list_empty(&fs_info->dirty_cowonly_roots)) { + while (!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; list_del_init(next); root = list_entry(next, struct btrfs_root, dirty_list); @@ -521,7 +518,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, int err = 0; u32 refs; - while(1) { + while (1) { ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, ARRAY_SIZE(gang), BTRFS_ROOT_TRANS_TAG); @@ -653,7 +650,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, int ret = 0; int err; - while(!list_empty(list)) { + while (!list_empty(list)) { struct btrfs_root *root; dirty = list_entry(list->prev, struct btrfs_dirty_root, list); @@ -663,13 +660,12 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, root = dirty->latest_root; atomic_inc(&root->fs_info->throttles); - while(1) { + while (1) { trans = btrfs_start_transaction(tree_root, 1); mutex_lock(&root->fs_info->drop_mutex); ret = btrfs_drop_snapshot(trans, dirty->root); - if (ret != -EAGAIN) { + if (ret != -EAGAIN) break; - } mutex_unlock(&root->fs_info->drop_mutex); err = btrfs_update_root(trans, @@ -874,7 +870,7 @@ static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, struct list_head *head = &trans->transaction->pending_snapshots; int ret; - while(!list_empty(head)) { + while (!list_empty(head)) { pending = list_entry(head->next, struct btrfs_pending_snapshot, list); ret = finish_pending_snapshot(fs_info, pending); @@ -1076,9 +1072,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); - if (root->fs_info->closing) { + if (root->fs_info->closing) drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); - } return ret; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ffe7f639732..ea292117f88 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -66,9 +66,9 @@ static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, trans->block_group = BTRFS_I(inode)->block_group; } -static inline void btrfs_update_inode_block_group(struct - btrfs_trans_handle *trans, - struct inode *inode) +static inline void btrfs_update_inode_block_group( + struct btrfs_trans_handle *trans, + struct inode *inode) { BTRFS_I(inode)->block_group = trans->block_group; } diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index a6a3956cedf..3e8358c3616 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -23,10 +23,11 @@ #include "transaction.h" #include "locking.h" -/* defrag all the leaves in a given btree. If cache_only == 1, don't read things - * from disk, otherwise read all the leaves and try to get key order to +/* defrag all the leaves in a given btree. If cache_only == 1, don't read + * things from disk, otherwise read all the leaves and try to get key order to * better reflect disk order */ + int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only) { @@ -65,9 +66,9 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, level = btrfs_header_level(root->node); orig_level = level; - if (level == 0) { + if (level == 0) goto out; - } + if (root->defrag_progress.objectid == 0) { struct extent_buffer *root_node; u32 nritems; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index b1c2921f5be..3a72a1b6c24 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -829,7 +829,7 @@ conflict_again: */ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]); - while(ptr < ptr_end) { + while (ptr < ptr_end) { victim_ref = (struct btrfs_inode_ref *)ptr; victim_name_len = btrfs_inode_ref_name_len(leaf, victim_ref); @@ -938,9 +938,8 @@ static noinline int replay_one_csum(struct btrfs_trans_handle *trans, file_bytes = (item_size / csum_size) * root->sectorsize; sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS); - if (!sums) { + if (!sums) return -ENOMEM; - } INIT_LIST_HEAD(&sums->list); sums->len = file_bytes; @@ -952,7 +951,7 @@ static noinline int replay_one_csum(struct btrfs_trans_handle *trans, sector_sum = sums->sums; cur_offset = key->offset; ptr = btrfs_item_ptr_offset(eb, slot); - while(item_size > 0) { + while (item_size > 0) { sector_sum->bytenr = cur_offset; read_extent_buffer(eb, §or_sum->sum, ptr, csum_size); sector_sum++; @@ -995,7 +994,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); - while(1) { + while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) break; @@ -1012,7 +1011,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); ptr_end = ptr + btrfs_item_size_nr(path->nodes[0], path->slots[0]); - while(ptr < ptr_end) { + while (ptr < ptr_end) { struct btrfs_inode_ref *ref; ref = (struct btrfs_inode_ref *)ptr; @@ -1048,7 +1047,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; key.type = BTRFS_ORPHAN_ITEM_KEY; key.offset = (u64)-1; - while(1) { + while (1) { ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) break; @@ -1206,8 +1205,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, if (key->type == BTRFS_DIR_ITEM_KEY) { dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, name, name_len, 1); - } - else if (key->type == BTRFS_DIR_INDEX_KEY) { + } else if (key->type == BTRFS_DIR_INDEX_KEY) { dst_di = btrfs_lookup_dir_index_item(trans, root, path, key->objectid, key->offset, name, @@ -1282,7 +1280,7 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, ptr = btrfs_item_ptr_offset(eb, slot); ptr_end = ptr + item_size; - while(ptr < ptr_end) { + while (ptr < ptr_end) { di = (struct btrfs_dir_item *)ptr; name_len = btrfs_dir_name_len(eb, di); ret = replay_one_name(trans, root, path, eb, di, key); @@ -1408,7 +1406,7 @@ again: item_size = btrfs_item_size_nr(eb, slot); ptr = btrfs_item_ptr_offset(eb, slot); ptr_end = ptr + item_size; - while(ptr < ptr_end) { + while (ptr < ptr_end) { di = (struct btrfs_dir_item *)ptr; name_len = btrfs_dir_name_len(eb, di); name = kmalloc(name_len, GFP_NOFS); @@ -1513,14 +1511,14 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, again: range_start = 0; range_end = 0; - while(1) { + while (1) { ret = find_dir_range(log, path, dirid, key_type, &range_start, &range_end); if (ret != 0) break; dir_key.offset = range_start; - while(1) { + while (1) { int nritems; ret = btrfs_search_slot(NULL, root, &dir_key, path, 0, 0); @@ -1676,7 +1674,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, return 0; } -static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, +static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level, struct walk_control *wc) @@ -1694,7 +1692,7 @@ static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - while(*level > 0) { + while (*level > 0) { WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; @@ -1753,11 +1751,11 @@ static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - if (path->nodes[*level] == root->node) { + if (path->nodes[*level] == root->node) parent = path->nodes[*level]; - } else { + else parent = path->nodes[*level + 1]; - } + bytenr = path->nodes[*level]->start; blocksize = btrfs_level_size(root, *level); @@ -1790,7 +1788,7 @@ static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, return 0; } -static int noinline walk_up_log_tree(struct btrfs_trans_handle *trans, +static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level, struct walk_control *wc) @@ -1801,7 +1799,7 @@ static int noinline walk_up_log_tree(struct btrfs_trans_handle *trans, int slot; int ret; - for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { struct extent_buffer *node; @@ -1875,7 +1873,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, extent_buffer_get(log->node); path->slots[level] = 0; - while(1) { + while (1) { wret = walk_down_log_tree(trans, log, path, &level, wc); if (wret > 0) break; @@ -1941,7 +1939,7 @@ static int wait_log_commit(struct btrfs_root *log) schedule(); finish_wait(&log->fs_info->tree_log_wait, &wait); mutex_lock(&log->fs_info->tree_log_mutex); - } while(transid == log->fs_info->tree_log_transid && + } while (transid == log->fs_info->tree_log_transid && atomic_read(&log->fs_info->tree_log_commit)); return 0; } @@ -1965,13 +1963,13 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } atomic_set(&log->fs_info->tree_log_commit, 1); - while(1) { + while (1) { batch = log->fs_info->tree_log_batch; mutex_unlock(&log->fs_info->tree_log_mutex); schedule_timeout_uninterruptible(1); mutex_lock(&log->fs_info->tree_log_mutex); - while(atomic_read(&log->fs_info->tree_log_writers)) { + while (atomic_read(&log->fs_info->tree_log_writers)) { DEFINE_WAIT(wait); prepare_to_wait(&log->fs_info->tree_log_wait, &wait, TASK_UNINTERRUPTIBLE); @@ -2030,7 +2028,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) ret = walk_log_tree(trans, log, &wc); BUG_ON(ret); - while(1) { + while (1) { ret = find_first_extent_bit(&log->dirty_log_pages, 0, &start, &end, EXTENT_DIRTY); if (ret) @@ -2287,9 +2285,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, struct btrfs_key tmp; btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); - if (key_type == tmp.type) { + if (key_type == tmp.type) first_offset = max(min_offset, tmp.offset) + 1; - } } goto done; } @@ -2319,7 +2316,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, * we have a block from this transaction, log every item in it * from our directory */ - while(1) { + while (1) { struct btrfs_key tmp; src = path->nodes[0]; nritems = btrfs_header_nritems(src); @@ -2396,7 +2393,7 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, again: min_key = 0; max_key = 0; - while(1) { + while (1) { ret = log_dir_items(trans, root, inode, path, dst_path, key_type, min_key, &max_key); @@ -2432,7 +2429,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, key.type = max_key_type; key.offset = (u64)-1; - while(1) { + while (1) { ret = btrfs_search_slot(trans, log, &key, path, -1, 1); if (ret != 1) @@ -2481,7 +2478,7 @@ static noinline int copy_extent_csums(struct btrfs_trans_handle *trans, list_add_tail(&sums->list, list); path = btrfs_alloc_path(); - while(disk_bytenr < end) { + while (disk_bytenr < end) { if (!item || disk_bytenr < item_start_offset || disk_bytenr >= item_last_offset) { struct btrfs_key found_key; @@ -2496,7 +2493,8 @@ static noinline int copy_extent_csums(struct btrfs_trans_handle *trans, if (ret == -ENOENT || ret == -EFBIG) ret = 0; sum = 0; - printk("log no csum found for byte %llu\n", + printk(KERN_INFO "log no csum found for " + "byte %llu\n", (unsigned long long)disk_bytenr); item = NULL; btrfs_release_path(root, path); @@ -2643,7 +2641,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, * we have to do this after the loop above to avoid changing the * log tree while trying to change the log tree. */ - while(!list_empty(&ordered_sums)) { + while (!list_empty(&ordered_sums)) { struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, struct btrfs_ordered_sum, list); @@ -2736,7 +2734,7 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, BUG_ON(ret); path->keep_locks = 1; - while(1) { + while (1) { ins_nr = 0; ret = btrfs_search_forward(root, &min_key, &max_key, path, 0, trans->transid); @@ -2848,7 +2846,7 @@ int btrfs_log_dentry(struct btrfs_trans_handle *trans, start_log_trans(trans, root); sb = dentry->d_inode->i_sb; - while(1) { + while (1) { ret = __btrfs_log_inode(trans, root, dentry->d_inode, inode_only); BUG_ON(ret); @@ -2919,7 +2917,7 @@ again: key.offset = (u64)-1; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - while(1) { + while (1) { ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); if (ret < 0) break; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6672adcec9f..b187b537888 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -140,7 +140,7 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) * the list if the block device is congested. This way, multiple devices * can make progress from a single worker thread. */ -static int noinline run_scheduled_bios(struct btrfs_device *device) +static noinline int run_scheduled_bios(struct btrfs_device *device) { struct bio *pending; struct backing_dev_info *bdi; @@ -187,7 +187,7 @@ loop: } spin_unlock(&device->io_lock); - while(pending) { + while (pending) { cur = pending; pending = pending->bi_next; cur->bi_next = NULL; @@ -458,7 +458,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, bdev = open_bdev_exclusive(device->name, flags, holder); if (IS_ERR(bdev)) { - printk("open %s failed\n", device->name); + printk(KERN_INFO "open %s failed\n", device->name); goto error; } set_blocksize(bdev, 4096); @@ -570,14 +570,15 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, devid = le64_to_cpu(disk_super->dev_item.devid); transid = btrfs_super_generation(disk_super); if (disk_super->label[0]) - printk("device label %s ", disk_super->label); + printk(KERN_INFO "device label %s ", disk_super->label); else { /* FIXME, make a readl uuid parser */ - printk("device fsid %llx-%llx ", + printk(KERN_INFO "device fsid %llx-%llx ", *(unsigned long long *)disk_super->fsid, *(unsigned long long *)(disk_super->fsid + 8)); } - printk("devid %Lu transid %Lu %s\n", devid, transid, path); + printk(KERN_INFO "devid %llu transid %llu %s\n", + (unsigned long long)devid, (unsigned long long)transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); brelse(bh); @@ -683,9 +684,8 @@ no_more_items: goto check_pending; } } - if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) { + if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) goto next; - } start_found = 1; dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); @@ -1001,14 +1001,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && root->fs_info->fs_devices->rw_devices <= 4) { - printk("btrfs: unable to go below four devices on raid10\n"); + printk(KERN_ERR "btrfs: unable to go below four devices " + "on raid10\n"); ret = -EINVAL; goto out; } if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && root->fs_info->fs_devices->rw_devices <= 2) { - printk("btrfs: unable to go below two devices on raid1\n"); + printk(KERN_ERR "btrfs: unable to go below two " + "devices on raid1\n"); ret = -EINVAL; goto out; } @@ -1031,7 +1033,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) bh = NULL; disk_super = NULL; if (!device) { - printk("btrfs: no missing devices found to remove\n"); + printk(KERN_ERR "btrfs: no missing devices found to " + "remove\n"); goto out; } } else { @@ -1060,7 +1063,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { - printk("btrfs: unable to remove the only writeable device\n"); + printk(KERN_ERR "btrfs: unable to remove the only writeable " + "device\n"); ret = -EINVAL; goto error_brelse; } @@ -1286,9 +1290,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) return -EINVAL; bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); - if (!bdev) { + if (!bdev) return -EIO; - } if (root->fs_info->fs_devices->seeding) { seeding_dev = 1; @@ -1401,8 +1404,8 @@ error: goto out; } -static int noinline btrfs_update_device(struct btrfs_trans_handle *trans, - struct btrfs_device *device) +static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device) { int ret; struct btrfs_path *path; @@ -1563,7 +1566,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, int ret; int i; - printk("btrfs relocating chunk %llu\n", + printk(KERN_INFO "btrfs relocating chunk %llu\n", (unsigned long long)chunk_offset); root = root->fs_info->chunk_root; extent_root = root->fs_info->extent_root; @@ -1748,7 +1751,7 @@ int btrfs_balance(struct btrfs_root *dev_root) key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; - while(1) { + while (1) { ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); if (ret < 0) goto error; @@ -1916,7 +1919,7 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, return 0; } -static u64 noinline chunk_bytes_by_type(u64 type, u64 calc_size, +static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes, int sub_stripes) { if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) @@ -2041,7 +2044,7 @@ again: min_free += 1024 * 1024; INIT_LIST_HEAD(&private_devs); - while(index < num_stripes) { + while (index < num_stripes) { device = list_entry(cur, struct btrfs_device, dev_alloc_list); BUG_ON(!device->writeable); if (device->total_bytes > device->bytes_used) @@ -2242,7 +2245,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, return 0; } -static int noinline init_first_rw_device(struct btrfs_trans_handle *trans, +static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device) { @@ -2338,7 +2341,7 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) { struct extent_map *em; - while(1) { + while (1) { spin_lock(&tree->map_tree.lock); em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); if (em) @@ -2413,9 +2416,8 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int max_errors = 0; struct btrfs_multi_bio *multi = NULL; - if (multi_ret && !(rw & (1 << BIO_RW))) { + if (multi_ret && !(rw & (1 << BIO_RW))) stripes_allocated = 1; - } again: if (multi_ret) { multi = kzalloc(btrfs_multi_bio_size(stripes_allocated), @@ -2434,7 +2436,9 @@ again: return 0; if (!em) { - printk("unable to find logical %Lu len %Lu\n", logical, *length); + printk(KERN_CRIT "unable to find logical %llu len %llu\n", + (unsigned long long)logical, + (unsigned long long)*length); BUG(); } @@ -2541,9 +2545,8 @@ again: device = map->stripes[stripe_index].dev; if (device->bdev) { bdi = blk_get_backing_dev_info(device->bdev); - if (bdi->unplug_io_fn) { + if (bdi->unplug_io_fn) bdi->unplug_io_fn(bdi, unplug_page); - } } } else { multi->stripes[i].physical = @@ -2717,7 +2720,7 @@ struct async_sched { * This will add one bio to the pending list for a device and make sure * the work struct is scheduled. */ -static int noinline schedule_bio(struct btrfs_root *root, +static noinline int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, int rw, struct bio *bio) { @@ -2785,8 +2788,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, total_devs = multi->num_stripes; if (map_length < length) { - printk("mapping failed logical %Lu bio len %Lu " - "len %Lu\n", logical, length, map_length); + printk(KERN_CRIT "mapping failed logical %llu bio len %llu " + "len %llu\n", (unsigned long long)logical, + (unsigned long long)length, + (unsigned long long)map_length); BUG(); } multi->end_io = first_bio->bi_end_io; @@ -2794,7 +2799,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, multi->orig_bio = first_bio; atomic_set(&multi->stripes_pending, multi->num_stripes); - while(dev_nr < total_devs) { + while (dev_nr < total_devs) { if (total_devs > 1) { if (dev_nr < total_devs - 1) { bio = bio_clone(first_bio, GFP_NOFS); @@ -3058,7 +3063,8 @@ static int read_one_dev(struct btrfs_root *root, return -EIO; if (!device) { - printk("warning devid %Lu missing\n", devid); + printk(KERN_WARNING "warning devid %llu missing\n", + (unsigned long long)devid); device = add_missing_dev(root, devid, dev_uuid); if (!device) return -ENOMEM; @@ -3078,12 +3084,6 @@ static int read_one_dev(struct btrfs_root *root, if (device->writeable) device->fs_devices->total_rw_bytes += device->total_bytes; ret = 0; -#if 0 - ret = btrfs_open_device(device); - if (ret) { - kfree(device); - } -#endif return ret; } @@ -3174,7 +3174,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) key.type = 0; again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - while(1) { + while (1) { leaf = path->nodes[0]; slot = path->slots[0]; if (slot >= btrfs_header_nritems(leaf)) { diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 4146f0710e6..7f332e27089 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -264,7 +264,8 @@ struct xattr_handler *btrfs_xattr_handlers[] = { */ static bool btrfs_is_valid_xattr(const char *name) { - return !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) || + return !strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN) || !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index c4617cde6c7..ecfbce836d3 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -152,7 +152,7 @@ static int free_workspace(struct workspace *workspace) static void free_workspaces(void) { struct workspace *workspace; - while(!list_empty(&idle_workspace)) { + while (!list_empty(&idle_workspace)) { workspace = list_entry(idle_workspace.next, struct workspace, list); list_del(&workspace->list); @@ -397,12 +397,10 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, ret = -1; goto out; } - while(workspace->inf_strm.total_in < srclen) { + while (workspace->inf_strm.total_in < srclen) { ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); - if (ret != Z_OK && ret != Z_STREAM_END) { + if (ret != Z_OK && ret != Z_STREAM_END) break; - } - /* * buf start is the byte offset we're of the start of * our workspace buffer @@ -424,16 +422,14 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, /* we didn't make progress in this inflate * call, we're done */ - if (ret != Z_STREAM_END) { + if (ret != Z_STREAM_END) ret = -1; - } break; } /* we haven't yet hit data corresponding to this page */ - if (total_out <= start_byte) { + if (total_out <= start_byte) goto next; - } /* * the start of the data we care about is offset into @@ -448,7 +444,7 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, current_buf_start = buf_start; /* copy bytes from the working buffer into the pages */ - while(working_bytes > 0) { + while (working_bytes > 0) { bytes = min(PAGE_CACHE_SIZE - pg_offset, PAGE_CACHE_SIZE - buf_offset); bytes = min(bytes, working_bytes); @@ -471,6 +467,7 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, ret = 0; goto done; } + page_out = bvec[page_out_index].bv_page; pg_offset = 0; page_bytes_left = PAGE_CACHE_SIZE; @@ -480,9 +477,8 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, * make sure our new page is covered by this * working buffer */ - if (total_out <= start_byte) { + if (total_out <= start_byte) goto next; - } /* the next page in the biovec might not * be adjacent to the last page, but it @@ -517,11 +513,10 @@ next: PAGE_CACHE_SIZE); } } - if (ret != Z_STREAM_END) { + if (ret != Z_STREAM_END) ret = -1; - } else { + else ret = 0; - } done: zlib_inflateEnd(&workspace->inf_strm); if (data_in) @@ -579,16 +574,15 @@ int btrfs_zlib_decompress(unsigned char *data_in, goto out; } - while(bytes_left > 0) { + while (bytes_left > 0) { unsigned long buf_start; unsigned long buf_offset; unsigned long bytes; unsigned long pg_offset = 0; ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); - if (ret != Z_OK && ret != Z_STREAM_END) { + if (ret != Z_OK && ret != Z_STREAM_END) break; - } buf_start = total_out; total_out = workspace->inf_strm.total_out; @@ -598,15 +592,13 @@ int btrfs_zlib_decompress(unsigned char *data_in, break; } - if (total_out <= start_byte) { + if (total_out <= start_byte) goto next; - } - if (total_out > start_byte && buf_start < start_byte) { + if (total_out > start_byte && buf_start < start_byte) buf_offset = start_byte - buf_start; - } else { + else buf_offset = 0; - } bytes = min(PAGE_CACHE_SIZE - pg_offset, PAGE_CACHE_SIZE - buf_offset); @@ -622,11 +614,12 @@ next: workspace->inf_strm.next_out = workspace->buf; workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; } - if (ret != Z_STREAM_END && bytes_left != 0) { + + if (ret != Z_STREAM_END && bytes_left != 0) ret = -1; - } else { + else ret = 0; - } + zlib_inflateEnd(&workspace->inf_strm); out: free_workspace(workspace); -- cgit v1.2.3-70-g09d2 From 9ca03b997f71787e345951e6267fbd8eba14d49f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 6 Jan 2009 09:38:55 -0500 Subject: Btrfs: drop remaining LINUX_KERNEL_VERSION checks and compat code Signed-off-by: Chris Mason --- fs/btrfs/compat.h | 25 ------------------------- fs/btrfs/extent-tree.c | 4 ---- 2 files changed, 29 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h index 594d60bdd3c..7c4503ef6ef 100644 --- a/fs/btrfs/compat.h +++ b/fs/btrfs/compat.h @@ -4,29 +4,4 @@ #define btrfs_drop_nlink(inode) drop_nlink(inode) #define btrfs_inc_nlink(inode) inc_nlink(inode) -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 27) -static inline struct dentry *d_obtain_alias(struct inode *inode) -{ - struct dentry *d; - - if (!inode) - return NULL; - if (IS_ERR(inode)) - return ERR_CAST(inode); - - d = d_alloc_anon(inode); - if (!d) - iput(inode); - return d; -} -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) -# define __pagevec_lru_add_file __pagevec_lru_add -# define open_bdev_exclusive open_bdev_excl -# define close_bdev_exclusive(bdev, mode) close_bdev_excl(bdev) -typedef unsigned __bitwise__ fmode_t; -#endif - - #endif /* _COMPAT_H_ */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ec43fa526d7..171ca30a375 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -869,11 +869,7 @@ static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, static void btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); -#else - blkdev_issue_discard(bdev, start >> 9, len >> 9); -#endif } #endif -- cgit v1.2.3-70-g09d2 From 07d400a6df4767a90d49a153fdb7f4cfa1e3f23e Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Tue, 6 Jan 2009 11:42:00 -0500 Subject: Btrfs: tree logging checksum fixes This patch contains following things. 1) Limit the max size of btrfs_ordered_sum structure to PAGE_SIZE. This struct is kmalloced so we want to keep it reasonable. 2) Replace copy_extent_csums by btrfs_lookup_csums_range. This was duplicated code in tree-log.c 3) Remove replay_one_csum. csum items are replayed at the same time as replaying file extents. This guarantees we only replay useful csums. 4) nbytes accounting fix. Signed-off-by: Yan Zheng --- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/file-item.c | 62 ++++++----- fs/btrfs/inode.c | 5 +- fs/btrfs/tree-log.c | 293 +++++++++++++++---------------------------------- 4 files changed, 130 insertions(+), 232 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 171ca30a375..293da650873 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5579,7 +5579,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) BUG_ON(ordered->file_offset != file_pos || ordered->len != len); disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; - ret = btrfs_lookup_csums_range(root, disk_bytenr, + ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, disk_bytenr + len - 1, &list); while (!list_empty(&list)) { diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b11abfad81a..964652435fd 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -27,6 +27,12 @@ #define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) * 2) / \ size) - 1)) + +#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ + sizeof(struct btrfs_ordered_sum)) / \ + sizeof(struct btrfs_sector_sum) * \ + (r)->sectorsize - (r)->sectorsize) + int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, @@ -259,8 +265,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, key.offset = start; key.type = BTRFS_EXTENT_CSUM_KEY; - ret = btrfs_search_slot(NULL, root->fs_info->csum_root, - &key, path, 0, 0); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto fail; if (ret > 0 && path->slots[0] > 0) { @@ -279,7 +284,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, while (start <= end) { leaf = path->nodes[0]; if (path->slots[0] >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(root->fs_info->csum_root, path); + ret = btrfs_next_leaf(root, path); if (ret < 0) goto fail; if (ret > 0) @@ -306,33 +311,38 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, continue; } - size = min(csum_end, end + 1) - start; - sums = kzalloc(btrfs_ordered_sum_size(root, size), GFP_NOFS); - BUG_ON(!sums); + csum_end = min(csum_end, end + 1); + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_csum_item); + while (start < csum_end) { + size = min_t(size_t, csum_end - start, + MAX_ORDERED_SUM_BYTES(root)); + sums = kzalloc(btrfs_ordered_sum_size(root, size), + GFP_NOFS); + BUG_ON(!sums); - sector_sum = sums->sums; - sums->bytenr = start; - sums->len = size; + sector_sum = sums->sums; + sums->bytenr = start; + sums->len = size; - offset = (start - key.offset) >> - root->fs_info->sb->s_blocksize_bits; - offset *= csum_size; + offset = (start - key.offset) >> + root->fs_info->sb->s_blocksize_bits; + offset *= csum_size; - item = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_csum_item); - while (size > 0) { - read_extent_buffer(path->nodes[0], §or_sum->sum, - ((unsigned long)item) + offset, - csum_size); - sector_sum->bytenr = start; - - size -= root->sectorsize; - start += root->sectorsize; - offset += csum_size; - sector_sum++; + while (size > 0) { + read_extent_buffer(path->nodes[0], + §or_sum->sum, + ((unsigned long)item) + + offset, csum_size); + sector_sum->bytenr = start; + + size -= root->sectorsize; + start += root->sectorsize; + offset += csum_size; + sector_sum++; + } + list_add_tail(&sums->list, list); } - list_add_tail(&sums->list, list); - path->slots[0]++; } ret = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c0ca9c3723c..4e57fe68e4b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -157,7 +157,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, key.objectid = inode->i_ino; key.offset = start; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); - inode_add_bytes(inode, size); datasize = btrfs_file_extent_calc_inline_size(cur_size); inode_add_bytes(inode, size); @@ -920,8 +919,8 @@ static noinline int csum_exist_in_range(struct btrfs_root *root, struct btrfs_ordered_sum *sums; LIST_HEAD(list); - ret = btrfs_lookup_csums_range(root, bytenr, bytenr + num_bytes - 1, - &list); + ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr, + bytenr + num_bytes - 1, &list); if (ret == 0 && list_empty(&list)) return 0; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3a72a1b6c24..332ec35d2c0 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -433,49 +433,6 @@ insert: trans->transid); } } - - if (overwrite_root && - key->type == BTRFS_EXTENT_DATA_KEY) { - int extent_type; - struct btrfs_file_extent_item *fi; - - fi = (struct btrfs_file_extent_item *)dst_ptr; - extent_type = btrfs_file_extent_type(path->nodes[0], fi); - if (extent_type == BTRFS_FILE_EXTENT_REG || - extent_type == BTRFS_FILE_EXTENT_PREALLOC) { - struct btrfs_key ins; - ins.objectid = btrfs_file_extent_disk_bytenr( - path->nodes[0], fi); - ins.offset = btrfs_file_extent_disk_num_bytes( - path->nodes[0], fi); - ins.type = BTRFS_EXTENT_ITEM_KEY; - - /* - * is this extent already allocated in the extent - * allocation tree? If so, just add a reference - */ - ret = btrfs_lookup_extent(root, ins.objectid, - ins.offset); - if (ret == 0) { - ret = btrfs_inc_extent_ref(trans, root, - ins.objectid, ins.offset, - path->nodes[0]->start, - root->root_key.objectid, - trans->transid, key->objectid); - } else { - /* - * insert the extent pointer in the extent - * allocation tree - */ - ret = btrfs_alloc_logged_extent(trans, root, - path->nodes[0]->start, - root->root_key.objectid, - trans->transid, key->objectid, - &ins); - BUG_ON(ret); - } - } - } no_copy: btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root, path); @@ -530,6 +487,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, u64 extent_end; u64 alloc_hint; u64 start = key->offset; + u64 saved_nbytes; struct btrfs_file_extent_item *item; struct inode *inode = NULL; unsigned long size; @@ -591,17 +549,95 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, } btrfs_release_path(root, path); + saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ ret = btrfs_drop_extents(trans, root, inode, start, extent_end, start, &alloc_hint); BUG_ON(ret); - /* insert the extent */ - ret = overwrite_item(trans, root, path, eb, slot, key); - BUG_ON(ret); + if (found_type == BTRFS_FILE_EXTENT_REG || + found_type == BTRFS_FILE_EXTENT_PREALLOC) { + unsigned long dest_offset; + struct btrfs_key ins; + + ret = btrfs_insert_empty_item(trans, root, path, key, + sizeof(*item)); + BUG_ON(ret); + dest_offset = btrfs_item_ptr_offset(path->nodes[0], + path->slots[0]); + copy_extent_buffer(path->nodes[0], eb, dest_offset, + (unsigned long)item, sizeof(*item)); + + ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); + ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); + ins.type = BTRFS_EXTENT_ITEM_KEY; + + if (ins.objectid > 0) { + u64 csum_start; + u64 csum_end; + LIST_HEAD(ordered_sums); + /* + * is this extent already allocated in the extent + * allocation tree? If so, just add a reference + */ + ret = btrfs_lookup_extent(root, ins.objectid, + ins.offset); + if (ret == 0) { + ret = btrfs_inc_extent_ref(trans, root, + ins.objectid, ins.offset, + path->nodes[0]->start, + root->root_key.objectid, + trans->transid, key->objectid); + } else { + /* + * insert the extent pointer in the extent + * allocation tree + */ + ret = btrfs_alloc_logged_extent(trans, root, + path->nodes[0]->start, + root->root_key.objectid, + trans->transid, key->objectid, + &ins); + BUG_ON(ret); + } + btrfs_release_path(root, path); + + if (btrfs_file_extent_compression(eb, item)) { + csum_start = ins.objectid; + csum_end = csum_start + ins.offset; + } else { + csum_start = ins.objectid + + btrfs_file_extent_offset(eb, item); + csum_end = csum_start + + btrfs_file_extent_num_bytes(eb, item); + } + + ret = btrfs_lookup_csums_range(root->log_root, + csum_start, csum_end - 1, + &ordered_sums); + BUG_ON(ret); + while (!list_empty(&ordered_sums)) { + struct btrfs_ordered_sum *sums; + sums = list_entry(ordered_sums.next, + struct btrfs_ordered_sum, + list); + ret = btrfs_csum_file_blocks(trans, + root->fs_info->csum_root, + sums); + BUG_ON(ret); + list_del(&sums->list); + kfree(sums); + } + } else { + btrfs_release_path(root, path); + } + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + /* inline extents are easy, we just overwrite them */ + ret = overwrite_item(trans, root, path, eb, slot, key); + BUG_ON(ret); + } - /* btrfs_drop_extents changes i_bytes & i_blocks, update it here */ - inode_add_bytes(inode, extent_end - start); + inode_set_bytes(inode, saved_nbytes); btrfs_update_inode(trans, root, inode); out: if (inode) @@ -902,70 +938,6 @@ out_nowrite: return 0; } -/* - * replay one csum item from the log tree into the subvolume 'root' - * eb, slot and key all refer to the log tree - * path is for temp use by this function and should be released on return - * - * This copies the checksums out of the log tree and inserts them into - * the subvolume. Any existing checksums for this range in the file - * are overwritten, and new items are added where required. - * - * We keep this simple by reusing the btrfs_ordered_sum code from - * the data=ordered mode. This basically means making a copy - * of all the checksums in ram, which we have to do anyway for kmap - * rules. - * - * The copy is then sent down to btrfs_csum_file_blocks, which - * does all the hard work of finding existing items in the file - * or adding new ones. - */ -static noinline int replay_one_csum(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct extent_buffer *eb, int slot, - struct btrfs_key *key) -{ - int ret; - u32 item_size = btrfs_item_size_nr(eb, slot); - u64 cur_offset; - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); - unsigned long file_bytes; - struct btrfs_ordered_sum *sums; - struct btrfs_sector_sum *sector_sum; - unsigned long ptr; - - file_bytes = (item_size / csum_size) * root->sectorsize; - sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS); - if (!sums) - return -ENOMEM; - - INIT_LIST_HEAD(&sums->list); - sums->len = file_bytes; - sums->bytenr = key->offset; - - /* - * copy all the sums into the ordered sum struct - */ - sector_sum = sums->sums; - cur_offset = key->offset; - ptr = btrfs_item_ptr_offset(eb, slot); - while (item_size > 0) { - sector_sum->bytenr = cur_offset; - read_extent_buffer(eb, §or_sum->sum, ptr, csum_size); - sector_sum++; - item_size -= csum_size; - ptr += csum_size; - cur_offset += root->sectorsize; - } - - /* let btrfs_csum_file_blocks add them into the file */ - ret = btrfs_csum_file_blocks(trans, root->fs_info->csum_root, sums); - BUG_ON(ret); - kfree(sums); - return 0; -} /* * There are a few corners where the link count of the file can't * be properly maintained during replay. So, instead of adding @@ -1659,10 +1631,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, ret = replay_one_extent(wc->trans, root, path, eb, i, &key); BUG_ON(ret); - } else if (key.type == BTRFS_EXTENT_CSUM_KEY) { - ret = replay_one_csum(wc->trans, root, path, - eb, i, &key); - BUG_ON(ret); } else if (key.type == BTRFS_DIR_ITEM_KEY || key.type == BTRFS_DIR_INDEX_KEY) { ret = replay_one_dir_item(wc->trans, root, path, @@ -2021,7 +1989,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) .process_func = process_one_buffer }; - if (!root->log_root) + if (!root->log_root || root->fs_info->log_root_recovering) return 0; log = root->log_root; @@ -2453,86 +2421,6 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, return 0; } -static noinline int copy_extent_csums(struct btrfs_trans_handle *trans, - struct list_head *list, - struct btrfs_root *root, - u64 disk_bytenr, u64 len) -{ - struct btrfs_ordered_sum *sums; - struct btrfs_sector_sum *sector_sum; - int ret; - struct btrfs_path *path; - struct btrfs_csum_item *item = NULL; - u64 end = disk_bytenr + len; - u64 item_start_offset = 0; - u64 item_last_offset = 0; - u32 diff; - u32 sum; - u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); - - sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS); - - sector_sum = sums->sums; - sums->bytenr = disk_bytenr; - sums->len = len; - list_add_tail(&sums->list, list); - - path = btrfs_alloc_path(); - while (disk_bytenr < end) { - if (!item || disk_bytenr < item_start_offset || - disk_bytenr >= item_last_offset) { - struct btrfs_key found_key; - u32 item_size; - - if (item) - btrfs_release_path(root, path); - item = btrfs_lookup_csum(NULL, root, path, - disk_bytenr, 0); - if (IS_ERR(item)) { - ret = PTR_ERR(item); - if (ret == -ENOENT || ret == -EFBIG) - ret = 0; - sum = 0; - printk(KERN_INFO "log no csum found for " - "byte %llu\n", - (unsigned long long)disk_bytenr); - item = NULL; - btrfs_release_path(root, path); - goto found; - } - btrfs_item_key_to_cpu(path->nodes[0], &found_key, - path->slots[0]); - - item_start_offset = found_key.offset; - item_size = btrfs_item_size_nr(path->nodes[0], - path->slots[0]); - item_last_offset = item_start_offset + - (item_size / csum_size) * - root->sectorsize; - item = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_csum_item); - } - /* - * this byte range must be able to fit inside - * a single leaf so it will also fit inside a u32 - */ - diff = disk_bytenr - item_start_offset; - diff = diff / root->sectorsize; - diff = diff * csum_size; - - read_extent_buffer(path->nodes[0], &sum, - ((unsigned long)item) + diff, - csum_size); -found: - sector_sum->bytenr = disk_bytenr; - sector_sum->sum = sum; - disk_bytenr += root->sectorsize; - sector_sum++; - } - btrfs_free_path(path); - return 0; -} - static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_root *log, struct btrfs_path *dst_path, @@ -2622,10 +2510,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, trans->transid, ins_keys[i].objectid); BUG_ON(ret); - ret = copy_extent_csums(trans, - &ordered_sums, - log->fs_info->csum_root, - ds + cs, cl); + ret = btrfs_lookup_csums_range( + log->fs_info->csum_root, + ds + cs, ds + cs + cl - 1, + &ordered_sums); BUG_ON(ret); } } @@ -2942,9 +2830,9 @@ again: tmp_key.offset = (u64)-1; wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); - BUG_ON(!wc.replay_dest); + wc.replay_dest->log_root = log; btrfs_record_root_in_trans(wc.replay_dest); ret = walk_log_tree(trans, log, &wc); BUG_ON(ret); @@ -2961,6 +2849,7 @@ again: } key.offset = found_key.offset - 1; + wc.replay_dest->log_root = NULL; free_extent_buffer(log->node); kfree(log); -- cgit v1.2.3-70-g09d2