From b0496686ba0da69cfd2433ef55fb2d1dc7465084 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 14 Mar 2013 14:57:45 +0000 Subject: Btrfs: cleanup unused arguments of btrfs_csum_data Argument 'root' is no more used in btrfs_csum_data(). Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6d19a0a554a..7bb28691ed9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -238,7 +238,7 @@ out: return em; } -u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) +u32 btrfs_csum_data(char *data, u32 seed, size_t len) { return crc32c(seed, data, len); } @@ -274,7 +274,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, if (err) return 1; cur_len = min(len, map_len - (offset - map_start)); - crc = btrfs_csum_data(root, kaddr + offset - map_start, + crc = btrfs_csum_data(kaddr + offset - map_start, crc, cur_len); len -= cur_len; offset += cur_len; @@ -2919,7 +2919,7 @@ static int write_dev_supers(struct btrfs_device *device, btrfs_set_super_bytenr(sb, bytenr); crc = ~(u32)0; - crc = btrfs_csum_data(NULL, (char *)sb + + crc = btrfs_csum_data((char *)sb + BTRFS_CSUM_SIZE, crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); -- cgit v1.2.3-70-g09d2 From be283b2e674a09457d4563729015adb637ce7cc1 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 14 Mar 2013 14:58:05 +0000 Subject: Btrfs: use helper to cleanup tree roots free_root_pointers() has been introduced to cleanup all of tree roots, so just use it instead. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7bb28691ed9..e0665488e51 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3426,20 +3426,7 @@ int close_ctree(struct btrfs_root *root) percpu_counter_sum(&fs_info->delalloc_bytes)); } - free_extent_buffer(fs_info->extent_root->node); - free_extent_buffer(fs_info->extent_root->commit_root); - free_extent_buffer(fs_info->tree_root->node); - free_extent_buffer(fs_info->tree_root->commit_root); - free_extent_buffer(fs_info->chunk_root->node); - free_extent_buffer(fs_info->chunk_root->commit_root); - free_extent_buffer(fs_info->dev_root->node); - free_extent_buffer(fs_info->dev_root->commit_root); - free_extent_buffer(fs_info->csum_root->node); - free_extent_buffer(fs_info->csum_root->commit_root); - if (fs_info->quota_root) { - free_extent_buffer(fs_info->quota_root->node); - free_extent_buffer(fs_info->quota_root->commit_root); - } + free_root_pointers(fs_info, 1); btrfs_free_block_groups(fs_info); -- cgit v1.2.3-70-g09d2 From 3173a18f70554fe7880bb2d85c7da566e364eb3c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 7 Mar 2013 14:22:04 -0500 Subject: Btrfs: add a incompatible format change for smaller metadata extent refs We currently store the first key of the tree block inside the reference for the tree block in the extent tree. This takes up quite a bit of space. Make a new key type for metadata which holds the level as the offset and completely removes storing the btrfs_tree_block_info inside the extent ref. This reduces the size from 51 bytes to 33 bytes per extent reference for each tree block. In practice this results in a 30-35% decrease in the size of our extent tree, which means we COW less and can keep more of the extent tree in memory which makes our heavy metadata operations go much faster. This is not an automatic format change, you must enable it at mkfs time or with btrfstune. This patch deals with having metadata stored as either the old format or the new format so it is easy to convert. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 3 +- fs/btrfs/ctree.h | 22 ++++- fs/btrfs/disk-io.c | 3 + fs/btrfs/extent-tree.c | 224 ++++++++++++++++++++++++++++++++++++++++--------- fs/btrfs/inode.c | 2 +- fs/btrfs/relocation.c | 73 ++++++++++++---- fs/btrfs/scrub.c | 30 ++++--- 7 files changed, 290 insertions(+), 67 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ca9d8f1a3bb..fe032ab6bd8 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -867,7 +867,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (btrfs_block_can_be_shared(root, buf)) { ret = btrfs_lookup_extent_info(trans, root, buf->start, - buf->len, &refs, &flags); + btrfs_header_level(buf), 1, + &refs, &flags); if (ret) return ret; if (refs == 0) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e2f14b5258b..efb2feb7cd4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -509,6 +509,7 @@ struct btrfs_super_block { #define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) #define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) +#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8) #define BTRFS_FEATURE_COMPAT_SUPP 0ULL #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL @@ -519,7 +520,8 @@ struct btrfs_super_block { BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ BTRFS_FEATURE_INCOMPAT_RAID56 | \ - BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) + BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) /* * A leaf is full of items. offset and size tell us where to find @@ -1809,6 +1811,12 @@ struct btrfs_ioctl_defrag_range_args { */ #define BTRFS_EXTENT_ITEM_KEY 168 +/* + * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know + * the length, so we save the level in key->offset instead of the length. + */ +#define BTRFS_METADATA_ITEM_KEY 169 + #define BTRFS_TREE_BLOCK_REF_KEY 176 #define BTRFS_EXTENT_DATA_REF_KEY 178 @@ -3006,7 +3014,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 *refs, u64 *flags); + u64 offset, int metadata, u64 *refs, u64 *flags); int btrfs_pin_extent(struct btrfs_root *root, u64 bytenr, u64 num, int reserved); int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, @@ -3669,6 +3677,16 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, } } +#define btrfs_fs_incompat(fs_info, opt) \ + __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt) + +static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag) +{ + struct btrfs_super_block *disk_super; + disk_super = fs_info->super_copy; + return !!(btrfs_super_incompat_flags(disk_super) & flag); +} + /* * Call btrfs_abort_transaction as early as possible when an error condition is * detected, that way the exact line number is reported. diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0665488e51..f47754a2fee 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2290,6 +2290,9 @@ int open_ctree(struct super_block *sb, if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO) features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; + if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) + printk(KERN_ERR "btrfs: has skinny extents\n"); + /* * flag our filesystem as having big metadata blocks if * they are bigger than the page size diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3d551231cab..7505856df9f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -442,11 +442,16 @@ again: block_group->key.offset) break; - if (key.type == BTRFS_EXTENT_ITEM_KEY) { + if (key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY) { total_found += add_new_free_space(block_group, fs_info, last, key.objectid); - last = key.objectid + key.offset; + if (key.type == BTRFS_METADATA_ITEM_KEY) + last = key.objectid + + fs_info->tree_root->leafsize; + else + last = key.objectid + key.offset; if (total_found > (1024 * 1024 * 2)) { total_found = 0; @@ -718,15 +723,21 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) key.objectid = start; key.offset = len; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + key.type = BTRFS_EXTENT_ITEM_KEY; ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 0, 0); + if (ret > 0) { + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid == start && + key.type == BTRFS_METADATA_ITEM_KEY) + ret = 0; + } btrfs_free_path(path); return ret; } /* - * helper function to lookup reference count and flags of extent. + * helper function to lookup reference count and flags of a tree block. * * the head node for delayed ref is used to store the sum of all the * reference count modifications queued up in the rbtree. the head @@ -736,7 +747,7 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) */ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 *refs, u64 *flags) + u64 offset, int metadata, u64 *refs, u64 *flags) { struct btrfs_delayed_ref_head *head; struct btrfs_delayed_ref_root *delayed_refs; @@ -749,13 +760,29 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, u64 extent_flags; int ret; + /* + * If we don't have skinny metadata, don't bother doing anything + * different + */ + if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) { + offset = root->leafsize; + metadata = 0; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; - key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = num_bytes; + if (metadata) { + key.objectid = bytenr; + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = offset; + } else { + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = offset; + } + if (!trans) { path->skip_locking = 1; path->search_commit_root = 1; @@ -766,6 +793,13 @@ again: if (ret < 0) goto out_free; + if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = root->leafsize; + btrfs_release_path(path); + goto again; + } + if (ret == 0) { leaf = path->nodes[0]; item_size = btrfs_item_size_nr(leaf, path->slots[0]); @@ -1453,6 +1487,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, int want; int ret; int err = 0; + bool skinny_metadata = btrfs_fs_incompat(root->fs_info, + SKINNY_METADATA); key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; @@ -1464,11 +1500,46 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, path->keep_locks = 1; } else extra_size = -1; + + /* + * Owner is our parent level, so we can just add one to get the level + * for the block we are interested in. + */ + if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = owner; + } + +again: ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); if (ret < 0) { err = ret; goto out; } + + /* + * We may be a newly converted file system which still has the old fat + * extent entries for metadata, so try and see if we have one of those. + */ + if (ret > 0 && skinny_metadata) { + skinny_metadata = false; + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == num_bytes) + ret = 0; + } + if (ret) { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + btrfs_release_path(path); + goto again; + } + } + if (ret && !insert) { err = -ENOENT; goto out; @@ -1504,11 +1575,9 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, ptr = (unsigned long)(ei + 1); end = (unsigned long)ei + item_size; - if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) { ptr += sizeof(struct btrfs_tree_block_info); BUG_ON(ptr > end); - } else { - BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); } err = -ENOENT; @@ -1973,10 +2042,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, ref_root = ref->root; if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { - if (extent_op) { - BUG_ON(extent_op->update_key); + if (extent_op) flags |= extent_op->flags_to_set; - } ret = alloc_reserved_file_extent(trans, root, parent, ref_root, flags, ref->objectid, ref->offset, @@ -2029,18 +2096,33 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, u32 item_size; int ret; int err = 0; + int metadata = (node->type == BTRFS_TREE_BLOCK_REF_KEY || + node->type == BTRFS_SHARED_BLOCK_REF_KEY); if (trans->aborted) return 0; + if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) + metadata = 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; key.objectid = node->bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = node->num_bytes; + if (metadata) { + struct btrfs_delayed_tree_ref *tree_ref; + + tree_ref = btrfs_delayed_node_to_tree_ref(node); + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = tree_ref->level; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = node->num_bytes; + } + +again: path->reada = 1; path->leave_spinning = 1; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, @@ -2050,6 +2132,14 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, goto out; } if (ret > 0) { + if (metadata) { + btrfs_release_path(path); + metadata = 0; + + key.offset = node->num_bytes; + key.type = BTRFS_EXTENT_ITEM_KEY; + goto again; + } err = -EIO; goto out; } @@ -2089,10 +2179,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, struct btrfs_key ins; u64 parent = 0; u64 ref_root = 0; - - ins.objectid = node->bytenr; - ins.offset = node->num_bytes; - ins.type = BTRFS_EXTENT_ITEM_KEY; + bool skinny_metadata = btrfs_fs_incompat(root->fs_info, + SKINNY_METADATA); ref = btrfs_delayed_node_to_tree_ref(node); if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) @@ -2100,10 +2188,18 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, else ref_root = ref->root; + ins.objectid = node->bytenr; + if (skinny_metadata) { + ins.offset = ref->level; + ins.type = BTRFS_METADATA_ITEM_KEY; + } else { + ins.offset = node->num_bytes; + ins.type = BTRFS_EXTENT_ITEM_KEY; + } + BUG_ON(node->ref_mod != 1); if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { - BUG_ON(!extent_op || !extent_op->update_flags || - !extent_op->update_key); + BUG_ON(!extent_op || !extent_op->update_flags); ret = alloc_reserved_tree_block(trans, root, parent, ref_root, extent_op->flags_to_set, @@ -5312,6 +5408,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, int num_to_del = 1; u32 item_size; u64 refs; + bool skinny_metadata = btrfs_fs_incompat(root->fs_info, + SKINNY_METADATA); path = btrfs_alloc_path(); if (!path) @@ -5323,6 +5421,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; BUG_ON(!is_data && refs_to_drop != 1); + if (is_data) + skinny_metadata = 0; + ret = lookup_extent_backref(trans, extent_root, path, &iref, bytenr, num_bytes, parent, root_objectid, owner_objectid, @@ -5339,6 +5440,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, found_extent = 1; break; } + if (key.type == BTRFS_METADATA_ITEM_KEY && + key.offset == owner_objectid) { + found_extent = 1; + break; + } if (path->slots[0] - extent_slot > 5) break; extent_slot--; @@ -5364,8 +5470,36 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = num_bytes; + if (!is_data && skinny_metadata) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = owner_objectid; + } + ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); + if (ret > 0 && skinny_metadata && path->slots[0]) { + /* + * Couldn't find our skinny metadata item, + * see if we have ye olde extent item. + */ + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == num_bytes) + ret = 0; + } + + if (ret > 0 && skinny_metadata) { + skinny_metadata = false; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + btrfs_release_path(path); + ret = btrfs_search_slot(trans, extent_root, + &key, path, -1, 1); + } + if (ret) { printk(KERN_ERR "umm, got %d back from search" ", was looking for %llu\n", ret, @@ -5435,7 +5569,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, BUG_ON(item_size < sizeof(*ei)); ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); - if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { + if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID && + key.type == BTRFS_EXTENT_ITEM_KEY) { struct btrfs_tree_block_info *bi; BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); bi = (struct btrfs_tree_block_info *)(ei + 1); @@ -6349,7 +6484,12 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_extent_inline_ref *iref; struct btrfs_path *path; struct extent_buffer *leaf; - u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); + u32 size = sizeof(*extent_item) + sizeof(*iref); + bool skinny_metadata = btrfs_fs_incompat(root->fs_info, + SKINNY_METADATA); + + if (!skinny_metadata) + size += sizeof(*block_info); path = btrfs_alloc_path(); if (!path) @@ -6370,12 +6510,16 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, btrfs_set_extent_generation(leaf, extent_item, trans->transid); btrfs_set_extent_flags(leaf, extent_item, flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); - block_info = (struct btrfs_tree_block_info *)(extent_item + 1); - btrfs_set_tree_block_key(leaf, block_info, key); - btrfs_set_tree_block_level(leaf, block_info, level); + if (skinny_metadata) { + iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); + } else { + block_info = (struct btrfs_tree_block_info *)(extent_item + 1); + btrfs_set_tree_block_key(leaf, block_info, key); + btrfs_set_tree_block_level(leaf, block_info, level); + iref = (struct btrfs_extent_inline_ref *)(block_info + 1); + } - iref = (struct btrfs_extent_inline_ref *)(block_info + 1); if (parent > 0) { BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); btrfs_set_extent_inline_ref_type(leaf, iref, @@ -6390,7 +6534,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); - ret = update_block_group(root, ins->objectid, ins->offset, 1); + ret = update_block_group(root, ins->objectid, root->leafsize, 1); if (ret) { /* -ENOENT, logic error */ printk(KERN_ERR "btrfs update block group failed for %llu " "%llu\n", (unsigned long long)ins->objectid, @@ -6594,7 +6738,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *buf; u64 flags = 0; int ret; - + bool skinny_metadata = btrfs_fs_incompat(root->fs_info, + SKINNY_METADATA); block_rsv = use_block_rsv(trans, root, blocksize); if (IS_ERR(block_rsv)) @@ -6627,7 +6772,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, else memset(&extent_op->key, 0, sizeof(extent_op->key)); extent_op->flags_to_set = flags; - extent_op->update_key = 1; + if (skinny_metadata) + extent_op->update_key = 0; + else + extent_op->update_key = 1; extent_op->update_flags = 1; extent_op->is_data = 0; @@ -6704,8 +6852,9 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, continue; /* We don't lock the tree block, it's OK to be racy here */ - ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, - &refs, &flags); + ret = btrfs_lookup_extent_info(trans, root, bytenr, + wc->level - 1, 1, &refs, + &flags); /* We don't care about errors in readahead. */ if (ret < 0) continue; @@ -6772,7 +6921,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { BUG_ON(!path->locks[level]); ret = btrfs_lookup_extent_info(trans, root, - eb->start, eb->len, + eb->start, level, 1, &wc->refs[level], &wc->flags[level]); BUG_ON(ret == -ENOMEM); @@ -6870,7 +7019,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, btrfs_tree_lock(next); btrfs_set_lock_blocking(next); - ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, + ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1, &wc->refs[level - 1], &wc->flags[level - 1]); if (ret < 0) { @@ -7001,7 +7150,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; ret = btrfs_lookup_extent_info(trans, root, - eb->start, eb->len, + eb->start, level, 1, &wc->refs[level], &wc->flags[level]); if (ret < 0) { @@ -7211,8 +7360,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, ret = btrfs_lookup_extent_info(trans, root, path->nodes[level]->start, - path->nodes[level]->len, - &wc->refs[level], + level, 1, &wc->refs[level], &wc->flags[level]); if (ret < 0) { err = ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 24e8a356a36..c69145b66ea 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3660,7 +3660,7 @@ static int check_path_shared(struct btrfs_root *root, eb = path->nodes[level]; if (!btrfs_block_can_be_shared(root, eb)) continue; - ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len, + ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1, &refs, NULL); if (refs > 1) return 1; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b67171e6d68..86f192ffc21 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -619,10 +619,13 @@ static noinline_for_stack int find_inline_backref(struct extent_buffer *leaf, int slot, unsigned long *ptr, unsigned long *end) { + struct btrfs_key key; struct btrfs_extent_item *ei; struct btrfs_tree_block_info *bi; u32 item_size; + btrfs_item_key_to_cpu(leaf, &key, slot); + item_size = btrfs_item_size_nr(leaf, slot); #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 if (item_size < sizeof(*ei)) { @@ -634,13 +637,18 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, WARN_ON(!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)); - if (item_size <= sizeof(*ei) + sizeof(*bi)) { + if (key.type == BTRFS_EXTENT_ITEM_KEY && + item_size <= sizeof(*ei) + sizeof(*bi)) { WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); return 1; } - bi = (struct btrfs_tree_block_info *)(ei + 1); - *ptr = (unsigned long)(bi + 1); + if (key.type == BTRFS_EXTENT_ITEM_KEY) { + bi = (struct btrfs_tree_block_info *)(ei + 1); + *ptr = (unsigned long)(bi + 1); + } else { + *ptr = (unsigned long)(ei + 1); + } *end = (unsigned long)ei + item_size; return 0; } @@ -708,7 +716,7 @@ again: end = 0; ptr = 0; key.objectid = cur->bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; + key.type = BTRFS_METADATA_ITEM_KEY; key.offset = (u64)-1; path1->search_commit_root = 1; @@ -766,7 +774,8 @@ again: break; } - if (key.type == BTRFS_EXTENT_ITEM_KEY) { + if (key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY) { ret = find_inline_backref(eb, path1->slots[0], &ptr, &end); if (ret) @@ -2768,8 +2777,13 @@ static int reada_tree_block(struct reloc_control *rc, struct tree_block *block) { BUG_ON(block->key_ready); - readahead_tree_block(rc->extent_root, block->bytenr, - block->key.objectid, block->key.offset); + if (block->key.type == BTRFS_METADATA_ITEM_KEY) + readahead_tree_block(rc->extent_root, block->bytenr, + block->key.objectid, + rc->extent_root->leafsize); + else + readahead_tree_block(rc->extent_root, block->bytenr, + block->key.objectid, block->key.offset); return 0; } @@ -3176,12 +3190,17 @@ static int add_tree_block(struct reloc_control *rc, eb = path->nodes[0]; item_size = btrfs_item_size_nr(eb, path->slots[0]); - if (item_size >= sizeof(*ei) + sizeof(*bi)) { + if (extent_key->type == BTRFS_METADATA_ITEM_KEY || + item_size >= sizeof(*ei) + sizeof(*bi)) { ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); - bi = (struct btrfs_tree_block_info *)(ei + 1); + if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) { + bi = (struct btrfs_tree_block_info *)(ei + 1); + level = btrfs_tree_block_level(eb, bi); + } else { + level = (int)extent_key->offset; + } generation = btrfs_extent_generation(eb, ei); - level = btrfs_tree_block_level(eb, bi); } else { #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 u64 ref_owner; @@ -3210,7 +3229,7 @@ static int add_tree_block(struct reloc_control *rc, return -ENOMEM; block->bytenr = extent_key->objectid; - block->key.objectid = extent_key->offset; + block->key.objectid = rc->extent_root->leafsize; block->key.offset = generation; block->level = level; block->key_ready = 0; @@ -3252,9 +3271,15 @@ static int __add_tree_block(struct reloc_control *rc, ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); if (ret < 0) goto out; - BUG_ON(ret); btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (ret > 0) { + if (key.objectid == bytenr && + key.type == BTRFS_METADATA_ITEM_KEY) + ret = 0; + } + BUG_ON(ret); + ret = add_tree_block(rc, &key, path, blocks); out: btrfs_free_path(path); @@ -3275,7 +3300,8 @@ static int block_use_full_backref(struct reloc_control *rc, return 1; ret = btrfs_lookup_extent_info(NULL, rc->extent_root, - eb->start, eb->len, NULL, &flags); + eb->start, btrfs_header_level(eb), 1, + NULL, &flags); BUG_ON(ret); if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) @@ -3644,12 +3670,25 @@ next: break; } - if (key.type != BTRFS_EXTENT_ITEM_KEY || + if (key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY) { + path->slots[0]++; + goto next; + } + + if (key.type == BTRFS_EXTENT_ITEM_KEY && key.objectid + key.offset <= rc->search_start) { path->slots[0]++; goto next; } + if (key.type == BTRFS_METADATA_ITEM_KEY && + key.objectid + rc->extent_root->leafsize <= + rc->search_start) { + path->slots[0]++; + goto next; + } + ret = find_first_extent_bit(&rc->processed_blocks, key.objectid, &start, &end, EXTENT_DIRTY, NULL); @@ -3658,7 +3697,11 @@ next: btrfs_release_path(path); rc->search_start = end + 1; } else { - rc->search_start = key.objectid + key.offset; + if (key.type == BTRFS_EXTENT_ITEM_KEY) + rc->search_start = key.objectid + key.offset; + else + rc->search_start = key.objectid + + rc->extent_root->leafsize; memcpy(extent_key, &key, sizeof(key)); return 0; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 3d29d60bdaf..28db5dcde0a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2312,8 +2312,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, key_start.type = BTRFS_EXTENT_ITEM_KEY; key_start.offset = (u64)0; key_end.objectid = base + offset + nstripes * increment; - key_end.type = BTRFS_EXTENT_ITEM_KEY; - key_end.offset = (u64)0; + key_end.type = BTRFS_METADATA_ITEM_KEY; + key_end.offset = (u64)-1; reada1 = btrfs_reada_add(root, &key_start, &key_end); key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID; @@ -2401,6 +2401,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; + if (ret > 0) { ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); @@ -2418,6 +2419,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, } while (1) { + u64 bytes; + l = path->nodes[0]; slot = path->slots[0]; if (slot >= btrfs_header_nritems(l)) { @@ -2431,14 +2434,21 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, } btrfs_item_key_to_cpu(l, &key, slot); - if (key.objectid + key.offset <= logical) + if (key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY) + goto next; + + if (key.type == BTRFS_METADATA_ITEM_KEY) + bytes = root->leafsize; + else + bytes = key.offset; + + if (key.objectid + bytes <= logical) goto next; if (key.objectid >= logical + map->stripe_len) break; - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) - goto next; extent = btrfs_item_ptr(l, slot, struct btrfs_extent_item); @@ -2459,18 +2469,18 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, * trim extent to this stripe */ if (key.objectid < logical) { - key.offset -= logical - key.objectid; + bytes -= logical - key.objectid; key.objectid = logical; } - if (key.objectid + key.offset > + if (key.objectid + bytes > logical + map->stripe_len) { - key.offset = logical + map->stripe_len - - key.objectid; + bytes = logical + map->stripe_len - + key.objectid; } extent_logical = key.objectid; extent_physical = key.objectid - logical + physical; - extent_len = key.offset; + extent_len = bytes; extent_dev = scrub_dev; extent_mirror_num = mirror_num; if (is_dev_replace) -- cgit v1.2.3-70-g09d2 From 7abadb6431a057f1e3cf8d395acb8766b947ac85 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Sun, 17 Mar 2013 02:10:31 +0000 Subject: Btrfs: share stop worker code Share the exactly same code of stopping workers. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 55 +++++++++++++++++++++++------------------------------- 1 file changed, 23 insertions(+), 32 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f47754a2fee..898263f56d9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1935,6 +1935,27 @@ static noinline int next_root_backup(struct btrfs_fs_info *info, return 0; } +/* helper to cleanup workers */ +static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) +{ + btrfs_stop_workers(&fs_info->generic_worker); + btrfs_stop_workers(&fs_info->fixup_workers); + btrfs_stop_workers(&fs_info->delalloc_workers); + btrfs_stop_workers(&fs_info->workers); + btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->endio_meta_workers); + btrfs_stop_workers(&fs_info->endio_raid56_workers); + btrfs_stop_workers(&fs_info->rmw_workers); + btrfs_stop_workers(&fs_info->endio_meta_write_workers); + btrfs_stop_workers(&fs_info->endio_write_workers); + btrfs_stop_workers(&fs_info->endio_freespace_worker); + btrfs_stop_workers(&fs_info->submit_workers); + btrfs_stop_workers(&fs_info->delayed_workers); + btrfs_stop_workers(&fs_info->caching_workers); + btrfs_stop_workers(&fs_info->readahead_workers); + btrfs_stop_workers(&fs_info->flush_workers); +} + /* helper to cleanup tree roots */ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) { @@ -2760,22 +2781,7 @@ fail_tree_roots: invalidate_inode_pages2(fs_info->btree_inode->i_mapping); fail_sb_buffer: - btrfs_stop_workers(&fs_info->generic_worker); - btrfs_stop_workers(&fs_info->readahead_workers); - btrfs_stop_workers(&fs_info->fixup_workers); - btrfs_stop_workers(&fs_info->delalloc_workers); - btrfs_stop_workers(&fs_info->workers); - btrfs_stop_workers(&fs_info->endio_workers); - btrfs_stop_workers(&fs_info->endio_meta_workers); - btrfs_stop_workers(&fs_info->endio_raid56_workers); - btrfs_stop_workers(&fs_info->rmw_workers); - btrfs_stop_workers(&fs_info->endio_meta_write_workers); - btrfs_stop_workers(&fs_info->endio_write_workers); - btrfs_stop_workers(&fs_info->endio_freespace_worker); - btrfs_stop_workers(&fs_info->submit_workers); - btrfs_stop_workers(&fs_info->delayed_workers); - btrfs_stop_workers(&fs_info->caching_workers); - btrfs_stop_workers(&fs_info->flush_workers); + btrfs_stop_all_workers(fs_info); fail_alloc: fail_iput: btrfs_mapping_tree_free(&fs_info->mapping_tree); @@ -3437,22 +3443,7 @@ int close_ctree(struct btrfs_root *root) iput(fs_info->btree_inode); - btrfs_stop_workers(&fs_info->generic_worker); - btrfs_stop_workers(&fs_info->fixup_workers); - btrfs_stop_workers(&fs_info->delalloc_workers); - btrfs_stop_workers(&fs_info->workers); - btrfs_stop_workers(&fs_info->endio_workers); - btrfs_stop_workers(&fs_info->endio_meta_workers); - btrfs_stop_workers(&fs_info->endio_raid56_workers); - btrfs_stop_workers(&fs_info->rmw_workers); - btrfs_stop_workers(&fs_info->endio_meta_write_workers); - btrfs_stop_workers(&fs_info->endio_write_workers); - btrfs_stop_workers(&fs_info->endio_freespace_worker); - btrfs_stop_workers(&fs_info->submit_workers); - btrfs_stop_workers(&fs_info->delayed_workers); - btrfs_stop_workers(&fs_info->caching_workers); - btrfs_stop_workers(&fs_info->readahead_workers); - btrfs_stop_workers(&fs_info->flush_workers); + btrfs_stop_all_workers(fs_info); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY if (btrfs_test_opt(root, CHECK_INTEGRITY)) -- cgit v1.2.3-70-g09d2 From 9d1a2a3ad59f7ae810bf04a5a05995bf2d79300c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 12 Mar 2013 15:13:28 +0000 Subject: btrfs: clean snapshots one by one Each time pick one dead root from the list and let the caller know if it's needed to continue. This should improve responsiveness during umount and balance which at some point waits for cleaning all currently queued dead roots. A new dead root is added to the end of the list, so the snapshots disappear in the order of deletion. The snapshot cleaning work is now done only from the cleaner thread and the others wake it if needed. Signed-off-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 17 +++++++++------ fs/btrfs/extent-tree.c | 8 ++++++++ fs/btrfs/relocation.c | 3 --- fs/btrfs/transaction.c | 56 +++++++++++++++++++++++++++++++++----------------- fs/btrfs/transaction.h | 2 +- 5 files changed, 57 insertions(+), 29 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 898263f56d9..b8ed1d4fe50 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1658,15 +1658,20 @@ static int cleaner_kthread(void *arg) struct btrfs_root *root = arg; do { + int again = 0; + if (!(root->fs_info->sb->s_flags & MS_RDONLY) && - mutex_trylock(&root->fs_info->cleaner_mutex)) { - btrfs_run_delayed_iputs(root); - btrfs_clean_old_snapshots(root); - mutex_unlock(&root->fs_info->cleaner_mutex); + down_read_trylock(&root->fs_info->sb->s_umount)) { + if (mutex_trylock(&root->fs_info->cleaner_mutex)) { + btrfs_run_delayed_iputs(root); + again = btrfs_clean_one_deleted_snapshot(root); + mutex_unlock(&root->fs_info->cleaner_mutex); + } btrfs_run_defrag_inodes(root->fs_info); + up_read(&root->fs_info->sb->s_umount); } - if (!try_to_freeze()) { + if (!try_to_freeze() && !again) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop()) schedule(); @@ -3358,8 +3363,8 @@ int btrfs_commit_super(struct btrfs_root *root) mutex_lock(&root->fs_info->cleaner_mutex); btrfs_run_delayed_iputs(root); - btrfs_clean_old_snapshots(root); mutex_unlock(&root->fs_info->cleaner_mutex); + wake_up_process(root->fs_info->cleaner_kthread); /* wait until ongoing cleanup work done */ down_write(&root->fs_info->cleanup_work_sem); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7505856df9f..be09157ff91 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7286,6 +7286,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, * reference count by one. if update_ref is true, this function * also make sure backrefs for the shared block and all lower level * blocks are properly updated. + * + * If called with for_reloc == 0, may exit early with -EAGAIN */ int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, int update_ref, @@ -7386,6 +7388,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root, wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); while (1) { + if (!for_reloc && btrfs_fs_closing(root->fs_info)) { + pr_debug("btrfs: drop snapshot early exit\n"); + err = -EAGAIN; + goto out_end_trans; + } + ret = walk_down_tree(trans, root, path, wc); if (ret < 0) { err = ret; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 86f192ffc21..4ef5f7455fb 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4148,10 +4148,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) while (1) { mutex_lock(&fs_info->cleaner_mutex); - - btrfs_clean_old_snapshots(fs_info->tree_root); ret = relocate_block_group(rc); - mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { err = ret; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 6c0a72ab6de..5a5ea99f29e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -948,7 +948,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, int btrfs_add_dead_root(struct btrfs_root *root) { spin_lock(&root->fs_info->trans_lock); - list_add(&root->root_list, &root->fs_info->dead_roots); + list_add_tail(&root->root_list, &root->fs_info->dead_roots); spin_unlock(&root->fs_info->trans_lock); return 0; } @@ -1873,31 +1873,49 @@ cleanup_transaction: } /* - * interface function to delete all the snapshots we have scheduled for deletion + * return < 0 if error + * 0 if there are no more dead_roots at the time of call + * 1 there are more to be processed, call me again + * + * The return value indicates there are certainly more snapshots to delete, but + * if there comes a new one during processing, it may return 0. We don't mind, + * because btrfs_commit_super will poke cleaner thread and it will process it a + * few seconds later. */ -int btrfs_clean_old_snapshots(struct btrfs_root *root) +int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) { - LIST_HEAD(list); + int ret; struct btrfs_fs_info *fs_info = root->fs_info; + if (fs_info->sb->s_flags & MS_RDONLY) { + pr_debug("btrfs: cleaner called for RO fs!\n"); + return 0; + } + spin_lock(&fs_info->trans_lock); - list_splice_init(&fs_info->dead_roots, &list); + if (list_empty(&fs_info->dead_roots)) { + spin_unlock(&fs_info->trans_lock); + return 0; + } + root = list_first_entry(&fs_info->dead_roots, + struct btrfs_root, root_list); + list_del(&root->root_list); spin_unlock(&fs_info->trans_lock); - while (!list_empty(&list)) { - int ret; - - root = list_entry(list.next, struct btrfs_root, root_list); - list_del(&root->root_list); + pr_debug("btrfs: cleaner removing %llu\n", + (unsigned long long)root->objectid); - btrfs_kill_all_delayed_nodes(root); + btrfs_kill_all_delayed_nodes(root); - if (btrfs_header_backref_rev(root->node) < - BTRFS_MIXED_BACKREF_REV) - ret = btrfs_drop_snapshot(root, NULL, 0, 0); - else - ret =btrfs_drop_snapshot(root, NULL, 1, 0); - BUG_ON(ret < 0); - } - return 0; + if (btrfs_header_backref_rev(root->node) < + BTRFS_MIXED_BACKREF_REV) + ret = btrfs_drop_snapshot(root, NULL, 0, 0); + else + ret = btrfs_drop_snapshot(root, NULL, 1, 0); + /* + * If we encounter a transaction abort during snapshot cleaning, we + * don't want to crash here + */ + BUG_ON(ret < 0 && ret != -EAGAIN && ret != -EROFS); + return 1; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 3c8e0d25c8e..f6edd5e6baa 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -123,7 +123,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int btrfs_add_dead_root(struct btrfs_root *root); int btrfs_defrag_root(struct btrfs_root *root); -int btrfs_clean_old_snapshots(struct btrfs_root *root); +int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, -- cgit v1.2.3-70-g09d2 From 09a2a8f96e3009273bed1833b3f210e2c68728a5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 5 Apr 2013 16:51:15 -0400 Subject: Btrfs: fix bad extent logging A user sent me a btrfs-image of a file system that was panicing on mount during the log recovery. I had originally thought these problems were from a bug in the free space cache code, but that was just a symptom of the problem. The problem is if your application does something like this [prealloc][prealloc][prealloc] the internal extent maps will merge those all together into one extent map, even though on disk they are 3 separate extents. So if you go to write into one of these ranges the extent map will be right since we use the physical extent when doing the write, but when we log the extents they will use the wrong sizes for the remainder prealloc space. If this doesn't happen to trip up the free space cache (which it won't in a lot of cases) then you will get bogus entries in your extent tree which will screw stuff up later. The data and such will still work, but everything else is broken. This patch fixes this by not allowing extents that are on the modified list to be merged. This has the side effect that we are no longer adding everything to the modified list all the time, which means we now have to call btrfs_drop_extents every time we log an extent into the tree. So this allows me to drop all this speciality code I was using to get around calling btrfs_drop_extents. With this patch the testcase I've created no longer creates a bogus file system after replaying the log. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 140 +----------------------------------------------- fs/btrfs/ctree.h | 4 -- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent_map.c | 18 +++++-- fs/btrfs/extent_map.h | 2 +- fs/btrfs/file.c | 14 +++-- fs/btrfs/inode.c | 34 +++--------- fs/btrfs/relocation.c | 2 +- fs/btrfs/tree-log.c | 144 +++----------------------------------------------- fs/btrfs/volumes.c | 4 +- 10 files changed, 40 insertions(+), 324 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index fe032ab6bd8..9ca0f6aefa2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2211,9 +2211,6 @@ static noinline void unlock_up(struct btrfs_path *path, int level, int no_skips = 0; struct extent_buffer *t; - if (path->really_keep_locks) - return; - for (i = level; i < BTRFS_MAX_LEVEL; i++) { if (!path->nodes[i]) break; @@ -2261,7 +2258,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) { int i; - if (path->keep_locks || path->really_keep_locks) + if (path->keep_locks) return; for (i = level; i < BTRFS_MAX_LEVEL; i++) { @@ -2494,7 +2491,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root if (!cow) write_lock_level = -1; - if (cow && (p->really_keep_locks || p->keep_locks || p->lowest_level)) + if (cow && (p->keep_locks || p->lowest_level)) write_lock_level = BTRFS_MAX_LEVEL; min_write_lock_level = write_lock_level; @@ -5465,139 +5462,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) return btrfs_next_old_leaf(root, path, 0); } -/* Release the path up to but not including the given level */ -static void btrfs_release_level(struct btrfs_path *path, int level) -{ - int i; - - for (i = 0; i < level; i++) { - path->slots[i] = 0; - if (!path->nodes[i]) - continue; - if (path->locks[i]) { - btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); - path->locks[i] = 0; - } - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - } -} - -/* - * This function assumes 2 things - * - * 1) You are using path->keep_locks - * 2) You are not inserting items. - * - * If either of these are not true do not use this function. If you need a next - * leaf with either of these not being true then this function can be easily - * adapted to do that, but at the moment these are the limitations. - */ -int btrfs_next_leaf_write(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - int del) -{ - struct extent_buffer *b; - struct btrfs_key key; - u32 nritems; - int level = 1; - int slot; - int ret = 1; - int write_lock_level = BTRFS_MAX_LEVEL; - int ins_len = del ? -1 : 0; - - WARN_ON(!(path->keep_locks || path->really_keep_locks)); - - nritems = btrfs_header_nritems(path->nodes[0]); - btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); - - while (path->nodes[level]) { - nritems = btrfs_header_nritems(path->nodes[level]); - if (!(path->locks[level] & BTRFS_WRITE_LOCK)) { -search: - btrfs_release_path(path); - ret = btrfs_search_slot(trans, root, &key, path, - ins_len, 1); - if (ret < 0) - goto out; - level = 1; - continue; - } - - if (path->slots[level] >= nritems - 1) { - level++; - continue; - } - - btrfs_release_level(path, level); - break; - } - - if (!path->nodes[level]) { - ret = 1; - goto out; - } - - path->slots[level]++; - b = path->nodes[level]; - - while (b) { - level = btrfs_header_level(b); - - if (!should_cow_block(trans, root, b)) - goto cow_done; - - btrfs_set_path_blocking(path); - ret = btrfs_cow_block(trans, root, b, - path->nodes[level + 1], - path->slots[level + 1], &b); - if (ret) - goto out; -cow_done: - path->nodes[level] = b; - btrfs_clear_path_blocking(path, NULL, 0); - if (level != 0) { - ret = setup_nodes_for_search(trans, root, path, b, - level, ins_len, - &write_lock_level); - if (ret == -EAGAIN) - goto search; - if (ret) - goto out; - - b = path->nodes[level]; - slot = path->slots[level]; - - ret = read_block_for_search(trans, root, path, - &b, level, slot, &key, 0); - if (ret == -EAGAIN) - goto search; - if (ret) - goto out; - level = btrfs_header_level(b); - if (!btrfs_try_tree_write_lock(b)) { - btrfs_set_path_blocking(path); - btrfs_tree_lock(b); - btrfs_clear_path_blocking(path, b, - BTRFS_WRITE_LOCK); - } - path->locks[level] = BTRFS_WRITE_LOCK; - path->nodes[level] = b; - path->slots[level] = 0; - } else { - path->slots[level] = 0; - ret = 0; - break; - } - } - -out: - if (ret) - btrfs_release_path(path); - - return ret; -} - int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, u64 time_seq) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5060990336a..075a8a0e49c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -586,7 +586,6 @@ struct btrfs_path { unsigned int skip_locking:1; unsigned int leave_spinning:1; unsigned int search_commit_root:1; - unsigned int really_keep_locks:1; }; /* @@ -3273,9 +3272,6 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, } int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); -int btrfs_next_leaf_write(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - int del); int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, u64 time_seq); static inline int btrfs_next_old_item(struct btrfs_root *root, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b8ed1d4fe50..70e6b0c32db 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -222,7 +222,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); + ret = add_extent_mapping(em_tree, em, 0); if (ret == -EEXIST) { free_extent_map(em); em = lookup_extent_mapping(em_tree, start, len); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2834ca5768e..ca968742c3d 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -174,6 +174,14 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) test_bit(EXTENT_FLAG_LOGGING, &next->flags)) return 0; + /* + * We don't want to merge stuff that hasn't been written to the log yet + * since it may not reflect exactly what is on disk, and that would be + * bad. + */ + if (!list_empty(&prev->list) || !list_empty(&next->list)) + return 0; + if (extent_map_end(prev) == next->start && prev->flags == next->flags && prev->bdev == next->bdev && @@ -209,9 +217,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; em->mod_start = merge->mod_start; em->generation = max(em->generation, merge->generation); - list_move(&em->list, &tree->modified_extents); - list_del_init(&merge->list); rb_erase(&merge->rb_node, &tree->map); free_extent_map(merge); } @@ -227,7 +233,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) merge->in_tree = 0; em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; em->generation = max(em->generation, merge->generation); - list_del_init(&merge->list); free_extent_map(merge); } } @@ -302,7 +307,7 @@ void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) * reference dropped if the merge attempt was successful. */ int add_extent_mapping(struct extent_map_tree *tree, - struct extent_map *em) + struct extent_map *em, int modified) { int ret = 0; struct rb_node *rb; @@ -324,7 +329,10 @@ int add_extent_mapping(struct extent_map_tree *tree, em->mod_start = em->start; em->mod_len = em->len; - try_merge_map(tree, em); + if (modified) + list_move(&em->list, &tree->modified_extents); + else + try_merge_map(tree, em); out: return ret; } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index b18314bc5a6..61adc44b780 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -62,7 +62,7 @@ void extent_map_tree_init(struct extent_map_tree *tree); struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 start, u64 len); int add_extent_mapping(struct extent_map_tree *tree, - struct extent_map *em); + struct extent_map *em, int modified); int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); struct extent_map *alloc_extent_map(void); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e81e428a884..a56abed7810 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -552,6 +552,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int testend = 1; unsigned long flags; int compressed = 0; + bool modified; WARN_ON(end < start); if (end == (u64)-1) { @@ -561,6 +562,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, while (1) { int no_splits = 0; + modified = false; if (!split) split = alloc_extent_map(); if (!split2) @@ -592,6 +594,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); clear_bit(EXTENT_FLAG_PINNED, &em->flags); clear_bit(EXTENT_FLAG_LOGGING, &flags); + modified = !list_empty(&em->list); remove_extent_mapping(em_tree, em); if (no_splits) goto next; @@ -614,9 +617,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->bdev = em->bdev; split->flags = flags; split->compress_type = em->compress_type; - ret = add_extent_mapping(em_tree, split); + ret = add_extent_mapping(em_tree, split, modified); BUG_ON(ret); /* Logic error */ - list_move(&split->list, &em_tree->modified_extents); free_extent_map(split); split = split2; split2 = NULL; @@ -645,9 +647,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->orig_start = em->orig_start; } - ret = add_extent_mapping(em_tree, split); + ret = add_extent_mapping(em_tree, split, modified); BUG_ON(ret); /* Logic error */ - list_move(&split->list, &em_tree->modified_extents); free_extent_map(split); split = NULL; } @@ -1930,10 +1931,7 @@ out: do { btrfs_drop_extent_cache(inode, offset, end - 1, 0); write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, hole_em); - if (!ret) - list_move(&hole_em->list, - &em_tree->modified_extents); + ret = add_extent_mapping(em_tree, hole_em, 1); write_unlock(&em_tree->lock); } while (ret == -EEXIST); free_extent_map(hole_em); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 456667b04fe..c41637a1ed3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -732,10 +732,7 @@ retry: while (1) { write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - if (!ret) - list_move(&em->list, - &em_tree->modified_extents); + ret = add_extent_mapping(em_tree, em, 1); write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); @@ -941,10 +938,7 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, while (1) { write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - if (!ret) - list_move(&em->list, - &em_tree->modified_extents); + ret = add_extent_mapping(em_tree, em, 1); write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); @@ -1387,10 +1381,7 @@ out_check: em->generation = -1; while (1) { write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - if (!ret) - list_move(&em->list, - &em_tree->modified_extents); + ret = add_extent_mapping(em_tree, em, 1); write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); @@ -4467,10 +4458,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) while (1) { write_lock(&em_tree->lock); - err = add_extent_mapping(em_tree, hole_em); - if (!err) - list_move(&hole_em->list, - &em_tree->modified_extents); + err = add_extent_mapping(em_tree, hole_em, 1); write_unlock(&em_tree->lock); if (err != -EEXIST) break; @@ -5989,7 +5977,7 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree, em->block_start += start_diff; em->block_len -= start_diff; } - return add_extent_mapping(em_tree, em); + return add_extent_mapping(em_tree, em, 0); } static noinline int uncompress_inline(struct btrfs_path *path, @@ -6283,7 +6271,7 @@ insert: err = 0; write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); + ret = add_extent_mapping(em_tree, em, 0); /* it is possible that someone inserted the extent into the tree * while we had the lock dropped. It is also possible that * an overlapping map exists in the tree @@ -6706,10 +6694,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, btrfs_drop_extent_cache(inode, em->start, em->start + em->len - 1, 0); write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - if (!ret) - list_move(&em->list, - &em_tree->modified_extents); + ret = add_extent_mapping(em_tree, em, 1); write_unlock(&em_tree->lock); } while (ret == -EEXIST); @@ -8593,10 +8578,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, while (1) { write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - if (!ret) - list_move(&em->list, - &em_tree->modified_extents); + ret = add_extent_mapping(em_tree, em, 1); write_unlock(&em_tree->lock); if (ret != -EEXIST) break; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4ef5f7455fb..208154986c4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2979,7 +2979,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end, lock_extent(&BTRFS_I(inode)->io_tree, start, end); while (1) { write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); + ret = add_extent_mapping(em_tree, em, 0); write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 592aa654b5f..b069fafbc3d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3209,115 +3209,6 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) return 0; } -static int drop_adjacent_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - struct extent_map *em, - struct btrfs_path *path) -{ - struct btrfs_file_extent_item *fi; - struct extent_buffer *leaf; - struct btrfs_key key, new_key; - struct btrfs_map_token token; - u64 extent_end; - u64 extent_offset = 0; - int extent_type; - int del_slot = 0; - int del_nr = 0; - int ret = 0; - - while (1) { - btrfs_init_map_token(&token); - leaf = path->nodes[0]; - path->slots[0]++; - if (path->slots[0] >= btrfs_header_nritems(leaf)) { - if (del_nr) { - ret = btrfs_del_items(trans, root, path, - del_slot, del_nr); - if (ret) - return ret; - del_nr = 0; - } - - ret = btrfs_next_leaf_write(trans, root, path, 1); - if (ret < 0) - return ret; - if (ret > 0) - return 0; - leaf = path->nodes[0]; - } - - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid != btrfs_ino(inode) || - key.type != BTRFS_EXTENT_DATA_KEY || - key.offset >= em->start + em->len) - break; - - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - extent_type = btrfs_token_file_extent_type(leaf, fi, &token); - if (extent_type == BTRFS_FILE_EXTENT_REG || - extent_type == BTRFS_FILE_EXTENT_PREALLOC) { - extent_offset = btrfs_token_file_extent_offset(leaf, - fi, &token); - extent_end = key.offset + - btrfs_token_file_extent_num_bytes(leaf, fi, - &token); - } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - extent_end = key.offset + - btrfs_file_extent_inline_len(leaf, fi); - } else { - BUG(); - } - - if (extent_end <= em->len + em->start) { - if (!del_nr) { - del_slot = path->slots[0]; - } - del_nr++; - continue; - } - - /* - * Ok so we'll ignore previous items if we log a new extent, - * which can lead to overlapping extents, so if we have an - * existing extent we want to adjust we _have_ to check the next - * guy to make sure we even need this extent anymore, this keeps - * us from panicing in set_item_key_safe. - */ - if (path->slots[0] < btrfs_header_nritems(leaf) - 1) { - struct btrfs_key tmp_key; - - btrfs_item_key_to_cpu(leaf, &tmp_key, - path->slots[0] + 1); - if (tmp_key.objectid == btrfs_ino(inode) && - tmp_key.type == BTRFS_EXTENT_DATA_KEY && - tmp_key.offset <= em->start + em->len) { - if (!del_nr) - del_slot = path->slots[0]; - del_nr++; - continue; - } - } - - BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); - memcpy(&new_key, &key, sizeof(new_key)); - new_key.offset = em->start + em->len; - btrfs_set_item_key_safe(trans, root, path, &new_key); - extent_offset += em->start + em->len - key.offset; - btrfs_set_token_file_extent_offset(leaf, fi, extent_offset, - &token); - btrfs_set_token_file_extent_num_bytes(leaf, fi, extent_end - - (em->start + em->len), - &token); - btrfs_mark_buffer_dirty(leaf); - } - - if (del_nr) - ret = btrfs_del_items(trans, root, path, del_slot, del_nr); - - return ret; -} - static int log_one_extent(struct btrfs_trans_handle *trans, struct inode *inode, struct btrfs_root *root, struct extent_map *em, struct btrfs_path *path) @@ -3339,39 +3230,24 @@ static int log_one_extent(struct btrfs_trans_handle *trans, int index = log->log_transid % 2; bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; -insert: + ret = __btrfs_drop_extents(trans, log, inode, path, em->start, + em->start + em->len, NULL, 0); + if (ret) + return ret; + INIT_LIST_HEAD(&ordered_sums); btrfs_init_map_token(&token); key.objectid = btrfs_ino(inode); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = em->start; - path->really_keep_locks = 1; ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi)); - if (ret && ret != -EEXIST) { - path->really_keep_locks = 0; + if (ret) return ret; - } leaf = path->nodes[0]; fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - /* - * If we are overwriting an inline extent with a real one then we need - * to just delete the inline extent as it may not be large enough to - * have the entire file_extent_item. - */ - if (ret && btrfs_token_file_extent_type(leaf, fi, &token) == - BTRFS_FILE_EXTENT_INLINE) { - ret = btrfs_del_item(trans, log, path); - btrfs_release_path(path); - if (ret) { - path->really_keep_locks = 0; - return ret; - } - goto insert; - } - btrfs_set_token_file_extent_generation(leaf, fi, em->generation, &token); if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { @@ -3417,15 +3293,7 @@ insert: btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); btrfs_mark_buffer_dirty(leaf); - /* - * Have to check the extent to the right of us to make sure it doesn't - * fall in our current range. We're ok if the previous extent is in our - * range since the recovery stuff will run us in key order and thus just - * drop the part we overwrote. - */ - ret = drop_adjacent_extents(trans, log, inode, em, path); btrfs_release_path(path); - path->really_keep_locks = 0; if (ret) { return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3994767ece4..0d7ab7eab2a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3932,7 +3932,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, em_tree = &extent_root->fs_info->mapping_tree.map_tree; write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); + ret = add_extent_mapping(em_tree, em, 0); write_unlock(&em_tree->lock); if (ret) { free_extent_map(em); @@ -5476,7 +5476,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, } write_lock(&map_tree->map_tree.lock); - ret = add_extent_mapping(&map_tree->map_tree, em); + ret = add_extent_mapping(&map_tree->map_tree, em, 0); write_unlock(&map_tree->map_tree.lock); BUG_ON(ret); /* Tree corruption */ free_extent_map(em); -- cgit v1.2.3-70-g09d2 From f2f6ed3d54648ec19dcdeec30f66843cf7a38487 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sun, 7 Apr 2013 10:50:16 +0000 Subject: Btrfs: introduce a mutex lock for btrfs quota operations The original code has one spin_lock 'qgroup_lock' to protect quota configurations in memory. If we want to add a BTRFS_QGROUP_INFO_KEY, it will be added to Btree firstly, and then update configurations in memory,however, a race condition may happen between these operations. For example: ->add_qgroup_info_item() ->add_qgroup_rb() For the above case, del_qgroup_info_item() may happen just before add_qgroup_rb(). What's worse, when we want to add a qgroup relation: ->add_qgroup_relation_item() ->add_qgroup_relations() We don't have any checks whether 'src' and 'dst' exist before add_qgroup_relation_item(), a race condition can also happen for the above case. To avoid race condition and have all the necessary checks, we introduce a mutex lock 'qgroup_ioctl_lock', and we make all the user change operations protected by the mutex lock. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 3 ++ fs/btrfs/disk-io.c | 1 + fs/btrfs/qgroup.c | 82 +++++++++++++++++++++++++++++++++++++----------------- 3 files changed, 61 insertions(+), 25 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 075a8a0e49c..1a850402937 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1583,6 +1583,9 @@ struct btrfs_fs_info { struct rb_root qgroup_tree; spinlock_t qgroup_lock; + /* protect user change for quota operations */ + struct mutex qgroup_ioctl_lock; + /* list of dirty qgroups to be written at next commit */ struct list_head dirty_qgroups; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 70e6b0c32db..9f83e5b870d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2213,6 +2213,7 @@ int open_ctree(struct super_block *sb, mutex_init(&fs_info->dev_replace.lock); spin_lock_init(&fs_info->qgroup_lock); + mutex_init(&fs_info->qgroup_ioctl_lock); fs_info->qgroup_tree = RB_ROOT; INIT_LIST_HEAD(&fs_info->dirty_qgroups); fs_info->qgroup_seq = 1; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 5be5a39dedc..0a1f6861ae9 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -791,6 +791,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, int ret = 0; int slot; + mutex_lock(&fs_info->qgroup_ioctl_lock); spin_lock(&fs_info->qgroup_lock); if (fs_info->quota_root) { fs_info->pending_quota_state = 1; @@ -900,6 +901,7 @@ out_free_root: kfree(quota_root); } out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -910,10 +912,11 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, struct btrfs_root *quota_root; int ret = 0; + mutex_lock(&fs_info->qgroup_ioctl_lock); spin_lock(&fs_info->qgroup_lock); if (!fs_info->quota_root) { spin_unlock(&fs_info->qgroup_lock); - return 0; + goto out; } fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; @@ -922,8 +925,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, btrfs_free_qgroup_config(fs_info); spin_unlock(&fs_info->qgroup_lock); - if (!quota_root) - return -EINVAL; + if (!quota_root) { + ret = -EINVAL; + goto out; + } ret = btrfs_clean_quota_tree(trans, quota_root); if (ret) @@ -944,6 +949,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, free_extent_buffer(quota_root->commit_root); kfree(quota_root); out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -959,24 +965,28 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_root *quota_root; int ret = 0; + mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; - if (!quota_root) - return -EINVAL; + if (!quota_root) { + ret = -EINVAL; + goto out; + } ret = add_qgroup_relation_item(trans, quota_root, src, dst); if (ret) - return ret; + goto out; ret = add_qgroup_relation_item(trans, quota_root, dst, src); if (ret) { del_qgroup_relation_item(trans, quota_root, src, dst); - return ret; + goto out; } spin_lock(&fs_info->qgroup_lock); ret = add_relation_rb(quota_root->fs_info, src, dst); spin_unlock(&fs_info->qgroup_lock); - +out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -987,9 +997,12 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, int ret = 0; int err; + mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; - if (!quota_root) - return -EINVAL; + if (!quota_root) { + ret = -EINVAL; + goto out; + } ret = del_qgroup_relation_item(trans, quota_root, src, dst); err = del_qgroup_relation_item(trans, quota_root, dst, src); @@ -1000,7 +1013,8 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, del_relation_rb(fs_info, src, dst); spin_unlock(&fs_info->qgroup_lock); - +out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -1011,9 +1025,12 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, struct btrfs_qgroup *qgroup; int ret = 0; + mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; - if (!quota_root) - return -EINVAL; + if (!quota_root) { + ret = -EINVAL; + goto out; + } ret = add_qgroup_item(trans, quota_root, qgroupid); @@ -1023,7 +1040,8 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, if (IS_ERR(qgroup)) ret = PTR_ERR(qgroup); - +out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -1034,9 +1052,12 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, struct btrfs_qgroup *qgroup; int ret = 0; + mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; - if (!quota_root) - return -EINVAL; + if (!quota_root) { + ret = -EINVAL; + goto out; + } /* check if there are no relations to this qgroup */ spin_lock(&fs_info->qgroup_lock); @@ -1044,7 +1065,8 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, if (qgroup) { if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) { spin_unlock(&fs_info->qgroup_lock); - return -EBUSY; + ret = -EBUSY; + goto out; } } spin_unlock(&fs_info->qgroup_lock); @@ -1054,7 +1076,8 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, spin_lock(&fs_info->qgroup_lock); del_qgroup_rb(quota_root->fs_info, qgroupid); spin_unlock(&fs_info->qgroup_lock); - +out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -1062,12 +1085,16 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 qgroupid, struct btrfs_qgroup_limit *limit) { - struct btrfs_root *quota_root = fs_info->quota_root; + struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; int ret = 0; - if (!quota_root) - return -EINVAL; + mutex_lock(&fs_info->qgroup_ioctl_lock); + quota_root = fs_info->quota_root; + if (!quota_root) { + ret = -EINVAL; + goto out; + } ret = update_qgroup_limit_item(trans, quota_root, qgroupid, limit->flags, limit->max_rfer, @@ -1094,7 +1121,8 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, unlock: spin_unlock(&fs_info->qgroup_lock); - +out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -1392,11 +1420,14 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, struct btrfs_qgroup *dstgroup; u32 level_size = 0; + mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_enabled) - return 0; + goto out; - if (!quota_root) - return -EINVAL; + if (!quota_root) { + ret = -EINVAL; + goto out; + } /* * create a tracking group for the subvol itself @@ -1523,6 +1554,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, unlock: spin_unlock(&fs_info->qgroup_lock); out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } -- cgit v1.2.3-70-g09d2 From ceda08642459e31673d24d7968d864390d2ce5fa Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 11 Apr 2013 10:30:16 +0000 Subject: Btrfs: use a lock to protect incompat/compat flag of the super block The following case will make the incompat/compat flag of the super block be recovered. Task1 |Task2 flags = btrfs_super_incompat_flags(); | |flags = btrfs_super_incompat_flags(); flags |= new_flag1; | |flags |= new_flag2; btrfs_set_super_incompat_flags(flags); | |btrfs_set_super_incompat_flags(flags); the new_flag1 is recovered. In order to avoid this problem, we introduce a lock named super_lock into the btrfs_fs_info structure. If we want to update incompat/compat flags of the super block, we must hold it. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 22 ++++++++++++++++++++-- fs/btrfs/disk-io.c | 5 +++++ fs/btrfs/volumes.c | 10 +--------- 3 files changed, 26 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1a850402937..c3b15f8dca0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1362,6 +1362,17 @@ struct btrfs_fs_info { wait_queue_head_t transaction_blocked_wait; wait_queue_head_t async_submit_wait; + /* + * Used to protect the incompat_flags, compat_flags, compat_ro_flags + * when they are updated. + * + * Because we do not clear the flags for ever, so we needn't use + * the lock on the read side. + * + * We also needn't use the lock when we mount the fs, because + * there is no other task which will update the flag. + */ + spinlock_t super_lock; struct btrfs_super_block *super_copy; struct btrfs_super_block *super_for_commit; struct block_device *__bdev; @@ -3688,8 +3699,15 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, disk_super = fs_info->super_copy; features = btrfs_super_incompat_flags(disk_super); if (!(features & flag)) { - features |= flag; - btrfs_set_super_incompat_flags(disk_super, features); + spin_lock(&fs_info->super_lock); + features = btrfs_super_incompat_flags(disk_super); + if (!(features & flag)) { + features |= flag; + btrfs_set_super_incompat_flags(disk_super, features); + printk(KERN_INFO "btrfs: setting %llu feature flag\n", + flag); + } + spin_unlock(&fs_info->super_lock); } } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9f83e5b870d..8a7a366267e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2086,6 +2086,7 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->defrag_inodes_lock); spin_lock_init(&fs_info->free_chunk_lock); spin_lock_init(&fs_info->tree_mod_seq_lock); + spin_lock_init(&fs_info->super_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->reloc_mutex); seqlock_init(&fs_info->profiles_lock); @@ -2349,6 +2350,10 @@ int open_ctree(struct super_block *sb, goto fail_alloc; } + /* + * Needn't use the lock because there is no other task which will + * update the flag. + */ btrfs_set_super_incompat_flags(disk_super, features); features = btrfs_super_compat_ro_flags(disk_super) & diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5aa52ee5c25..76ded9eb77a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3674,18 +3674,10 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target) static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) { - u64 features; - if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))) return; - features = btrfs_super_incompat_flags(info->super_copy); - if (features & BTRFS_FEATURE_INCOMPAT_RAID56) - return; - - features |= BTRFS_FEATURE_INCOMPAT_RAID56; - btrfs_set_super_incompat_flags(info->super_copy, features); - printk(KERN_INFO "btrfs: setting RAID5/6 feature flag\n"); + btrfs_set_fs_incompat(info, RAID56); } static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, -- cgit v1.2.3-70-g09d2 From b8d7f3ac10a865ca727f9373a796ef8537e3a474 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 17 Apr 2013 10:17:05 -0400 Subject: Btrfs: don't force pages under writeback to finish when aborting Dave reported a BUG_ON() that happened in end_page_writeback() after an abort. This happened because we unconditionally call end_page_writeback() in the endio case, which is right. However when we abort the transaction we will call end_page_writeback() on any writeback pages we find, which is wrong. We need to lock the page and wait on page writeback to complete if it is. There is nothing unsafe about this since we are discarding the transaction anyway. Thanks, Reported-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8a7a366267e..aab7d89f3ff 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3763,10 +3763,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, if (eb) ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); - if (PageWriteback(page)) - end_page_writeback(page); - lock_page(page); + + wait_on_page_writeback(page); if (PageDirty(page)) { clear_page_dirty_for_io(page); spin_lock_irq(&page->mapping->tree_lock); -- cgit v1.2.3-70-g09d2 From 79fb65a1f6d97febc232e1e4e56019b0c9208541 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Sat, 20 Apr 2013 10:18:27 -0400 Subject: Btrfs: don't call readahead hook until we have read the entire eb Martin Steigerwald reported a BUG_ON() where we were given a bogus bytenr to map. Turns out he is using > PAGESIZE leafsizes. The readahead stuff is called every time we do a completion, but we may not have finished reading in all the pages, so the bytenr we read off the node could be completely bogus. Fix this by only calling the readahead hook once all pages have been read in. Thanks, Reported-by: Martin Steigerwald Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index aab7d89f3ff..bb6cdbda4f5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -636,10 +636,9 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, if (!ret) set_extent_buffer_uptodate(eb); err: - if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { - clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); + if (reads_done && + test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) btree_readahead_hook(root, eb, eb->start, ret); - } if (ret) { /* -- cgit v1.2.3-70-g09d2 From 1c24c3ce6a8022ab225f44160bfddc92a5a6e435 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 23 Apr 2013 11:30:14 -0400 Subject: Btrfs: add tree block level sanity check With a users corrupted fs I was getting weird behavior and panics and it turns out it was because one of his tree blocks had a bogus header level. So add this to the sanity checks in the endio handler for tree blocks. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bb6cdbda4f5..c7d8fb0dbff 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -613,6 +613,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, goto err; } found_level = btrfs_header_level(eb); + if (found_level >= BTRFS_MAX_LEVEL) { + btrfs_info(root->fs_info, "bad tree block level %d\n", + (int)btrfs_header_level(eb)); + ret = -EIO; + goto err; + } btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb, found_level); -- cgit v1.2.3-70-g09d2 From 416bc6580bb01ddf67befaaeb94f087b392e7f47 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 23 Apr 2013 14:17:42 -0400 Subject: Btrfs: fix all callers of read_tree_block We kept leaking extent buffers when mounting a broken file system and it turns out it's because not everybody uses read_tree_block properly. You need to check and make sure the extent_buffer is uptodate before you use it. This patch fixes everybody who calls read_tree_block directly to make sure they check that it is uptodate and free it and return an error if it is not. With this we no longer leak EB's when things go horribly wrong. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/backref.c | 10 ++++++++-- fs/btrfs/ctree.c | 21 ++++++++++++++++----- fs/btrfs/disk-io.c | 19 +++++++++++++++++-- fs/btrfs/extent-tree.c | 4 +++- fs/btrfs/relocation.c | 18 +++++++++++++++--- 5 files changed, 59 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 23e927b191c..04b5b309389 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -423,7 +423,10 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info, BUG_ON(!ref->wanted_disk_byte); eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, fs_info->tree_root->leafsize, 0); - BUG_ON(!eb); + if (!eb || !extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } btrfs_tree_read_lock(eb); if (btrfs_header_level(eb) == 0) btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0); @@ -913,7 +916,10 @@ again: info_level); eb = read_tree_block(fs_info->extent_root, ref->parent, bsz, 0); - BUG_ON(!eb); + if (!eb || !extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } ret = find_extent_in_eb(eb, bytenr, *extent_item_pos, &eie); ref->inode_list = eie; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 566d99b51be..2bc34408872 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1281,7 +1281,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq) free_extent_buffer(eb_root); blocksize = btrfs_level_size(root, old_root->level); old = read_tree_block(root, logical, blocksize, 0); - if (!old) { + if (!old || !extent_buffer_uptodate(old)) { + free_extent_buffer(old); pr_warn("btrfs: failed to read tree block %llu from get_old_root\n", logical); WARN_ON(1); @@ -1526,8 +1527,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (!cur) { cur = read_tree_block(root, blocknr, blocksize, gen); - if (!cur) + if (!cur || !extent_buffer_uptodate(cur)) { + free_extent_buffer(cur); return -EIO; + } } else if (!uptodate) { err = btrfs_read_buffer(cur, gen); if (err) { @@ -1692,6 +1695,8 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, struct extent_buffer *parent, int slot) { int level = btrfs_header_level(parent); + struct extent_buffer *eb; + if (slot < 0) return NULL; if (slot >= btrfs_header_nritems(parent)) @@ -1699,9 +1704,15 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, BUG_ON(level == 0); - return read_tree_block(root, btrfs_node_blockptr(parent, slot), - btrfs_level_size(root, level - 1), - btrfs_node_ptr_generation(parent, slot)); + eb = read_tree_block(root, btrfs_node_blockptr(parent, slot), + btrfs_level_size(root, level - 1), + btrfs_node_ptr_generation(parent, slot)); + if (eb && !extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + eb = NULL; + } + + return eb; } /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c7d8fb0dbff..deb6e3d0728 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1496,6 +1496,14 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); + if (!root->node || !extent_buffer_uptodate(root->node)) { + ret = (!root->node) ? -ENOMEM : -EIO; + + free_extent_buffer(root->node); + kfree(root); + return ERR_PTR(ret); + } + root->commit_root = btrfs_root_node(root); BUG_ON(!root->node); /* -ENOMEM */ out: @@ -2515,8 +2523,8 @@ int open_ctree(struct super_block *sb, chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), blocksize, generation); - BUG_ON(!chunk_root->node); /* -ENOMEM */ - if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { + if (!chunk_root->node || + !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", sb->s_id); goto fail_tree_roots; @@ -2701,6 +2709,13 @@ retry_root_backup: log_tree_root->node = read_tree_block(tree_root, bytenr, blocksize, generation + 1); + if (!log_tree_root->node || + !extent_buffer_uptodate(log_tree_root->node)) { + printk(KERN_ERR "btrfs: failed to read log tree\n"); + free_extent_buffer(log_tree_root->node); + kfree(log_tree_root); + goto fail_trans_kthread; + } /* returns with log_tree_root freed on success */ ret = btrfs_recover_log_trees(log_tree_root); if (ret) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 94bed61b799..6526f1faf6c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7087,8 +7087,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, if (reada && level == 1) reada_walk_down(trans, root, wc, path); next = read_tree_block(root, bytenr, blocksize, generation); - if (!next) + if (!next || !extent_buffer_uptodate(next)) { + free_extent_buffer(next); return -EIO; + } btrfs_tree_lock(next); btrfs_set_lock_blocking(next); } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 208154986c4..63cdd9246c7 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1771,7 +1771,11 @@ again: eb = read_tree_block(dest, old_bytenr, blocksize, old_ptr_gen); - BUG_ON(!eb); + if (!eb || !extent_buffer_uptodate(eb)) { + ret = (!eb) ? -ENOMEM : -EIO; + free_extent_buffer(eb); + return ret; + } btrfs_tree_lock(eb); if (cow) { ret = btrfs_cow_block(trans, dest, eb, parent, @@ -1924,6 +1928,10 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, bytenr = btrfs_node_blockptr(eb, path->slots[i]); blocksize = btrfs_level_size(root, i - 1); eb = read_tree_block(root, bytenr, blocksize, ptr_gen); + if (!eb || !extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } BUG_ON(btrfs_header_level(eb) != i - 1); path->nodes[i - 1] = eb; path->slots[i - 1] = 0; @@ -2601,7 +2609,8 @@ static int do_relocation(struct btrfs_trans_handle *trans, blocksize = btrfs_level_size(root, node->level); generation = btrfs_node_ptr_generation(upper->eb, slot); eb = read_tree_block(root, bytenr, blocksize, generation); - if (!eb) { + if (!eb || !extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); err = -EIO; goto next; } @@ -2762,7 +2771,10 @@ static int get_tree_block_key(struct reloc_control *rc, BUG_ON(block->key_ready); eb = read_tree_block(rc->extent_root, block->bytenr, block->key.objectid, block->key.offset); - BUG_ON(!eb); + if (!eb || !extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } WARN_ON(btrfs_header_level(eb) != block->level); if (block->level == 0) btrfs_item_key_to_cpu(eb, &block->key, 0); -- cgit v1.2.3-70-g09d2 From 171f6537abb1929ab9072ed084872273300ca175 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 24 Apr 2013 16:35:41 -0400 Subject: Btrfs: cleanup fs roots if we fail to mount We can run the tree logging recovery or the orphan cleanup on mount, so we'll end up looking up a random fs tree in the meantime. So we need to clean this up so we don't leave extent buffers hanging around on the cache. With this patch we no longer leak extent buffers on failure to mount. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 62 +++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index deb6e3d0728..b20f1121efa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2011,6 +2011,36 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) } } +static void del_fs_roots(struct btrfs_fs_info *fs_info) +{ + int ret; + struct btrfs_root *gang[8]; + int i; + + while (!list_empty(&fs_info->dead_roots)) { + gang[0] = list_entry(fs_info->dead_roots.next, + struct btrfs_root, root_list); + list_del(&gang[0]->root_list); + + if (gang[0]->in_radix) { + btrfs_free_fs_root(fs_info, gang[0]); + } else { + free_extent_buffer(gang[0]->node); + free_extent_buffer(gang[0]->commit_root); + kfree(gang[0]); + } + } + + while (1) { + ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, + (void **)gang, 0, + ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) + btrfs_free_fs_root(fs_info, gang[i]); + } +} int open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, @@ -2795,6 +2825,7 @@ fail_qgroup: btrfs_free_qgroup_config(fs_info); fail_trans_kthread: kthread_stop(fs_info->transaction_kthread); + del_fs_roots(fs_info); fail_cleaner: kthread_stop(fs_info->cleaner_kthread); @@ -3323,37 +3354,6 @@ static void free_fs_root(struct btrfs_root *root) kfree(root); } -static void del_fs_roots(struct btrfs_fs_info *fs_info) -{ - int ret; - struct btrfs_root *gang[8]; - int i; - - while (!list_empty(&fs_info->dead_roots)) { - gang[0] = list_entry(fs_info->dead_roots.next, - struct btrfs_root, root_list); - list_del(&gang[0]->root_list); - - if (gang[0]->in_radix) { - btrfs_free_fs_root(fs_info, gang[0]); - } else { - free_extent_buffer(gang[0]->node); - free_extent_buffer(gang[0]->commit_root); - kfree(gang[0]); - } - } - - while (1) { - ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, - (void **)gang, 0, - ARRAY_SIZE(gang)); - if (!ret) - break; - for (i = 0; i < ret; i++) - btrfs_free_fs_root(fs_info, gang[i]); - } -} - int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) { u64 root_objectid = 0; -- cgit v1.2.3-70-g09d2 From fd8b2b611580929ab1aa01e3942dce20f9e95732 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 24 Apr 2013 16:41:19 -0400 Subject: Btrfs: cleanup destroy_marked_extents We can just look up the extent_buffers for the range and free stuff that way. This makes the cleanup a bit cleaner and we can make sure to evict the extent_buffers pretty quickly by marking them as stale. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 40 +++++++++------------------------------- fs/btrfs/extent_io.c | 2 +- fs/btrfs/extent_io.h | 1 + 3 files changed, 11 insertions(+), 32 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b20f1121efa..aff571cb6e0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3752,13 +3752,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, int mark) { int ret; - struct page *page; - struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; u64 start = 0; u64 end; - u64 offset; - unsigned long index; while (1) { ret = find_first_extent_bit(dirty_pages, start, &start, &end, @@ -3768,35 +3764,17 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); while (start <= end) { - index = start >> PAGE_CACHE_SHIFT; - start = (u64)(index + 1) << PAGE_CACHE_SHIFT; - page = find_get_page(btree_inode->i_mapping, index); - if (!page) + eb = btrfs_find_tree_block(root, start, + root->leafsize); + start += eb->len; + if (!eb) continue; - offset = page_offset(page); - - spin_lock(&dirty_pages->buffer_lock); - eb = radix_tree_lookup( - &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, - offset >> PAGE_CACHE_SHIFT); - spin_unlock(&dirty_pages->buffer_lock); - if (eb) - ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, - &eb->bflags); - lock_page(page); - - wait_on_page_writeback(page); - if (PageDirty(page)) { - clear_page_dirty_for_io(page); - spin_lock_irq(&page->mapping->tree_lock); - radix_tree_tag_clear(&page->mapping->page_tree, - page_index(page), - PAGECACHE_TAG_DIRTY); - spin_unlock_irq(&page->mapping->tree_lock); - } + wait_on_extent_buffer_writeback(eb); - unlock_page(page); - page_cache_release(page); + if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, + &eb->bflags)) + clear_extent_buffer_dirty(eb); + free_extent_buffer_stale(eb); } } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2107a06679c..d9a82f261e0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3162,7 +3162,7 @@ static int eb_wait(void *word) return 0; } -static void wait_on_extent_buffer_writeback(struct extent_buffer *eb) +void wait_on_extent_buffer_writeback(struct extent_buffer *eb) { wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait, TASK_UNINTERRUPTIBLE); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index db009d80bef..fa86861de24 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -282,6 +282,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb); int read_extent_buffer_pages(struct extent_io_tree *tree, struct extent_buffer *eb, u64 start, int wait, get_extent_t *get_extent, int mirror_num); +void wait_on_extent_buffer_writeback(struct extent_buffer *eb); static inline unsigned long num_extent_pages(u64 start, u64 len) { -- cgit v1.2.3-70-g09d2 From 54067ae95e1547123fe6ffcf80842e234effd53b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 25 Apr 2013 13:44:38 -0400 Subject: Btrfs: various abort cleanups I have a broken file system that when it aborts leaves all sorts of accounting things wrong and gives you lots of WARN_ON()'s other than the abort. This is because we're not cleaning up various parts of the file system when we abort. The first chunks are specific to mount failures, we weren't cleaning up the block group cached inodes and we weren't cleaning up any transactions that had been aborted, which leaves a bunch of things laying around. The second half of this are related to the cleanup parts. First we don't need to release space for the dirty pages from the trans_block_rsv, that's all handled by the trans handles so this is just plain wrong. The other thing is we need to pin down extents that were set ->must_insert_reserved for delayed refs. This isn't so much for the pinning but more for the cleaning up the cache->reserved counter since we are no longer going to use those reserved bytes. With this patch I no longer see a bunch of WARN_ON()'s when I try to mount this broken file system, just the initial one from the abort. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index aff571cb6e0..84c4fa503ef 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2826,6 +2826,7 @@ fail_qgroup: fail_trans_kthread: kthread_stop(fs_info->transaction_kthread); del_fs_roots(fs_info); + btrfs_cleanup_transaction(fs_info->tree_root); fail_cleaner: kthread_stop(fs_info->cleaner_kthread); @@ -2836,6 +2837,7 @@ fail_cleaner: filemap_write_and_wait(fs_info->btree_inode->i_mapping); fail_block_groups: + btrfs_put_block_group_cache(fs_info); btrfs_free_block_groups(fs_info); fail_tree_roots: @@ -3681,6 +3683,9 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, continue; } + if (head->must_insert_reserved) + btrfs_pin_extent(root, ref->bytenr, + ref->num_bytes, 1); btrfs_free_delayed_extent_op(head->extent_op); delayed_refs->num_heads--; if (list_empty(&head->cluster)) @@ -3876,10 +3881,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) btrfs_destroy_delayed_refs(t, root); - btrfs_block_rsv_release(root, - &root->fs_info->trans_block_rsv, - t->dirty_pages.dirty_bytes); - /* FIXME: cleanup wait for commit */ t->in_commit = 1; t->blocked = 1; -- cgit v1.2.3-70-g09d2 From 5fbf83c10c323cbee61483a06ea61883e3c83e6b Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Fri, 19 Apr 2013 15:08:04 +0000 Subject: Btrfs: delete unused parameter to btrfs_read_root_item() Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 5 ++--- fs/btrfs/disk-io.c | 2 +- fs/btrfs/root-tree.c | 7 +++---- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 97468780815..37c4da3403d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3359,9 +3359,8 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item); -void btrfs_read_root_item(struct btrfs_root *root, - struct extent_buffer *eb, int slot, - struct btrfs_root_item *item); +void btrfs_read_root_item(struct extent_buffer *eb, int slot, + struct btrfs_root_item *item); int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 84c4fa503ef..a03a9665054 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1481,7 +1481,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, if (ret == 0) { l = path->nodes[0]; slot = path->slots[0]; - btrfs_read_root_item(tree_root, l, slot, &root->root_item); + btrfs_read_root_item(l, slot, &root->root_item); memcpy(&root->root_key, location, sizeof(*location)); } btrfs_free_path(path); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 668af537a3e..5bf1ed57f17 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -29,9 +29,8 @@ * generation numbers as then we know the root was once mounted with an older * kernel that was not aware of the root item structure change. */ -void btrfs_read_root_item(struct btrfs_root *root, - struct extent_buffer *eb, int slot, - struct btrfs_root_item *item) +void btrfs_read_root_item(struct extent_buffer *eb, int slot, + struct btrfs_root_item *item) { uuid_le uuid; int len; @@ -104,7 +103,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, goto out; } if (item) - btrfs_read_root_item(root, l, slot, item); + btrfs_read_root_item(l, slot, item); if (key) memcpy(key, &found_key, sizeof(found_key)); -- cgit v1.2.3-70-g09d2 From 6463fe58ea60cbcc3e799937dd0877466fc7b8d5 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Fri, 19 Apr 2013 15:08:05 +0000 Subject: Btrfs: set UUID in root_item for created trees It is a rare exception that a new tree is created, like the qgroups tree. So far these new trees have an all-zero UUID in their root items. All trees that mkfs.btrfs has created get an UUID during the first mount when btrfs_read_root_item() rewrites the root_item to the v2 structure style. These UUID are never used so far, but anyway, since it is better to have it uniform for all trees, this commit adds some lines that generate and write an UUID for newly created trees. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a03a9665054..e4488b57a7a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "compat.h" #include "ctree.h" @@ -1280,6 +1281,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, struct btrfs_key key; int ret = 0; u64 bytenr; + uuid_le uuid; root = btrfs_alloc_root(fs_info); if (!root) @@ -1329,6 +1331,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, btrfs_set_root_used(&root->root_item, leaf->len); btrfs_set_root_last_snapshot(&root->root_item, 0); btrfs_set_root_dirid(&root->root_item, 0); + uuid_le_gen(&uuid); + memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE); root->root_item.drop_level = 0; key.objectid = objectid; -- cgit v1.2.3-70-g09d2 From fc36ed7e0b13955ba66fc56dc5067e67ac105150 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Wed, 24 Apr 2013 16:57:33 +0000 Subject: Btrfs: separate sequence numbers for delayed ref tracking and tree mod log Sequence numbers for delayed refs have been introduced in the first version of the qgroup patch set. To solve the problem of find_all_roots on a busy file system, the tree mod log was introduced. The sequence numbers for that were simply shared between those two users. However, at one point in qgroup's quota accounting, there's a statement accessing the previous sequence number, that's still just doing (seq - 1) just as it would have to in the very first version. To satisfy that requirement, this patch makes the sequence number counter 64 bit and splits it into a major part (used for qgroup sequence number counting) and a minor part (incremented for each tree modification in the log). This enables us to go exactly one major step backwards, as required for qgroups, while still incrementing the sequence counter for tree mod log insertions to keep track of their order. Keeping them in a single variable means there's no need to change all the code dealing with comparisons of two sequence numbers. The sequence number is reset to 0 on commit (not new in this patch), which ensures we won't overflow the two 32 bit counters. Without this fix, the qgroup tracking can occasionally go wrong and WARN_ONs from the tree mod log code may happen. Signed-off-by: Jan Schmidt Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 47 ++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/ctree.h | 7 ++----- fs/btrfs/delayed-ref.c | 6 ++++-- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 5 +++-- fs/btrfs/qgroup.c | 13 ++++++++----- fs/btrfs/transaction.c | 2 +- 7 files changed, 63 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2bc34408872..a17d9991c33 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -360,6 +360,44 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info) write_unlock(&fs_info->tree_mod_log_lock); } +/* + * Increment the upper half of tree_mod_seq, set lower half zero. + * + * Must be called with fs_info->tree_mod_seq_lock held. + */ +static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info) +{ + u64 seq = atomic64_read(&fs_info->tree_mod_seq); + seq &= 0xffffffff00000000ull; + seq += 1ull << 32; + atomic64_set(&fs_info->tree_mod_seq, seq); + return seq; +} + +/* + * Increment the lower half of tree_mod_seq. + * + * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers + * are generated should not technically require a spin lock here. (Rationale: + * incrementing the minor while incrementing the major seq number is between its + * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it + * just returns a unique sequence number as usual.) We have decided to leave + * that requirement in here and rethink it once we notice it really imposes a + * problem on some workload. + */ +static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info) +{ + return atomic64_inc_return(&fs_info->tree_mod_seq); +} + +/* + * return the last minor in the previous major tree_mod_seq number + */ +u64 btrfs_tree_mod_seq_prev(u64 seq) +{ + return (seq & 0xffffffff00000000ull) - 1ull; +} + /* * This adds a new blocker to the tree mod log's blocker list if the @elem * passed does not already have a sequence number set. So when a caller expects @@ -376,10 +414,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, tree_mod_log_write_lock(fs_info); spin_lock(&fs_info->tree_mod_seq_lock); if (!elem->seq) { - elem->seq = btrfs_inc_tree_mod_seq(fs_info); + elem->seq = btrfs_inc_tree_mod_seq_major(fs_info); list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); } - seq = btrfs_inc_tree_mod_seq(fs_info); + seq = btrfs_inc_tree_mod_seq_minor(fs_info); spin_unlock(&fs_info->tree_mod_seq_lock); tree_mod_log_write_unlock(fs_info); @@ -524,7 +562,10 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, if (!tm) return -ENOMEM; - tm->seq = btrfs_inc_tree_mod_seq(fs_info); + spin_lock(&fs_info->tree_mod_seq_lock); + tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); + spin_unlock(&fs_info->tree_mod_seq_lock); + return tm->seq; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 37c4da3403d..2c48f52aba4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1422,7 +1422,7 @@ struct btrfs_fs_info { /* this protects tree_mod_seq_list */ spinlock_t tree_mod_seq_lock; - atomic_t tree_mod_seq; + atomic64_t tree_mod_seq; struct list_head tree_mod_seq_list; struct seq_list tree_mod_seq_elem; @@ -3332,10 +3332,7 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem); void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem); -static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) -{ - return atomic_inc_return(&fs_info->tree_mod_seq); -} +u64 btrfs_tree_mod_seq_prev(u64 seq); int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); /* root-item.c */ diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 116abec7a29..c219463fb1f 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -361,8 +361,10 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, elem = list_first_entry(&fs_info->tree_mod_seq_list, struct seq_list, list); if (seq >= elem->seq) { - pr_debug("holding back delayed_ref %llu, lowest is " - "%llu (%p)\n", seq, elem->seq, delayed_refs); + pr_debug("holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)\n", + (u32)(seq >> 32), (u32)seq, + (u32)(elem->seq >> 32), (u32)elem->seq, + delayed_refs); ret = 1; } } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e4488b57a7a..92c44ed78de 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2157,7 +2157,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->defrag_running, 0); - atomic_set(&fs_info->tree_mod_seq, 0); + atomic64_set(&fs_info->tree_mod_seq, 0); fs_info->sb = sb; fs_info->max_inline = 8192 * 1024; fs_info->metadata_ratio = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f8a5652b0c4..1cae6631b3d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2541,9 +2541,10 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, !trans->delayed_ref_elem.seq) { /* list without seq or seq without list */ btrfs_err(fs_info, - "qgroup accounting update error, list is%s empty, seq is %llu", + "qgroup accounting update error, list is%s empty, seq is %#x.%x", list_empty(&trans->qgroup_ref_list) ? "" : " not", - trans->delayed_ref_elem.seq); + (u32)(trans->delayed_ref_elem.seq >> 32), + (u32)trans->delayed_ref_elem.seq); BUG(); } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index f175471da88..e5c56238b6c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1242,9 +1242,11 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, case BTRFS_ADD_DELAYED_REF: case BTRFS_ADD_DELAYED_EXTENT: sgn = 1; + seq = btrfs_tree_mod_seq_prev(node->seq); break; case BTRFS_DROP_DELAYED_REF: sgn = -1; + seq = node->seq; break; case BTRFS_UPDATE_DELAYED_HEAD: return 0; @@ -1254,14 +1256,14 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, /* * the delayed ref sequence number we pass depends on the direction of - * the operation. for add operations, we pass (node->seq - 1) to skip + * the operation. for add operations, we pass + * tree_mod_log_prev_seq(node->seq) to skip * the delayed ref's current sequence number, because we need the state * of the tree before the add operation. for delete operations, we pass * (node->seq) to include the delayed ref's current sequence number, * because we need the state of the tree after the delete operation. */ - ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, - sgn > 0 ? node->seq - 1 : node->seq, &roots); + ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots); if (ret < 0) return ret; @@ -1772,8 +1774,9 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) { if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) return; - printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n", + pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n", trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", - trans->delayed_ref_elem.seq); + (u32)(trans->delayed_ref_elem.seq >> 32), + (u32)trans->delayed_ref_elem.seq); BUG(); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 258fcebc7cc..18d6fb7be26 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -162,7 +162,7 @@ loop: if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when " "creating a fresh transaction\n"); - atomic_set(&fs_info->tree_mod_seq, 0); + atomic64_set(&fs_info->tree_mod_seq, 0); spin_lock_init(&cur_trans->commit_lock); spin_lock_init(&cur_trans->delayed_refs.lock); -- cgit v1.2.3-70-g09d2 From 2f2320360b0c35b86938bfc561124474f0dac6e4 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 25 Apr 2013 16:04:51 +0000 Subject: Btrfs: rescan for qgroups If qgroup tracking is out of sync, a rescan operation can be started. It iterates the complete extent tree and recalculates all qgroup tracking data. This is an expensive operation and should not be used unless required. A filesystem under rescan can still be umounted. The rescan continues on the next mount. Status information is provided with a separate ioctl while a rescan operation is in progress. Signed-off-by: Jan Schmidt Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 17 ++- fs/btrfs/disk-io.c | 5 + fs/btrfs/ioctl.c | 83 ++++++++++-- fs/btrfs/qgroup.c | 318 +++++++++++++++++++++++++++++++++++++++++++-- include/uapi/linux/btrfs.h | 12 +- 5 files changed, 400 insertions(+), 35 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c48f52aba4..d9bed5fd334 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1021,9 +1021,9 @@ struct btrfs_block_group_item { */ #define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0) /* - * SCANNING is set during the initialization phase + * RESCAN is set during the initialization phase */ -#define BTRFS_QGROUP_STATUS_FLAG_SCANNING (1ULL << 1) +#define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1) /* * Some qgroup entries are known to be out of date, * either because the configuration has changed in a way that @@ -1052,7 +1052,7 @@ struct btrfs_qgroup_status_item { * only used during scanning to record the progress * of the scan. It contains a logical address */ - __le64 scan; + __le64 rescan; } __attribute__ ((__packed__)); struct btrfs_qgroup_info_item { @@ -1603,6 +1603,11 @@ struct btrfs_fs_info { /* used by btrfs_qgroup_record_ref for an efficient tree traversal */ u64 qgroup_seq; + /* qgroup rescan items */ + struct mutex qgroup_rescan_lock; /* protects the progress item */ + struct btrfs_key qgroup_rescan_progress; + struct btrfs_workers qgroup_rescan_workers; + /* filesystem state */ unsigned long fs_state; @@ -2886,8 +2891,8 @@ BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item, version, 64); BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, flags, 64); -BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item, - scan, 64); +BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item, + rescan, 64); /* btrfs_qgroup_info_item */ BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, @@ -3828,7 +3833,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_quota_disable(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info); +int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 src, u64 dst); int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 92c44ed78de..d96305e5cc9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1976,6 +1976,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_stop_workers(&fs_info->caching_workers); btrfs_stop_workers(&fs_info->readahead_workers); btrfs_stop_workers(&fs_info->flush_workers); + btrfs_stop_workers(&fs_info->qgroup_rescan_workers); } /* helper to cleanup tree roots */ @@ -2267,6 +2268,7 @@ int open_ctree(struct super_block *sb, fs_info->qgroup_seq = 1; fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; + mutex_init(&fs_info->qgroup_rescan_lock); btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); btrfs_init_free_cluster(&fs_info->data_alloc_cluster); @@ -2476,6 +2478,8 @@ int open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->readahead_workers, "readahead", fs_info->thread_pool_size, &fs_info->generic_worker); + btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1, + &fs_info->generic_worker); /* * endios are largely parallel and should have a very @@ -2510,6 +2514,7 @@ int open_ctree(struct super_block *sb, ret |= btrfs_start_workers(&fs_info->caching_workers); ret |= btrfs_start_workers(&fs_info->readahead_workers); ret |= btrfs_start_workers(&fs_info->flush_workers); + ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers); if (ret) { err = -ENOMEM; goto fail_sb_buffer; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a74edc79753..f5f6af338b5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3701,12 +3701,10 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) } down_write(&root->fs_info->subvol_sem); - if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { - trans = btrfs_start_transaction(root->fs_info->tree_root, 2); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } + trans = btrfs_start_transaction(root->fs_info->tree_root, 2); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; } switch (sa->cmd) { @@ -3716,9 +3714,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) case BTRFS_QUOTA_CTL_DISABLE: ret = btrfs_quota_disable(trans, root->fs_info); break; - case BTRFS_QUOTA_CTL_RESCAN: - ret = btrfs_quota_rescan(root->fs_info); - break; default: ret = -EINVAL; break; @@ -3727,11 +3722,9 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) if (copy_to_user(arg, sa, sizeof(*sa))) ret = -EFAULT; - if (trans) { - err = btrfs_commit_transaction(trans, root->fs_info->tree_root); - if (err && !ret) - ret = err; - } + err = btrfs_commit_transaction(trans, root->fs_info->tree_root); + if (err && !ret) + ret = err; out: kfree(sa); up_write(&root->fs_info->subvol_sem); @@ -3886,6 +3879,64 @@ drop_write: return ret; } +static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) +{ + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_ioctl_quota_rescan_args *qsa; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + qsa = memdup_user(arg, sizeof(*qsa)); + if (IS_ERR(qsa)) { + ret = PTR_ERR(qsa); + goto drop_write; + } + + if (qsa->flags) { + ret = -EINVAL; + goto out; + } + + ret = btrfs_qgroup_rescan(root->fs_info); + +out: + kfree(qsa); +drop_write: + mnt_drop_write_file(file); + return ret; +} + +static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) +{ + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_ioctl_quota_rescan_args *qsa; + int ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + qsa = kzalloc(sizeof(*qsa), GFP_NOFS); + if (!qsa) + return -ENOMEM; + + if (root->fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { + qsa->flags = 1; + qsa->progress = root->fs_info->qgroup_rescan_progress.objectid; + } + + if (copy_to_user(arg, qsa, sizeof(*qsa))) + ret = -EFAULT; + + kfree(qsa); + return ret; +} + static long btrfs_ioctl_set_received_subvol(struct file *file, void __user *arg) { @@ -4124,6 +4175,10 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_qgroup_create(file, argp); case BTRFS_IOC_QGROUP_LIMIT: return btrfs_ioctl_qgroup_limit(file, argp); + case BTRFS_IOC_QUOTA_RESCAN: + return btrfs_ioctl_quota_rescan(file, argp); + case BTRFS_IOC_QUOTA_RESCAN_STATUS: + return btrfs_ioctl_quota_rescan_status(file, argp); case BTRFS_IOC_DEV_REPLACE: return btrfs_ioctl_dev_replace(root, argp); case BTRFS_IOC_GET_FSLABEL: diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1fb7d8da308..da8458357b5 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -31,13 +31,13 @@ #include "locking.h" #include "ulist.h" #include "backref.h" +#include "extent_io.h" /* TODO XXX FIXME * - subvol delete -> delete when ref goes to 0? delete limits also? * - reorganize keys * - compressed * - sync - * - rescan * - copy also limits on subvol creation * - limit * - caches fuer ulists @@ -98,6 +98,14 @@ struct btrfs_qgroup_list { struct btrfs_qgroup *member; }; +struct qgroup_rescan { + struct btrfs_work work; + struct btrfs_fs_info *fs_info; +}; + +static void qgroup_rescan_start(struct btrfs_fs_info *fs_info, + struct qgroup_rescan *qscan); + /* must be called with qgroup_ioctl_lock held */ static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) @@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) } fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, ptr); - /* FIXME read scan element */ + fs_info->qgroup_rescan_progress.objectid = + btrfs_qgroup_status_rescan(l, ptr); + if (fs_info->qgroup_flags & + BTRFS_QGROUP_STATUS_FLAG_RESCAN) { + struct qgroup_rescan *qscan = + kmalloc(sizeof(*qscan), GFP_NOFS); + if (!qscan) { + ret = -ENOMEM; + goto out; + } + fs_info->qgroup_rescan_progress.type = 0; + fs_info->qgroup_rescan_progress.offset = 0; + qgroup_rescan_start(fs_info, qscan); + } goto next1; } @@ -719,7 +740,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans, ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); btrfs_set_qgroup_status_generation(l, ptr, trans->transid); - /* XXX scan */ + btrfs_set_qgroup_status_rescan(l, ptr, + fs_info->qgroup_rescan_progress.objectid); btrfs_mark_buffer_dirty(l); @@ -830,7 +852,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); - btrfs_set_qgroup_status_scan(leaf, ptr, 0); + btrfs_set_qgroup_status_rescan(leaf, ptr, 0); btrfs_mark_buffer_dirty(leaf); @@ -944,10 +966,11 @@ out: return ret; } -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info) +static void qgroup_dirty(struct btrfs_fs_info *fs_info, + struct btrfs_qgroup *qgroup) { - /* FIXME */ - return 0; + if (list_empty(&qgroup->dirty)) + list_add(&qgroup->dirty, &fs_info->dirty_qgroups); } int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, @@ -1155,13 +1178,6 @@ out: return ret; } -static void qgroup_dirty(struct btrfs_fs_info *fs_info, - struct btrfs_qgroup *qgroup) -{ - if (list_empty(&qgroup->dirty)) - list_add(&qgroup->dirty, &fs_info->dirty_qgroups); -} - /* * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts * the modification into a list that's later used by btrfs_end_transaction to @@ -1390,6 +1406,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, BUG(); } + mutex_lock(&fs_info->qgroup_rescan_lock); + if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { + if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { + mutex_unlock(&fs_info->qgroup_rescan_lock); + return 0; + } + } + mutex_unlock(&fs_info->qgroup_rescan_lock); + /* * the delayed ref sequence number we pass depends on the direction of * the operation. for add operations, we pass @@ -1403,7 +1428,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, if (ret < 0) return ret; + mutex_lock(&fs_info->qgroup_rescan_lock); spin_lock(&fs_info->qgroup_lock); + if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { + if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { + ret = 0; + goto unlock; + } + } + quota_root = fs_info->quota_root; if (!quota_root) goto unlock; @@ -1445,6 +1478,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, unlock: spin_unlock(&fs_info->qgroup_lock); + mutex_unlock(&fs_info->qgroup_rescan_lock); ulist_free(roots); ulist_free(tmp); @@ -1823,3 +1857,259 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) (u32)trans->delayed_ref_elem.seq); BUG(); } + +/* + * returns < 0 on error, 0 when more leafs are to be scanned. + * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. + */ +static int +qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path, + struct btrfs_trans_handle *trans, struct ulist *tmp, + struct extent_buffer *scratch_leaf) +{ + struct btrfs_key found; + struct btrfs_fs_info *fs_info = qscan->fs_info; + struct ulist *roots = NULL; + struct ulist_node *unode; + struct ulist_iterator uiter; + struct seq_list tree_mod_seq_elem = {}; + u64 seq; + int slot; + int ret; + + path->leave_spinning = 1; + mutex_lock(&fs_info->qgroup_rescan_lock); + ret = btrfs_search_slot_for_read(fs_info->extent_root, + &fs_info->qgroup_rescan_progress, + path, 1, 0); + + pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", + (unsigned long long)fs_info->qgroup_rescan_progress.objectid, + fs_info->qgroup_rescan_progress.type, + (unsigned long long)fs_info->qgroup_rescan_progress.offset, + ret); + + if (ret) { + /* + * The rescan is about to end, we will not be scanning any + * further blocks. We cannot unset the RESCAN flag here, because + * we want to commit the transaction if everything went well. + * To make the live accounting work in this phase, we set our + * scan progress pointer such that every real extent objectid + * will be smaller. + */ + fs_info->qgroup_rescan_progress.objectid = (u64)-1; + btrfs_release_path(path); + mutex_unlock(&fs_info->qgroup_rescan_lock); + return ret; + } + + btrfs_item_key_to_cpu(path->nodes[0], &found, + btrfs_header_nritems(path->nodes[0]) - 1); + fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; + + btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); + memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf)); + slot = path->slots[0]; + btrfs_release_path(path); + mutex_unlock(&fs_info->qgroup_rescan_lock); + + for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { + btrfs_item_key_to_cpu(scratch_leaf, &found, slot); + if (found.type != BTRFS_EXTENT_ITEM_KEY) + continue; + ret = btrfs_find_all_roots(trans, fs_info, found.objectid, + tree_mod_seq_elem.seq, &roots); + if (ret < 0) + goto out; + spin_lock(&fs_info->qgroup_lock); + seq = fs_info->qgroup_seq; + fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ + + ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq); + if (ret) { + spin_unlock(&fs_info->qgroup_lock); + ulist_free(roots); + goto out; + } + + /* + * step2 of btrfs_qgroup_account_ref works from a single root, + * we're doing all at once here. + */ + ulist_reinit(tmp); + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(roots, &uiter))) { + struct btrfs_qgroup *qg; + + qg = find_qgroup_rb(fs_info, unode->val); + if (!qg) + continue; + + ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, + GFP_ATOMIC); + if (ret < 0) { + spin_unlock(&fs_info->qgroup_lock); + ulist_free(roots); + goto out; + } + } + + /* this loop is similar to step 2 of btrfs_qgroup_account_ref */ + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(tmp, &uiter))) { + struct btrfs_qgroup *qg; + struct btrfs_qgroup_list *glist; + + qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux; + qg->rfer += found.offset; + qg->rfer_cmpr += found.offset; + WARN_ON(qg->tag >= seq); + if (qg->refcnt - seq == roots->nnodes) { + qg->excl += found.offset; + qg->excl_cmpr += found.offset; + } + qgroup_dirty(fs_info, qg); + + list_for_each_entry(glist, &qg->groups, next_group) { + ret = ulist_add(tmp, glist->group->qgroupid, + (uintptr_t)glist->group, + GFP_ATOMIC); + if (ret < 0) { + spin_unlock(&fs_info->qgroup_lock); + ulist_free(roots); + goto out; + } + } + } + + spin_unlock(&fs_info->qgroup_lock); + ulist_free(roots); + ret = 0; + } + +out: + btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); + + return ret; +} + +static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) +{ + struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan, + work); + struct btrfs_path *path; + struct btrfs_trans_handle *trans = NULL; + struct btrfs_fs_info *fs_info = qscan->fs_info; + struct ulist *tmp = NULL; + struct extent_buffer *scratch_leaf = NULL; + int err = -ENOMEM; + + path = btrfs_alloc_path(); + if (!path) + goto out; + tmp = ulist_alloc(GFP_NOFS); + if (!tmp) + goto out; + scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS); + if (!scratch_leaf) + goto out; + + err = 0; + while (!err) { + trans = btrfs_start_transaction(fs_info->fs_root, 0); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + break; + } + if (!fs_info->quota_enabled) { + err = -EINTR; + } else { + err = qgroup_rescan_leaf(qscan, path, trans, + tmp, scratch_leaf); + } + if (err > 0) + btrfs_commit_transaction(trans, fs_info->fs_root); + else + btrfs_end_transaction(trans, fs_info->fs_root); + } + +out: + kfree(scratch_leaf); + ulist_free(tmp); + btrfs_free_path(path); + kfree(qscan); + + mutex_lock(&fs_info->qgroup_rescan_lock); + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; + + if (err == 2 && + fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + } else if (err < 0) { + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + } + mutex_unlock(&fs_info->qgroup_rescan_lock); + + if (err >= 0) { + pr_info("btrfs: qgroup scan completed%s\n", + err == 2 ? " (inconsistency flag cleared)" : ""); + } else { + pr_err("btrfs: qgroup scan failed with %d\n", err); + } +} + +static void +qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan) +{ + memset(&qscan->work, 0, sizeof(qscan->work)); + qscan->work.func = btrfs_qgroup_rescan_worker; + qscan->fs_info = fs_info; + + pr_info("btrfs: qgroup scan started\n"); + btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work); +} + +int +btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) +{ + int ret = 0; + struct rb_node *n; + struct btrfs_qgroup *qgroup; + struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS); + + if (!qscan) + return -ENOMEM; + + mutex_lock(&fs_info->qgroup_rescan_lock); + spin_lock(&fs_info->qgroup_lock); + if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) + ret = -EINPROGRESS; + else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) + ret = -EINVAL; + if (ret) { + spin_unlock(&fs_info->qgroup_lock); + mutex_unlock(&fs_info->qgroup_rescan_lock); + kfree(qscan); + return ret; + } + + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; + memset(&fs_info->qgroup_rescan_progress, 0, + sizeof(fs_info->qgroup_rescan_progress)); + + /* clear all current qgroup tracking information */ + for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { + qgroup = rb_entry(n, struct btrfs_qgroup, node); + qgroup->rfer = 0; + qgroup->rfer_cmpr = 0; + qgroup->excl = 0; + qgroup->excl_cmpr = 0; + } + spin_unlock(&fs_info->qgroup_lock); + mutex_unlock(&fs_info->qgroup_rescan_lock); + + qgroup_rescan_start(fs_info, qscan); + + return 0; +} diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 5e39e859a84..5ef0df545a2 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -376,12 +376,18 @@ struct btrfs_ioctl_get_dev_stats { #define BTRFS_QUOTA_CTL_ENABLE 1 #define BTRFS_QUOTA_CTL_DISABLE 2 -#define BTRFS_QUOTA_CTL_RESCAN 3 +#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED 3 struct btrfs_ioctl_quota_ctl_args { __u64 cmd; __u64 status; }; +struct btrfs_ioctl_quota_rescan_args { + __u64 flags; + __u64 progress; + __u64 reserved[6]; +}; + struct btrfs_ioctl_qgroup_assign_args { __u64 assign; __u64 src; @@ -520,6 +526,10 @@ struct btrfs_ioctl_send_args { struct btrfs_ioctl_qgroup_create_args) #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ struct btrfs_ioctl_qgroup_limit_args) +#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \ + struct btrfs_ioctl_quota_rescan_args) +#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \ + struct btrfs_ioctl_quota_rescan_args) #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \ char[BTRFS_LABEL_SIZE]) #define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \ -- cgit v1.2.3-70-g09d2 From 634554dc0acfc8753c05e432b2fdb34b0be89c78 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 29 Apr 2013 10:05:57 -0400 Subject: Btrfs: deal with errors in write_dev_supers If you try to mount -o loop a restored file system it will panic if the file ends up being smaller than the original disk. This is because we go to try and get a block for a super that may be past the EOF which makes __getblk return NULL for a buffer head when we aren't expecting it to. Fix this by dealing with this case and just jacking up the errors count. With this patch we no longer panic when mounting a restored file system loopback. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d96305e5cc9..f651a37c7e0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2986,7 +2986,10 @@ static int write_dev_supers(struct btrfs_device *device, if (wait) { bh = __find_get_block(device->bdev, bytenr / 4096, BTRFS_SUPER_INFO_SIZE); - BUG_ON(!bh); + if (!bh) { + errors++; + continue; + } wait_on_buffer(bh); if (!buffer_uptodate(bh)) errors++; @@ -3013,6 +3016,13 @@ static int write_dev_supers(struct btrfs_device *device, */ bh = __getblk(device->bdev, bytenr / 4096, BTRFS_SUPER_INFO_SIZE); + if (!bh) { + printk(KERN_ERR "btrfs: couldn't get super " + "buffer head for bytenr %Lu\n", bytenr); + errors++; + continue; + } + memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); /* one reference for submit_bh */ -- cgit v1.2.3-70-g09d2 From 48a3b6366f6913683563d934eb16fea67dead9c1 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 25 Apr 2013 20:41:01 +0000 Subject: btrfs: make static code static & remove dead code Big patch, but all it does is add statics to functions which are in fact static, then remove the associated dead-code fallout. removed functions: btrfs_iref_to_path() __btrfs_lookup_delayed_deletion_item() __btrfs_search_delayed_insertion_item() __btrfs_search_delayed_deletion_item() find_eb_for_page() btrfs_find_block_group() range_straddles_pages() extent_range_uptodate() btrfs_file_extent_length() btrfs_scrub_cancel_devid() btrfs_start_transaction_lflush() btrfs_print_tree() is left because it is used for debugging. btrfs_start_transaction_lflush() and btrfs_reada_detach() are left for symmetry. ulist.c functions are left, another patch will take care of those. Signed-off-by: Eric Sandeen Signed-off-by: Josef Bacik --- fs/btrfs/backref.c | 40 ++++++++++------------------ fs/btrfs/backref.h | 3 --- fs/btrfs/compression.c | 11 +++++--- fs/btrfs/compression.h | 2 -- fs/btrfs/ctree.c | 9 ++----- fs/btrfs/ctree.h | 22 ---------------- fs/btrfs/delayed-inode.c | 55 ++++++-------------------------------- fs/btrfs/dir-item.c | 6 ++++- fs/btrfs/disk-io.c | 43 ++++-------------------------- fs/btrfs/disk-io.h | 2 -- fs/btrfs/extent-tree.c | 64 ++++++--------------------------------------- fs/btrfs/extent_io.c | 61 ++++++++---------------------------------- fs/btrfs/extent_io.h | 8 ------ fs/btrfs/extent_map.c | 5 ++-- fs/btrfs/file-item.c | 30 ++++----------------- fs/btrfs/file.c | 12 ++++----- fs/btrfs/free-space-cache.c | 26 +++++++++--------- fs/btrfs/inode-item.c | 9 ++++--- fs/btrfs/inode.c | 11 +++++--- fs/btrfs/ioctl.c | 2 +- fs/btrfs/locking.c | 4 +-- fs/btrfs/print-tree.h | 2 +- fs/btrfs/raid56.c | 14 +++++----- fs/btrfs/relocation.c | 3 +-- fs/btrfs/scrub.c | 22 ---------------- fs/btrfs/send.c | 2 +- fs/btrfs/send.h | 1 - fs/btrfs/transaction.c | 2 +- fs/btrfs/transaction.h | 1 - fs/btrfs/tree-log.c | 6 ++--- fs/btrfs/tree-log.h | 3 --- fs/btrfs/volumes.c | 29 ++++++++++---------- fs/btrfs/volumes.h | 13 --------- fs/btrfs/xattr.c | 4 +-- 34 files changed, 135 insertions(+), 392 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 04b5b309389..b4fb4155811 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1189,6 +1189,20 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, return ret; } +/* + * this iterates to turn a name (from iref/extref) into a full filesystem path. + * Elements of the path are separated by '/' and the path is guaranteed to be + * 0-terminated. the path is only given within the current file system. + * Therefore, it never starts with a '/'. the caller is responsible to provide + * "size" bytes in "dest". the dest buffer will be filled backwards. finally, + * the start point of the resulting string is returned. this pointer is within + * dest, normally. + * in case the path buffer would overflow, the pointer is decremented further + * as if output was written to the buffer, though no more output is actually + * generated. that way, the caller can determine how much space would be + * required for the path to fit into the buffer. in that case, the returned + * value will be smaller than dest. callers must check this! + */ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, u32 name_len, unsigned long name_off, struct extent_buffer *eb_in, u64 parent, @@ -1257,32 +1271,6 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, return dest + bytes_left; } -/* - * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements - * of the path are separated by '/' and the path is guaranteed to be - * 0-terminated. the path is only given within the current file system. - * Therefore, it never starts with a '/'. the caller is responsible to provide - * "size" bytes in "dest". the dest buffer will be filled backwards. finally, - * the start point of the resulting string is returned. this pointer is within - * dest, normally. - * in case the path buffer would overflow, the pointer is decremented further - * as if output was written to the buffer, though no more output is actually - * generated. that way, the caller can determine how much space would be - * required for the path to fit into the buffer. in that case, the returned - * value will be smaller than dest. callers must check this! - */ -char *btrfs_iref_to_path(struct btrfs_root *fs_root, - struct btrfs_path *path, - struct btrfs_inode_ref *iref, - struct extent_buffer *eb_in, u64 parent, - char *dest, u32 size) -{ - return btrfs_ref_to_path(fs_root, path, - btrfs_inode_ref_name_len(eb_in, iref), - (unsigned long)(iref + 1), - eb_in, parent, dest, size); -} - /* * this makes the path point to (logical EXTENT_ITEM *) * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 310a7f6d09b..0f446d7ca2c 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -59,9 +59,6 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 time_seq, struct ulist **roots); -char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, - struct btrfs_inode_ref *iref, struct extent_buffer *eb, - u64 parent, char *dest, u32 size); char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, u32 name_len, unsigned long name_off, struct extent_buffer *eb_in, u64 parent, diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b9c56502860..b189bd1e7a3 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -82,6 +82,10 @@ struct compressed_bio { u32 sums; }; +static int btrfs_decompress_biovec(int type, struct page **pages_in, + u64 disk_start, struct bio_vec *bvec, + int vcnt, size_t srclen); + static inline int compressed_bio_size(struct btrfs_root *root, unsigned long disk_size) { @@ -738,7 +742,7 @@ static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; -struct btrfs_compress_op *btrfs_compress_op[] = { +static struct btrfs_compress_op *btrfs_compress_op[] = { &btrfs_zlib_compress, &btrfs_lzo_compress, }; @@ -909,8 +913,9 @@ int btrfs_compress_pages(int type, struct address_space *mapping, * be contiguous. They all correspond to the range of bytes covered by * the compressed extent. */ -int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, - struct bio_vec *bvec, int vcnt, size_t srclen) +static int btrfs_decompress_biovec(int type, struct page **pages_in, + u64 disk_start, struct bio_vec *bvec, + int vcnt, size_t srclen) { struct list_head *workspace; int ret; diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 9afb0a62ae8..0c803b4fbf9 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -30,8 +30,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping, unsigned long *total_in, unsigned long *total_out, unsigned long max_out); -int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, - struct bio_vec *bvec, int vcnt, size_t srclen); int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, unsigned long start_byte, size_t srclen, size_t destlen); int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a17d9991c33..de6de8e60b4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -41,12 +41,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, int slot); static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb); -struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize, u64 parent_transid, - u64 time_seq); -struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root, - u64 bytenr, u32 blocksize, - u64 time_seq); +static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); struct btrfs_path *btrfs_alloc_path(void) { @@ -208,7 +203,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) * tree until you end up with a lock on the root. A locked buffer * is returned, with a reference held. */ -struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) +static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) { struct extent_buffer *eb; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d9bed5fd334..4272fbb0873 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3044,8 +3044,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group( struct btrfs_fs_info *info, u64 bytenr); void btrfs_put_block_group(struct btrfs_block_group_cache *cache); -u64 btrfs_find_block_group(struct btrfs_root *root, - u64 search_start, u64 search_hint, int owner); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize, u64 parent, u64 root_objectid, @@ -3055,10 +3053,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, u64 parent, int last_ref); -struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u32 blocksize, - int level); int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 root_objectid, u64 owner, @@ -3111,7 +3105,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 group_start); void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); -u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); void btrfs_clear_space_info_full(struct btrfs_fs_info *info); @@ -3300,7 +3293,6 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) { return btrfs_next_old_item(root, p, 0); } -int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); int __must_check btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, @@ -3395,9 +3387,6 @@ struct btrfs_dir_item * btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path, u64 dirid, const char *name, int name_len); -struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, - struct btrfs_path *path, - const char *name, int name_len); int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -3475,16 +3464,11 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, u64 bytenr, int mod); -u64 btrfs_file_extent_length(struct btrfs_path *path); int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_ordered_sum *sums); int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 file_start, int contig); -struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - u64 bytenr, int cow); int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); @@ -3546,8 +3530,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, struct extent_state **cached_state); -int btrfs_writepages(struct address_space *mapping, - struct writeback_control *wbc); int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, struct btrfs_root *new_root, u64 new_dirid); int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset, @@ -3557,7 +3539,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); -int btrfs_dirty_inode(struct inode *inode); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); @@ -3575,7 +3556,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); -int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_cleanup(struct btrfs_root *root); void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); @@ -3626,7 +3606,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, struct inode *inode, u64 start, u64 end); int btrfs_release_file(struct inode *inode, struct file *file); -void btrfs_drop_pages(struct page **pages, size_t num_pages); int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, @@ -3802,7 +3781,6 @@ void btrfs_scrub_continue_super(struct btrfs_root *root); int btrfs_scrub_cancel(struct btrfs_fs_info *info); int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, struct btrfs_device *dev); -int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, struct btrfs_scrub_progress *progress); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 71b78c27496..f26f38ccd19 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -202,7 +202,7 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root, spin_unlock(&root->lock); } -struct btrfs_delayed_node *btrfs_first_delayed_node( +static struct btrfs_delayed_node *btrfs_first_delayed_node( struct btrfs_delayed_root *delayed_root) { struct list_head *p; @@ -221,7 +221,7 @@ out: return node; } -struct btrfs_delayed_node *btrfs_next_delayed_node( +static struct btrfs_delayed_node *btrfs_next_delayed_node( struct btrfs_delayed_node *node) { struct btrfs_delayed_root *delayed_root; @@ -282,7 +282,7 @@ static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node) __btrfs_release_delayed_node(node, 0); } -struct btrfs_delayed_node *btrfs_first_prepared_delayed_node( +static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node( struct btrfs_delayed_root *delayed_root) { struct list_head *p; @@ -308,7 +308,7 @@ static inline void btrfs_release_prepared_delayed_node( __btrfs_release_delayed_node(node, 1); } -struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len) +static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len) { struct btrfs_delayed_item *item; item = kmalloc(sizeof(*item) + data_len, GFP_NOFS); @@ -383,7 +383,7 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_item( return NULL; } -struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item( +static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item( struct btrfs_delayed_node *delayed_node, struct btrfs_key *key) { @@ -394,45 +394,6 @@ struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item( return item; } -struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item( - struct btrfs_delayed_node *delayed_node, - struct btrfs_key *key) -{ - struct btrfs_delayed_item *item; - - item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key, - NULL, NULL); - return item; -} - -struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item( - struct btrfs_delayed_node *delayed_node, - struct btrfs_key *key) -{ - struct btrfs_delayed_item *item, *next; - - item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key, - NULL, &next); - if (!item) - item = next; - - return item; -} - -struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item( - struct btrfs_delayed_node *delayed_node, - struct btrfs_key *key) -{ - struct btrfs_delayed_item *item, *next; - - item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key, - NULL, &next); - if (!item) - item = next; - - return item; -} - static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node, struct btrfs_delayed_item *ins, int action) @@ -535,7 +496,7 @@ static void btrfs_release_delayed_item(struct btrfs_delayed_item *item) } } -struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item( +static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item( struct btrfs_delayed_node *delayed_node) { struct rb_node *p; @@ -548,7 +509,7 @@ struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item( return item; } -struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item( +static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item( struct btrfs_delayed_node *delayed_node) { struct rb_node *p; @@ -561,7 +522,7 @@ struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item( return item; } -struct btrfs_delayed_item *__btrfs_next_delayed_item( +static struct btrfs_delayed_item *__btrfs_next_delayed_item( struct btrfs_delayed_item *item) { struct rb_node *p; diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 71fa113fe41..79e594e341c 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -21,6 +21,10 @@ #include "hash.h" #include "transaction.h" +static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, + struct btrfs_path *path, + const char *name, int name_len); + /* * insert a name into a directory, doing overflow properly if there is a hash * collision. data_size indicates how big the item inserted should be. On @@ -379,7 +383,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, * this walks through all the entries in a dir item and finds one * for a specific name. */ -struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, +static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f651a37c7e0..ac132d9637b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -70,6 +70,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, int mark); static int btrfs_destroy_pinned_extent(struct btrfs_root *root, struct extent_io_tree *pinned_extents); +static int btrfs_cleanup_transaction(struct btrfs_root *root); +static void btrfs_error_commit_super(struct btrfs_root *root); /* * end_io_wq structs are used to do processing in task context when an IO is @@ -531,41 +533,6 @@ static noinline int check_leaf(struct btrfs_root *root, return 0; } -struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree, - struct page *page, int max_walk) -{ - struct extent_buffer *eb; - u64 start = page_offset(page); - u64 target = start; - u64 min_start; - - if (start < max_walk) - min_start = 0; - else - min_start = start - max_walk; - - while (start >= min_start) { - eb = find_extent_buffer(tree, start, 0); - if (eb) { - /* - * we found an extent buffer and it contains our page - * horray! - */ - if (eb->start <= target && - eb->start + eb->len > target) - return eb; - - /* we found an extent buffer that wasn't for us */ - free_extent_buffer(eb); - return NULL; - } - if (start == 0) - break; - start -= PAGE_CACHE_SIZE; - } - return NULL; -} - static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state, int mirror) { @@ -3245,7 +3212,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( return num_tolerated_disk_barrier_failures; } -int write_all_supers(struct btrfs_root *root, int max_mirrors) +static int write_all_supers(struct btrfs_root *root, int max_mirrors) { struct list_head *head; struct btrfs_device *dev; @@ -3611,7 +3578,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, return 0; } -void btrfs_error_commit_super(struct btrfs_root *root) +static void btrfs_error_commit_super(struct btrfs_root *root) { mutex_lock(&root->fs_info->cleaner_mutex); btrfs_run_delayed_iputs(root); @@ -3879,7 +3846,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, */ } -int btrfs_cleanup_transaction(struct btrfs_root *root) +static int btrfs_cleanup_transaction(struct btrfs_root *root) { struct btrfs_transaction *t; LIST_HEAD(list); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 85c23e6c8fe..be69ce1b07a 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -61,7 +61,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, int max_mirrors); struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); int btrfs_commit_super(struct btrfs_root *root); -void btrfs_error_commit_super(struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, @@ -93,7 +92,6 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_cleanup_transaction(struct btrfs_root *root); void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, struct btrfs_root *root); struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b0a3fab9871..039a7716281 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -105,6 +105,8 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, u64 num_bytes, int reserve); static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes); +int btrfs_pin_extent(struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int reserved); static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) @@ -684,55 +686,6 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info) rcu_read_unlock(); } -u64 btrfs_find_block_group(struct btrfs_root *root, - u64 search_start, u64 search_hint, int owner) -{ - struct btrfs_block_group_cache *cache; - u64 used; - u64 last = max(search_hint, search_start); - u64 group_start = 0; - int full_search = 0; - int factor = 9; - int wrapped = 0; -again: - while (1) { - cache = btrfs_lookup_first_block_group(root->fs_info, last); - if (!cache) - break; - - spin_lock(&cache->lock); - last = cache->key.objectid + cache->key.offset; - used = btrfs_block_group_used(&cache->item); - - if ((full_search || !cache->ro) && - block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { - if (used + cache->pinned + cache->reserved < - div_factor(cache->key.offset, factor)) { - group_start = cache->key.objectid; - spin_unlock(&cache->lock); - btrfs_put_block_group(cache); - goto found; - } - } - spin_unlock(&cache->lock); - btrfs_put_block_group(cache); - cond_resched(); - } - if (!wrapped) { - last = search_start; - wrapped = 1; - goto again; - } - if (!full_search && factor < 10) { - last = search_start; - full_search = 1; - factor = 10; - goto again; - } -found: - return group_start; -} - /* simple helper to search for an existing extent at a given offset */ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) { @@ -3453,7 +3406,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags) * progress (either running or paused) picks the target profile (if it's * already available), otherwise falls back to plain reducing. */ -u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) +static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) { /* * we add in the count of missing devices because we want @@ -3927,8 +3880,8 @@ static int can_overcommit(struct btrfs_root *root, return 0; } -void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, - unsigned long nr_pages) +static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, + unsigned long nr_pages) { struct super_block *sb = root->fs_info->sb; int started; @@ -6652,10 +6605,9 @@ out: return ret; } -struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u32 blocksize, - int level) +static struct extent_buffer * +btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, + u64 bytenr, u32 blocksize, int level) { struct extent_buffer *buf; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f110d12de2d..77c5914f609 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -477,7 +477,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc) return prealloc; } -void extent_io_tree_panic(struct extent_io_tree *tree, int err) +static void extent_io_tree_panic(struct extent_io_tree *tree, int err) { btrfs_panic(tree_fs_info(tree), err, "Locking error: " "Extent tree was modified by another " @@ -658,7 +658,8 @@ static void wait_on_state(struct extent_io_tree *tree, * The range [start, end] is inclusive. * The tree lock is taken by this function */ -void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) +static void wait_extent_bit(struct extent_io_tree *tree, u64 start, + u64 end, int bits) { struct extent_state *state; struct rb_node *node; @@ -1327,8 +1328,9 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) * return it. tree->lock must be held. NULL will returned if * nothing was found after 'start' */ -struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, - u64 start, int bits) +static struct extent_state * +find_first_extent_bit_state(struct extent_io_tree *tree, + u64 start, int bits) { struct rb_node *node; struct extent_state *state; @@ -2668,7 +2670,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, return ret; } -void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page) +static void attach_extent_buffer_page(struct extent_buffer *eb, + struct page *page) { if (!PagePrivate(page)) { SetPagePrivate(page); @@ -3786,9 +3789,9 @@ int extent_invalidatepage(struct extent_io_tree *tree, * are locked or under IO and drops the related state bits if it is safe * to drop the page. */ -int try_release_extent_state(struct extent_map_tree *map, - struct extent_io_tree *tree, struct page *page, - gfp_t mask) +static int try_release_extent_state(struct extent_map_tree *map, + struct extent_io_tree *tree, + struct page *page, gfp_t mask) { u64 start = page_offset(page); u64 end = start + PAGE_CACHE_SIZE - 1; @@ -4571,17 +4574,6 @@ int set_extent_buffer_dirty(struct extent_buffer *eb) return was_dirty; } -static int range_straddles_pages(u64 start, u64 len) -{ - if (len < PAGE_CACHE_SIZE) - return 1; - if (start & (PAGE_CACHE_SIZE - 1)) - return 1; - if ((start + len) & (PAGE_CACHE_SIZE - 1)) - return 1; - return 0; -} - int clear_extent_buffer_uptodate(struct extent_buffer *eb) { unsigned long i; @@ -4613,37 +4605,6 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb) return 0; } -int extent_range_uptodate(struct extent_io_tree *tree, - u64 start, u64 end) -{ - struct page *page; - int ret; - int pg_uptodate = 1; - int uptodate; - unsigned long index; - - if (range_straddles_pages(start, end - start + 1)) { - ret = test_range_bit(tree, start, end, - EXTENT_UPTODATE, 1, NULL); - if (ret) - return 1; - } - while (start <= end) { - index = start >> PAGE_CACHE_SHIFT; - page = find_get_page(tree->mapping, index); - if (!page) - return 1; - uptodate = PageUptodate(page); - page_cache_release(page); - if (!uptodate) { - pg_uptodate = 0; - break; - } - start += PAGE_CACHE_SIZE; - } - return pg_uptodate; -} - int extent_buffer_uptodate(struct extent_buffer *eb) { return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 59d883bc3ed..9ebb4c7b86d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -190,9 +190,6 @@ int try_release_extent_mapping(struct extent_map_tree *map, struct extent_io_tree *tree, struct page *page, gfp_t mask); int try_release_extent_buffer(struct page *page, gfp_t mask); -int try_release_extent_state(struct extent_map_tree *map, - struct extent_io_tree *tree, struct page *page, - gfp_t mask); int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, struct extent_state **cached); @@ -242,8 +239,6 @@ int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, int bits, struct extent_state **cached_state); -struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, - u64 start, int bits); int extent_invalidatepage(struct extent_io_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_io_tree *tree, struct page *page, @@ -322,7 +317,6 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_offset, unsigned long len); void memset_extent_buffer(struct extent_buffer *eb, char c, unsigned long start, unsigned long len); -void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); void clear_extent_buffer_dirty(struct extent_buffer *eb); int set_extent_buffer_dirty(struct extent_buffer *eb); int set_extent_buffer_uptodate(struct extent_buffer *eb); @@ -332,8 +326,6 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long min_len, char **map, unsigned long *map_start, unsigned long *map_len); -int extent_range_uptodate(struct extent_io_tree *tree, - u64 start, u64 end); int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); int extent_clear_unlock_delalloc(struct inode *inode, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index ca968742c3d..a4a7a1a8da9 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -345,8 +345,9 @@ static u64 range_end(u64 start, u64 len) return start + len; } -struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 len, int strict) +static struct extent_map * +__lookup_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 len, int strict) { struct extent_map *em; struct rb_node *rb_node; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 769eb86f890..b193bf324a4 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -83,10 +83,11 @@ out: return ret; } -struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - u64 bytenr, int cow) +static struct btrfs_csum_item * +btrfs_lookup_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, int cow) { int ret; struct btrfs_key file_key; @@ -152,27 +153,6 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } -u64 btrfs_file_extent_length(struct btrfs_path *path) -{ - int extent_type; - struct btrfs_file_extent_item *fi; - u64 len; - - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - extent_type = btrfs_file_extent_type(path->nodes[0], fi); - - if (extent_type == BTRFS_FILE_EXTENT_REG || - extent_type == BTRFS_FILE_EXTENT_PREALLOC) - len = btrfs_file_extent_num_bytes(path->nodes[0], fi); - else if (extent_type == BTRFS_FILE_EXTENT_INLINE) - len = btrfs_file_extent_inline_len(path->nodes[0], fi); - else - BUG(); - - return len; -} - static int __btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 logical_offset, u32 *dst, int dio) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index bef15c3ef41..b3e359bc8e6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -192,8 +192,8 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, * the same inode in the tree, we will merge them together (by * __btrfs_add_inode_defrag()) and free the one that we want to requeue. */ -void btrfs_requeue_inode_defrag(struct inode *inode, - struct inode_defrag *defrag) +static void btrfs_requeue_inode_defrag(struct inode *inode, + struct inode_defrag *defrag) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; @@ -473,7 +473,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, /* * unlocks pages after btrfs_file_write is done with them */ -void btrfs_drop_pages(struct page **pages, size_t num_pages) +static void btrfs_drop_pages(struct page **pages, size_t num_pages) { size_t i; for (i = 0; i < num_pages; i++) { @@ -497,9 +497,9 @@ void btrfs_drop_pages(struct page **pages, size_t num_pages) * doing real data extents, marking pages dirty and delalloc as required. */ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, - struct page **pages, size_t num_pages, - loff_t pos, size_t write_bytes, - struct extent_state **cached) + struct page **pages, size_t num_pages, + loff_t pos, size_t write_bytes, + struct extent_state **cached) { int err = 0; int i; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 37b2b89a28f..ecca6c7375a 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -120,9 +120,10 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, return inode; } -int __create_free_space_inode(struct btrfs_root *root, - struct btrfs_trans_handle *trans, - struct btrfs_path *path, u64 ino, u64 offset) +static int __create_free_space_inode(struct btrfs_root *root, + struct btrfs_trans_handle *trans, + struct btrfs_path *path, + u64 ino, u64 offset) { struct btrfs_key key; struct btrfs_disk_key disk_key; @@ -625,9 +626,9 @@ next: spin_unlock(&ctl->tree_lock); } -int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, - struct btrfs_free_space_ctl *ctl, - struct btrfs_path *path, u64 offset) +static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, + struct btrfs_free_space_ctl *ctl, + struct btrfs_path *path, u64 offset) { struct btrfs_free_space_header *header; struct extent_buffer *leaf; @@ -868,11 +869,11 @@ out: * on mount. This will return 0 if it was successfull in writing the cache out, * and -1 if it was not. */ -int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, - struct btrfs_free_space_ctl *ctl, - struct btrfs_block_group_cache *block_group, - struct btrfs_trans_handle *trans, - struct btrfs_path *path, u64 offset) +static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, + struct btrfs_free_space_ctl *ctl, + struct btrfs_block_group_cache *block_group, + struct btrfs_trans_handle *trans, + struct btrfs_path *path, u64 offset) { struct btrfs_free_space_header *header; struct extent_buffer *leaf; @@ -2067,7 +2068,8 @@ out: return 0; } -void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) +static void __btrfs_remove_free_space_cache_locked( + struct btrfs_free_space_ctl *ctl) { struct btrfs_free_space *info; struct rb_node *node; diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 1640e0344a4..e0b7034d634 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -183,10 +183,11 @@ int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans, return -ENOENT; } -int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - const char *name, int name_len, - u64 inode_objectid, u64 ref_objectid, u64 *index) +static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + u64 inode_objectid, u64 ref_objectid, + u64 *index) { struct btrfs_path *path; struct btrfs_key key; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0eab7b67e10..ec63d7af346 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -103,6 +103,8 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, u64 orig_block_len, u64 ram_bytes, int type); +static int btrfs_dirty_inode(struct inode *inode); + static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir, const struct qstr *qstr) @@ -3024,7 +3026,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) * We have done the truncate/delete so we can go ahead and remove the orphan * item for this particular inode. */ -int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) +static int btrfs_orphan_del(struct btrfs_trans_handle *trans, + struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; int delete_item = 0; @@ -5342,7 +5345,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) * FIXME, needs more benchmarking...there are no reasons other than performance * to keep or drop this code. */ -int btrfs_dirty_inode(struct inode *inode) +static int btrfs_dirty_inode(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; @@ -7437,8 +7440,8 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } -int btrfs_writepages(struct address_space *mapping, - struct writeback_control *wbc) +static int btrfs_writepages(struct address_space *mapping, + struct writeback_control *wbc) { struct extent_io_tree *tree; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f5f6af338b5..3aa37508bf2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3010,7 +3010,7 @@ void btrfs_get_block_group_info(struct list_head *groups_list, } } -long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) { struct btrfs_ioctl_space_args space_args; struct btrfs_ioctl_space_info space; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index e95df435d89..01277b8f237 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -24,7 +24,7 @@ #include "extent_io.h" #include "locking.h" -void btrfs_assert_tree_read_locked(struct extent_buffer *eb); +static void btrfs_assert_tree_read_locked(struct extent_buffer *eb); /* * if we currently have a spinning reader or writer lock @@ -264,7 +264,7 @@ void btrfs_assert_tree_locked(struct extent_buffer *eb) BUG_ON(!atomic_read(&eb->write_locks)); } -void btrfs_assert_tree_read_locked(struct extent_buffer *eb) +static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { BUG_ON(!atomic_read(&eb->read_locks)); } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index da75efe534d..7faddfacc5b 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -19,5 +19,5 @@ #ifndef __PRINT_TREE_ #define __PRINT_TREE_ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l); -void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t); +void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c); #endif diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 9a79fb790ad..0740621daf6 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -410,7 +410,7 @@ static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio) /* * remove everything in the cache */ -void btrfs_clear_rbio_cache(struct btrfs_fs_info *info) +static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info) { struct btrfs_stripe_hash_table *table; unsigned long flags; @@ -1010,12 +1010,12 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio) * this will try to merge into existing bios if possible, and returns * zero if all went well. */ -int rbio_add_io_page(struct btrfs_raid_bio *rbio, - struct bio_list *bio_list, - struct page *page, - int stripe_nr, - unsigned long page_index, - unsigned long bio_max_len) +static int rbio_add_io_page(struct btrfs_raid_bio *rbio, + struct bio_list *bio_list, + struct page *page, + int stripe_nr, + unsigned long page_index, + unsigned long bio_max_len) { struct bio *last = bio_list->tail; u64 last_end = 0; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 63cdd9246c7..d338df405e7 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -326,8 +326,7 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) return NULL; } -void backref_tree_panic(struct rb_node *rb_node, int errno, - u64 bytenr) +static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr) { struct btrfs_fs_info *fs_info = NULL; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 28db5dcde0a..47500c25262 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3012,28 +3012,6 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info, return 0; } -int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid) -{ - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_device *dev; - int ret; - - /* - * we have to hold the device_list_mutex here so the device - * does not go away in cancel_dev. FIXME: find a better solution - */ - mutex_lock(&fs_info->fs_devices->device_list_mutex); - dev = btrfs_find_device(fs_info, devid, NULL, NULL); - if (!dev) { - mutex_unlock(&fs_info->fs_devices->device_list_mutex); - return -ENODEV; - } - ret = btrfs_scrub_cancel_dev(fs_info, dev); - mutex_unlock(&fs_info->fs_devices->device_list_mutex); - - return ret; -} - int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, struct btrfs_scrub_progress *progress) { diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 2037fc0efab..ff40f1c00ce 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -387,7 +387,7 @@ static struct btrfs_path *alloc_path_for_send(void) return path; } -int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) +static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) { int ret; mm_segment_t old_fs; diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 8bb18f7ccaa..48d425aef05 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h @@ -131,5 +131,4 @@ enum { #ifdef __KERNEL__ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); -int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 18d6fb7be26..0544587d74f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -34,7 +34,7 @@ #define BTRFS_ROOT_TRANS_TAG 0 -void put_transaction(struct btrfs_transaction *transaction) +static void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(atomic_read(&transaction->use_count) == 0); if (atomic_dec_and_test(&transaction->use_count)) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f6edd5e6baa..24c97335a59 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -146,5 +146,4 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark); int btrfs_transaction_blocked(struct btrfs_fs_info *info); int btrfs_transaction_in_commit(struct btrfs_fs_info *info); -void put_transaction(struct btrfs_transaction *transaction); #endif diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 705aee6bd15..c276ac9a0ec 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3839,9 +3839,9 @@ out: * only logging is done of any parent directories that are older than * the last committed transaction */ -int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - struct dentry *parent, int exists_only) +static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + struct dentry *parent, int exists_only) { int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; struct super_block *sb; diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 862ac813f6b..1d4ae0d15a7 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -40,9 +40,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, struct inode *inode, u64 dirid); void btrfs_end_log_trans(struct btrfs_root *root); int btrfs_pin_log_trans(struct btrfs_root *root); -int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - struct dentry *parent, int exists_only); void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, struct inode *dir, struct inode *inode, int for_rename); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c1a22178c6e..a191bac31d8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -46,6 +46,7 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_device *device); static int btrfs_relocate_sys_chunks(struct btrfs_root *root); static void __btrfs_reset_dev_stats(struct btrfs_device *dev); +static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); static DEFINE_MUTEX(uuid_mutex); @@ -1199,10 +1200,10 @@ out: return ret; } -int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, - struct btrfs_device *device, - u64 chunk_tree, u64 chunk_objectid, - u64 chunk_offset, u64 start, u64 num_bytes) +static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset, u64 start, u64 num_bytes) { int ret; struct btrfs_path *path; @@ -1329,9 +1330,9 @@ error: * the device information is stored in the chunk root * the btrfs_device struct should be fully filled in */ -int btrfs_add_device(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_device *device) +static int btrfs_add_device(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_device *device) { int ret; struct btrfs_path *path; @@ -1710,8 +1711,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, mutex_unlock(&fs_info->fs_devices->device_list_mutex); } -int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, - struct btrfs_device **device) +static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, + struct btrfs_device **device) { int ret = 0; struct btrfs_super_block *disk_super; @@ -3607,7 +3608,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b) return 0; } -struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { +static struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { [BTRFS_RAID_RAID10] = { .sub_stripes = 2, .dev_stripes = 1, @@ -5120,9 +5121,9 @@ struct async_sched { * This will add one bio to the pending list for a device and make sure * the work struct is scheduled. */ -noinline void btrfs_schedule_bio(struct btrfs_root *root, - struct btrfs_device *device, - int rw, struct bio *bio) +static noinline void btrfs_schedule_bio(struct btrfs_root *root, + struct btrfs_device *device, + int rw, struct bio *bio) { int should_queue = 1; struct btrfs_pending_bios *pending_bios; @@ -5940,7 +5941,7 @@ void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index) btrfs_dev_stat_print_on_error(dev); } -void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) +static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) { if (!dev->dev_stats_valid) return; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 062d8604d35..845ccbb0d2e 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -254,10 +254,6 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, #define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \ (sizeof(struct btrfs_bio_stripe) * (n))) -int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, - struct btrfs_device *device, - u64 chunk_tree, u64 chunk_objectid, - u64 chunk_offset, u64 start, u64 num_bytes); int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num); @@ -282,11 +278,6 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, char *device_path, struct btrfs_device **device); -int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, - struct btrfs_device **device); -int btrfs_add_device(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_device *device); int btrfs_rm_device(struct btrfs_root *root, char *device_path); void btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); @@ -307,7 +298,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *max_avail); -void btrfs_dev_stat_print_on_error(struct btrfs_device *device); void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); int btrfs_get_dev_stats(struct btrfs_root *root, struct btrfs_ioctl_get_dev_stats *stats); @@ -321,9 +311,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, struct btrfs_device *tgtdev); int btrfs_scratch_superblock(struct btrfs_device *device); -void btrfs_schedule_bio(struct btrfs_root *root, - struct btrfs_device *device, - int rw, struct bio *bio); int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len, int mirror_num); unsigned long btrfs_full_stripe_len(struct btrfs_root *root, diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 446a6848c55..05740b9789e 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -406,8 +406,8 @@ int btrfs_removexattr(struct dentry *dentry, const char *name) XATTR_REPLACE); } -int btrfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, - void *fs_info) +static int btrfs_initxattrs(struct inode *inode, + const struct xattr *xattr_array, void *fs_info) { const struct xattr *xattr; struct btrfs_trans_handle *trans = fs_info; -- cgit v1.2.3-70-g09d2 From f7a52a40cabea38b99b5053bc4f7cf45f4997603 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 26 Apr 2013 14:56:29 +0000 Subject: btrfs: remove unused gfp mask parameter from release_extent_buffer callchain It's unused since 0b32f4bbb423f02ac. Signed-off-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 8 +------- fs/btrfs/extent_io.c | 13 +++++-------- fs/btrfs/extent_io.h | 2 +- 3 files changed, 7 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ac132d9637b..2bc1ecf5e84 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -966,14 +966,8 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) { if (PageWriteback(page) || PageDirty(page)) return 0; - /* - * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing - * slab allocation from alloc_extent_state down the callchain where - * it'd hit a BUG_ON as those flags are not allowed. - */ - gfp_flags &= ~GFP_SLAB_BUG_MASK; - return try_release_extent_buffer(page, gfp_flags); + return try_release_extent_buffer(page); } static void btree_invalidatepage(struct page *page, unsigned long offset) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 77c5914f609..9b93e807616 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4450,7 +4450,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) } /* Expects to have eb->eb_lock already held */ -static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask) +static int release_extent_buffer(struct extent_buffer *eb) { WARN_ON(atomic_read(&eb->refs) == 0); if (atomic_dec_and_test(&eb->refs)) { @@ -4508,7 +4508,7 @@ void free_extent_buffer(struct extent_buffer *eb) * I know this is terrible, but it's temporary until we stop tracking * the uptodate bits and such for the extent buffers. */ - release_extent_buffer(eb, GFP_ATOMIC); + release_extent_buffer(eb); } void free_extent_buffer_stale(struct extent_buffer *eb) @@ -4522,7 +4522,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb) if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) && test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) atomic_dec(&eb->refs); - release_extent_buffer(eb, GFP_NOFS); + release_extent_buffer(eb); } void clear_extent_buffer_dirty(struct extent_buffer *eb) @@ -5042,7 +5042,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, } } -int try_release_extent_buffer(struct page *page, gfp_t mask) +int try_release_extent_buffer(struct page *page) { struct extent_buffer *eb; @@ -5072,9 +5072,6 @@ int try_release_extent_buffer(struct page *page, gfp_t mask) } spin_unlock(&page->mapping->private_lock); - if ((mask & GFP_NOFS) == GFP_NOFS) - mask = GFP_NOFS; - /* * If tree ref isn't set then we know the ref on this eb is a real ref, * so just return, this page will likely be freed soon anyway. @@ -5084,5 +5081,5 @@ int try_release_extent_buffer(struct page *page, gfp_t mask) return 0; } - return release_extent_buffer(eb, mask); + return release_extent_buffer(eb); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 9ebb4c7b86d..3af58bf55dd 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -189,7 +189,7 @@ void extent_io_tree_init(struct extent_io_tree *tree, int try_release_extent_mapping(struct extent_map_tree *map, struct extent_io_tree *tree, struct page *page, gfp_t mask); -int try_release_extent_buffer(struct page *page, gfp_t mask); +int try_release_extent_buffer(struct page *page); int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, struct extent_state **cached); -- cgit v1.2.3-70-g09d2 From 1104a8855109a4051d74977f819a13b4516aa11e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 6 Mar 2013 15:57:46 +0100 Subject: btrfs: enhance superblock checks The superblock checksum is not verified upon mount. Add that check and also reorder existing checks to a more logical order. Current mkfs.btrfs does not calculate the correct checksum of super_block and thus a freshly created filesytem will fail to mount when this patch is applied. First transaction commit calculates correct superblock checksum and saves it to disk. Reproducer: $ mfks.btrfs /dev/sda $ mount /dev/sda /mnt $ btrfs scrub start /mnt $ sleep 5 $ btrfs scrub status /mnt ... super:2 ... Signed-off-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 6 ++-- fs/btrfs/disk-io.c | 82 ++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 71 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 78b9d457d72..63c328a9ce9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2793,8 +2793,10 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, static inline int btrfs_super_csum_size(struct btrfs_super_block *s) { - int t = btrfs_super_csum_type(s); - BUG_ON(t >= ARRAY_SIZE(btrfs_csum_sizes)); + u16 t = btrfs_super_csum_type(s); + /* + * csum type is validated at mount time + */ return btrfs_csum_sizes[t]; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2bc1ecf5e84..bc423f7eddc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -356,6 +356,44 @@ out: return ret; } +/* + * Return 0 if the superblock checksum type matches the checksum value of that + * algorithm. Pass the raw disk superblock data. + */ +static int btrfs_check_super_csum(char *raw_disk_sb) +{ + struct btrfs_super_block *disk_sb = + (struct btrfs_super_block *)raw_disk_sb; + u16 csum_type = btrfs_super_csum_type(disk_sb); + int ret = 0; + + if (csum_type == BTRFS_CSUM_TYPE_CRC32) { + u32 crc = ~(u32)0; + const int csum_size = sizeof(crc); + char result[csum_size]; + + /* + * The super_block structure does not span the whole + * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space + * is filled with zeros and is included in the checkum. + */ + crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE, + crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, result); + + if (memcmp(raw_disk_sb, result, csum_size)) + ret = 1; + } + + if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { + printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n", + csum_type); + ret = 1; + } + + return ret; +} + /* * helper to read a given tree block, doing retries as required when * the checksums don't match and we have alternate mirrors to try. @@ -2249,12 +2287,31 @@ int open_ctree(struct super_block *sb, fs_info, BTRFS_ROOT_TREE_OBJECTID); invalidate_bdev(fs_devices->latest_bdev); + + /* + * Read super block and check the signature bytes only + */ bh = btrfs_read_dev_super(fs_devices->latest_bdev); if (!bh) { err = -EINVAL; goto fail_alloc; } + /* + * We want to check superblock checksum, the type is stored inside. + * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). + */ + if (btrfs_check_super_csum(bh->b_data)) { + printk(KERN_ERR "btrfs: superblock checksum mismatch\n"); + err = -EINVAL; + goto fail_alloc; + } + + /* + * super_copy is zeroed at allocation time and we never touch the + * following bytes up to INFO_SIZE, the checksum is calculated from + * the whole block of INFO_SIZE + */ memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy)); memcpy(fs_info->super_for_commit, fs_info->super_copy, sizeof(*fs_info->super_for_commit)); @@ -2262,6 +2319,13 @@ int open_ctree(struct super_block *sb, memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE); + ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); + if (ret) { + printk(KERN_ERR "btrfs: superblock contains fatal errors\n"); + err = -EINVAL; + goto fail_alloc; + } + disk_super = fs_info->super_copy; if (!btrfs_super_root(disk_super)) goto fail_alloc; @@ -2270,13 +2334,6 @@ int open_ctree(struct super_block *sb, if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR) set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); - ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); - if (ret) { - printk(KERN_ERR "btrfs: superblock contains fatal errors\n"); - err = ret; - goto fail_alloc; - } - /* * run through our array of backup supers and setup * our ring pointer to the oldest one @@ -3561,14 +3618,9 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int read_only) { - if (btrfs_super_csum_type(fs_info->super_copy) >= ARRAY_SIZE(btrfs_csum_sizes)) { - printk(KERN_ERR "btrfs: unsupported checksum algorithm\n"); - return -EINVAL; - } - - if (read_only) - return 0; - + /* + * Placeholder for checks + */ return 0; } -- cgit v1.2.3-70-g09d2 From 667e7d94a1683661cff5fe9a0fa0d7f8fdd2c007 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 7 May 2013 11:00:13 -0400 Subject: Btrfs: allow superblock mismatch from older mkfs We've added new checks to make sure the super block crc is correct during mount. A fresh filesystem from an older mkfs won't have the crc set. This adds a warning when it finds a newly created filesystem but doesn't fail the mount. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bc423f7eddc..4e9ebe1f182 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -383,6 +383,11 @@ static int btrfs_check_super_csum(char *raw_disk_sb) if (memcmp(raw_disk_sb, result, csum_size)) ret = 1; + + if (ret && btrfs_super_generation(disk_sb) < 10) { + printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n"); + ret = 0; + } } if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { -- cgit v1.2.3-70-g09d2