From 74121f7cbb2f60b41c48184ad5b889c0704e7b90 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 7 Aug 2014 12:00:44 +0100 Subject: Btrfs: fix hole detection during file fsync The file hole detection logic during a file fsync wasn't correct, because it didn't look back (in a previous leaf) for the last file extent item that can be in a leaf to the left of our leaf and that has a generation lower than the current transaction id. This made it assume that a hole exists when it really doesn't exist in the file. Such false positive hole detection happens in the following scenario: * We have a file that has many file extent items, covering 3 or more btree leafs (the first leaf must contain non file extent items too). * Two ranges of the file are modified, with their extent items being located at 2 different leafs and those leafs aren't consecutive. * When processing the second modified leaf, we weren't checking if some file extent item exists that is located in some leaf that is between our 2 modified leafs, and therefore assumed the range defined between the last file extent item in the first leaf and the first file extent item in the second leaf matched a hole. Fortunately this didn't result in overriding the log with wrong data, instead it made the last loop in copy_items() attempt to insert a duplicated key (for a hole file extent item), which makes the file fsync code return with -EEXIST to file.c:btrfs_sync_file() which in turn ends up doing a full transaction commit, which is much more expensive then writing only to the log tree and wait for it to be durably persisted (as well as the file's modified extents/pages). Therefore fix the hole detection logic, so that we don't pay the cost of doing full transaction commits. I could trigger this issue with the following test for xfstests (which never fails, either without or with this patch). The last fsync call results in a full transaction commit, due to the -EEXIST error mentioned above. I could also observe this behaviour happening frequently when running xfstests/generic/075 in a loop. Test: _cleanup() { _cleanup_flakey rm -fr $tmp } # get standard environment, filters and checks . ./common/rc . ./common/filter . ./common/dmflakey # real QA test starts here _supported_fs btrfs _supported_os Linux _require_scratch _require_dm_flakey _need_to_be_root rm -f $seqres.full # Create a file with many file extent items, each representing a 4Kb extent. # These items span 3 btree leaves, of 16Kb each (default mkfs.btrfs leaf size # as of btrfs-progs 3.12). _scratch_mkfs -l 16384 >/dev/null 2>&1 _init_flakey SAVE_MOUNT_OPTIONS="$MOUNT_OPTIONS" MOUNT_OPTIONS="$MOUNT_OPTIONS -o commit=999" _mount_flakey # First fsync, inode has BTRFS_INODE_NEEDS_FULL_SYNC flag set. $XFS_IO_PROG -f -c "pwrite -S 0x01 -b 4096 0 4096" -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io # For any of the following fsync calls, inode doesn't have the flag # BTRFS_INODE_NEEDS_FULL_SYNC set. for ((i = 1; i <= 500; i++)); do OFFSET=$((4096 * i)) LEN=4096 $XFS_IO_PROG -c "pwrite -S 0x01 $OFFSET $LEN" -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io done # Commit transaction and bump next transaction's id (to 7). sync # Truncate will set the BTRFS_INODE_NEEDS_FULL_SYNC flag in the btrfs's # inode runtime flags. $XFS_IO_PROG -c "truncate 2048000" $SCRATCH_MNT/foo # Commit transaction and bump next transaction's id (to 8). sync # Touch 1 extent item from the first leaf and 1 from the last leaf. The leaf # in the middle, containing only file extent items, isn't touched. So the # next fsync, when calling btrfs_search_forward(), won't visit that middle # leaf. First and 3rd leaf have now a generation with value 8, while the # middle leaf remains with a generation with value 6. $XFS_IO_PROG \ -c "pwrite -S 0xee -b 4096 0 4096" \ -c "pwrite -S 0xff -b 4096 2043904 4096" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io _load_flakey_table $FLAKEY_DROP_WRITES md5sum $SCRATCH_MNT/foo | _filter_scratch _unmount_flakey _load_flakey_table $FLAKEY_ALLOW_WRITES # During mount, we'll replay the log created by the fsync above, and the file's # md5 digest should be the same we got before the unmount. _mount_flakey md5sum $SCRATCH_MNT/foo | _filter_scratch _unmount_flakey MOUNT_OPTIONS="$SAVE_MOUNT_OPTIONS" status=0 exit Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9e1f2cd5e67..7e0e6e3029d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3298,7 +3298,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct list_head ordered_sums; int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; bool has_extents = false; - bool need_find_last_extent = (*last_extent == 0); + bool need_find_last_extent = true; bool done = false; INIT_LIST_HEAD(&ordered_sums); @@ -3352,8 +3352,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, */ if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { has_extents = true; - if (need_find_last_extent && - first_key.objectid == (u64)-1) + if (first_key.objectid == (u64)-1) first_key = ins_keys[i]; } else { need_find_last_extent = false; @@ -3427,6 +3426,16 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (!has_extents) return ret; + if (need_find_last_extent && *last_extent == first_key.offset) { + /* + * We don't have any leafs between our current one and the one + * we processed before that can have file extent items for our + * inode (and have a generation number smaller than our current + * transaction id). + */ + need_find_last_extent = false; + } + /* * Because we use btrfs_search_forward we could skip leaves that were * not modified and then assume *last_extent is valid when it really @@ -3537,7 +3546,7 @@ fill_holes: 0, 0); if (ret) break; - *last_extent = offset + len; + *last_extent = extent_end; } /* * Need to let the callers know we dropped the path so they should -- cgit v1.2.3-70-g09d2 From 49dae1bc1c665817e434d01eefaa11967f618243 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 6 Sep 2014 22:34:39 +0100 Subject: Btrfs: fix fsync data loss after a ranged fsync While we're doing a full fsync (when the inode has the flag BTRFS_INODE_NEEDS_FULL_SYNC set) that is ranged too (covers only a portion of the file), we might have ordered operations that are started before or while we're logging the inode and that fall outside the fsync range. Therefore when a full ranged fsync finishes don't remove every extent map from the list of modified extent maps - as for some of them, that fall outside our fsync range, their respective ordered operation hasn't finished yet, meaning the corresponding file extent item wasn't inserted into the fs/subvol tree yet and therefore we didn't log it, and we must let the next fast fsync (one that checks only the modified list) see this extent map and log a matching file extent item to the log btree and wait for its ordered operation to finish (if it's still ongoing). A test case for xfstests follows. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- fs/btrfs/tree-log.c | 77 ++++++++++++++++++++++++++++++++++++++++++----------- fs/btrfs/tree-log.h | 2 ++ 3 files changed, 64 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 36861b7a675..ff1cc0399b9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1966,7 +1966,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) btrfs_init_log_ctx(&ctx); - ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); + ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); if (ret < 0) { /* Fallthrough and commit/free transaction. */ ret = 1; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7e0e6e3029d..d296efe2d3e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -94,8 +94,10 @@ #define LOG_WALK_REPLAY_ALL 3 static int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - int inode_only); + struct btrfs_root *root, struct inode *inode, + int inode_only, + const loff_t start, + const loff_t end); static int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid); @@ -3858,8 +3860,10 @@ process: * This handles both files and directories. */ static int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - int inode_only) + struct btrfs_root *root, struct inode *inode, + int inode_only, + const loff_t start, + const loff_t end) { struct btrfs_path *path; struct btrfs_path *dst_path; @@ -3876,6 +3880,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, int ins_nr; bool fast_search = false; u64 ino = btrfs_ino(inode); + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; path = btrfs_alloc_path(); if (!path) @@ -4049,13 +4054,35 @@ log_extents: goto out_unlock; } } else if (inode_only == LOG_INODE_ALL) { - struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em, *n; - write_lock(&tree->lock); - list_for_each_entry_safe(em, n, &tree->modified_extents, list) - list_del_init(&em->list); - write_unlock(&tree->lock); + write_lock(&em_tree->lock); + /* + * We can't just remove every em if we're called for a ranged + * fsync - that is, one that doesn't cover the whole possible + * file range (0 to LLONG_MAX). This is because we can have + * em's that fall outside the range we're logging and therefore + * their ordered operations haven't completed yet + * (btrfs_finish_ordered_io() not invoked yet). This means we + * didn't get their respective file extent item in the fs/subvol + * tree yet, and need to let the next fast fsync (one which + * consults the list of modified extent maps) find the em so + * that it logs a matching file extent item and waits for the + * respective ordered operation to complete (if it's still + * running). + * + * Removing every em outside the range we're logging would make + * the next fast fsync not log their matching file extent items, + * therefore making us lose data after a log replay. + */ + list_for_each_entry_safe(em, n, &em_tree->modified_extents, + list) { + const u64 mod_end = em->mod_start + em->mod_len - 1; + + if (em->mod_start >= start && mod_end <= end) + list_del_init(&em->list); + } + write_unlock(&em_tree->lock); } if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { @@ -4065,8 +4092,19 @@ log_extents: goto out_unlock; } } - BTRFS_I(inode)->logged_trans = trans->transid; - BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; + + write_lock(&em_tree->lock); + /* + * If we're doing a ranged fsync and there are still modified extents + * in the list, we must run on the next fsync call as it might cover + * those extents (a full fsync or an fsync for other range). + */ + if (list_empty(&em_tree->modified_extents)) { + BTRFS_I(inode)->logged_trans = trans->transid; + BTRFS_I(inode)->last_log_commit = + BTRFS_I(inode)->last_sub_trans; + } + write_unlock(&em_tree->lock); out_unlock: if (unlikely(err)) btrfs_put_logged_extents(&logged_list); @@ -4161,7 +4199,10 @@ out: */ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct dentry *parent, int exists_only, + struct dentry *parent, + const loff_t start, + const loff_t end, + int exists_only, struct btrfs_log_ctx *ctx) { int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; @@ -4207,7 +4248,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (ret) goto end_no_trans; - ret = btrfs_log_inode(trans, root, inode, inode_only); + ret = btrfs_log_inode(trans, root, inode, inode_only, start, end); if (ret) goto end_trans; @@ -4235,7 +4276,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { - ret = btrfs_log_inode(trans, root, inode, inode_only); + ret = btrfs_log_inode(trans, root, inode, inode_only, + 0, LLONG_MAX); if (ret) goto end_trans; } @@ -4269,13 +4311,15 @@ end_no_trans: */ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct dentry *dentry, + const loff_t start, + const loff_t end, struct btrfs_log_ctx *ctx) { struct dentry *parent = dget_parent(dentry); int ret; ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, - 0, ctx); + start, end, 0, ctx); dput(parent); return ret; @@ -4512,6 +4556,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, root->fs_info->last_trans_committed)) return 0; - return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL); + return btrfs_log_inode_parent(trans, root, inode, parent, 0, + LLONG_MAX, 1, NULL); } diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 7f5b41bd537..e2e798ae7cd 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -59,6 +59,8 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, int btrfs_recover_log_trees(struct btrfs_root *tree_root); int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct dentry *dentry, + const loff_t start, + const loff_t end, struct btrfs_log_ctx *ctx); int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, -- cgit v1.2.3-70-g09d2 From 125c4cf9f37c98fed2c08229b31358cfec63dcf6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 11 Sep 2014 21:22:14 +0100 Subject: Btrfs: set inode's logged_trans/last_log_commit after ranged fsync When a ranged fsync finishes if there are still extent maps in the modified list, still set the inode's logged_trans and last_log_commit. This is important in case an inode is fsync'ed and unlinked in the same transaction, to ensure its inode ref gets deleted from the log and the respective dentries in its parent are deleted too from the log (if the parent directory was fsync'ed in the same transaction). Instead make btrfs_inode_in_log() return false if the list of modified extent maps isn't empty. This is an incremental on top of the v4 version of the patch: "Btrfs: fix fsync data loss after a ranged fsync" which was added to its v5, but didn't make it on time. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 13 +++++++++++-- fs/btrfs/tree-log.c | 14 ++------------ 2 files changed, 13 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 43527fd7882..56b8522d576 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -234,8 +234,17 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) BTRFS_I(inode)->last_sub_trans <= BTRFS_I(inode)->last_log_commit && BTRFS_I(inode)->last_sub_trans <= - BTRFS_I(inode)->root->last_log_commit) - return 1; + BTRFS_I(inode)->root->last_log_commit) { + /* + * After a ranged fsync we might have left some extent maps + * (that fall outside the fsync's range). So return false + * here if the list isn't empty, to make sure btrfs_log_inode() + * will be called and process those extent maps. + */ + smp_mb(); + if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents)) + return 1; + } return 0; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d296efe2d3e..1d1ba083ca6 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4093,18 +4093,8 @@ log_extents: } } - write_lock(&em_tree->lock); - /* - * If we're doing a ranged fsync and there are still modified extents - * in the list, we must run on the next fsync call as it might cover - * those extents (a full fsync or an fsync for other range). - */ - if (list_empty(&em_tree->modified_extents)) { - BTRFS_I(inode)->logged_trans = trans->transid; - BTRFS_I(inode)->last_log_commit = - BTRFS_I(inode)->last_sub_trans; - } - write_unlock(&em_tree->lock); + BTRFS_I(inode)->logged_trans = trans->transid; + BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; out_unlock: if (unlikely(err)) btrfs_put_logged_extents(&logged_list); -- cgit v1.2.3-70-g09d2 From 962a298f35110edd8f326814ae41a3dd306ecb64 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 4 Jun 2014 18:41:45 +0200 Subject: btrfs: kill the key type accessor helpers btrfs_set_key_type and btrfs_key_type are used inconsistently along with open coded variants. Other members of btrfs_key are accessed directly without any helpers anyway. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 4 ++-- fs/btrfs/delayed-inode.c | 8 ++++---- fs/btrfs/dir-item.c | 12 ++++++------ fs/btrfs/export.c | 4 ++-- fs/btrfs/extent-tree.c | 6 +++--- fs/btrfs/extent_io.c | 2 +- fs/btrfs/file-item.c | 12 ++++++------ fs/btrfs/file.c | 4 ++-- fs/btrfs/inode-item.c | 12 ++++++------ fs/btrfs/inode.c | 28 ++++++++++++++-------------- fs/btrfs/ioctl.c | 6 +++--- fs/btrfs/orphan.c | 4 ++-- fs/btrfs/print-tree.c | 2 +- fs/btrfs/scrub.c | 2 +- fs/btrfs/tree-log.c | 6 +++--- fs/btrfs/volumes.c | 4 ++-- fs/btrfs/xattr.c | 4 ++-- 17 files changed, 60 insertions(+), 60 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 54a201dac7f..cfe8566e6e3 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1193,7 +1193,7 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, unsigned long ptr; key.objectid = inode_objectid; - btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); + key.type = BTRFS_INODE_EXTREF_KEY; key.offset = start_off; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -1233,7 +1233,7 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, ret = -ENOENT; if (found_key.objectid != inode_objectid) break; - if (btrfs_key_type(&found_key) != BTRFS_INODE_EXTREF_KEY) + if (found_key.type != BTRFS_INODE_EXTREF_KEY) break; ret = 0; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index a2e90f855d7..054577bddaf 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1042,7 +1042,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, int ret; key.objectid = node->inode_id; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags)) @@ -1099,7 +1099,7 @@ err_out: search: btrfs_release_path(path); - btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); + key.type = BTRFS_INODE_EXTREF_KEY; key.offset = -1; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) @@ -1473,7 +1473,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, } delayed_item->key.objectid = btrfs_ino(dir); - btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY); + delayed_item->key.type = BTRFS_DIR_INDEX_KEY; delayed_item->key.offset = index; dir_item = (struct btrfs_dir_item *)delayed_item->data; @@ -1542,7 +1542,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, return PTR_ERR(node); item_key.objectid = btrfs_ino(dir); - btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY); + item_key.type = BTRFS_DIR_INDEX_KEY; item_key.offset = index; ret = btrfs_delete_delayed_insertion_item(root, node, &item_key); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index a0691df5dce..fc8df866e91 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -86,7 +86,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)); key.objectid = objectid; - btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); + key.type = BTRFS_XATTR_ITEM_KEY; key.offset = btrfs_name_hash(name, name_len); data_size = sizeof(*dir_item) + name_len + data_len; @@ -137,7 +137,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root u32 data_size; key.objectid = btrfs_ino(dir); - btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + key.type = BTRFS_DIR_ITEM_KEY; key.offset = btrfs_name_hash(name, name_len); path = btrfs_alloc_path(); @@ -204,7 +204,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, int cow = mod != 0; key.objectid = dir; - btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + key.type = BTRFS_DIR_ITEM_KEY; key.offset = btrfs_name_hash(name, name_len); @@ -234,7 +234,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, return -ENOMEM; key.objectid = dir; - btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + key.type = BTRFS_DIR_ITEM_KEY; key.offset = btrfs_name_hash(name, name_len); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -297,7 +297,7 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, int cow = mod != 0; key.objectid = dir; - btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.type = BTRFS_DIR_INDEX_KEY; key.offset = objectid; ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); @@ -367,7 +367,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, int cow = mod != 0; key.objectid = dir; - btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); + key.type = BTRFS_XATTR_ITEM_KEY; key.offset = btrfs_name_hash(name, name_len); ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 41422a3de8e..37d164540c3 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -70,7 +70,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, return ERR_PTR(-ESTALE); key.objectid = root_objectid; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; index = srcu_read_lock(&fs_info->subvol_srcu); @@ -82,7 +82,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, } key.objectid = objectid; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; inode = btrfs_iget(sb, &key, root, NULL); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3efe1c3877b..4d1b50d4dc5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3097,7 +3097,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, for (i = 0; i < nritems; i++) { if (level == 0) { btrfs_item_key_to_cpu(buf, &key, i); - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + if (key.type != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(buf, i, struct btrfs_file_extent_item); @@ -6464,7 +6464,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, bool have_caching_bg = false; WARN_ON(num_bytes < root->sectorsize); - btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + ins->type = BTRFS_EXTENT_ITEM_KEY; ins->objectid = 0; ins->offset = 0; @@ -9009,7 +9009,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) root = info->extent_root; key.objectid = 0; key.offset = 0; - btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; path = btrfs_alloc_path(); if (!path) return -ENOMEM; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index af0359dcf33..1009fa8a08e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4224,7 +4224,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, WARN_ON(!ret); path->slots[0]--; btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); - found_type = btrfs_key_type(&found_key); + found_type = found_key.type; /* No extents, but there might be delalloc bits */ if (found_key.objectid != btrfs_ino(inode) || diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 54c84daec9b..991f056acab 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -55,7 +55,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, return -ENOMEM; file_key.objectid = objectid; file_key.offset = pos; - btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); + file_key.type = BTRFS_EXTENT_DATA_KEY; path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, root, path, &file_key, @@ -100,7 +100,7 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans, file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; file_key.offset = bytenr; - btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY); + file_key.type = BTRFS_EXTENT_CSUM_KEY; ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); if (ret < 0) goto fail; @@ -111,7 +111,7 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans, goto fail; path->slots[0]--; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY) + if (found_key.type != BTRFS_EXTENT_CSUM_KEY) goto fail; csum_offset = (bytenr - found_key.offset) >> @@ -148,7 +148,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, file_key.objectid = objectid; file_key.offset = offset; - btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); + file_key.type = BTRFS_EXTENT_DATA_KEY; ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); return ret; } @@ -720,7 +720,7 @@ again: bytenr = sums->bytenr + total_bytes; file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; file_key.offset = bytenr; - btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY); + file_key.type = BTRFS_EXTENT_CSUM_KEY; item = btrfs_lookup_csum(trans, root, path, bytenr, 1); if (!IS_ERR(item)) { @@ -790,7 +790,7 @@ again: csum_offset = (bytenr - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; - if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY || + if (found_key.type != BTRFS_EXTENT_CSUM_KEY || found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) { goto insert; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ff1cc0399b9..a9b56e32dd8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -299,7 +299,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, /* get the inode */ key.objectid = defrag->root; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; index = srcu_read_lock(&fs_info->subvol_srcu); @@ -311,7 +311,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, } key.objectid = defrag->ino; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); if (IS_ERR(inode)) { diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 2be38df703c..8ffa4783cbf 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -135,7 +135,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, u32 item_size; key.objectid = inode_objectid; - btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); + key.type = BTRFS_INODE_EXTREF_KEY; key.offset = btrfs_extref_hash(ref_objectid, name, name_len); path = btrfs_alloc_path(); @@ -209,7 +209,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, key.objectid = inode_objectid; key.offset = ref_objectid; - btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); + key.type = BTRFS_INODE_REF_KEY; path = btrfs_alloc_path(); if (!path) @@ -337,7 +337,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, key.objectid = inode_objectid; key.offset = ref_objectid; - btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); + key.type = BTRFS_INODE_REF_KEY; path = btrfs_alloc_path(); if (!path) @@ -400,7 +400,7 @@ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, struct btrfs_key key; int ret; key.objectid = objectid; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; ret = btrfs_insert_empty_item(trans, root, path, &key, @@ -420,13 +420,13 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key found_key; ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); - if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY && + if (ret > 0 && location->type == BTRFS_ROOT_ITEM_KEY && location->offset == (u64)-1 && path->slots[0] != 0) { slot = path->slots[0] - 1; leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, slot); if (found_key.objectid == location->objectid && - btrfs_key_type(&found_key) == btrfs_key_type(location)) { + found_key.type == location->type) { path->slots[0]--; return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 016c403bfe7..e326ffdd5c7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -153,7 +153,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, key.objectid = btrfs_ino(inode); key.offset = start; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + key.type = BTRFS_EXTENT_DATA_KEY; datasize = btrfs_file_extent_calc_inline_size(cur_size); path->leave_spinning = 1; @@ -3159,7 +3159,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) path->reada = -1; key.objectid = BTRFS_ORPHAN_OBJECTID; - btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); + key.type = BTRFS_ORPHAN_ITEM_KEY; key.offset = (u64)-1; while (1) { @@ -3186,7 +3186,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) /* make sure the item matches what we want */ if (found_key.objectid != BTRFS_ORPHAN_OBJECTID) break; - if (btrfs_key_type(&found_key) != BTRFS_ORPHAN_ITEM_KEY) + if (found_key.type != BTRFS_ORPHAN_ITEM_KEY) break; /* release the path since we're done with it */ @@ -4085,7 +4085,7 @@ search_again: fi = NULL; leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - found_type = btrfs_key_type(&found_key); + found_type = found_key.type; if (found_key.objectid != ino) break; @@ -5331,7 +5331,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) btrfs_get_delayed_items(inode, &ins_list, &del_list); } - btrfs_set_key_type(&key, key_type); + key.type = key_type; key.offset = ctx->pos; key.objectid = btrfs_ino(inode); @@ -5356,7 +5356,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (found_key.objectid != key.objectid) break; - if (btrfs_key_type(&found_key) != key_type) + if (found_key.type != key_type) break; if (found_key.offset < ctx->pos) goto next; @@ -5568,7 +5568,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) int ret; key.objectid = btrfs_ino(inode); - btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.type = BTRFS_DIR_INDEX_KEY; key.offset = (u64)-1; path = btrfs_alloc_path(); @@ -5600,7 +5600,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != btrfs_ino(inode) || - btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) { + found_key.type != BTRFS_DIR_INDEX_KEY) { BTRFS_I(inode)->index_cnt = 2; goto out; } @@ -5718,7 +5718,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); key[0].objectid = objectid; - btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); + key[0].type = BTRFS_INODE_ITEM_KEY; key[0].offset = 0; sizes[0] = sizeof(struct btrfs_inode_item); @@ -5731,7 +5731,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, * add more hard links than can fit in the ref item. */ key[1].objectid = objectid; - btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); + key[1].type = BTRFS_INODE_REF_KEY; key[1].offset = ref_objectid; sizes[1] = name_len + sizeof(*ref); @@ -5740,7 +5740,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, location = &BTRFS_I(inode)->location; location->objectid = objectid; location->offset = 0; - btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); + location->type = BTRFS_INODE_ITEM_KEY; ret = btrfs_insert_inode_locked(inode); if (ret < 0) @@ -5832,7 +5832,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); } else { key.objectid = ino; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; } @@ -6333,7 +6333,7 @@ again: struct btrfs_file_extent_item); /* are we inside the extent that was found? */ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - found_type = btrfs_key_type(&found_key); + found_type = found_key.type; if (found_key.objectid != objectid || found_type != BTRFS_EXTENT_DATA_KEY) { /* @@ -8832,7 +8832,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, } key.objectid = btrfs_ino(inode); key.offset = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + key.type = BTRFS_EXTENT_DATA_KEY; datasize = btrfs_file_extent_calc_inline_size(name_len); err = btrfs_insert_empty_item(trans, root, path, &key, datasize); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 091c4d35671..b61801ac052 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -535,7 +535,7 @@ static noinline int create_subvol(struct inode *dir, key.objectid = objectid; key.offset = 0; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + key.type = BTRFS_ROOT_ITEM_KEY; ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &root_item); if (ret) @@ -3252,11 +3252,11 @@ process_slot: slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &key, slot); - if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || + if (key.type > BTRFS_EXTENT_DATA_KEY || key.objectid != btrfs_ino(src)) break; - if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { + if (key.type == BTRFS_EXTENT_DATA_KEY) { struct btrfs_file_extent_item *extent; int type; u32 size; diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 65793edb38c..47767d5b8f0 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c @@ -27,7 +27,7 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, int ret = 0; key.objectid = BTRFS_ORPHAN_OBJECTID; - btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); + key.type = BTRFS_ORPHAN_ITEM_KEY; key.offset = offset; path = btrfs_alloc_path(); @@ -48,7 +48,7 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, int ret = 0; key.objectid = BTRFS_ORPHAN_OBJECTID; - btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); + key.type = BTRFS_ORPHAN_ITEM_KEY; key.offset = offset; path = btrfs_alloc_path(); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 9626b4ad3b9..1591620bee3 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -195,7 +195,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(i); btrfs_item_key_to_cpu(l, &key, i); - type = btrfs_key_type(&key); + type = key.type; printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d " "itemsize %d\n", i, key.objectid, type, key.offset, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f4a41f37be2..053dd000d4e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2714,7 +2714,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, if (found_key.objectid != scrub_dev->devid) break; - if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY) + if (found_key.type != BTRFS_DEV_EXTENT_KEY) break; if (found_key.offset >= end) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d296efe2d3e..2f5000c0a87 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1498,7 +1498,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, return -EIO; key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; - btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); + key.type = BTRFS_ORPHAN_ITEM_KEY; key.offset = objectid; ret = btrfs_insert_empty_item(trans, root, path, &key, 0); @@ -3364,7 +3364,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, * or deletes of this inode don't have to relog the inode * again */ - if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY && + if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY && !skip_csum) { int found_type; extent = btrfs_item_ptr(src, start_slot + i, @@ -4369,7 +4369,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) again: key.objectid = BTRFS_TREE_LOG_OBJECTID; key.offset = (u64)-1; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + key.type = BTRFS_ROOT_ITEM_KEY; while (1) { ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 340a92d08e8..a7a3863e380 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1054,7 +1054,7 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, if (key.objectid > device->devid) break; - if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) + if (key.type != BTRFS_DEV_EXTENT_KEY) goto next; dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); @@ -1206,7 +1206,7 @@ again: if (key.objectid > device->devid) break; - if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) + if (key.type != BTRFS_DEV_EXTENT_KEY) goto next; if (key.offset > search_start) { diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index ad8328d797e..dcf20131fbe 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -237,7 +237,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) * first xattr that we find and walk forward */ key.objectid = btrfs_ino(inode); - btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); + key.type = BTRFS_XATTR_ITEM_KEY; key.offset = 0; path = btrfs_alloc_path(); @@ -273,7 +273,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) /* check to make sure this item is what we want */ if (found_key.objectid != key.objectid) break; - if (btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY) + if (found_key.type != BTRFS_XATTR_ITEM_KEY) break; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); -- cgit v1.2.3-70-g09d2 From 707e8a071528385a87b63a72a37c2322e463c7b8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 4 Jun 2014 19:22:26 +0200 Subject: btrfs: use nodesize everywhere, kill leafsize The nodesize and leafsize were never of different values. Unify the usage and make nodesize the one. Cleanup the redundant checks and helpers. Shaves a few bytes from .text: text data bss dec hex filename 852418 24560 23112 900090 dbbfa btrfs.ko.before 851074 24584 23112 898770 db6d2 btrfs.ko.after Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 8 ++--- fs/btrfs/check-integrity.c | 13 -------- fs/btrfs/ctree.c | 18 +++++------ fs/btrfs/ctree.h | 21 +++---------- fs/btrfs/disk-io.c | 74 +++++++++++++++++++++------------------------- fs/btrfs/extent-tree.c | 36 +++++++++++----------- fs/btrfs/file.c | 2 +- fs/btrfs/ioctl.c | 6 ++-- fs/btrfs/print-tree.c | 2 +- fs/btrfs/qgroup.c | 6 ++-- fs/btrfs/reada.c | 2 +- fs/btrfs/relocation.c | 21 +++++++------ fs/btrfs/scrub.c | 17 +---------- fs/btrfs/transaction.c | 2 +- fs/btrfs/tree-log.c | 2 +- 15 files changed, 89 insertions(+), 141 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index cfe8566e6e3..4de97926939 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -482,7 +482,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info, continue; BUG_ON(!ref->wanted_disk_byte); eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, - fs_info->tree_root->leafsize, 0); + fs_info->tree_root->nodesize, 0); if (!eb || !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); return -EIO; @@ -991,8 +991,8 @@ again: ref->level == 0) { u32 bsz; struct extent_buffer *eb; - bsz = btrfs_level_size(fs_info->extent_root, - ref->level); + + bsz = fs_info->extent_root->nodesize; eb = read_tree_block(fs_info->extent_root, ref->parent, bsz, 0); if (!eb || !extent_buffer_uptodate(eb)) { @@ -1366,7 +1366,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, } btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); if (found_key->type == BTRFS_METADATA_ITEM_KEY) - size = fs_info->extent_root->leafsize; + size = fs_info->extent_root->nodesize; else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) size = found_key->offset; diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index ce92ae30250..d0690da3b15 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -820,7 +820,6 @@ static int btrfsic_process_superblock_dev_mirror( btrfs_super_magic(super_tmp) != BTRFS_MAGIC || memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || btrfs_super_nodesize(super_tmp) != state->metablock_size || - btrfs_super_leafsize(super_tmp) != state->metablock_size || btrfs_super_sectorsize(super_tmp) != state->datablock_size) { brelse(bh); return 0; @@ -3120,24 +3119,12 @@ int btrfsic_mount(struct btrfs_root *root, struct list_head *dev_head = &fs_devices->devices; struct btrfs_device *device; - if (root->nodesize != root->leafsize) { - printk(KERN_INFO - "btrfsic: cannot handle nodesize %d != leafsize %d!\n", - root->nodesize, root->leafsize); - return -1; - } if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", root->nodesize, PAGE_CACHE_SIZE); return -1; } - if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { - printk(KERN_INFO - "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->leafsize, PAGE_CACHE_SIZE); - return -1; - } if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 44ee5d2e52a..263145b2715 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1444,7 +1444,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { btrfs_tree_read_unlock(eb_root); free_extent_buffer(eb_root); - blocksize = btrfs_level_size(root, old_root->level); + blocksize = root->nodesize; old = read_tree_block(root, logical, blocksize, 0); if (WARN_ON(!old || !extent_buffer_uptodate(old))) { free_extent_buffer(old); @@ -1651,7 +1651,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, WARN_ON(trans->transid != root->fs_info->generation); parent_nritems = btrfs_header_nritems(parent); - blocksize = btrfs_level_size(root, parent_level - 1); + blocksize = root->nodesize; end_slot = parent_nritems; if (parent_nritems == 1) @@ -1872,7 +1872,7 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, BUG_ON(level == 0); eb = read_tree_block(root, btrfs_node_blockptr(parent, slot), - btrfs_level_size(root, level - 1), + root->nodesize, btrfs_node_ptr_generation(parent, slot)); if (eb && !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); @@ -2267,7 +2267,7 @@ static void reada_for_search(struct btrfs_root *root, node = path->nodes[level]; search = btrfs_node_blockptr(node, slot); - blocksize = btrfs_level_size(root, level - 1); + blocksize = root->nodesize; eb = btrfs_find_tree_block(root, search, blocksize); if (eb) { free_extent_buffer(eb); @@ -2325,7 +2325,7 @@ static noinline void reada_for_balance(struct btrfs_root *root, nritems = btrfs_header_nritems(parent); slot = path->slots[level + 1]; - blocksize = btrfs_level_size(root, level); + blocksize = root->nodesize; if (slot > 0) { block1 = btrfs_node_blockptr(parent, slot - 1); @@ -2461,7 +2461,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, blocknr = btrfs_node_blockptr(b, slot); gen = btrfs_node_ptr_generation(b, slot); - blocksize = btrfs_level_size(root, level - 1); + blocksize = root->nodesize; tmp = btrfs_find_tree_block(root, blocknr, blocksize); if (tmp) { @@ -4282,13 +4282,13 @@ again: else btrfs_item_key(l, &disk_key, mid); - right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, + right = btrfs_alloc_free_block(trans, root, root->nodesize, 0, root->root_key.objectid, &disk_key, 0, l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); - root_add_used(root, root->leafsize); + root_add_used(root, root->nodesize); memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); btrfs_set_header_bytenr(right, right->start); @@ -5375,7 +5375,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; } - tmp_buf = kmalloc(left_root->leafsize, GFP_NOFS); + tmp_buf = kmalloc(left_root->nodesize, GFP_NOFS); if (!tmp_buf) { ret = -ENOMEM; goto out; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a835a548e47..6fc16d22d27 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -391,7 +391,7 @@ struct btrfs_header { sizeof(struct btrfs_header)) / \ sizeof(struct btrfs_key_ptr)) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) -#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) +#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->nodesize)) #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) @@ -474,7 +474,7 @@ struct btrfs_super_block { __le64 num_devices; __le32 sectorsize; __le32 nodesize; - __le32 leafsize; + __le32 __unused_leafsize; __le32 stripesize; __le32 sys_chunk_array_size; __le64 chunk_root_generation; @@ -1806,9 +1806,6 @@ struct btrfs_root { /* node allocations are done in nodesize units */ u32 nodesize; - /* leaf allocations are done in leafsize units */ - u32 leafsize; - u32 stripesize; u32 type; @@ -2995,8 +2992,6 @@ BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, sectorsize, 32); BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, nodesize, 32); -BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block, - leafsize, 32); BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, stripesize, 32); BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, @@ -3232,13 +3227,6 @@ static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) return sb->s_fs_info; } -static inline u32 btrfs_level_size(struct btrfs_root *root, int level) -{ - if (level == 0) - return root->leafsize; - return root->nodesize; -} - /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -3263,7 +3251,7 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, unsigned num_items) { - return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * + return (root->nodesize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * 2 * num_items; } @@ -3274,8 +3262,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, unsigned num_items) { - return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * - num_items; + return root->nodesize * BTRFS_MAX_LEVEL * num_items; } int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ec32bead96a..508bbee320f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1200,16 +1200,14 @@ btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers) kfree(writers); } -static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, - u32 stripesize, struct btrfs_root *root, - struct btrfs_fs_info *fs_info, +static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, + struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { root->node = NULL; root->commit_root = NULL; root->sectorsize = sectorsize; root->nodesize = nodesize; - root->leafsize = leafsize; root->stripesize = stripesize; root->state = 0; root->orphan_cleanup_state = 0; @@ -1295,7 +1293,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void) root = btrfs_alloc_root(NULL); if (!root) return ERR_PTR(-ENOMEM); - __setup_root(4096, 4096, 4096, 4096, root, NULL, 1); + __setup_root(4096, 4096, 4096, root, NULL, 1); set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); root->alloc_bytenr = 0; @@ -1318,14 +1316,13 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, if (!root) return ERR_PTR(-ENOMEM); - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - root, fs_info, objectid); + __setup_root(tree_root->nodesize, tree_root->sectorsize, + tree_root->stripesize, root, fs_info, objectid); root->root_key.objectid = objectid; root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = 0; - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + leaf = btrfs_alloc_free_block(trans, root, root->nodesize, 0, objectid, NULL, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); @@ -1396,9 +1393,9 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, if (!root) return ERR_PTR(-ENOMEM); - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - root, fs_info, BTRFS_TREE_LOG_OBJECTID); + __setup_root(tree_root->nodesize, tree_root->sectorsize, + tree_root->stripesize, root, fs_info, + BTRFS_TREE_LOG_OBJECTID); root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; root->root_key.type = BTRFS_ROOT_ITEM_KEY; @@ -1413,7 +1410,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, * updated (along with back refs to the log tree). */ - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, + leaf = btrfs_alloc_free_block(trans, root, root->nodesize, 0, BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0); if (IS_ERR(leaf)) { @@ -1465,7 +1462,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, btrfs_set_stack_inode_generation(inode_item, 1); btrfs_set_stack_inode_size(inode_item, 3); btrfs_set_stack_inode_nlink(inode_item, 1); - btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); + btrfs_set_stack_inode_nbytes(inode_item, root->nodesize); btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_root_node(&log_root->root_item, log_root->node); @@ -1498,9 +1495,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, goto alloc_fail; } - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - root, fs_info, key->objectid); + __setup_root(tree_root->nodesize, tree_root->sectorsize, + tree_root->stripesize, root, fs_info, key->objectid); ret = btrfs_find_root(tree_root, key, path, &root->root_item, &root->root_key); @@ -1511,7 +1507,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, } generation = btrfs_root_generation(&root->root_item); - blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + blocksize = root->nodesize; root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); if (!root->node) { @@ -2143,7 +2139,6 @@ int open_ctree(struct super_block *sb, { u32 sectorsize; u32 nodesize; - u32 leafsize; u32 blocksize; u32 stripesize; u64 generation; @@ -2389,7 +2384,7 @@ int open_ctree(struct super_block *sb, goto fail_alloc; } - __setup_root(4096, 4096, 4096, 4096, tree_root, + __setup_root(4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); invalidate_bdev(fs_devices->latest_bdev); @@ -2469,19 +2464,22 @@ int open_ctree(struct super_block *sb, goto fail_alloc; } - if (btrfs_super_leafsize(disk_super) != + /* + * Leafsize and nodesize were always equal, this is only a sanity check. + */ + if (le32_to_cpu(disk_super->__unused_leafsize) != btrfs_super_nodesize(disk_super)) { printk(KERN_ERR "BTRFS: couldn't mount because metadata " "blocksizes don't match. node %d leaf %d\n", btrfs_super_nodesize(disk_super), - btrfs_super_leafsize(disk_super)); + le32_to_cpu(disk_super->__unused_leafsize)); err = -EINVAL; goto fail_alloc; } - if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) { + if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) { printk(KERN_ERR "BTRFS: couldn't mount because metadata " "blocksize (%d) was too large\n", - btrfs_super_leafsize(disk_super)); + btrfs_super_nodesize(disk_super)); err = -EINVAL; goto fail_alloc; } @@ -2498,17 +2496,16 @@ int open_ctree(struct super_block *sb, * flag our filesystem as having big metadata blocks if * they are bigger than the page size */ - if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) { + if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) { if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n"); features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; } nodesize = btrfs_super_nodesize(disk_super); - leafsize = btrfs_super_leafsize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); stripesize = btrfs_super_stripesize(disk_super); - fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids)); + fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); /* @@ -2516,7 +2513,7 @@ int open_ctree(struct super_block *sb, * extent buffers for the same range. It leads to corruptions */ if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && - (sectorsize != leafsize)) { + (sectorsize != nodesize)) { printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes " "are not allowed for mixed block groups on %s\n", sb->s_id); @@ -2615,7 +2612,6 @@ int open_ctree(struct super_block *sb, 4 * 1024 * 1024 / PAGE_CACHE_SIZE); tree_root->nodesize = nodesize; - tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; tree_root->stripesize = stripesize; @@ -2642,12 +2638,11 @@ int open_ctree(struct super_block *sb, goto fail_sb_buffer; } - blocksize = btrfs_level_size(tree_root, - btrfs_super_chunk_root_level(disk_super)); + blocksize = tree_root->nodesize; generation = btrfs_super_chunk_root_generation(disk_super); - __setup_root(nodesize, leafsize, sectorsize, stripesize, - chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); + __setup_root(nodesize, sectorsize, stripesize, chunk_root, + fs_info, BTRFS_CHUNK_TREE_OBJECTID); chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), @@ -2684,8 +2679,7 @@ int open_ctree(struct super_block *sb, } retry_root_backup: - blocksize = btrfs_level_size(tree_root, - btrfs_super_root_level(disk_super)); + blocksize = tree_root->nodesize; generation = btrfs_super_generation(disk_super); tree_root->node = read_tree_block(tree_root, @@ -2859,9 +2853,7 @@ retry_root_backup: err = -EIO; goto fail_qgroup; } - blocksize = - btrfs_level_size(tree_root, - btrfs_super_log_root_level(disk_super)); + blocksize = tree_root->nodesize; log_tree_root = btrfs_alloc_root(fs_info); if (!log_tree_root) { @@ -2869,7 +2861,7 @@ retry_root_backup: goto fail_qgroup; } - __setup_root(nodesize, leafsize, sectorsize, stripesize, + __setup_root(nodesize, sectorsize, stripesize, log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); log_tree_root->node = read_tree_block(tree_root, bytenr, @@ -4008,8 +4000,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); while (start <= end) { eb = btrfs_find_tree_block(root, start, - root->leafsize); - start += root->leafsize; + root->nodesize); + start += root->nodesize; if (!eb) continue; wait_on_extent_buffer_writeback(eb); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4d1b50d4dc5..d52da9628f0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -491,7 +491,7 @@ next: key.objectid); if (key.type == BTRFS_METADATA_ITEM_KEY) last = key.objectid + - fs_info->tree_root->leafsize; + fs_info->tree_root->nodesize; else last = key.objectid + key.offset; @@ -765,7 +765,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, * different */ if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) { - offset = root->leafsize; + offset = root->nodesize; metadata = 0; } @@ -799,13 +799,13 @@ again: path->slots[0]); if (key.objectid == bytenr && key.type == BTRFS_EXTENT_ITEM_KEY && - key.offset == root->leafsize) + key.offset == root->nodesize) ret = 0; } if (ret) { key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = root->leafsize; + key.offset = root->nodesize; btrfs_release_path(path); goto again; } @@ -2651,7 +2651,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, num_bytes = btrfs_calc_trans_metadata_size(root, 1); num_heads = heads_to_leaves(root, num_heads); if (num_heads > 1) - num_bytes += (num_heads - 1) * root->leafsize; + num_bytes += (num_heads - 1) * root->nodesize; num_bytes <<= 1; global_rsv = &root->fs_info->global_block_rsv; @@ -3117,7 +3117,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, goto fail; } else { bytenr = btrfs_node_blockptr(buf, i); - num_bytes = btrfs_level_size(root, level - 1); + num_bytes = root->nodesize; ret = process_func(trans, root, bytenr, num_bytes, parent, ref_root, level - 1, 0, 1); @@ -4839,7 +4839,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) if (num_bytes * 3 > meta_used) num_bytes = div64_u64(meta_used, 3); - return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); + return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10); } static void update_global_block_rsv(struct btrfs_fs_info *fs_info) @@ -4988,7 +4988,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, if (root->fs_info->quota_enabled) { /* One for parent inode, two for dir entries */ - num_bytes = 3 * root->leafsize; + num_bytes = 3 * root->nodesize; ret = btrfs_qgroup_reserve(root, num_bytes); if (ret) return ret; @@ -5176,7 +5176,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) if (root->fs_info->quota_enabled) { ret = btrfs_qgroup_reserve(root, num_bytes + - nr_extents * root->leafsize); + nr_extents * root->nodesize); if (ret) goto out_fail; } @@ -5185,7 +5185,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) if (unlikely(ret)) { if (root->fs_info->quota_enabled) btrfs_qgroup_free(root, num_bytes + - nr_extents * root->leafsize); + nr_extents * root->nodesize); goto out_fail; } @@ -5301,7 +5301,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) btrfs_ino(inode), to_free, 0); if (root->fs_info->quota_enabled) { btrfs_qgroup_free(root, num_bytes + - dropped * root->leafsize); + dropped * root->nodesize); } btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, @@ -7077,7 +7077,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { btrfs_free_and_pin_reserved_extent(root, ins->objectid, - root->leafsize); + root->nodesize); return -ENOMEM; } @@ -7086,7 +7086,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, ins, size); if (ret) { btrfs_free_and_pin_reserved_extent(root, ins->objectid, - root->leafsize); + root->nodesize); btrfs_free_path(path); return ret; } @@ -7101,7 +7101,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, if (skinny_metadata) { iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); - num_bytes = root->leafsize; + num_bytes = root->nodesize; } else { block_info = (struct btrfs_tree_block_info *)(extent_item + 1); btrfs_set_tree_block_key(leaf, block_info, key); @@ -7131,14 +7131,14 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, return ret; } - ret = update_block_group(root, ins->objectid, root->leafsize, 1); + ret = update_block_group(root, ins->objectid, root->nodesize, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", ins->objectid, ins->offset); BUG(); } - trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize); + trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->nodesize); return ret; } @@ -7417,7 +7417,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, eb = path->nodes[wc->level]; nritems = btrfs_header_nritems(eb); - blocksize = btrfs_level_size(root, wc->level - 1); + blocksize = root->nodesize; for (slot = path->slots[wc->level]; slot < nritems; slot++) { if (nread >= wc->reada_count) @@ -7806,7 +7806,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, } bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); - blocksize = btrfs_level_size(root, level - 1); + blocksize = root->nodesize; next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a9b56e32dd8..033f04bac85 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1653,7 +1653,7 @@ again: cond_resched(); balance_dirty_pages_ratelimited(inode->i_mapping); - if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) + if (dirty_pages < (root->nodesize >> PAGE_CACHE_SHIFT) + 1) btrfs_btree_balance_dirty(root); pos += copied; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b61801ac052..d6e10d60f8a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -477,7 +477,7 @@ static noinline int create_subvol(struct inode *dir, if (ret) goto fail; - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + leaf = btrfs_alloc_free_block(trans, root, root->nodesize, 0, objectid, NULL, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); @@ -503,7 +503,7 @@ static noinline int create_subvol(struct inode *dir, btrfs_set_stack_inode_generation(inode_item, 1); btrfs_set_stack_inode_size(inode_item, 3); btrfs_set_stack_inode_nlink(inode_item, 1); - btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); + btrfs_set_stack_inode_nbytes(inode_item, root->nodesize); btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_root_flags(&root_item, 0); @@ -3199,7 +3199,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, u64 last_dest_end = destoff; ret = -ENOMEM; - buf = vmalloc(btrfs_level_size(root, 0)); + buf = vmalloc(root->nodesize); if (!buf) return ret; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 1591620bee3..eb309855d5c 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -336,7 +336,7 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) for (i = 0; i < nr; i++) { struct extent_buffer *next = read_tree_block(root, btrfs_node_blockptr(c, i), - btrfs_level_size(root, level - 1), + root->nodesize, btrfs_node_ptr_generation(c, i)); if (btrfs_is_leaf(next) && level != 1) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index ded5c601d91..2ce4ce7b47d 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2237,7 +2237,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, if (srcid) { struct btrfs_root *srcroot; struct btrfs_key srckey; - int srcroot_level; srckey.objectid = srcid; srckey.type = BTRFS_ROOT_ITEM_KEY; @@ -2249,8 +2248,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, } rcu_read_lock(); - srcroot_level = btrfs_header_level(srcroot->node); - level_size = btrfs_level_size(srcroot, srcroot_level); + level_size = srcroot->nodesize; rcu_read_unlock(); } @@ -2566,7 +2564,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, found.type != BTRFS_METADATA_ITEM_KEY) continue; if (found.type == BTRFS_METADATA_ITEM_KEY) - num_bytes = fs_info->extent_root->leafsize; + num_bytes = fs_info->extent_root->nodesize; else num_bytes = found.offset; diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 20408c6b665..b63ae20618f 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -347,7 +347,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, if (!re) return NULL; - blocksize = btrfs_level_size(root, level); + blocksize = root->nodesize; re->logical = logical; re->blocksize = blocksize; re->top = *top; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b3329ad3452..2d221c46180 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1787,7 +1787,7 @@ again: btrfs_node_key_to_cpu(parent, next_key, slot + 1); old_bytenr = btrfs_node_blockptr(parent, slot); - blocksize = btrfs_level_size(dest, level - 1); + blocksize = dest->nodesize; old_ptr_gen = btrfs_node_ptr_generation(parent, slot); if (level <= max_level) { @@ -1970,7 +1970,7 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, } bytenr = btrfs_node_blockptr(eb, path->slots[i]); - blocksize = btrfs_level_size(root, i - 1); + blocksize = root->nodesize; eb = read_tree_block(root, bytenr, blocksize, ptr_gen); if (!eb || !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); @@ -2544,8 +2544,7 @@ u64 calcu_metadata_size(struct reloc_control *rc, if (next->processed && (reserve || next != node)) break; - num_bytes += btrfs_level_size(rc->extent_root, - next->level); + num_bytes += rc->extent_root->nodesize; if (list_empty(&next->upper)) break; @@ -2679,7 +2678,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, goto next; } - blocksize = btrfs_level_size(root, node->level); + blocksize = root->nodesize; generation = btrfs_node_ptr_generation(upper->eb, slot); eb = read_tree_block(root, bytenr, blocksize, generation); if (!eb || !extent_buffer_uptodate(eb)) { @@ -2789,7 +2788,7 @@ static void __mark_block_processed(struct reloc_control *rc, u32 blocksize; if (node->level == 0 || in_block_group(node->bytenr, rc->block_group)) { - blocksize = btrfs_level_size(rc->extent_root, node->level); + blocksize = rc->extent_root->nodesize; mark_block_processed(rc, node->bytenr, blocksize); } node->processed = 1; @@ -2865,7 +2864,7 @@ static int reada_tree_block(struct reloc_control *rc, if (block->key.type == BTRFS_METADATA_ITEM_KEY) readahead_tree_block(rc->extent_root, block->bytenr, block->key.objectid, - rc->extent_root->leafsize); + rc->extent_root->nodesize); else readahead_tree_block(rc->extent_root, block->bytenr, block->key.objectid, block->key.offset); @@ -3313,7 +3312,7 @@ static int add_tree_block(struct reloc_control *rc, return -ENOMEM; block->bytenr = extent_key->objectid; - block->key.objectid = rc->extent_root->leafsize; + block->key.objectid = rc->extent_root->nodesize; block->key.offset = generation; block->level = level; block->key_ready = 0; @@ -3640,7 +3639,7 @@ int add_data_references(struct reloc_control *rc, struct btrfs_extent_inline_ref *iref; unsigned long ptr; unsigned long end; - u32 blocksize = btrfs_level_size(rc->extent_root, 0); + u32 blocksize = rc->extent_root->nodesize; int ret = 0; int err = 0; @@ -3783,7 +3782,7 @@ next: } if (key.type == BTRFS_METADATA_ITEM_KEY && - key.objectid + rc->extent_root->leafsize <= + key.objectid + rc->extent_root->nodesize <= rc->search_start) { path->slots[0]++; goto next; @@ -3801,7 +3800,7 @@ next: rc->search_start = key.objectid + key.offset; else rc->search_start = key.objectid + - rc->extent_root->leafsize; + rc->extent_root->nodesize; memcpy(extent_key, &key, sizeof(key)); return 0; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 053dd000d4e..4ae1c5feccb 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -137,7 +137,6 @@ struct scrub_ctx { int pages_per_rd_bio; u32 sectorsize; u32 nodesize; - u32 leafsize; int is_dev_replace; struct scrub_wr_ctx wr_ctx; @@ -438,7 +437,6 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) } sctx->first_free = 0; sctx->nodesize = dev->dev_root->nodesize; - sctx->leafsize = dev->dev_root->leafsize; sctx->sectorsize = dev->dev_root->sectorsize; atomic_set(&sctx->bios_in_flight, 0); atomic_set(&sctx->workers_pending, 0); @@ -1758,7 +1756,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) BTRFS_UUID_SIZE)) ++fail; - WARN_ON(sctx->nodesize != sctx->leafsize); len = sctx->nodesize - BTRFS_CSUM_SIZE; mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; @@ -2196,7 +2193,6 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, sctx->stat.data_bytes_scrubbed += len; spin_unlock(&sctx->stat_lock); } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { - WARN_ON(sctx->nodesize != sctx->leafsize); blocksize = sctx->nodesize; spin_lock(&sctx->stat_lock); sctx->stat.tree_extents_scrubbed++; @@ -2487,7 +2483,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, btrfs_item_key_to_cpu(l, &key, slot); if (key.type == BTRFS_METADATA_ITEM_KEY) - bytes = root->leafsize; + bytes = root->nodesize; else bytes = key.offset; @@ -2910,17 +2906,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, if (btrfs_fs_closing(fs_info)) return -EINVAL; - /* - * check some assumptions - */ - if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) { - btrfs_err(fs_info, - "scrub: size assumption nodesize == leafsize (%d == %d) fails", - fs_info->chunk_root->nodesize, - fs_info->chunk_root->leafsize); - return -EINVAL; - } - if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) { /* * in this case scrub is unable to calculate the checksum diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 977717b45bf..e336646508f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -408,7 +408,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, if (num_items > 0 && root != root->fs_info->chunk_root) { if (root->fs_info->quota_enabled && is_fstree(root->root_key.objectid)) { - qgroup_reserved = num_items * root->leafsize; + qgroup_reserved = num_items * root->nodesize; ret = btrfs_qgroup_reserve(root, qgroup_reserved); if (ret) return ERR_PTR(ret); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2f5000c0a87..7b6d1428f03 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2157,7 +2157,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); - blocksize = btrfs_level_size(root, *level - 1); + blocksize = root->nodesize; parent = path->nodes[*level]; root_owner = btrfs_header_owner(parent); -- cgit v1.2.3-70-g09d2 From f98de9b9c07485f7e21edfd5b2b20c89d662af3c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 4 Aug 2014 19:37:21 +0100 Subject: Btrfs: make btrfs_search_forward return with nodes unlocked None of the uses of btrfs_search_forward() need to have the path nodes (level >= 1) read locked, only the leaf needs to be locked while the caller processes it. Therefore make it return a path with all nodes unlocked, except for the leaf. This change is motivated by the observation that during a file fsync we repeatdly call btrfs_search_forward() and process the returned leaf while upper nodes of the returned path (level >= 1) are read locked, which unnecessarily blocks other tasks that want to write to the same fs/subvol btree. Therefore instead of modifying the fsync code to unlock all nodes with level >= 1 immediately after calling btrfs_search_forward(), change btrfs_search_forward() to do it, so that it benefits all callers. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 11 +++++++---- fs/btrfs/ioctl.c | 5 ----- fs/btrfs/tree-log.c | 3 --- fs/btrfs/uuid-tree.c | 1 - fs/btrfs/volumes.c | 2 -- 5 files changed, 7 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 783ea3bac7d..39021bf2df9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5144,8 +5144,9 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, u32 nritems; int level; int ret = 1; + int keep_locks = path->keep_locks; - WARN_ON(!path->keep_locks); + path->keep_locks = 1; again: cur = btrfs_read_lock_root_node(root); level = btrfs_header_level(cur); @@ -5209,7 +5210,6 @@ find_next_key: path->slots[level] = slot; if (level == path->lowest_level) { ret = 0; - unlock_up(path, level, 1, 0, NULL); goto out; } btrfs_set_path_blocking(path); @@ -5224,9 +5224,12 @@ find_next_key: btrfs_clear_path_blocking(path, NULL, 0); } out: - if (ret == 0) + path->keep_locks = keep_locks; + if (ret == 0) { + btrfs_unlock_up_safe(path, path->lowest_level + 1); + btrfs_set_path_blocking(path); memcpy(min_key, &found_key, sizeof(found_key)); - btrfs_set_path_blocking(path); + } return ret; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4492e017bdb..85ca3cce855 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -936,12 +936,9 @@ static int find_new_extents(struct btrfs_root *root, min_key.offset = *off; while (1) { - path->keep_locks = 1; ret = btrfs_search_forward(root, &min_key, path, newer_than); if (ret != 0) goto none; - path->keep_locks = 0; - btrfs_unlock_up_safe(path, 1); process_slot: if (min_key.objectid != ino) goto none; @@ -2088,8 +2085,6 @@ static noinline int search_ioctl(struct inode *inode, key.type = sk->min_type; key.offset = sk->min_offset; - path->keep_locks = 1; - while (1) { ret = btrfs_search_forward(root, &key, path, sk->min_transid); if (ret != 0) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7b6d1428f03..82db14f5cf8 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2983,8 +2983,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, min_key.type = key_type; min_key.offset = min_offset; - path->keep_locks = 1; - ret = btrfs_search_forward(root, &min_key, path, trans->transid); /* @@ -3964,7 +3962,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, err = ret; goto out_unlock; } - path->keep_locks = 1; while (1) { ins_nr = 0; diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index f6a4c03ee7d..77828294453 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -279,7 +279,6 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, key.offset = 0; again_search_slot: - path->keep_locks = 1; ret = btrfs_search_forward(root, &key, path, 0); if (ret) { if (ret > 0) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 46cd29ad315..ff8386eb336 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3625,8 +3625,6 @@ static int btrfs_uuid_scan_kthread(void *data) max_key.type = BTRFS_ROOT_ITEM_KEY; max_key.offset = (u64)-1; - path->keep_locks = 1; - while (1) { ret = btrfs_search_forward(root, &key, path, 0); if (ret) { -- cgit v1.2.3-70-g09d2 From a2cc11db245b9d8fbd4e3adbe2a1e7cf60473950 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 8 Sep 2014 22:53:18 +0100 Subject: Btrfs: fix directory recovery from fsync log When replaying a directory from the fsync log, if a directory entry exists both in the fs/subvol tree and in the log, the directory's inode got its i_size updated incorrectly, accounting for the dentry's name twice. Reproducer, from a test for xfstests: _scratch_mkfs >> $seqres.full 2>&1 _init_flakey _mount_flakey touch $SCRATCH_MNT/foo sync touch $SCRATCH_MNT/bar xfs_io -c "fsync" $SCRATCH_MNT xfs_io -c "fsync" $SCRATCH_MNT/bar _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey [ -f $SCRATCH_MNT/foo ] || echo "file foo is missing" [ -f $SCRATCH_MNT/bar ] || echo "file bar is missing" _unmount_flakey _check_scratch_fs $FLAKEY_DEV The filesystem check at the end failed with the message: "root 5 root dir 256 error". A test case for xfstests follows. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 82db14f5cf8..dce33b5a694 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1637,6 +1637,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, found_key.type == log_key.type && found_key.offset == log_key.offset && btrfs_dir_type(path->nodes[0], dst_di) == log_type) { + update_size = false; goto out; } -- cgit v1.2.3-70-g09d2 From 8407f553268a4611f2542ed90677f0edfaa2c9c4 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 5 Sep 2014 15:14:39 +0100 Subject: Btrfs: fix data corruption after fast fsync and writeback error When we do a fast fsync, we start all ordered operations and then while they're running in parallel we visit the list of modified extent maps and construct their matching file extent items and write them to the log btree. After that, in btrfs_sync_log() we wait for all the ordered operations to finish (via btrfs_wait_logged_extents). The problem with this is that we were completely ignoring errors that can happen in the extent write path, such as -ENOSPC, a temporary -ENOMEM or -EIO errors for example. When such error happens, it means we have parts of the on disk extent that weren't written to, and so we end up logging file extent items that point to these extents that contain garbage/random data - so after a crash/reboot plus log replay, we get our inode's metadata pointing to those extents. This worked in contrast with the full (non-fast) fsync path, where we start all ordered operations, wait for them to finish and then write to the log btree. In this path, after each ordered operation completes we check if it's flagged with an error (BTRFS_ORDERED_IOERR) and return -EIO if so (via btrfs_wait_ordered_range). So if an error happens with any ordered operation, just return a -EIO error to userspace, so that it knows that not all of its previous writes were durably persisted and the application can take proper action (like redo the writes for e.g.) - and definitely not leave any file extent items in the log refer to non fully written extents. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/file.c | 19 ++++ fs/btrfs/tree-log.c | 247 ++++++++++++++++++++++++++++++---------------------- fs/btrfs/tree-log.h | 2 + 3 files changed, 166 insertions(+), 102 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index cdb71461e0f..29b147d46b0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2029,6 +2029,25 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ mutex_unlock(&inode->i_mutex); + /* + * If any of the ordered extents had an error, just return it to user + * space, so that the application knows some writes didn't succeed and + * can take proper action (retry for e.g.). Blindly committing the + * transaction in this case, would fool userspace that everything was + * successful. And we also want to make sure our log doesn't contain + * file extent items pointing to extents that weren't fully written to - + * just like in the non fast fsync path, where we check for the ordered + * operation's error flag before writing to the log tree and return -EIO + * if any of them had this flag set (btrfs_wait_ordered_range) - + * therefore we need to check for errors in the ordered operations, + * which are indicated by ctx.io_err. + */ + if (ctx.io_err) { + btrfs_end_transaction(trans, root); + ret = ctx.io_err; + goto out; + } + if (ret != BTRFS_NO_LOG_SYNC) { if (!ret) { ret = btrfs_sync_log(trans, root, &ctx); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index dce33b5a694..2b26dad35d8 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -97,7 +97,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, int inode_only, const loff_t start, - const loff_t end); + const loff_t end, + struct btrfs_log_ctx *ctx); static int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid); @@ -3572,107 +3573,33 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) return 0; } -static int log_one_extent(struct btrfs_trans_handle *trans, - struct inode *inode, struct btrfs_root *root, - struct extent_map *em, struct btrfs_path *path, - struct list_head *logged_list) +static int wait_ordered_extents(struct btrfs_trans_handle *trans, + struct inode *inode, + struct btrfs_root *root, + const struct extent_map *em, + const struct list_head *logged_list, + bool *ordered_io_error) { - struct btrfs_root *log = root->log_root; - struct btrfs_file_extent_item *fi; - struct extent_buffer *leaf; struct btrfs_ordered_extent *ordered; - struct list_head ordered_sums; - struct btrfs_map_token token; - struct btrfs_key key; + struct btrfs_root *log = root->log_root; u64 mod_start = em->mod_start; u64 mod_len = em->mod_len; + const bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; u64 csum_offset; u64 csum_len; - u64 extent_offset = em->start - em->orig_start; - u64 block_len; - int ret; - bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; - int extent_inserted = 0; - - INIT_LIST_HEAD(&ordered_sums); - btrfs_init_map_token(&token); - - ret = __btrfs_drop_extents(trans, log, inode, path, em->start, - em->start + em->len, NULL, 0, 1, - sizeof(*fi), &extent_inserted); - if (ret) - return ret; - - if (!extent_inserted) { - key.objectid = btrfs_ino(inode); - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = em->start; - - ret = btrfs_insert_empty_item(trans, log, path, &key, - sizeof(*fi)); - if (ret) - return ret; - } - leaf = path->nodes[0]; - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - - btrfs_set_token_file_extent_generation(leaf, fi, em->generation, - &token); - if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { - skip_csum = true; - btrfs_set_token_file_extent_type(leaf, fi, - BTRFS_FILE_EXTENT_PREALLOC, - &token); - } else { - btrfs_set_token_file_extent_type(leaf, fi, - BTRFS_FILE_EXTENT_REG, - &token); - if (em->block_start == EXTENT_MAP_HOLE) - skip_csum = true; - } - - block_len = max(em->block_len, em->orig_block_len); - if (em->compress_type != BTRFS_COMPRESS_NONE) { - btrfs_set_token_file_extent_disk_bytenr(leaf, fi, - em->block_start, - &token); - btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, - &token); - } else if (em->block_start < EXTENT_MAP_LAST_BYTE) { - btrfs_set_token_file_extent_disk_bytenr(leaf, fi, - em->block_start - - extent_offset, &token); - btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, - &token); - } else { - btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token); - btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0, - &token); - } - - btrfs_set_token_file_extent_offset(leaf, fi, - em->start - em->orig_start, - &token); - btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token); - btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token); - btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type, - &token); - btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token); - btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); - btrfs_mark_buffer_dirty(leaf); + LIST_HEAD(ordered_sums); + int ret = 0; - btrfs_release_path(path); - if (ret) { - return ret; - } + *ordered_io_error = false; - if (skip_csum) + if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || + em->block_start == EXTENT_MAP_HOLE) return 0; /* - * First check and see if our csums are on our outstanding ordered - * extents. + * Wait far any ordered extent that covers our extent map. If it + * finishes without an error, first check and see if our csums are on + * our outstanding ordered extents. */ list_for_each_entry(ordered, logged_list, log_list) { struct btrfs_ordered_sum *sum; @@ -3684,6 +3611,24 @@ static int log_one_extent(struct btrfs_trans_handle *trans, mod_start + mod_len <= ordered->file_offset) continue; + if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && + !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) && + !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { + const u64 start = ordered->file_offset; + const u64 end = ordered->file_offset + ordered->len - 1; + + WARN_ON(ordered->inode != inode); + filemap_fdatawrite_range(inode->i_mapping, start, end); + } + + wait_event(ordered->wait, + (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) || + test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))); + + if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) { + *ordered_io_error = true; + break; + } /* * We are going to copy all the csums on this ordered extent, so * go ahead and adjust mod_start and mod_len in case this @@ -3715,6 +3660,9 @@ static int log_one_extent(struct btrfs_trans_handle *trans, } } + if (skip_csum) + continue; + /* * To keep us from looping for the above case of an ordered * extent that falls inside of the logged extent. @@ -3732,18 +3680,16 @@ static int log_one_extent(struct btrfs_trans_handle *trans, list_for_each_entry(sum, &ordered->list, list) { ret = btrfs_csum_file_blocks(trans, log, sum); if (ret) - goto unlocked; + break; } - } -unlocked: - if (!mod_len || ret) + if (*ordered_io_error || !mod_len || ret || skip_csum) return ret; if (em->compress_type) { csum_offset = 0; - csum_len = block_len; + csum_len = max(em->block_len, em->orig_block_len); } else { csum_offset = mod_start - em->start; csum_len = mod_len; @@ -3770,11 +3716,106 @@ unlocked: return ret; } +static int log_one_extent(struct btrfs_trans_handle *trans, + struct inode *inode, struct btrfs_root *root, + const struct extent_map *em, + struct btrfs_path *path, + const struct list_head *logged_list, + struct btrfs_log_ctx *ctx) +{ + struct btrfs_root *log = root->log_root; + struct btrfs_file_extent_item *fi; + struct extent_buffer *leaf; + struct btrfs_map_token token; + struct btrfs_key key; + u64 extent_offset = em->start - em->orig_start; + u64 block_len; + int ret; + int extent_inserted = 0; + bool ordered_io_err = false; + + ret = wait_ordered_extents(trans, inode, root, em, logged_list, + &ordered_io_err); + if (ret) + return ret; + + if (ordered_io_err) { + ctx->io_err = -EIO; + return 0; + } + + btrfs_init_map_token(&token); + + ret = __btrfs_drop_extents(trans, log, inode, path, em->start, + em->start + em->len, NULL, 0, 1, + sizeof(*fi), &extent_inserted); + if (ret) + return ret; + + if (!extent_inserted) { + key.objectid = btrfs_ino(inode); + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = em->start; + + ret = btrfs_insert_empty_item(trans, log, path, &key, + sizeof(*fi)); + if (ret) + return ret; + } + leaf = path->nodes[0]; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + btrfs_set_token_file_extent_generation(leaf, fi, em->generation, + &token); + if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) + btrfs_set_token_file_extent_type(leaf, fi, + BTRFS_FILE_EXTENT_PREALLOC, + &token); + else + btrfs_set_token_file_extent_type(leaf, fi, + BTRFS_FILE_EXTENT_REG, + &token); + + block_len = max(em->block_len, em->orig_block_len); + if (em->compress_type != BTRFS_COMPRESS_NONE) { + btrfs_set_token_file_extent_disk_bytenr(leaf, fi, + em->block_start, + &token); + btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, + &token); + } else if (em->block_start < EXTENT_MAP_LAST_BYTE) { + btrfs_set_token_file_extent_disk_bytenr(leaf, fi, + em->block_start - + extent_offset, &token); + btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, + &token); + } else { + btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token); + btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0, + &token); + } + + btrfs_set_token_file_extent_offset(leaf, fi, extent_offset, &token); + btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token); + btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token); + btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type, + &token); + btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token); + btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); + btrfs_mark_buffer_dirty(leaf); + + btrfs_release_path(path); + + return ret; +} + static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, struct btrfs_path *path, - struct list_head *logged_list) + struct list_head *logged_list, + struct btrfs_log_ctx *ctx) { struct extent_map *em, *n; struct list_head extents; @@ -3832,7 +3873,8 @@ process: write_unlock(&tree->lock); - ret = log_one_extent(trans, inode, root, em, path, logged_list); + ret = log_one_extent(trans, inode, root, em, path, logged_list, + ctx); write_lock(&tree->lock); clear_em_logging(tree, em); free_extent_map(em); @@ -3862,7 +3904,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, int inode_only, const loff_t start, - const loff_t end) + const loff_t end, + struct btrfs_log_ctx *ctx) { struct btrfs_path *path; struct btrfs_path *dst_path; @@ -4046,7 +4089,7 @@ log_extents: btrfs_release_path(dst_path); if (fast_search) { ret = btrfs_log_changed_extents(trans, root, inode, dst_path, - &logged_list); + &logged_list, ctx); if (ret) { err = ret; goto out_unlock; @@ -4246,7 +4289,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (ret) goto end_no_trans; - ret = btrfs_log_inode(trans, root, inode, inode_only, start, end); + ret = btrfs_log_inode(trans, root, inode, inode_only, start, end, ctx); if (ret) goto end_trans; @@ -4275,7 +4318,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { ret = btrfs_log_inode(trans, root, inode, inode_only, - 0, LLONG_MAX); + 0, LLONG_MAX, ctx); if (ret) goto end_trans; } diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index e2e798ae7cd..154990c26dc 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -28,6 +28,7 @@ struct btrfs_log_ctx { int log_ret; int log_transid; + int io_err; struct list_head list; }; @@ -35,6 +36,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) { ctx->log_ret = 0; ctx->log_transid = 0; + ctx->io_err = 0; INIT_LIST_HEAD(&ctx->list); } -- cgit v1.2.3-70-g09d2 From 1a4ed8fdca077d2489ec47d548451be69389e926 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 27 Oct 2014 10:44:24 +0000 Subject: Btrfs: fix invalid leaf slot access in btrfs_lookup_extent() If we couldn't find our extent item, we accessed the current slot (path->slots[0]) to check if it corresponds to an equivalent skinny metadata item. However this slot could be beyond our last item in the leaf (i.e. path->slots[0] >= btrfs_header_nritems(leaf)), in which case we shouldn't process it. Since btrfs_lookup_extent() is only used to find extent items for data extents, fix this by removing completely the logic that looks up for an equivalent skinny metadata item, since it can not exist. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 10 ++-------- fs/btrfs/tree-log.c | 2 +- 3 files changed, 4 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d557264ee97..fe69edda11f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3276,7 +3276,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); int btrfs_async_run_delayed_refs(struct btrfs_root *root, unsigned long count, int wait); -int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); +int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 offset, int metadata, u64 *refs, u64 *flags); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0d599ba1aae..87c0b46f8a7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -710,8 +710,8 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info) rcu_read_unlock(); } -/* simple helper to search for an existing extent at a given offset */ -int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) +/* simple helper to search for an existing data extent at a given offset */ +int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len) { int ret; struct btrfs_key key; @@ -726,12 +726,6 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) key.type = BTRFS_EXTENT_ITEM_KEY; ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 0, 0); - if (ret > 0) { - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - if (key.objectid == start && - key.type == BTRFS_METADATA_ITEM_KEY) - ret = 0; - } btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2b26dad35d8..6d58d72705a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -672,7 +672,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, * is this extent already allocated in the extent * allocation tree? If so, just add a reference */ - ret = btrfs_lookup_extent(root, ins.objectid, + ret = btrfs_lookup_data_extent(root, ins.objectid, ins.offset); if (ret == 0) { ret = btrfs_inc_extent_ref(trans, root, -- cgit v1.2.3-70-g09d2