From e94acd86d48d61a5d919d807ed1efa0d8c1cd5ae Mon Sep 17 00:00:00 2001 From: Valentina Giusti Date: Mon, 4 Nov 2013 22:34:28 +0100 Subject: btrfs: replace path->slots[0] with otherwise unused variable 'slot' Signed-off-by: Valentina Giusti Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 3775947429b..826b98c211a 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1683,8 +1683,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, btrfs_release_path(path); leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, path->slots[0]); - ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + item_size = btrfs_item_size_nr(leaf, slot); + ptr = btrfs_item_ptr_offset(leaf, slot); cur_offset = 0; while (cur_offset < item_size) { -- cgit v1.2.3-70-g09d2 From e33d5c3d6d61518c7f115af6d11d3dffa230d31f Mon Sep 17 00:00:00 2001 From: Kelley Nielsen Date: Mon, 4 Nov 2013 19:33:33 -0800 Subject: btrfs: bootstrap generic btrfs_find_item interface There are many btrfs functions that manually search the tree for an item. They all reimplement the same mechanism and differ in the conditions that they use to find the item. __inode_info() is one such example. Zach Brown proposed creating a new interface to take the place of these functions. This patch is the first step to creating the interface. A new function, btrfs_find_item, has been added to ctree.c and prototyped in ctree.h. It is identical to __inode_info, except that the order of the parameters has been rearranged to more closely those of similar functions elsewhere in the code (now, root and path come first, then the objectid, offset and type, and the key to be filled in last). __inode_info's callers have been set to call this new function instead, and __inode_info itself has been removed. Signed-off-by: Kelley Nielsen Suggested-by: Zach Brown Reviewed-by: Josh Triplett Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 40 ++++------------------------------------ fs/btrfs/ctree.c | 37 +++++++++++++++++++++++++++++++++++++ fs/btrfs/ctree.h | 2 ++ 3 files changed, 43 insertions(+), 36 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 826b98c211a..6a3f7f50ad3 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1107,38 +1107,6 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, return 0; } - -static int __inode_info(u64 inum, u64 ioff, u8 key_type, - struct btrfs_root *fs_root, struct btrfs_path *path, - struct btrfs_key *found_key) -{ - int ret; - struct btrfs_key key; - struct extent_buffer *eb; - - key.type = key_type; - key.objectid = inum; - key.offset = ioff; - - ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); - if (ret < 0) - return ret; - - eb = path->nodes[0]; - if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { - ret = btrfs_next_leaf(fs_root, path); - if (ret) - return ret; - eb = path->nodes[0]; - } - - btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); - if (found_key->type != key.type || found_key->objectid != key.objectid) - return 1; - - return 0; -} - /* * this makes the path point to (inum INODE_ITEM ioff) */ @@ -1146,16 +1114,16 @@ int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, struct btrfs_path *path) { struct btrfs_key key; - return __inode_info(inum, ioff, BTRFS_INODE_ITEM_KEY, fs_root, path, - &key); + return btrfs_find_item(fs_root, path, inum, ioff, + BTRFS_INODE_ITEM_KEY, &key); } static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, struct btrfs_path *path, struct btrfs_key *found_key) { - return __inode_info(inum, ioff, BTRFS_INODE_REF_KEY, fs_root, path, - found_key); + return btrfs_find_item(fs_root, path, inum, ioff, + BTRFS_INODE_REF_KEY, found_key); } int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index bcd0bd85e3e..141c15ca294 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2461,6 +2461,43 @@ static int key_search(struct extent_buffer *b, struct btrfs_key *key, return 0; } +/* Proposed generic search function, meant to take the place of the +* various small search helper functions throughout the code and standardize +* the search interface. Right now, it only replaces the former __inode_info +* in backref.c. +*/ +int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path, + u64 iobjectid, u64 ioff, u8 key_type, + struct btrfs_key *found_key) +{ + int ret; + struct btrfs_key key; + struct extent_buffer *eb; + + key.type = key_type; + key.objectid = iobjectid; + key.offset = ioff; + + ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); + if (ret < 0) + return ret; + + eb = path->nodes[0]; + if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { + ret = btrfs_next_leaf(fs_root, path); + if (ret) + return ret; + eb = path->nodes[0]; + } + + btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); + if (found_key->type != key.type || + found_key->objectid != key.objectid) + return 1; + + return 0; +} + /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index dbe9b313cdf..a58611fbf00 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3371,6 +3371,8 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *new_key); +int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path, + u64 inum, u64 ioff, u8 key_type, struct btrfs_key *found_key); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); -- cgit v1.2.3-70-g09d2 From 3fe81ce206f3805e0eb5d886aabbf91064655144 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sun, 15 Dec 2013 12:43:58 +0000 Subject: Btrfs: fix deadlock when iterating inode refs and running delayed inodes While running btrfs/004 from xfstests, after 503 iterations, dmesg reported a deadlock between tasks iterating inode refs and tasks running delayed inodes (during a transaction commit). It turns out that iterating inode refs implies doing one tree search and release all nodes in the path except the leaf node, and then passing that leaf node to btrfs_ref_to_path(), which in turn does another tree search without releasing the lock on the leaf node it received as parameter. This is a problem when other task wants to write to the btree as well and ends up updating the leaf that is read locked - the writer task locks the parent of the leaf and then blocks waiting for the leaf's lock to be released - at the same time, the task executing btrfs_ref_to_path() does a second tree search, without releasing the lock on the first leaf, and wants to access a leaf (the same or another one) that is a child of the same parent, resulting in a deadlock. The trace reported by lockdep follows. [84314.936373] INFO: task fsstress:11930 blocked for more than 120 seconds. [84314.936381] Tainted: G W O 3.12.0-fdm-btrfs-next-16+ #70 [84314.936383] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [84314.936386] fsstress D ffff8806e1bf8000 0 11930 11926 0x00000000 [84314.936393] ffff8804d6d89b78 0000000000000046 ffff8804d6d89b18 ffffffff810bd8bd [84314.936399] ffff8806e1bf8000 ffff8804d6d89fd8 ffff8804d6d89fd8 ffff8804d6d89fd8 [84314.936405] ffff880806308000 ffff8806e1bf8000 ffff8804d6d89c08 ffff8804deb8f190 [84314.936410] Call Trace: [84314.936421] [] ? trace_hardirqs_on+0xd/0x10 [84314.936428] [] schedule+0x29/0x70 [84314.936451] [] btrfs_tree_lock+0x75/0x270 [btrfs] [84314.936457] [] ? __init_waitqueue_head+0x60/0x60 [84314.936470] [] btrfs_search_slot+0x7f1/0x930 [btrfs] [84314.936489] [] ? __btrfs_run_delayed_items+0x13a/0x1e0 [btrfs] [84314.936504] [] btrfs_lookup_inode+0x2f/0xa0 [btrfs] [84314.936510] [] ? trace_hardirqs_on_caller+0x1f/0x1e0 [84314.936528] [] __btrfs_update_delayed_inode+0x4c/0x1d0 [btrfs] [84314.936543] [] ? __btrfs_run_delayed_items+0x13a/0x1e0 [btrfs] [84314.936558] [] ? __btrfs_run_delayed_items+0x13a/0x1e0 [btrfs] [84314.936573] [] __btrfs_run_delayed_items+0x192/0x1e0 [btrfs] [84314.936589] [] btrfs_run_delayed_items+0x13/0x20 [btrfs] [84314.936604] [] btrfs_flush_all_pending_stuffs+0x24/0x80 [btrfs] [84314.936620] [] btrfs_commit_transaction+0x223/0xa20 [btrfs] [84314.936630] [] btrfs_sync_fs+0x6e/0x110 [btrfs] [84314.936635] [] ? __sync_filesystem+0x60/0x60 [84314.936639] [] ? __sync_filesystem+0x60/0x60 [84314.936643] [] sync_fs_one_sb+0x20/0x30 [84314.936648] [] iterate_supers+0xf1/0x100 [84314.936652] [] sys_sync+0x55/0x90 [84314.936658] [] system_call_fastpath+0x16/0x1b [84314.936660] INFO: lockdep is turned off. [84314.936663] INFO: task btrfs:11955 blocked for more than 120 seconds. [84314.936666] Tainted: G W O 3.12.0-fdm-btrfs-next-16+ #70 [84314.936668] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [84314.936670] btrfs D ffff880541729a88 0 11955 11608 0x00000000 [84314.936674] ffff880541729a38 0000000000000046 ffff8805417299d8 ffffffff810bd8bd [84314.936680] ffff88075430c8a0 ffff880541729fd8 ffff880541729fd8 ffff880541729fd8 [84314.936685] ffffffff81c104e0 ffff88075430c8a0 ffff8804de8b00b8 ffff8804de8b0000 [84314.936690] Call Trace: [84314.936695] [] ? trace_hardirqs_on+0xd/0x10 [84314.936700] [] schedule+0x29/0x70 [84314.936717] [] btrfs_tree_read_lock+0xd5/0x140 [btrfs] [84314.936721] [] ? __init_waitqueue_head+0x60/0x60 [84314.936733] [] btrfs_search_slot+0x7c1/0x930 [btrfs] [84314.936746] [] btrfs_find_item+0x55/0x160 [btrfs] [84314.936763] [] ? free_extent_buffer+0x49/0xc0 [btrfs] [84314.936780] [] btrfs_ref_to_path+0xba/0x1e0 [btrfs] [84314.936797] [] ? release_extent_buffer+0xb9/0xe0 [btrfs] [84314.936813] [] ? free_extent_buffer+0x49/0xc0 [btrfs] [84314.936830] [] inode_to_path+0x60/0xd0 [btrfs] [84314.936846] [] paths_from_inode+0x115/0x3c0 [btrfs] [84314.936851] [] ? kmem_cache_alloc_trace+0x114/0x200 [84314.936868] [] btrfs_ioctl+0xf14/0x2030 [btrfs] [84314.936873] [] ? _raw_spin_unlock+0x2b/0x50 [84314.936877] [] ? handle_mm_fault+0x34f/0xb00 [84314.936882] [] ? up_read+0x23/0x40 [84314.936886] [] ? __do_page_fault+0x20c/0x5a0 [84314.936892] [] do_vfs_ioctl+0x96/0x570 [84314.936896] [] ? error_sti+0x5/0x6 [84314.936901] [] ? trace_hardirqs_off_caller+0x28/0xd0 [84314.936906] [] ? retint_swapgs+0xe/0x13 [84314.936910] [] SyS_ioctl+0x91/0xb0 [84314.936915] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [84314.936920] [] system_call_fastpath+0x16/0x1b [84314.936922] INFO: lockdep is turned off. [84434.866873] INFO: task btrfs-transacti:11921 blocked for more than 120 seconds. [84434.866881] Tainted: G W O 3.12.0-fdm-btrfs-next-16+ #70 [84434.866883] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [84434.866886] btrfs-transacti D ffff880755b6a478 0 11921 2 0x00000000 [84434.866893] ffff8800735b9ce8 0000000000000046 ffff8800735b9c88 ffffffff810bd8bd [84434.866899] ffff8805a1b848a0 ffff8800735b9fd8 ffff8800735b9fd8 ffff8800735b9fd8 [84434.866904] ffffffff81c104e0 ffff8805a1b848a0 ffff880755b6a478 ffff8804cece78f0 [84434.866910] Call Trace: [84434.866920] [] ? trace_hardirqs_on+0xd/0x10 [84434.866927] [] schedule+0x29/0x70 [84434.866948] [] wait_current_trans.isra.33+0xbf/0x120 [btrfs] [84434.866954] [] ? __init_waitqueue_head+0x60/0x60 [84434.866970] [] start_transaction+0x388/0x5a0 [btrfs] [84434.866985] [] ? transaction_kthread+0xb5/0x280 [btrfs] [84434.866999] [] btrfs_attach_transaction+0x17/0x20 [btrfs] [84434.867012] [] transaction_kthread+0x19e/0x280 [btrfs] [84434.867026] [] ? open_ctree+0x2260/0x2260 [btrfs] [84434.867030] [] kthread+0xed/0x100 [84434.867035] [] ? flush_kthread_worker+0x190/0x190 [84434.867040] [] ret_from_fork+0x7c/0xb0 [84434.867044] [] ? flush_kthread_worker+0x190/0x190 Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 6a3f7f50ad3..835b6c9a26a 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1569,7 +1569,6 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, struct btrfs_key found_key; while (!ret) { - path->leave_spinning = 1; ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, &found_key); if (ret < 0) @@ -1582,9 +1581,12 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, parent = found_key.offset; slot = path->slots[0]; - eb = path->nodes[0]; - /* make sure we can use eb after releasing the path */ - atomic_inc(&eb->refs); + eb = btrfs_clone_extent_buffer(path->nodes[0]); + if (!eb) { + ret = -ENOMEM; + break; + } + extent_buffer_get(eb); btrfs_tree_read_lock(eb); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); btrfs_release_path(path); @@ -1642,9 +1644,12 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, ++found; slot = path->slots[0]; - eb = path->nodes[0]; - /* make sure we can use eb after releasing the path */ - atomic_inc(&eb->refs); + eb = btrfs_clone_extent_buffer(path->nodes[0]); + if (!eb) { + ret = -ENOMEM; + break; + } + extent_buffer_get(eb); btrfs_tree_read_lock(eb); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); -- cgit v1.2.3-70-g09d2 From d7df2c796d7eedd72a334dc89c65e1fec8171431 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 23 Jan 2014 09:21:38 -0500 Subject: Btrfs: attach delayed ref updates to delayed ref heads Currently we have two rb-trees, one for delayed ref heads and one for all of the delayed refs, including the delayed ref heads. When we process the delayed refs we have to hold onto the delayed ref lock for all of the selecting and merging and such, which results in quite a bit of lock contention. This was solved by having a waitqueue and only one flusher at a time, however this hurts if we get a lot of delayed refs queued up. So instead just have an rb tree for the delayed ref heads, and then attach the delayed ref updates to an rb tree that is per delayed ref head. Then we only need to take the delayed ref lock when adding new delayed refs and when selecting a delayed ref head to process, all the rest of the time we deal with a per delayed ref head lock which will be much less contentious. The locking rules for this get a little more complicated since we have to lock up to 3 things to properly process delayed refs, but I will address that problem later. For now this passes all of xfstests and my overnight stress tests. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 23 ++-- fs/btrfs/delayed-ref.c | 223 +++++++++++++++++----------------- fs/btrfs/delayed-ref.h | 23 ++-- fs/btrfs/disk-io.c | 79 ++++++------ fs/btrfs/extent-tree.c | 317 ++++++++++++++++--------------------------------- fs/btrfs/transaction.c | 7 +- 6 files changed, 267 insertions(+), 405 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 835b6c9a26a..34a8952de8d 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -538,14 +538,13 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, if (extent_op && extent_op->update_key) btrfs_disk_key_to_cpu(&op_key, &extent_op->key); - while ((n = rb_prev(n))) { + spin_lock(&head->lock); + n = rb_first(&head->ref_root); + while (n) { struct btrfs_delayed_ref_node *node; node = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); - if (node->bytenr != head->node.bytenr) - break; - WARN_ON(node->is_head); - + n = rb_next(n); if (node->seq > seq) continue; @@ -612,10 +611,10 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, WARN_ON(1); } if (ret) - return ret; + break; } - - return 0; + spin_unlock(&head->lock); + return ret; } /* @@ -882,15 +881,15 @@ again: btrfs_put_delayed_ref(&head->node); goto again; } + spin_unlock(&delayed_refs->lock); ret = __add_delayed_refs(head, time_seq, &prefs_delayed); mutex_unlock(&head->mutex); - if (ret) { - spin_unlock(&delayed_refs->lock); + if (ret) goto out; - } + } else { + spin_unlock(&delayed_refs->lock); } - spin_unlock(&delayed_refs->lock); } if (path->slots[0]) { diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index fab60c1ba06..f3bff89eecf 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -269,39 +269,38 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head, struct btrfs_delayed_ref_node *ref) { - rb_erase(&ref->rb_node, &delayed_refs->root); if (btrfs_delayed_ref_is_head(ref)) { - struct btrfs_delayed_ref_head *head; - head = btrfs_delayed_node_to_head(ref); rb_erase(&head->href_node, &delayed_refs->href_root); + } else { + assert_spin_locked(&head->lock); + rb_erase(&ref->rb_node, &head->ref_root); } ref->in_tree = 0; btrfs_put_delayed_ref(ref); - delayed_refs->num_entries--; + atomic_dec(&delayed_refs->num_entries); if (trans->delayed_ref_updates) trans->delayed_ref_updates--; } static int merge_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head, struct btrfs_delayed_ref_node *ref, u64 seq) { struct rb_node *node; - int merged = 0; int mod = 0; int done = 0; - node = rb_prev(&ref->rb_node); - while (node) { + node = rb_next(&ref->rb_node); + while (!done && node) { struct btrfs_delayed_ref_node *next; next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - node = rb_prev(node); - if (next->bytenr != ref->bytenr) - break; + node = rb_next(node); if (seq && next->seq >= seq) break; if (comp_entry(ref, next, 0)) @@ -321,12 +320,11 @@ static int merge_ref(struct btrfs_trans_handle *trans, mod = -next->ref_mod; } - merged++; - drop_delayed_ref(trans, delayed_refs, next); + drop_delayed_ref(trans, delayed_refs, head, next); ref->ref_mod += mod; if (ref->ref_mod == 0) { - drop_delayed_ref(trans, delayed_refs, ref); - break; + drop_delayed_ref(trans, delayed_refs, head, ref); + done = 1; } else { /* * You can't have multiples of the same ref on a tree @@ -335,13 +333,8 @@ static int merge_ref(struct btrfs_trans_handle *trans, WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || ref->type == BTRFS_SHARED_BLOCK_REF_KEY); } - - if (done) - break; - node = rb_prev(&ref->rb_node); } - - return merged; + return done; } void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, @@ -352,6 +345,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, struct rb_node *node; u64 seq = 0; + assert_spin_locked(&head->lock); /* * We don't have too much refs to merge in the case of delayed data * refs. @@ -369,22 +363,19 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, } spin_unlock(&fs_info->tree_mod_seq_lock); - node = rb_prev(&head->node.rb_node); + node = rb_first(&head->ref_root); while (node) { struct btrfs_delayed_ref_node *ref; ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - if (ref->bytenr != head->node.bytenr) - break; - /* We can't merge refs that are outside of our seq count */ if (seq && ref->seq >= seq) break; - if (merge_ref(trans, delayed_refs, ref, seq)) - node = rb_prev(&head->node.rb_node); + if (merge_ref(trans, delayed_refs, head, ref, seq)) + node = rb_first(&head->ref_root); else - node = rb_prev(node); + node = rb_next(&ref->rb_node); } } @@ -412,64 +403,52 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, return ret; } -int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, - struct list_head *cluster, u64 start) +struct btrfs_delayed_ref_head * +btrfs_select_ref_head(struct btrfs_trans_handle *trans) { - int count = 0; struct btrfs_delayed_ref_root *delayed_refs; - struct rb_node *node; - struct btrfs_delayed_ref_head *head = NULL; + struct btrfs_delayed_ref_head *head; + u64 start; + bool loop = false; delayed_refs = &trans->transaction->delayed_refs; - node = rb_first(&delayed_refs->href_root); - if (start) { - find_ref_head(&delayed_refs->href_root, start + 1, &head, 1); - if (head) - node = &head->href_node; - } again: - while (node && count < 32) { - head = rb_entry(node, struct btrfs_delayed_ref_head, href_node); - if (list_empty(&head->cluster)) { - list_add_tail(&head->cluster, cluster); - delayed_refs->run_delayed_start = - head->node.bytenr; - count++; - - WARN_ON(delayed_refs->num_heads_ready == 0); - delayed_refs->num_heads_ready--; - } else if (count) { - /* the goal of the clustering is to find extents - * that are likely to end up in the same extent - * leaf on disk. So, we don't want them spread - * all over the tree. Stop now if we've hit - * a head that was already in use - */ - break; - } - node = rb_next(node); - } - if (count) { - return 0; - } else if (start) { - /* - * we've gone to the end of the rbtree without finding any - * clusters. start from the beginning and try again - */ + start = delayed_refs->run_delayed_start; + head = find_ref_head(&delayed_refs->href_root, start, NULL, 1); + if (!head && !loop) { + delayed_refs->run_delayed_start = 0; start = 0; - node = rb_first(&delayed_refs->href_root); - goto again; + loop = true; + head = find_ref_head(&delayed_refs->href_root, start, NULL, 1); + if (!head) + return NULL; + } else if (!head && loop) { + return NULL; } - return 1; -} -void btrfs_release_ref_cluster(struct list_head *cluster) -{ - struct list_head *pos, *q; + while (head->processing) { + struct rb_node *node; + + node = rb_next(&head->href_node); + if (!node) { + if (loop) + return NULL; + delayed_refs->run_delayed_start = 0; + start = 0; + loop = true; + goto again; + } + head = rb_entry(node, struct btrfs_delayed_ref_head, + href_node); + } - list_for_each_safe(pos, q, cluster) - list_del_init(pos); + head->processing = 1; + WARN_ON(delayed_refs->num_heads_ready == 0); + delayed_refs->num_heads_ready--; + delayed_refs->run_delayed_start = head->node.bytenr + + head->node.num_bytes; + return head; } /* @@ -483,6 +462,7 @@ void btrfs_release_ref_cluster(struct list_head *cluster) static noinline void update_existing_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head, struct btrfs_delayed_ref_node *existing, struct btrfs_delayed_ref_node *update) { @@ -495,7 +475,7 @@ update_existing_ref(struct btrfs_trans_handle *trans, */ existing->ref_mod--; if (existing->ref_mod == 0) - drop_delayed_ref(trans, delayed_refs, existing); + drop_delayed_ref(trans, delayed_refs, head, existing); else WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || existing->type == BTRFS_SHARED_BLOCK_REF_KEY); @@ -565,9 +545,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, } } /* - * update the reference mod on the head to reflect this new operation + * update the reference mod on the head to reflect this new operation, + * only need the lock for this case cause we could be processing it + * currently, for refs we just added we know we're a-ok. */ + spin_lock(&existing_ref->lock); existing->ref_mod += update->ref_mod; + spin_unlock(&existing_ref->lock); } /* @@ -575,13 +559,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, * this does all the dirty work in terms of maintaining the correct * overall modification count. */ -static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_node *ref, - u64 bytenr, u64 num_bytes, - int action, int is_data) +static noinline struct btrfs_delayed_ref_head * +add_delayed_ref_head(struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_node *ref, u64 bytenr, + u64 num_bytes, int action, int is_data) { - struct btrfs_delayed_ref_node *existing; + struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_head *head_ref = NULL; struct btrfs_delayed_ref_root *delayed_refs; int count_mod = 1; @@ -628,39 +612,43 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, head_ref = btrfs_delayed_node_to_head(ref); head_ref->must_insert_reserved = must_insert_reserved; head_ref->is_data = is_data; + head_ref->ref_root = RB_ROOT; + head_ref->processing = 0; - INIT_LIST_HEAD(&head_ref->cluster); + spin_lock_init(&head_ref->lock); mutex_init(&head_ref->mutex); trace_add_delayed_ref_head(ref, head_ref, action); - existing = tree_insert(&delayed_refs->root, &ref->rb_node); - + existing = htree_insert(&delayed_refs->href_root, + &head_ref->href_node); if (existing) { - update_existing_head_ref(existing, ref); + update_existing_head_ref(&existing->node, ref); /* * we've updated the existing ref, free the newly * allocated ref */ kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); + head_ref = existing; } else { - htree_insert(&delayed_refs->href_root, &head_ref->href_node); delayed_refs->num_heads++; delayed_refs->num_heads_ready++; - delayed_refs->num_entries++; + atomic_inc(&delayed_refs->num_entries); trans->delayed_ref_updates++; } + return head_ref; } /* * helper to insert a delayed tree ref into the rbtree. */ -static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_node *ref, - u64 bytenr, u64 num_bytes, u64 parent, - u64 ref_root, int level, int action, - int for_cow) +static noinline void +add_delayed_tree_ref(struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_head *head_ref, + struct btrfs_delayed_ref_node *ref, u64 bytenr, + u64 num_bytes, u64 parent, u64 ref_root, int level, + int action, int for_cow) { struct btrfs_delayed_ref_node *existing; struct btrfs_delayed_tree_ref *full_ref; @@ -696,30 +684,33 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, trace_add_delayed_tree_ref(ref, full_ref, action); - existing = tree_insert(&delayed_refs->root, &ref->rb_node); - + spin_lock(&head_ref->lock); + existing = tree_insert(&head_ref->ref_root, &ref->rb_node); if (existing) { - update_existing_ref(trans, delayed_refs, existing, ref); + update_existing_ref(trans, delayed_refs, head_ref, existing, + ref); /* * we've updated the existing ref, free the newly * allocated ref */ kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref); } else { - delayed_refs->num_entries++; + atomic_inc(&delayed_refs->num_entries); trans->delayed_ref_updates++; } + spin_unlock(&head_ref->lock); } /* * helper to insert a delayed data ref into the rbtree. */ -static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_node *ref, - u64 bytenr, u64 num_bytes, u64 parent, - u64 ref_root, u64 owner, u64 offset, - int action, int for_cow) +static noinline void +add_delayed_data_ref(struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_head *head_ref, + struct btrfs_delayed_ref_node *ref, u64 bytenr, + u64 num_bytes, u64 parent, u64 ref_root, u64 owner, + u64 offset, int action, int for_cow) { struct btrfs_delayed_ref_node *existing; struct btrfs_delayed_data_ref *full_ref; @@ -757,19 +748,21 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, trace_add_delayed_data_ref(ref, full_ref, action); - existing = tree_insert(&delayed_refs->root, &ref->rb_node); - + spin_lock(&head_ref->lock); + existing = tree_insert(&head_ref->ref_root, &ref->rb_node); if (existing) { - update_existing_ref(trans, delayed_refs, existing, ref); + update_existing_ref(trans, delayed_refs, head_ref, existing, + ref); /* * we've updated the existing ref, free the newly * allocated ref */ kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref); } else { - delayed_refs->num_entries++; + atomic_inc(&delayed_refs->num_entries); trans->delayed_ref_updates++; } + spin_unlock(&head_ref->lock); } /* @@ -808,10 +801,10 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, * insert both the head node and the new ref without dropping * the spin lock */ - add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, - num_bytes, action, 0); + head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, + bytenr, num_bytes, action, 0); - add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, + add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, num_bytes, parent, ref_root, level, action, for_cow); spin_unlock(&delayed_refs->lock); @@ -856,10 +849,10 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, * insert both the head node and the new ref without dropping * the spin lock */ - add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, - num_bytes, action, 1); + head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, + bytenr, num_bytes, action, 1); - add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, + add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, num_bytes, parent, ref_root, owner, offset, action, for_cow); spin_unlock(&delayed_refs->lock); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index a54c9d47918..4ba9b93022f 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -81,7 +81,8 @@ struct btrfs_delayed_ref_head { */ struct mutex mutex; - struct list_head cluster; + spinlock_t lock; + struct rb_root ref_root; struct rb_node href_node; @@ -100,6 +101,7 @@ struct btrfs_delayed_ref_head { */ unsigned int must_insert_reserved:1; unsigned int is_data:1; + unsigned int processing:1; }; struct btrfs_delayed_tree_ref { @@ -118,8 +120,6 @@ struct btrfs_delayed_data_ref { }; struct btrfs_delayed_ref_root { - struct rb_root root; - /* head ref rbtree */ struct rb_root href_root; @@ -129,7 +129,7 @@ struct btrfs_delayed_ref_root { /* how many delayed ref updates we've queued, used by the * throttling code */ - unsigned long num_entries; + atomic_t num_entries; /* total number of head nodes in tree */ unsigned long num_heads; @@ -137,15 +137,6 @@ struct btrfs_delayed_ref_root { /* total number of head nodes ready for processing */ unsigned long num_heads_ready; - /* - * bumped when someone is making progress on the delayed - * refs, so that other procs know they are just adding to - * contention intead of helping - */ - atomic_t procs_running_refs; - atomic_t ref_seq; - wait_queue_head_t wait; - /* * set when the tree is flushing before a transaction commit, * used by the throttling code to decide if new updates need @@ -231,9 +222,9 @@ static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head) mutex_unlock(&head->mutex); } -int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, - struct list_head *cluster, u64 search_start); -void btrfs_release_ref_cluster(struct list_head *cluster); + +struct btrfs_delayed_ref_head * +btrfs_select_ref_head(struct btrfs_trans_handle *trans); int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9850a51a3f1..ed23127a4b0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3801,58 +3801,55 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, delayed_refs = &trans->delayed_refs; spin_lock(&delayed_refs->lock); - if (delayed_refs->num_entries == 0) { + if (atomic_read(&delayed_refs->num_entries) == 0) { spin_unlock(&delayed_refs->lock); btrfs_info(root->fs_info, "delayed_refs has NO entry"); return ret; } - while ((node = rb_first(&delayed_refs->root)) != NULL) { - struct btrfs_delayed_ref_head *head = NULL; + while ((node = rb_first(&delayed_refs->href_root)) != NULL) { + struct btrfs_delayed_ref_head *head; bool pin_bytes = false; - ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - atomic_set(&ref->refs, 1); - if (btrfs_delayed_ref_is_head(ref)) { + head = rb_entry(node, struct btrfs_delayed_ref_head, + href_node); + if (!mutex_trylock(&head->mutex)) { + atomic_inc(&head->node.refs); + spin_unlock(&delayed_refs->lock); - head = btrfs_delayed_node_to_head(ref); - if (!mutex_trylock(&head->mutex)) { - atomic_inc(&ref->refs); - spin_unlock(&delayed_refs->lock); - - /* Need to wait for the delayed ref to run */ - mutex_lock(&head->mutex); - mutex_unlock(&head->mutex); - btrfs_put_delayed_ref(ref); - - spin_lock(&delayed_refs->lock); - continue; - } - - if (head->must_insert_reserved) - pin_bytes = true; - btrfs_free_delayed_extent_op(head->extent_op); - delayed_refs->num_heads--; - if (list_empty(&head->cluster)) - delayed_refs->num_heads_ready--; - list_del_init(&head->cluster); - } - - ref->in_tree = 0; - rb_erase(&ref->rb_node, &delayed_refs->root); - if (head) - rb_erase(&head->href_node, &delayed_refs->href_root); - - delayed_refs->num_entries--; - spin_unlock(&delayed_refs->lock); - if (head) { - if (pin_bytes) - btrfs_pin_extent(root, ref->bytenr, - ref->num_bytes, 1); + mutex_lock(&head->mutex); mutex_unlock(&head->mutex); + btrfs_put_delayed_ref(&head->node); + spin_lock(&delayed_refs->lock); + continue; + } + spin_lock(&head->lock); + while ((node = rb_first(&head->ref_root)) != NULL) { + ref = rb_entry(node, struct btrfs_delayed_ref_node, + rb_node); + ref->in_tree = 0; + rb_erase(&ref->rb_node, &head->ref_root); + atomic_dec(&delayed_refs->num_entries); + btrfs_put_delayed_ref(ref); + cond_resched_lock(&head->lock); } - btrfs_put_delayed_ref(ref); + if (head->must_insert_reserved) + pin_bytes = true; + btrfs_free_delayed_extent_op(head->extent_op); + delayed_refs->num_heads--; + if (head->processing == 0) + delayed_refs->num_heads_ready--; + atomic_dec(&delayed_refs->num_entries); + head->node.in_tree = 0; + rb_erase(&head->href_node, &delayed_refs->href_root); + spin_unlock(&head->lock); + spin_unlock(&delayed_refs->lock); + mutex_unlock(&head->mutex); + if (pin_bytes) + btrfs_pin_extent(root, head->node.bytenr, + head->node.num_bytes, 1); + btrfs_put_delayed_ref(&head->node); cond_resched(); spin_lock(&delayed_refs->lock); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 77acc087389..c77156c77de 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -857,12 +857,14 @@ again: btrfs_put_delayed_ref(&head->node); goto search_again; } + spin_lock(&head->lock); if (head->extent_op && head->extent_op->update_flags) extent_flags |= head->extent_op->flags_to_set; else BUG_ON(num_refs == 0); num_refs += head->node.ref_mod; + spin_unlock(&head->lock); mutex_unlock(&head->mutex); } spin_unlock(&delayed_refs->lock); @@ -2287,40 +2289,33 @@ static noinline struct btrfs_delayed_ref_node * select_delayed_ref(struct btrfs_delayed_ref_head *head) { struct rb_node *node; - struct btrfs_delayed_ref_node *ref; - int action = BTRFS_ADD_DELAYED_REF; -again: + struct btrfs_delayed_ref_node *ref, *last = NULL;; + /* * select delayed ref of type BTRFS_ADD_DELAYED_REF first. * this prevents ref count from going down to zero when * there still are pending delayed ref. */ - node = rb_prev(&head->node.rb_node); - while (1) { - if (!node) - break; + node = rb_first(&head->ref_root); + while (node) { ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - if (ref->bytenr != head->node.bytenr) - break; - if (ref->action == action) + if (ref->action == BTRFS_ADD_DELAYED_REF) return ref; - node = rb_prev(node); - } - if (action == BTRFS_ADD_DELAYED_REF) { - action = BTRFS_DROP_DELAYED_REF; - goto again; + else if (last == NULL) + last = ref; + node = rb_next(node); } - return NULL; + return last; } /* * Returns 0 on success or if called with an already aborted transaction. * Returns -ENOMEM or -EIO on failure and will abort the transaction. */ -static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct list_head *cluster) +static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + unsigned long nr) { struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_node *ref; @@ -2328,23 +2323,26 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_extent_op *extent_op; struct btrfs_fs_info *fs_info = root->fs_info; int ret; - int count = 0; + unsigned long count = 0; int must_insert_reserved = 0; delayed_refs = &trans->transaction->delayed_refs; while (1) { if (!locked_ref) { - /* pick a new head ref from the cluster list */ - if (list_empty(cluster)) + if (count >= nr) break; - locked_ref = list_entry(cluster->next, - struct btrfs_delayed_ref_head, cluster); + spin_lock(&delayed_refs->lock); + locked_ref = btrfs_select_ref_head(trans); + if (!locked_ref) { + spin_unlock(&delayed_refs->lock); + break; + } /* grab the lock that says we are going to process * all the refs for this head */ ret = btrfs_delayed_ref_lock(trans, locked_ref); - + spin_unlock(&delayed_refs->lock); /* * we may have dropped the spin lock to get the head * mutex lock, and that might have given someone else @@ -2365,6 +2363,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, * finish. If we merged anything we need to re-loop so we can * get a good ref. */ + spin_lock(&locked_ref->lock); btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, locked_ref); @@ -2376,17 +2375,14 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, if (ref && ref->seq && btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { - /* - * there are still refs with lower seq numbers in the - * process of being added. Don't run this ref yet. - */ - list_del_init(&locked_ref->cluster); + spin_unlock(&locked_ref->lock); btrfs_delayed_ref_unlock(locked_ref); - locked_ref = NULL; + spin_lock(&delayed_refs->lock); + locked_ref->processing = 0; delayed_refs->num_heads_ready++; spin_unlock(&delayed_refs->lock); + locked_ref = NULL; cond_resched(); - spin_lock(&delayed_refs->lock); continue; } @@ -2401,6 +2397,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, locked_ref->extent_op = NULL; if (!ref) { + + /* All delayed refs have been processed, Go ahead * and send the head node to run_one_delayed_ref, * so that any accounting fixes can happen @@ -2413,8 +2411,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, } if (extent_op) { - spin_unlock(&delayed_refs->lock); - + spin_unlock(&locked_ref->lock); ret = run_delayed_extent_op(trans, root, ref, extent_op); btrfs_free_delayed_extent_op(extent_op); @@ -2428,23 +2425,38 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, */ if (must_insert_reserved) locked_ref->must_insert_reserved = 1; + locked_ref->processing = 0; btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); - spin_lock(&delayed_refs->lock); btrfs_delayed_ref_unlock(locked_ref); return ret; } - - goto next; + continue; } - } - ref->in_tree = 0; - rb_erase(&ref->rb_node, &delayed_refs->root); - if (btrfs_delayed_ref_is_head(ref)) { + /* + * Need to drop our head ref lock and re-aqcuire the + * delayed ref lock and then re-check to make sure + * nobody got added. + */ + spin_unlock(&locked_ref->lock); + spin_lock(&delayed_refs->lock); + spin_lock(&locked_ref->lock); + if (rb_first(&locked_ref->ref_root)) { + spin_unlock(&locked_ref->lock); + spin_unlock(&delayed_refs->lock); + continue; + } + ref->in_tree = 0; + delayed_refs->num_heads--; rb_erase(&locked_ref->href_node, &delayed_refs->href_root); + spin_unlock(&delayed_refs->lock); + } else { + ref->in_tree = 0; + rb_erase(&ref->rb_node, &locked_ref->ref_root); } - delayed_refs->num_entries--; + atomic_dec(&delayed_refs->num_entries); + if (!btrfs_delayed_ref_is_head(ref)) { /* * when we play the delayed ref, also correct the @@ -2461,20 +2473,18 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, default: WARN_ON(1); } - } else { - list_del_init(&locked_ref->cluster); } - spin_unlock(&delayed_refs->lock); + spin_unlock(&locked_ref->lock); ret = run_one_delayed_ref(trans, root, ref, extent_op, must_insert_reserved); btrfs_free_delayed_extent_op(extent_op); if (ret) { + locked_ref->processing = 0; btrfs_delayed_ref_unlock(locked_ref); btrfs_put_delayed_ref(ref); btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret); - spin_lock(&delayed_refs->lock); return ret; } @@ -2490,11 +2500,9 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, } btrfs_put_delayed_ref(ref); count++; -next: cond_resched(); - spin_lock(&delayed_refs->lock); } - return count; + return 0; } #ifdef SCRAMBLE_DELAYED_REFS @@ -2576,16 +2584,6 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, return ret; } -static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq, - int count) -{ - int val = atomic_read(&delayed_refs->ref_seq); - - if (val < seq || val >= seq + count) - return 1; - return 0; -} - static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) { u64 num_bytes; @@ -2647,12 +2645,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct rb_node *node; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_head *head; - struct list_head cluster; int ret; - u64 delayed_start; int run_all = count == (unsigned long)-1; int run_most = 0; - int loops; /* We'll clean this up in btrfs_cleanup_transaction */ if (trans->aborted) @@ -2664,121 +2659,31 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); delayed_refs = &trans->transaction->delayed_refs; - INIT_LIST_HEAD(&cluster); if (count == 0) { - count = delayed_refs->num_entries * 2; + count = atomic_read(&delayed_refs->num_entries) * 2; run_most = 1; } - if (!run_all && !run_most) { - int old; - int seq = atomic_read(&delayed_refs->ref_seq); - -progress: - old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); - if (old) { - DEFINE_WAIT(__wait); - if (delayed_refs->flushing || - !btrfs_should_throttle_delayed_refs(trans, root)) - return 0; - - prepare_to_wait(&delayed_refs->wait, &__wait, - TASK_UNINTERRUPTIBLE); - - old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); - if (old) { - schedule(); - finish_wait(&delayed_refs->wait, &__wait); - - if (!refs_newer(delayed_refs, seq, 256)) - goto progress; - else - return 0; - } else { - finish_wait(&delayed_refs->wait, &__wait); - goto again; - } - } - - } else { - atomic_inc(&delayed_refs->procs_running_refs); - } - again: - loops = 0; - spin_lock(&delayed_refs->lock); - #ifdef SCRAMBLE_DELAYED_REFS delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); #endif - - while (1) { - if (!(run_all || run_most) && - !btrfs_should_throttle_delayed_refs(trans, root)) - break; - - /* - * go find something we can process in the rbtree. We start at - * the beginning of the tree, and then build a cluster - * of refs to process starting at the first one we are able to - * lock - */ - delayed_start = delayed_refs->run_delayed_start; - ret = btrfs_find_ref_cluster(trans, &cluster, - delayed_refs->run_delayed_start); - if (ret) - break; - - ret = run_clustered_refs(trans, root, &cluster); - if (ret < 0) { - btrfs_release_ref_cluster(&cluster); - spin_unlock(&delayed_refs->lock); - btrfs_abort_transaction(trans, root, ret); - atomic_dec(&delayed_refs->procs_running_refs); - wake_up(&delayed_refs->wait); - return ret; - } - - atomic_add(ret, &delayed_refs->ref_seq); - - count -= min_t(unsigned long, ret, count); - - if (count == 0) - break; - - if (delayed_start >= delayed_refs->run_delayed_start) { - if (loops == 0) { - /* - * btrfs_find_ref_cluster looped. let's do one - * more cycle. if we don't run any delayed ref - * during that cycle (because we can't because - * all of them are blocked), bail out. - */ - loops = 1; - } else { - /* - * no runnable refs left, stop trying - */ - BUG_ON(run_all); - break; - } - } - if (ret) { - /* refs were run, let's reset staleness detection */ - loops = 0; - } + ret = __btrfs_run_delayed_refs(trans, root, count); + if (ret < 0) { + btrfs_abort_transaction(trans, root, ret); + return ret; } if (run_all) { - if (!list_empty(&trans->new_bgs)) { - spin_unlock(&delayed_refs->lock); + if (!list_empty(&trans->new_bgs)) btrfs_create_pending_block_groups(trans, root); - spin_lock(&delayed_refs->lock); - } + spin_lock(&delayed_refs->lock); node = rb_first(&delayed_refs->href_root); - if (!node) + if (!node) { + spin_unlock(&delayed_refs->lock); goto out; + } count = (unsigned long)-1; while (node) { @@ -2807,16 +2712,10 @@ again: node = rb_next(node); } spin_unlock(&delayed_refs->lock); - schedule_timeout(1); + cond_resched(); goto again; } out: - atomic_dec(&delayed_refs->procs_running_refs); - smp_mb(); - if (waitqueue_active(&delayed_refs->wait)) - wake_up(&delayed_refs->wait); - - spin_unlock(&delayed_refs->lock); assert_qgroups_uptodate(trans); return 0; } @@ -2858,12 +2757,13 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, struct rb_node *node; int ret = 0; - ret = -ENOENT; delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); head = btrfs_find_delayed_ref_head(trans, bytenr); - if (!head) - goto out; + if (!head) { + spin_unlock(&delayed_refs->lock); + return 0; + } if (!mutex_trylock(&head->mutex)) { atomic_inc(&head->node.refs); @@ -2880,40 +2780,35 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, btrfs_put_delayed_ref(&head->node); return -EAGAIN; } + spin_unlock(&delayed_refs->lock); - node = rb_prev(&head->node.rb_node); - if (!node) - goto out_unlock; - - ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - - if (ref->bytenr != bytenr) - goto out_unlock; + spin_lock(&head->lock); + node = rb_first(&head->ref_root); + while (node) { + ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); + node = rb_next(node); - ret = 1; - if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) - goto out_unlock; + /* If it's a shared ref we know a cross reference exists */ + if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) { + ret = 1; + break; + } - data_ref = btrfs_delayed_node_to_data_ref(ref); + data_ref = btrfs_delayed_node_to_data_ref(ref); - node = rb_prev(node); - if (node) { - int seq = ref->seq; - - ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - if (ref->bytenr == bytenr && ref->seq == seq) - goto out_unlock; + /* + * If our ref doesn't match the one we're currently looking at + * then we have a cross reference. + */ + if (data_ref->root != root->root_key.objectid || + data_ref->objectid != objectid || + data_ref->offset != offset) { + ret = 1; + break; + } } - - if (data_ref->root != root->root_key.objectid || - data_ref->objectid != objectid || data_ref->offset != offset) - goto out_unlock; - - ret = 0; -out_unlock: + spin_unlock(&head->lock); mutex_unlock(&head->mutex); -out: - spin_unlock(&delayed_refs->lock); return ret; } @@ -5953,8 +5848,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, { struct btrfs_delayed_ref_head *head; struct btrfs_delayed_ref_root *delayed_refs; - struct btrfs_delayed_ref_node *ref; - struct rb_node *node; int ret = 0; delayed_refs = &trans->transaction->delayed_refs; @@ -5963,14 +5856,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, if (!head) goto out; - node = rb_prev(&head->node.rb_node); - if (!node) - goto out; - - ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - - /* there are still entries for this ref, we can't drop it */ - if (ref->bytenr == bytenr) + spin_lock(&head->lock); + if (rb_first(&head->ref_root)) goto out; if (head->extent_op) { @@ -5992,20 +5879,19 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, * ahead and process it. */ head->node.in_tree = 0; - rb_erase(&head->node.rb_node, &delayed_refs->root); rb_erase(&head->href_node, &delayed_refs->href_root); - delayed_refs->num_entries--; + atomic_dec(&delayed_refs->num_entries); /* * we don't take a ref on the node because we're removing it from the * tree, so we just steal the ref the tree was holding. */ delayed_refs->num_heads--; - if (list_empty(&head->cluster)) + if (head->processing == 0) delayed_refs->num_heads_ready--; - - list_del_init(&head->cluster); + head->processing = 0; + spin_unlock(&head->lock); spin_unlock(&delayed_refs->lock); BUG_ON(head->extent_op); @@ -6016,6 +5902,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, btrfs_put_delayed_ref(&head->node); return ret; out: + spin_unlock(&head->lock); spin_unlock(&delayed_refs->lock); return 0; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b16352ce0f7..fd1446496fe 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -62,7 +62,6 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) WARN_ON(atomic_read(&transaction->use_count) == 0); if (atomic_dec_and_test(&transaction->use_count)) { BUG_ON(!list_empty(&transaction->list)); - WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.root)); WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root)); while (!list_empty(&transaction->pending_chunks)) { struct extent_map *em; @@ -184,9 +183,8 @@ loop: atomic_set(&cur_trans->use_count, 2); cur_trans->start_time = get_seconds(); - cur_trans->delayed_refs.root = RB_ROOT; cur_trans->delayed_refs.href_root = RB_ROOT; - cur_trans->delayed_refs.num_entries = 0; + atomic_set(&cur_trans->delayed_refs.num_entries, 0); cur_trans->delayed_refs.num_heads_ready = 0; cur_trans->delayed_refs.num_heads = 0; cur_trans->delayed_refs.flushing = 0; @@ -206,9 +204,6 @@ loop: atomic64_set(&fs_info->tree_mod_seq, 0); spin_lock_init(&cur_trans->delayed_refs.lock); - atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0); - atomic_set(&cur_trans->delayed_refs.ref_seq, 0); - init_waitqueue_head(&cur_trans->delayed_refs.wait); INIT_LIST_HEAD(&cur_trans->pending_snapshots); INIT_LIST_HEAD(&cur_trans->ordered_operations); -- cgit v1.2.3-70-g09d2 From 580f0a678ebeba85d30b6a7f22ce32c472263c72 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 23 Jan 2014 16:03:45 -0500 Subject: Btrfs: fix extent_from_logical to deal with skinny metadata I don't think this is an issue and I've not seen it in practice but extent_from_logical will fail to find a skinny extent because it uses btrfs_previous_item and gives it the normal extent item type. This is just not a place to use btrfs_previous_item since we care about either normal extents or skinny extents, so open code btrfs_previous_item to properly check. This would only affect metadata and the only place this is used for metadata is scrub and I'm pretty sure it's just for printing stuff out, not actually doing any work so hopefully it was never a problem other than a cosmetic one. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 34a8952de8d..dcf2448c16f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1302,20 +1302,45 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); if (ret < 0) return ret; - ret = btrfs_previous_item(fs_info->extent_root, path, - 0, BTRFS_EXTENT_ITEM_KEY); - if (ret < 0) - return ret; - btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); + while (1) { + u32 nritems; + if (path->slots[0] == 0) { + btrfs_set_path_blocking(path); + ret = btrfs_prev_leaf(fs_info->extent_root, path); + if (ret != 0) { + if (ret > 0) { + pr_debug("logical %llu is not within " + "any extent\n", logical); + ret = -ENOENT; + } + return ret; + } + } else { + path->slots[0]--; + } + nritems = btrfs_header_nritems(path->nodes[0]); + if (nritems == 0) { + pr_debug("logical %llu is not within any extent\n", + logical); + return -ENOENT; + } + if (path->slots[0] == nritems) + path->slots[0]--; + + btrfs_item_key_to_cpu(path->nodes[0], found_key, + path->slots[0]); + if (found_key->type == BTRFS_EXTENT_ITEM_KEY || + found_key->type == BTRFS_METADATA_ITEM_KEY) + break; + } + if (found_key->type == BTRFS_METADATA_ITEM_KEY) size = fs_info->extent_root->leafsize; else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) size = found_key->offset; - if ((found_key->type != BTRFS_EXTENT_ITEM_KEY && - found_key->type != BTRFS_METADATA_ITEM_KEY) || - found_key->objectid > logical || + if (found_key->objectid > logical || found_key->objectid + size <= logical) { pr_debug("logical %llu is not within any extent\n", logical); return -ENOENT; -- cgit v1.2.3-70-g09d2 From 7ef81ac86c8a44ab9f4e6e04e1f4c9ea53615b8a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 24 Jan 2014 14:05:42 -0500 Subject: Btrfs: only process as many file extents as there are refs The backref walking code will search down to the key it is looking for and then proceed to walk _all_ of the extents on the file until it hits the end. This is suboptimal with large files, we only need to look for as many extents as we have references for that inode. I have a testcase that creates a randomly written 4 gig file and before this patch it took 6min 30sec to do the initial send, with this patch it takes 2min 30sec to do the intial send. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index dcf2448c16f..15384968a84 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -209,18 +209,19 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, } static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, - struct ulist *parents, int level, - struct btrfs_key *key_for_search, u64 time_seq, - u64 wanted_disk_byte, - const u64 *extent_item_pos) + struct ulist *parents, struct __prelim_ref *ref, + int level, u64 time_seq, const u64 *extent_item_pos) { int ret = 0; int slot; struct extent_buffer *eb; struct btrfs_key key; + struct btrfs_key *key_for_search = &ref->key_for_search; struct btrfs_file_extent_item *fi; struct extent_inode_elem *eie = NULL, *old = NULL; u64 disk_byte; + u64 wanted_disk_byte = ref->wanted_disk_byte; + u64 count = 0; if (level != 0) { eb = path->nodes[level]; @@ -238,7 +239,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) ret = btrfs_next_old_leaf(root, path, time_seq); - while (!ret) { + while (!ret && count < ref->count) { eb = path->nodes[0]; slot = path->slots[0]; @@ -254,6 +255,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, if (disk_byte == wanted_disk_byte) { eie = NULL; old = NULL; + count++; if (extent_item_pos) { ret = check_extent_in_eb(&key, eb, fi, *extent_item_pos, @@ -334,9 +336,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, eb = path->nodes[level]; } - ret = add_all_parents(root, path, parents, level, &ref->key_for_search, - time_seq, ref->wanted_disk_byte, - extent_item_pos); + ret = add_all_parents(root, path, parents, ref, level, time_seq, + extent_item_pos); out: path->lowest_level = 0; btrfs_release_path(path); -- cgit v1.2.3-70-g09d2 From 538f72cdf03cad1c21c551ea542c8ce7d9fa2d81 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Thu, 23 Jan 2014 13:47:48 +0800 Subject: Btrfs: fix protection between walking backrefs and root deletion There is a race condition between resolving indirect ref and root deletion, and we should gurantee that root can not be destroyed to avoid accessing broken tree here. Here we fix it by holding @subvol_srcu, and we will release it as soon as we have held root node lock. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 15384968a84..10ae5700ab1 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -301,23 +301,34 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, int ret = 0; int root_level; int level = ref->level; + int index; root_key.objectid = ref->root_id; root_key.type = BTRFS_ROOT_ITEM_KEY; root_key.offset = (u64)-1; + + index = srcu_read_lock(&fs_info->subvol_srcu); + root = btrfs_read_fs_root_no_name(fs_info, &root_key); if (IS_ERR(root)) { + srcu_read_unlock(&fs_info->subvol_srcu, index); ret = PTR_ERR(root); goto out; } root_level = btrfs_old_root_level(root, time_seq); - if (root_level + 1 == level) + if (root_level + 1 == level) { + srcu_read_unlock(&fs_info->subvol_srcu, index); goto out; + } path->lowest_level = level; ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); + + /* root node has been locked, we can release @subvol_srcu safely here */ + srcu_read_unlock(&fs_info->subvol_srcu, index); + pr_debug("search slot in root %llu (level %d, ref count %d) returned " "%d for key (%llu %u %llu)\n", ref->root_id, level, ref->count, ret, -- cgit v1.2.3-70-g09d2 From 95def2ede1a9dd12b164932eaf5fefb67aefc41c Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Thu, 23 Jan 2014 13:47:49 +0800 Subject: Btrfs: fix to catch all errors when resolving indirect ref We can only tolerate ENOENT here, for other errors, we should return directly. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 10ae5700ab1..55ffcf44b90 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -388,10 +388,16 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, continue; err = __resolve_indirect_ref(fs_info, path, time_seq, ref, parents, extent_item_pos); - if (err == -ENOMEM) - goto out; - if (err) + /* + * we can only tolerate ENOENT,otherwise,we should catch error + * and return directly. + */ + if (err == -ENOENT) { continue; + } else if (err) { + ret = err; + goto out; + } /* we put the first parent into the ref at hand */ ULIST_ITER_INIT(&uiter); -- cgit v1.2.3-70-g09d2 From bca1a290033d20981e11f81ae4207e4d0fa5b1e6 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sun, 26 Jan 2014 22:32:18 +0800 Subject: Btrfs: add a reschedule point in btrfs_find_all_roots() I can easily trigger the following warnings when enabling quota in my virtual machine(running Opensuse), Steps are firstly creating a subvolume full of fragment extents, and then create many snapshots (500 in my test case). [ 2362.808459] BUG: soft lockup - CPU#0 stuck for 22s! [btrfs-qgroup-re:1970] [ 2362.809023] task: e4af8450 ti: e371c000 task.ti: e371c000 [ 2362.809026] EIP: 0060:[] EFLAGS: 00000246 CPU: 0 [ 2362.809049] EIP is at __merge_refs+0x5e/0x100 [btrfs] [ 2362.809051] EAX: 00000000 EBX: cfadbcf0 ECX: 00000000 EDX: cfadbcb0 [ 2362.809052] ESI: dd8d3370 EDI: e371dde0 EBP: e371dd6c ESP: e371dd5c [ 2362.809054] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 [ 2362.809055] CR0: 80050033 CR2: ac454d50 CR3: 009a9000 CR4: 001407d0 [ 2362.809099] Stack: [ 2362.809100] 00000001 e371dde0 dfcc6890 f29f8000 e371de28 fa39016d 00000011 00000001 [ 2362.809105] 99bfc000 00000000 93928000 00000000 00000001 00000050 e371dda8 00000001 [ 2362.809109] f3a31000 f3413000 00000001 e371ddb8 000040a8 00000202 00000000 00000023 [ 2362.809113] Call Trace: [ 2362.809136] [] find_parent_nodes+0x34d/0x1280 [btrfs] [ 2362.809156] [] btrfs_find_all_roots+0xb2/0x110 [btrfs] [ 2362.809174] [] btrfs_qgroup_rescan_worker+0x358/0x7a0 [btrfs] [ 2362.809180] [] ? lock_timer_base.isra.39+0x1e/0x40 [ 2362.809199] [] worker_loop+0xff/0x470 [btrfs] [ 2362.809204] [] ? __wake_up_locked+0x1a/0x20 [ 2362.809221] [] ? btrfs_queue_worker+0x2b0/0x2b0 [btrfs] [ 2362.809225] [] kthread+0x9c/0xb0 [ 2362.809229] [] ret_from_kernel_thread+0x1b/0x30 [ 2362.809233] [] ? kthread_create_on_node+0x110/0x110 By adding a reschedule point at the end of btrfs_find_all_roots(), i no longer hit these warnings. Cc: Josef Bacik Signed-off-by: Wang Shilong Reviewed-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 55ffcf44b90..7966acd5dc7 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1118,6 +1118,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, if (!node) break; bytenr = node->val; + cond_resched(); } ulist_free(tmp); -- cgit v1.2.3-70-g09d2 From f05c474688762f186b16a26366755b6ef0bfed0c Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 28 Jan 2014 19:13:38 +0800 Subject: Btrfs: fix memory leaks on walking backrefs failure When walking backrefs, we may iterate every inode's extent and add/merge them into ulist, and the caller will free memory from ulist. However, if we fail to allocate inode's extents element memory or ulist_add() fail to allocate memory, we won't add allocated memory into ulist, and the caller won't free some allocated memory thus memory leaks happen. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 7966acd5dc7..aded3ef3d3d 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -66,6 +66,16 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, return 0; } +static void free_inode_elem_list(struct extent_inode_elem *eie) +{ + struct extent_inode_elem *eie_next; + + for (; eie; eie = eie_next) { + eie_next = eie->next; + kfree(eie); + } +} + static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, u64 extent_item_pos, struct extent_inode_elem **eie) @@ -275,6 +285,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, old = old->next; old->next = eie; } + eie = NULL; } next: ret = btrfs_next_old_item(root, path, time_seq); @@ -282,6 +293,8 @@ next: if (ret > 0) ret = 0; + else if (ret < 0) + free_inode_elem_list(eie); return ret; } @@ -845,6 +858,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, struct list_head prefs_delayed; struct list_head prefs; struct __prelim_ref *ref; + struct extent_inode_elem *eie = NULL; INIT_LIST_HEAD(&prefs); INIT_LIST_HEAD(&prefs_delayed); @@ -958,7 +972,6 @@ again: goto out; } if (ref->count && ref->parent) { - struct extent_inode_elem *eie = NULL; if (extent_item_pos && !ref->inode_list) { u32 bsz; struct extent_buffer *eb; @@ -993,6 +1006,7 @@ again: eie = eie->next; eie->next = ref->inode_list; } + eie = NULL; } list_del(&ref->list); kmem_cache_free(btrfs_prelim_ref_cache, ref); @@ -1011,7 +1025,8 @@ out: list_del(&ref->list); kmem_cache_free(btrfs_prelim_ref_cache, ref); } - + if (ret < 0) + free_inode_elem_list(eie); return ret; } @@ -1019,7 +1034,6 @@ static void free_leaf_list(struct ulist *blocks) { struct ulist_node *node = NULL; struct extent_inode_elem *eie; - struct extent_inode_elem *eie_next; struct ulist_iterator uiter; ULIST_ITER_INIT(&uiter); @@ -1027,10 +1041,7 @@ static void free_leaf_list(struct ulist *blocks) if (!node->aux) continue; eie = (struct extent_inode_elem *)(uintptr_t)node->aux; - for (; eie; eie = eie_next) { - eie_next = eie->next; - kfree(eie); - } + free_inode_elem_list(eie); node->aux = 0; } -- cgit v1.2.3-70-g09d2