diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 317 |
1 files changed, 102 insertions, 215 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 77acc087389..c77156c77de 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -857,12 +857,14 @@ again: btrfs_put_delayed_ref(&head->node); goto search_again; } + spin_lock(&head->lock); if (head->extent_op && head->extent_op->update_flags) extent_flags |= head->extent_op->flags_to_set; else BUG_ON(num_refs == 0); num_refs += head->node.ref_mod; + spin_unlock(&head->lock); mutex_unlock(&head->mutex); } spin_unlock(&delayed_refs->lock); @@ -2287,40 +2289,33 @@ static noinline struct btrfs_delayed_ref_node * select_delayed_ref(struct btrfs_delayed_ref_head *head) { struct rb_node *node; - struct btrfs_delayed_ref_node *ref; - int action = BTRFS_ADD_DELAYED_REF; -again: + struct btrfs_delayed_ref_node *ref, *last = NULL;; + /* * select delayed ref of type BTRFS_ADD_DELAYED_REF first. * this prevents ref count from going down to zero when * there still are pending delayed ref. */ - node = rb_prev(&head->node.rb_node); - while (1) { - if (!node) - break; + node = rb_first(&head->ref_root); + while (node) { ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - if (ref->bytenr != head->node.bytenr) - break; - if (ref->action == action) + if (ref->action == BTRFS_ADD_DELAYED_REF) return ref; - node = rb_prev(node); - } - if (action == BTRFS_ADD_DELAYED_REF) { - action = BTRFS_DROP_DELAYED_REF; - goto again; + else if (last == NULL) + last = ref; + node = rb_next(node); } - return NULL; + return last; } /* * Returns 0 on success or if called with an already aborted transaction. * Returns -ENOMEM or -EIO on failure and will abort the transaction. */ -static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct list_head *cluster) +static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + unsigned long nr) { struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_node *ref; @@ -2328,23 +2323,26 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_extent_op *extent_op; struct btrfs_fs_info *fs_info = root->fs_info; int ret; - int count = 0; + unsigned long count = 0; int must_insert_reserved = 0; delayed_refs = &trans->transaction->delayed_refs; while (1) { if (!locked_ref) { - /* pick a new head ref from the cluster list */ - if (list_empty(cluster)) + if (count >= nr) break; - locked_ref = list_entry(cluster->next, - struct btrfs_delayed_ref_head, cluster); + spin_lock(&delayed_refs->lock); + locked_ref = btrfs_select_ref_head(trans); + if (!locked_ref) { + spin_unlock(&delayed_refs->lock); + break; + } /* grab the lock that says we are going to process * all the refs for this head */ ret = btrfs_delayed_ref_lock(trans, locked_ref); - + spin_unlock(&delayed_refs->lock); /* * we may have dropped the spin lock to get the head * mutex lock, and that might have given someone else @@ -2365,6 +2363,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, * finish. If we merged anything we need to re-loop so we can * get a good ref. */ + spin_lock(&locked_ref->lock); btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, locked_ref); @@ -2376,17 +2375,14 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, if (ref && ref->seq && btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { - /* - * there are still refs with lower seq numbers in the - * process of being added. Don't run this ref yet. - */ - list_del_init(&locked_ref->cluster); + spin_unlock(&locked_ref->lock); btrfs_delayed_ref_unlock(locked_ref); - locked_ref = NULL; + spin_lock(&delayed_refs->lock); + locked_ref->processing = 0; delayed_refs->num_heads_ready++; spin_unlock(&delayed_refs->lock); + locked_ref = NULL; cond_resched(); - spin_lock(&delayed_refs->lock); continue; } @@ -2401,6 +2397,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, locked_ref->extent_op = NULL; if (!ref) { + + /* All delayed refs have been processed, Go ahead * and send the head node to run_one_delayed_ref, * so that any accounting fixes can happen @@ -2413,8 +2411,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, } if (extent_op) { - spin_unlock(&delayed_refs->lock); - + spin_unlock(&locked_ref->lock); ret = run_delayed_extent_op(trans, root, ref, extent_op); btrfs_free_delayed_extent_op(extent_op); @@ -2428,23 +2425,38 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, */ if (must_insert_reserved) locked_ref->must_insert_reserved = 1; + locked_ref->processing = 0; btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); - spin_lock(&delayed_refs->lock); btrfs_delayed_ref_unlock(locked_ref); return ret; } - - goto next; + continue; } - } - ref->in_tree = 0; - rb_erase(&ref->rb_node, &delayed_refs->root); - if (btrfs_delayed_ref_is_head(ref)) { + /* + * Need to drop our head ref lock and re-aqcuire the + * delayed ref lock and then re-check to make sure + * nobody got added. + */ + spin_unlock(&locked_ref->lock); + spin_lock(&delayed_refs->lock); + spin_lock(&locked_ref->lock); + if (rb_first(&locked_ref->ref_root)) { + spin_unlock(&locked_ref->lock); + spin_unlock(&delayed_refs->lock); + continue; + } + ref->in_tree = 0; + delayed_refs->num_heads--; rb_erase(&locked_ref->href_node, &delayed_refs->href_root); + spin_unlock(&delayed_refs->lock); + } else { + ref->in_tree = 0; + rb_erase(&ref->rb_node, &locked_ref->ref_root); } - delayed_refs->num_entries--; + atomic_dec(&delayed_refs->num_entries); + if (!btrfs_delayed_ref_is_head(ref)) { /* * when we play the delayed ref, also correct the @@ -2461,20 +2473,18 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, default: WARN_ON(1); } - } else { - list_del_init(&locked_ref->cluster); } - spin_unlock(&delayed_refs->lock); + spin_unlock(&locked_ref->lock); ret = run_one_delayed_ref(trans, root, ref, extent_op, must_insert_reserved); btrfs_free_delayed_extent_op(extent_op); if (ret) { + locked_ref->processing = 0; btrfs_delayed_ref_unlock(locked_ref); btrfs_put_delayed_ref(ref); btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret); - spin_lock(&delayed_refs->lock); return ret; } @@ -2490,11 +2500,9 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, } btrfs_put_delayed_ref(ref); count++; -next: cond_resched(); - spin_lock(&delayed_refs->lock); } - return count; + return 0; } #ifdef SCRAMBLE_DELAYED_REFS @@ -2576,16 +2584,6 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, return ret; } -static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq, - int count) -{ - int val = atomic_read(&delayed_refs->ref_seq); - - if (val < seq || val >= seq + count) - return 1; - return 0; -} - static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) { u64 num_bytes; @@ -2647,12 +2645,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct rb_node *node; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_head *head; - struct list_head cluster; int ret; - u64 delayed_start; int run_all = count == (unsigned long)-1; int run_most = 0; - int loops; /* We'll clean this up in btrfs_cleanup_transaction */ if (trans->aborted) @@ -2664,121 +2659,31 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); delayed_refs = &trans->transaction->delayed_refs; - INIT_LIST_HEAD(&cluster); if (count == 0) { - count = delayed_refs->num_entries * 2; + count = atomic_read(&delayed_refs->num_entries) * 2; run_most = 1; } - if (!run_all && !run_most) { - int old; - int seq = atomic_read(&delayed_refs->ref_seq); - -progress: - old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); - if (old) { - DEFINE_WAIT(__wait); - if (delayed_refs->flushing || - !btrfs_should_throttle_delayed_refs(trans, root)) - return 0; - - prepare_to_wait(&delayed_refs->wait, &__wait, - TASK_UNINTERRUPTIBLE); - - old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); - if (old) { - schedule(); - finish_wait(&delayed_refs->wait, &__wait); - - if (!refs_newer(delayed_refs, seq, 256)) - goto progress; - else - return 0; - } else { - finish_wait(&delayed_refs->wait, &__wait); - goto again; - } - } - - } else { - atomic_inc(&delayed_refs->procs_running_refs); - } - again: - loops = 0; - spin_lock(&delayed_refs->lock); - #ifdef SCRAMBLE_DELAYED_REFS delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); #endif - - while (1) { - if (!(run_all || run_most) && - !btrfs_should_throttle_delayed_refs(trans, root)) - break; - - /* - * go find something we can process in the rbtree. We start at - * the beginning of the tree, and then build a cluster - * of refs to process starting at the first one we are able to - * lock - */ - delayed_start = delayed_refs->run_delayed_start; - ret = btrfs_find_ref_cluster(trans, &cluster, - delayed_refs->run_delayed_start); - if (ret) - break; - - ret = run_clustered_refs(trans, root, &cluster); - if (ret < 0) { - btrfs_release_ref_cluster(&cluster); - spin_unlock(&delayed_refs->lock); - btrfs_abort_transaction(trans, root, ret); - atomic_dec(&delayed_refs->procs_running_refs); - wake_up(&delayed_refs->wait); - return ret; - } - - atomic_add(ret, &delayed_refs->ref_seq); - - count -= min_t(unsigned long, ret, count); - - if (count == 0) - break; - - if (delayed_start >= delayed_refs->run_delayed_start) { - if (loops == 0) { - /* - * btrfs_find_ref_cluster looped. let's do one - * more cycle. if we don't run any delayed ref - * during that cycle (because we can't because - * all of them are blocked), bail out. - */ - loops = 1; - } else { - /* - * no runnable refs left, stop trying - */ - BUG_ON(run_all); - break; - } - } - if (ret) { - /* refs were run, let's reset staleness detection */ - loops = 0; - } + ret = __btrfs_run_delayed_refs(trans, root, count); + if (ret < 0) { + btrfs_abort_transaction(trans, root, ret); + return ret; } if (run_all) { - if (!list_empty(&trans->new_bgs)) { - spin_unlock(&delayed_refs->lock); + if (!list_empty(&trans->new_bgs)) btrfs_create_pending_block_groups(trans, root); - spin_lock(&delayed_refs->lock); - } + spin_lock(&delayed_refs->lock); node = rb_first(&delayed_refs->href_root); - if (!node) + if (!node) { + spin_unlock(&delayed_refs->lock); goto out; + } count = (unsigned long)-1; while (node) { @@ -2807,16 +2712,10 @@ again: node = rb_next(node); } spin_unlock(&delayed_refs->lock); - schedule_timeout(1); + cond_resched(); goto again; } out: - atomic_dec(&delayed_refs->procs_running_refs); - smp_mb(); - if (waitqueue_active(&delayed_refs->wait)) - wake_up(&delayed_refs->wait); - - spin_unlock(&delayed_refs->lock); assert_qgroups_uptodate(trans); return 0; } @@ -2858,12 +2757,13 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, struct rb_node *node; int ret = 0; - ret = -ENOENT; delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); head = btrfs_find_delayed_ref_head(trans, bytenr); - if (!head) - goto out; + if (!head) { + spin_unlock(&delayed_refs->lock); + return 0; + } if (!mutex_trylock(&head->mutex)) { atomic_inc(&head->node.refs); @@ -2880,40 +2780,35 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, btrfs_put_delayed_ref(&head->node); return -EAGAIN; } + spin_unlock(&delayed_refs->lock); - node = rb_prev(&head->node.rb_node); - if (!node) - goto out_unlock; - - ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - - if (ref->bytenr != bytenr) - goto out_unlock; + spin_lock(&head->lock); + node = rb_first(&head->ref_root); + while (node) { + ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); + node = rb_next(node); - ret = 1; - if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) - goto out_unlock; + /* If it's a shared ref we know a cross reference exists */ + if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) { + ret = 1; + break; + } - data_ref = btrfs_delayed_node_to_data_ref(ref); + data_ref = btrfs_delayed_node_to_data_ref(ref); - node = rb_prev(node); - if (node) { - int seq = ref->seq; - - ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - if (ref->bytenr == bytenr && ref->seq == seq) - goto out_unlock; + /* + * If our ref doesn't match the one we're currently looking at + * then we have a cross reference. + */ + if (data_ref->root != root->root_key.objectid || + data_ref->objectid != objectid || + data_ref->offset != offset) { + ret = 1; + break; + } } - - if (data_ref->root != root->root_key.objectid || - data_ref->objectid != objectid || data_ref->offset != offset) - goto out_unlock; - - ret = 0; -out_unlock: + spin_unlock(&head->lock); mutex_unlock(&head->mutex); -out: - spin_unlock(&delayed_refs->lock); return ret; } @@ -5953,8 +5848,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, { struct btrfs_delayed_ref_head *head; struct btrfs_delayed_ref_root *delayed_refs; - struct btrfs_delayed_ref_node *ref; - struct rb_node *node; int ret = 0; delayed_refs = &trans->transaction->delayed_refs; @@ -5963,14 +5856,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, if (!head) goto out; - node = rb_prev(&head->node.rb_node); - if (!node) - goto out; - - ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - - /* there are still entries for this ref, we can't drop it */ - if (ref->bytenr == bytenr) + spin_lock(&head->lock); + if (rb_first(&head->ref_root)) goto out; if (head->extent_op) { @@ -5992,20 +5879,19 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, * ahead and process it. */ head->node.in_tree = 0; - rb_erase(&head->node.rb_node, &delayed_refs->root); rb_erase(&head->href_node, &delayed_refs->href_root); - delayed_refs->num_entries--; + atomic_dec(&delayed_refs->num_entries); /* * we don't take a ref on the node because we're removing it from the * tree, so we just steal the ref the tree was holding. */ delayed_refs->num_heads--; - if (list_empty(&head->cluster)) + if (head->processing == 0) delayed_refs->num_heads_ready--; - - list_del_init(&head->cluster); + head->processing = 0; + spin_unlock(&head->lock); spin_unlock(&delayed_refs->lock); BUG_ON(head->extent_op); @@ -6016,6 +5902,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, btrfs_put_delayed_ref(&head->node); return ret; out: + spin_unlock(&head->lock); spin_unlock(&delayed_refs->lock); return 0; } |