diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2014-07-07 10:17:56 +0200 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2014-07-07 10:17:56 +0200 |
commit | f1615bbe9be4def59c3b3eaddb60722efeed16c2 (patch) | |
tree | ca3020e65447576fc1826e819651e6ba072030b5 /fs/btrfs/extent-tree.c | |
parent | cfb3c0ab0903abb6ea5215b37eebd9c2a1f057eb (diff) | |
parent | cd3de83f147601356395b57a8673e9c5ff1e59d1 (diff) |
Merge tag 'v3.16-rc4' into drm-intel-next-queued
Due to Dave's vacation drm-next hasn't opened yet for 3.17 so I
couldn't move my drm-intel-next queue forward yet like I usually do.
Just pull in the latest upstream -rc to unblock patch merging - I
don't want to needlessly rebase my current patch pile really and void
all the testing we've done already.
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 618 |
1 files changed, 484 insertions, 134 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5590af92094..813537f362f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -26,16 +26,16 @@ #include <linux/ratelimit.h> #include <linux/percpu_counter.h> #include "hash.h" -#include "ctree.h" +#include "tree-log.h" #include "disk-io.h" #include "print-tree.h" -#include "transaction.h" #include "volumes.h" #include "raid56.h" #include "locking.h" #include "free-space-cache.h" #include "math.h" #include "sysfs.h" +#include "qgroup.h" #undef SCRAMBLE_DELAYED_REFS @@ -81,7 +81,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 owner_objectid, u64 owner_offset, int refs_to_drop, - struct btrfs_delayed_extent_op *extra_op); + struct btrfs_delayed_extent_op *extra_op, + int no_quota); static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, struct extent_buffer *leaf, struct btrfs_extent_item *ei); @@ -94,7 +95,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 flags, struct btrfs_disk_key *key, - int level, struct btrfs_key *ins); + int level, struct btrfs_key *ins, + int no_quota); static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 flags, int force); @@ -103,7 +105,8 @@ static int find_next_key(struct btrfs_path *path, int level, static void dump_space_info(struct btrfs_space_info *info, u64 bytes, int dump_block_groups); static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve); + u64 num_bytes, int reserve, + int delalloc); static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes); int btrfs_pin_extent(struct btrfs_root *root, @@ -1271,7 +1274,7 @@ fail: static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - int refs_to_drop) + int refs_to_drop, int *last_ref) { struct btrfs_key key; struct btrfs_extent_data_ref *ref1 = NULL; @@ -1307,6 +1310,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, if (num_refs == 0) { ret = btrfs_del_item(trans, root, path); + *last_ref = 1; } else { if (key.type == BTRFS_EXTENT_DATA_REF_KEY) btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); @@ -1764,7 +1768,8 @@ void update_inline_extent_backref(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_extent_inline_ref *iref, int refs_to_mod, - struct btrfs_delayed_extent_op *extent_op) + struct btrfs_delayed_extent_op *extent_op, + int *last_ref) { struct extent_buffer *leaf; struct btrfs_extent_item *ei; @@ -1808,6 +1813,7 @@ void update_inline_extent_backref(struct btrfs_root *root, else btrfs_set_shared_data_ref_count(leaf, sref, refs); } else { + *last_ref = 1; size = btrfs_extent_inline_ref_size(type); item_size = btrfs_item_size_nr(leaf, path->slots[0]); ptr = (unsigned long)iref; @@ -1839,7 +1845,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans, if (ret == 0) { BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); update_inline_extent_backref(root, path, iref, - refs_to_add, extent_op); + refs_to_add, extent_op, NULL); } else if (ret == -ENOENT) { setup_inline_extent_backref(root, path, iref, parent, root_objectid, owner, offset, @@ -1872,17 +1878,19 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_extent_inline_ref *iref, - int refs_to_drop, int is_data) + int refs_to_drop, int is_data, int *last_ref) { int ret = 0; BUG_ON(!is_data && refs_to_drop != 1); if (iref) { update_inline_extent_backref(root, path, iref, - -refs_to_drop, NULL); + -refs_to_drop, NULL, last_ref); } else if (is_data) { - ret = remove_extent_data_ref(trans, root, path, refs_to_drop); + ret = remove_extent_data_ref(trans, root, path, refs_to_drop, + last_ref); } else { + *last_ref = 1; ret = btrfs_del_item(trans, root, path); } return ret; @@ -1946,7 +1954,8 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 owner, u64 offset, int for_cow) + u64 root_objectid, u64 owner, u64 offset, + int no_quota) { int ret; struct btrfs_fs_info *fs_info = root->fs_info; @@ -1958,12 +1967,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, (int)owner, - BTRFS_ADD_DELAYED_REF, NULL, for_cow); + BTRFS_ADD_DELAYED_REF, NULL, no_quota); } else { ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, owner, offset, - BTRFS_ADD_DELAYED_REF, NULL, for_cow); + BTRFS_ADD_DELAYED_REF, NULL, no_quota); } return ret; } @@ -1973,31 +1982,64 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 owner, u64 offset, int refs_to_add, + int no_quota, struct btrfs_delayed_extent_op *extent_op) { + struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_extent_item *item; + struct btrfs_key key; u64 refs; int ret; + enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL; path = btrfs_alloc_path(); if (!path) return -ENOMEM; + if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled) + no_quota = 1; + path->reada = 1; path->leave_spinning = 1; /* this will setup the path even if it fails to insert the back ref */ - ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, - path, bytenr, num_bytes, parent, + ret = insert_inline_extent_backref(trans, fs_info->extent_root, path, + bytenr, num_bytes, parent, root_objectid, owner, offset, refs_to_add, extent_op); - if (ret != -EAGAIN) + if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota)) goto out; + /* + * Ok we were able to insert an inline extent and it appears to be a new + * reference, deal with the qgroup accounting. + */ + if (!ret && !no_quota) { + ASSERT(root->fs_info->quota_enabled); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item); + if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add) + type = BTRFS_QGROUP_OPER_ADD_SHARED; + btrfs_release_path(path); + + ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, + bytenr, num_bytes, type, 0); + goto out; + } + /* + * Ok we had -EAGAIN which means we didn't have space to insert and + * inline extent ref, so just update the reference count and add a + * normal backref. + */ leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, item); + if (refs) + type = BTRFS_QGROUP_OPER_ADD_SHARED; btrfs_set_extent_refs(leaf, item, refs + refs_to_add); if (extent_op) __run_delayed_extent_op(extent_op, leaf, item); @@ -2005,9 +2047,15 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); + if (!no_quota) { + ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, + bytenr, num_bytes, type, 0); + if (ret) + goto out; + } + path->reada = 1; path->leave_spinning = 1; - /* now insert the actual backref */ ret = insert_extent_backref(trans, root->fs_info->extent_root, path, bytenr, parent, root_objectid, @@ -2041,8 +2089,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, if (node->type == BTRFS_SHARED_DATA_REF_KEY) parent = ref->parent; - else - ref_root = ref->root; + ref_root = ref->root; if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { if (extent_op) @@ -2056,13 +2103,13 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, node->num_bytes, parent, ref_root, ref->objectid, ref->offset, node->ref_mod, - extent_op); + node->no_quota, extent_op); } else if (node->action == BTRFS_DROP_DELAYED_REF) { ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes, parent, ref_root, ref->objectid, ref->offset, node->ref_mod, - extent_op); + extent_op, node->no_quota); } else { BUG(); } @@ -2199,8 +2246,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) parent = ref->parent; - else - ref_root = ref->root; + ref_root = ref->root; ins.objectid = node->bytenr; if (skinny_metadata) { @@ -2218,15 +2264,18 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, parent, ref_root, extent_op->flags_to_set, &extent_op->key, - ref->level, &ins); + ref->level, &ins, + node->no_quota); } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, node->num_bytes, parent, ref_root, - ref->level, 0, 1, extent_op); + ref->level, 0, 1, node->no_quota, + extent_op); } else if (node->action == BTRFS_DROP_DELAYED_REF) { ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes, parent, ref_root, - ref->level, 0, 1, extent_op); + ref->level, 0, 1, extent_op, + node->no_quota); } else { BUG(); } @@ -2574,42 +2623,6 @@ static u64 find_middle(struct rb_root *root) } #endif -int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - struct qgroup_update *qgroup_update; - int ret = 0; - - if (list_empty(&trans->qgroup_ref_list) != - !trans->delayed_ref_elem.seq) { - /* list without seq or seq without list */ - btrfs_err(fs_info, - "qgroup accounting update error, list is%s empty, seq is %#x.%x", - list_empty(&trans->qgroup_ref_list) ? "" : " not", - (u32)(trans->delayed_ref_elem.seq >> 32), - (u32)trans->delayed_ref_elem.seq); - BUG(); - } - - if (!trans->delayed_ref_elem.seq) - return 0; - - while (!list_empty(&trans->qgroup_ref_list)) { - qgroup_update = list_first_entry(&trans->qgroup_ref_list, - struct qgroup_update, list); - list_del(&qgroup_update->list); - if (!ret) - ret = btrfs_qgroup_account_ref( - trans, fs_info, qgroup_update->node, - qgroup_update->extent_op); - kfree(qgroup_update); - } - - btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem); - - return ret; -} - static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) { u64 num_bytes; @@ -2662,15 +2675,94 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, u64 num_entries = atomic_read(&trans->transaction->delayed_refs.num_entries); u64 avg_runtime; + u64 val; smp_mb(); avg_runtime = fs_info->avg_delayed_ref_runtime; + val = num_entries * avg_runtime; if (num_entries * avg_runtime >= NSEC_PER_SEC) return 1; + if (val >= NSEC_PER_SEC / 2) + return 2; return btrfs_check_space_for_delayed_refs(trans, root); } +struct async_delayed_refs { + struct btrfs_root *root; + int count; + int error; + int sync; + struct completion wait; + struct btrfs_work work; +}; + +static void delayed_ref_async_start(struct btrfs_work *work) +{ + struct async_delayed_refs *async; + struct btrfs_trans_handle *trans; + int ret; + + async = container_of(work, struct async_delayed_refs, work); + + trans = btrfs_join_transaction(async->root); + if (IS_ERR(trans)) { + async->error = PTR_ERR(trans); + goto done; + } + + /* + * trans->sync means that when we call end_transaciton, we won't + * wait on delayed refs + */ + trans->sync = true; + ret = btrfs_run_delayed_refs(trans, async->root, async->count); + if (ret) + async->error = ret; + + ret = btrfs_end_transaction(trans, async->root); + if (ret && !async->error) + async->error = ret; +done: + if (async->sync) + complete(&async->wait); + else + kfree(async); +} + +int btrfs_async_run_delayed_refs(struct btrfs_root *root, + unsigned long count, int wait) +{ + struct async_delayed_refs *async; + int ret; + + async = kmalloc(sizeof(*async), GFP_NOFS); + if (!async) + return -ENOMEM; + + async->root = root->fs_info->tree_root; + async->count = count; + async->error = 0; + if (wait) + async->sync = 1; + else + async->sync = 0; + init_completion(&async->wait); + + btrfs_init_work(&async->work, delayed_ref_async_start, + NULL, NULL); + + btrfs_queue_work(root->fs_info->extent_workers, &async->work); + + if (wait) { + wait_for_completion(&async->wait); + ret = async->error; + kfree(async); + return ret; + } + return 0; +} + /* * this starts processing the delayed reference count updates and * extent insertions we have queued up so far. count can be @@ -2698,8 +2790,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, if (root == root->fs_info->extent_root) root = root->fs_info->tree_root; - btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); - delayed_refs = &trans->transaction->delayed_refs; if (count == 0) { count = atomic_read(&delayed_refs->num_entries) * 2; @@ -2758,6 +2848,9 @@ again: goto again; } out: + ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info); + if (ret) + return ret; assert_qgroups_uptodate(trans); return 0; } @@ -2964,7 +3057,7 @@ out: static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, - int full_backref, int inc, int for_cow) + int full_backref, int inc, int no_quota) { u64 bytenr; u64 num_bytes; @@ -2979,11 +3072,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, u64, u64, u64, u64, u64, u64, int); +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) + return 0; +#endif ref_root = btrfs_header_owner(buf); nritems = btrfs_header_nritems(buf); level = btrfs_header_level(buf); - if (!root->ref_cows && level == 0) + if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0) return 0; if (inc) @@ -3014,7 +3111,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, key.offset -= btrfs_file_extent_offset(buf, fi); ret = process_func(trans, root, bytenr, num_bytes, parent, ref_root, key.objectid, - key.offset, for_cow); + key.offset, no_quota); if (ret) goto fail; } else { @@ -3022,7 +3119,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, num_bytes = btrfs_level_size(root, level - 1); ret = process_func(trans, root, bytenr, num_bytes, parent, ref_root, level - 1, 0, - for_cow); + no_quota); if (ret) goto fail; } @@ -3033,15 +3130,15 @@ fail: } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int full_backref, int for_cow) + struct extent_buffer *buf, int full_backref, int no_quota) { - return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow); + return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota); } int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int full_backref, int for_cow) + struct extent_buffer *buf, int full_backref, int no_quota) { - return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow); + return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota); } static int write_one_cache_group(struct btrfs_trans_handle *trans, @@ -3164,7 +3261,8 @@ again: spin_lock(&block_group->lock); if (block_group->cached != BTRFS_CACHE_FINISHED || - !btrfs_test_opt(root, SPACE_CACHE)) { + !btrfs_test_opt(root, SPACE_CACHE) || + block_group->delalloc_bytes) { /* * don't bother trying to write stuff out _if_ * a) we're not cached, @@ -3401,10 +3499,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, return ret; } - for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { + for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) INIT_LIST_HEAD(&found->block_groups[i]); - kobject_init(&found->block_group_kobjs[i], &btrfs_raid_ktype); - } init_rwsem(&found->groups_sem); spin_lock_init(&found->lock); found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; @@ -4204,6 +4300,104 @@ static int flush_space(struct btrfs_root *root, return ret; } + +static inline u64 +btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, + struct btrfs_space_info *space_info) +{ + u64 used; + u64 expected; + u64 to_reclaim; + + to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024, + 16 * 1024 * 1024); + spin_lock(&space_info->lock); + if (can_overcommit(root, space_info, to_reclaim, + BTRFS_RESERVE_FLUSH_ALL)) { + to_reclaim = 0; + goto out; + } + + used = space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + space_info->bytes_may_use; + if (can_overcommit(root, space_info, 1024 * 1024, + BTRFS_RESERVE_FLUSH_ALL)) + expected = div_factor_fine(space_info->total_bytes, 95); + else + expected = div_factor_fine(space_info->total_bytes, 90); + + if (used > expected) + to_reclaim = used - expected; + else + to_reclaim = 0; + to_reclaim = min(to_reclaim, space_info->bytes_may_use + + space_info->bytes_reserved); +out: + spin_unlock(&space_info->lock); + + return to_reclaim; +} + +static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, + struct btrfs_fs_info *fs_info, u64 used) +{ + return (used >= div_factor_fine(space_info->total_bytes, 98) && + !btrfs_fs_closing(fs_info) && + !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); +} + +static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info, + struct btrfs_fs_info *fs_info) +{ + u64 used; + + spin_lock(&space_info->lock); + used = space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + space_info->bytes_may_use; + if (need_do_async_reclaim(space_info, fs_info, used)) { + spin_unlock(&space_info->lock); + return 1; + } + spin_unlock(&space_info->lock); + + return 0; +} + +static void btrfs_async_reclaim_metadata_space(struct work_struct *work) +{ + struct btrfs_fs_info *fs_info; + struct btrfs_space_info *space_info; + u64 to_reclaim; + int flush_state; + + fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); + space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); + + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, + space_info); + if (!to_reclaim) + return; + + flush_state = FLUSH_DELAYED_ITEMS_NR; + do { + flush_space(fs_info->fs_root, space_info, to_reclaim, + to_reclaim, flush_state); + flush_state++; + if (!btrfs_need_do_async_reclaim(space_info, fs_info)) + return; + } while (flush_state <= COMMIT_TRANS); + + if (btrfs_need_do_async_reclaim(space_info, fs_info)) + queue_work(system_unbound_wq, work); +} + +void btrfs_init_async_reclaim_work(struct work_struct *work) +{ + INIT_WORK(work, btrfs_async_reclaim_metadata_space); +} + /** * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space * @root - the root we're allocating for @@ -4311,8 +4505,13 @@ again: if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { flushing = true; space_info->flush = 1; + } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { + used += orig_bytes; + if (need_do_async_reclaim(space_info, root->fs_info, used) && + !work_busy(&root->fs_info->async_reclaim_work)) + queue_work(system_unbound_wq, + &root->fs_info->async_reclaim_work); } - spin_unlock(&space_info->lock); if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) @@ -4369,7 +4568,7 @@ static struct btrfs_block_rsv *get_block_rsv( { struct btrfs_block_rsv *block_rsv = NULL; - if (root->ref_cows) + if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) block_rsv = trans->block_rsv; if (root == root->fs_info->csum_root && trans->adding_csums) @@ -5416,6 +5615,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log, * @cache: The cache we are manipulating * @num_bytes: The number of bytes in question * @reserve: One of the reservation enums + * @delalloc: The blocks are allocated for the delalloc write * * This is called by the allocator when it reserves space, or by somebody who is * freeing space that was never actually used on disk. For example if you @@ -5434,7 +5634,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log, * succeeds. */ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve) + u64 num_bytes, int reserve, int delalloc) { struct btrfs_space_info *space_info = cache->space_info; int ret = 0; @@ -5453,12 +5653,18 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, num_bytes, 0); space_info->bytes_may_use -= num_bytes; } + + if (delalloc) + cache->delalloc_bytes += num_bytes; } } else { if (cache->ro) space_info->bytes_readonly += num_bytes; cache->reserved -= num_bytes; space_info->bytes_reserved -= num_bytes; + + if (delalloc) + cache->delalloc_bytes -= num_bytes; } spin_unlock(&cache->lock); spin_unlock(&space_info->lock); @@ -5472,7 +5678,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_caching_control *next; struct btrfs_caching_control *caching_ctl; struct btrfs_block_group_cache *cache; - struct btrfs_space_info *space_info; down_write(&fs_info->commit_root_sem); @@ -5495,9 +5700,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, up_write(&fs_info->commit_root_sem); - list_for_each_entry_rcu(space_info, &fs_info->space_info, list) - percpu_counter_set(&space_info->total_bytes_pinned, 0); - update_global_block_rsv(fs_info); } @@ -5535,6 +5737,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) spin_lock(&cache->lock); cache->pinned -= len; space_info->bytes_pinned -= len; + percpu_counter_add(&space_info->total_bytes_pinned, -len); if (cache->ro) { space_info->bytes_readonly += len; readonly = true; @@ -5621,7 +5824,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 owner_objectid, u64 owner_offset, int refs_to_drop, - struct btrfs_delayed_extent_op *extent_op) + struct btrfs_delayed_extent_op *extent_op, + int no_quota) { struct btrfs_key key; struct btrfs_path *path; @@ -5637,9 +5841,14 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, int num_to_del = 1; u32 item_size; u64 refs; + int last_ref = 0; + enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL; bool skinny_metadata = btrfs_fs_incompat(root->fs_info, SKINNY_METADATA); + if (!info->quota_enabled || !is_fstree(root_objectid)) + no_quota = 1; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -5687,7 +5896,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, BUG_ON(iref); ret = remove_extent_backref(trans, extent_root, path, NULL, refs_to_drop, - is_data); + is_data, &last_ref); if (ret) { btrfs_abort_transaction(trans, extent_root, ret); goto out; @@ -5806,7 +6015,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, refs = btrfs_extent_refs(leaf, ei); if (refs < refs_to_drop) { btrfs_err(info, "trying to drop %d refs but we only have %Lu " - "for bytenr %Lu\n", refs_to_drop, refs, bytenr); + "for bytenr %Lu", refs_to_drop, refs, bytenr); ret = -EINVAL; btrfs_abort_transaction(trans, extent_root, ret); goto out; @@ -5814,6 +6023,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, refs -= refs_to_drop; if (refs > 0) { + type = BTRFS_QGROUP_OPER_SUB_SHARED; if (extent_op) __run_delayed_extent_op(extent_op, leaf, ei); /* @@ -5829,7 +6039,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (found_extent) { ret = remove_extent_backref(trans, extent_root, path, iref, refs_to_drop, - is_data); + is_data, &last_ref); if (ret) { btrfs_abort_transaction(trans, extent_root, ret); goto out; @@ -5850,6 +6060,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } } + last_ref = 1; ret = btrfs_del_items(trans, extent_root, path, path->slots[0], num_to_del); if (ret) { @@ -5872,6 +6083,20 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, goto out; } } + btrfs_release_path(path); + + /* Deal with the quota accounting */ + if (!ret && last_ref && !no_quota) { + int mod_seq = 0; + + if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID && + type == BTRFS_QGROUP_OPER_SUB_SHARED) + mod_seq = 1; + + ret = btrfs_qgroup_record_ref(trans, info, root_objectid, + bytenr, num_bytes, type, + mod_seq); + } out: btrfs_free_path(path); return ret; @@ -5987,7 +6212,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); btrfs_add_free_space(cache, buf->start, buf->len); - btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); + btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); trace_btrfs_reserved_extent_free(root, buf->start, buf->len); pin = 0; } @@ -6008,11 +6233,15 @@ out: /* Can return -ENOMEM */ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, - u64 owner, u64 offset, int for_cow) + u64 owner, u64 offset, int no_quota) { int ret; struct btrfs_fs_info *fs_info = root->fs_info; +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) + return 0; +#endif add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); /* @@ -6028,13 +6257,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, (int)owner, - BTRFS_DROP_DELAYED_REF, NULL, for_cow); + BTRFS_DROP_DELAYED_REF, NULL, no_quota); } else { ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, owner, offset, BTRFS_DROP_DELAYED_REF, - NULL, for_cow); + NULL, no_quota); } return ret; } @@ -6142,6 +6371,70 @@ enum btrfs_loop_type { LOOP_NO_EMPTY_SIZE = 3, }; +static inline void +btrfs_lock_block_group(struct btrfs_block_group_cache *cache, + int delalloc) +{ + if (delalloc) + down_read(&cache->data_rwsem); +} + +static inline void +btrfs_grab_block_group(struct btrfs_block_group_cache *cache, + int delalloc) +{ + btrfs_get_block_group(cache); + if (delalloc) + down_read(&cache->data_rwsem); +} + +static struct btrfs_block_group_cache * +btrfs_lock_cluster(struct btrfs_block_group_cache *block_group, + struct btrfs_free_cluster *cluster, + int delalloc) +{ + struct btrfs_block_group_cache *used_bg; + bool locked = false; +again: + spin_lock(&cluster->refill_lock); + if (locked) { + if (used_bg == cluster->block_group) + return used_bg; + + up_read(&used_bg->data_rwsem); + btrfs_put_block_group(used_bg); + } + + used_bg = cluster->block_group; + if (!used_bg) + return NULL; + + if (used_bg == block_group) + return used_bg; + + btrfs_get_block_group(used_bg); + + if (!delalloc) + return used_bg; + + if (down_read_trylock(&used_bg->data_rwsem)) + return used_bg; + + spin_unlock(&cluster->refill_lock); + down_read(&used_bg->data_rwsem); + locked = true; + goto again; +} + +static inline void +btrfs_release_block_group(struct btrfs_block_group_cache *cache, + int delalloc) +{ + if (delalloc) + up_read(&cache->data_rwsem); + btrfs_put_block_group(cache); +} + /* * walks the btree of allocated extents and find a hole of a given size. * The key ins is changed to record the hole: @@ -6156,7 +6449,7 @@ enum btrfs_loop_type { static noinline int find_free_extent(struct btrfs_root *orig_root, u64 num_bytes, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, - u64 flags) + u64 flags, int delalloc) { int ret = 0; struct btrfs_root *root = orig_root->fs_info->extent_root; @@ -6244,6 +6537,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, up_read(&space_info->groups_sem); } else { index = get_block_group_index(block_group); + btrfs_lock_block_group(block_group, delalloc); goto have_block_group; } } else if (block_group) { @@ -6258,7 +6552,7 @@ search: u64 offset; int cached; - btrfs_get_block_group(block_group); + btrfs_grab_block_group(block_group, delalloc); search_start = block_group->key.objectid; /* @@ -6306,16 +6600,16 @@ have_block_group: * the refill lock keeps out other * people trying to start a new cluster */ - spin_lock(&last_ptr->refill_lock); - used_block_group = last_ptr->block_group; - if (used_block_group != block_group && - (!used_block_group || - used_block_group->ro || - !block_group_bits(used_block_group, flags))) + used_block_group = btrfs_lock_cluster(block_group, + last_ptr, + delalloc); + if (!used_block_group) goto refill_cluster; - if (used_block_group != block_group) - btrfs_get_block_group(used_block_group); + if (used_block_group != block_group && + (used_block_group->ro || + !block_group_bits(used_block_group, flags))) + goto release_cluster; offset = btrfs_alloc_from_cluster(used_block_group, last_ptr, @@ -6329,16 +6623,15 @@ have_block_group: used_block_group, search_start, num_bytes); if (used_block_group != block_group) { - btrfs_put_block_group(block_group); + btrfs_release_block_group(block_group, + delalloc); block_group = used_block_group; } goto checks; } WARN_ON(last_ptr->block_group != used_block_group); - if (used_block_group != block_group) - btrfs_put_block_group(used_block_group); -refill_cluster: +release_cluster: /* If we are on LOOP_NO_EMPTY_SIZE, we can't * set up a new clusters, so lets just skip it * and let the allocator find whatever block @@ -6355,8 +6648,10 @@ refill_cluster: * succeeding in the unclustered * allocation. */ if (loop >= LOOP_NO_EMPTY_SIZE && - last_ptr->block_group != block_group) { + used_block_group != block_group) { spin_unlock(&last_ptr->refill_lock); + btrfs_release_block_group(used_block_group, + delalloc); goto unclustered_alloc; } @@ -6366,6 +6661,10 @@ refill_cluster: */ btrfs_return_cluster_to_free_space(NULL, last_ptr); + if (used_block_group != block_group) + btrfs_release_block_group(used_block_group, + delalloc); +refill_cluster: if (loop >= LOOP_NO_EMPTY_SIZE) { spin_unlock(&last_ptr->refill_lock); goto unclustered_alloc; @@ -6473,7 +6772,7 @@ checks: BUG_ON(offset > search_start); ret = btrfs_update_reserved_bytes(block_group, num_bytes, - alloc_type); + alloc_type, delalloc); if (ret == -EAGAIN) { btrfs_add_free_space(block_group, offset, num_bytes); goto loop; @@ -6485,13 +6784,13 @@ checks: trace_btrfs_reserve_extent(orig_root, block_group, search_start, num_bytes); - btrfs_put_block_group(block_group); + btrfs_release_block_group(block_group, delalloc); break; loop: failed_cluster_refill = false; failed_alloc = false; BUG_ON(index != get_block_group_index(block_group)); - btrfs_put_block_group(block_group); + btrfs_release_block_group(block_group, delalloc); } up_read(&space_info->groups_sem); @@ -6514,8 +6813,14 @@ loop: loop++; if (loop == LOOP_ALLOC_CHUNK) { struct btrfs_trans_handle *trans; + int exist = 0; + + trans = current->journal_info; + if (trans) + exist = 1; + else + trans = btrfs_join_transaction(root); - trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; @@ -6532,7 +6837,8 @@ loop: root, ret); else ret = 0; - btrfs_end_transaction(trans, root); + if (!exist) + btrfs_end_transaction(trans, root); if (ret) goto out; } @@ -6597,7 +6903,7 @@ again: int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, - struct btrfs_key *ins, int is_data) + struct btrfs_key *ins, int is_data, int delalloc) { bool final_tried = false; u64 flags; @@ -6607,7 +6913,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, again: WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, - flags); + flags, delalloc); if (ret == -ENOSPC) { if (!final_tried && ins->offset) { @@ -6632,7 +6938,8 @@ again: } static int __btrfs_free_reserved_extent(struct btrfs_root *root, - u64 start, u64 len, int pin) + u64 start, u64 len, + int pin, int delalloc) { struct btrfs_block_group_cache *cache; int ret = 0; @@ -6651,7 +6958,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, pin_down_extent(root, cache, start, len, 1); else { btrfs_add_free_space(cache, start, len); - btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); + btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); } btrfs_put_block_group(cache); @@ -6661,15 +6968,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, } int btrfs_free_reserved_extent(struct btrfs_root *root, - u64 start, u64 len) + u64 start, u64 len, int delalloc) { - return __btrfs_free_reserved_extent(root, start, len, 0); + return __btrfs_free_reserved_extent(root, start, len, 0, delalloc); } int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, u64 start, u64 len) { - return __btrfs_free_reserved_extent(root, start, len, 1); + return __btrfs_free_reserved_extent(root, start, len, 1, 0); } static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, @@ -6733,6 +7040,13 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); + /* Always set parent to 0 here since its exclusive anyway. */ + ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, + ins->objectid, ins->offset, + BTRFS_QGROUP_OPER_ADD_EXCL, 0); + if (ret) + return ret; + ret = update_block_group(root, ins->objectid, ins->offset, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", @@ -6747,7 +7061,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 flags, struct btrfs_disk_key *key, - int level, struct btrfs_key *ins) + int level, struct btrfs_key *ins, + int no_quota) { int ret; struct btrfs_fs_info *fs_info = root->fs_info; @@ -6757,6 +7072,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct extent_buffer *leaf; u32 size = sizeof(*extent_item) + sizeof(*iref); + u64 num_bytes = ins->offset; bool skinny_metadata = btrfs_fs_incompat(root->fs_info, SKINNY_METADATA); @@ -6790,6 +7106,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, if (skinny_metadata) { iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); + num_bytes = root->leafsize; } else { block_info = (struct btrfs_tree_block_info *)(extent_item + 1); btrfs_set_tree_block_key(leaf, block_info, key); @@ -6811,6 +7128,14 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); + if (!no_quota) { + ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, + ins->objectid, num_bytes, + BTRFS_QGROUP_OPER_ADD_EXCL, 0); + if (ret) + return ret; + } + ret = update_block_group(root, ins->objectid, root->leafsize, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", @@ -6866,7 +7191,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, return -EINVAL; ret = btrfs_update_reserved_bytes(block_group, ins->offset, - RESERVE_ALLOC_NO_ACCOUNT); + RESERVE_ALLOC_NO_ACCOUNT, 0); BUG_ON(ret); /* logic error */ ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 0, owner, offset, ins, 1); @@ -6994,12 +7319,21 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, bool skinny_metadata = btrfs_fs_incompat(root->fs_info, SKINNY_METADATA); +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) { + buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, + blocksize, level); + if (!IS_ERR(buf)) + root->alloc_bytenr += blocksize; + return buf; + } +#endif block_rsv = use_block_rsv(trans, root, blocksize); if (IS_ERR(block_rsv)) return ERR_CAST(block_rsv); ret = btrfs_reserve_extent(root, blocksize, blocksize, - empty_size, hint, &ins, 0); + empty_size, hint, &ins, 0, 0); if (ret) { unuse_block_rsv(root->fs_info, block_rsv, blocksize); return ERR_PTR(ret); @@ -7735,7 +8069,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } } - if (root->in_radix) { + if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) { btrfs_drop_and_free_fs_root(tree_root->fs_info, root); } else { free_extent_buffer(root->node); @@ -8327,8 +8661,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) list_del(&space_info->list); for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { struct kobject *kobj; - kobj = &space_info->block_group_kobjs[i]; - if (kobj->parent) { + kobj = space_info->block_group_kobjs[i]; + space_info->block_group_kobjs[i] = NULL; + if (kobj) { kobject_del(kobj); kobject_put(kobj); } @@ -8352,17 +8687,26 @@ static void __link_block_group(struct btrfs_space_info *space_info, up_write(&space_info->groups_sem); if (first) { - struct kobject *kobj = &space_info->block_group_kobjs[index]; + struct raid_kobject *rkobj; int ret; - kobject_get(&space_info->kobj); /* put in release */ - ret = kobject_add(kobj, &space_info->kobj, "%s", - get_raid_name(index)); + rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS); + if (!rkobj) + goto out_err; + rkobj->raid_type = index; + kobject_init(&rkobj->kobj, &btrfs_raid_ktype); + ret = kobject_add(&rkobj->kobj, &space_info->kobj, + "%s", get_raid_name(index)); if (ret) { - pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n"); - kobject_put(&space_info->kobj); + kobject_put(&rkobj->kobj); + goto out_err; } + space_info->block_group_kobjs[index] = &rkobj->kobj; } + + return; +out_err: + pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n"); } static struct btrfs_block_group_cache * @@ -8392,6 +8736,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) start); atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); + init_rwsem(&cache->data_rwsem); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); INIT_LIST_HEAD(&cache->new_bg_list); @@ -8611,7 +8956,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, extent_root = root->fs_info->extent_root; - root->fs_info->last_trans_log_full_commit = trans->transid; + btrfs_set_log_full_commit(root->fs_info, trans); cache = btrfs_create_block_group_cache(root, chunk_offset, size); if (!cache) @@ -8697,6 +9042,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root = root->fs_info->tree_root; struct btrfs_key key; struct inode *inode; + struct kobject *kobj = NULL; int ret; int index; int factor; @@ -8796,11 +9142,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, */ list_del_init(&block_group->list); if (list_empty(&block_group->space_info->block_groups[index])) { - kobject_del(&block_group->space_info->block_group_kobjs[index]); - kobject_put(&block_group->space_info->block_group_kobjs[index]); + kobj = block_group->space_info->block_group_kobjs[index]; + block_group->space_info->block_group_kobjs[index] = NULL; clear_avail_alloc_bits(root->fs_info, block_group->flags); } up_write(&block_group->space_info->groups_sem); + if (kobj) { + kobject_del(kobj); + kobject_put(kobj); + } if (block_group->cached == BTRFS_CACHE_STARTED) wait_block_group_cache_done(block_group); |