summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c431
1 files changed, 245 insertions, 186 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 169bd62ce77..4d08ed79405 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
return total_added;
}
-static int caching_kthread(void *data)
+static noinline void caching_thread(struct btrfs_work *work)
{
- struct btrfs_block_group_cache *block_group = data;
- struct btrfs_fs_info *fs_info = block_group->fs_info;
- struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
- struct btrfs_root *extent_root = fs_info->extent_root;
+ struct btrfs_block_group_cache *block_group;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_caching_control *caching_ctl;
+ struct btrfs_root *extent_root;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key key;
@@ -334,9 +334,14 @@ static int caching_kthread(void *data)
u32 nritems;
int ret = 0;
+ caching_ctl = container_of(work, struct btrfs_caching_control, work);
+ block_group = caching_ctl->block_group;
+ fs_info = block_group->fs_info;
+ extent_root = fs_info->extent_root;
+
path = btrfs_alloc_path();
if (!path)
- return -ENOMEM;
+ goto out;
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
@@ -348,7 +353,7 @@ static int caching_kthread(void *data)
*/
path->skip_locking = 1;
path->search_commit_root = 1;
- path->reada = 2;
+ path->reada = 1;
key.objectid = last;
key.offset = 0;
@@ -366,8 +371,7 @@ again:
nritems = btrfs_header_nritems(leaf);
while (1) {
- smp_mb();
- if (fs_info->closing > 1) {
+ if (btrfs_fs_closing(fs_info) > 1) {
last = (u64)-1;
break;
}
@@ -379,15 +383,18 @@ again:
if (ret)
break;
- caching_ctl->progress = last;
- btrfs_release_path(path);
- up_read(&fs_info->extent_commit_sem);
- mutex_unlock(&caching_ctl->mutex);
- if (btrfs_transaction_in_commit(fs_info))
- schedule_timeout(1);
- else
+ if (need_resched() ||
+ btrfs_next_leaf(extent_root, path)) {
+ caching_ctl->progress = last;
+ btrfs_release_path(path);
+ up_read(&fs_info->extent_commit_sem);
+ mutex_unlock(&caching_ctl->mutex);
cond_resched();
- goto again;
+ goto again;
+ }
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ continue;
}
if (key.objectid < block_group->key.objectid) {
@@ -431,13 +438,11 @@ err:
free_excluded_extents(extent_root, block_group);
mutex_unlock(&caching_ctl->mutex);
+out:
wake_up(&caching_ctl->wait);
put_caching_control(caching_ctl);
- atomic_dec(&block_group->space_info->caching_threads);
btrfs_put_block_group(block_group);
-
- return 0;
}
static int cache_block_group(struct btrfs_block_group_cache *cache,
@@ -447,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl;
- struct task_struct *tsk;
int ret = 0;
smp_mb();
@@ -499,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
caching_ctl->progress = cache->key.objectid;
/* one for caching kthread, one for caching block group list */
atomic_set(&caching_ctl->count, 2);
+ caching_ctl->work.func = caching_thread;
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
@@ -514,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
up_write(&fs_info->extent_commit_sem);
- atomic_inc(&cache->space_info->caching_threads);
btrfs_get_block_group(cache);
- tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
- cache->key.objectid);
- if (IS_ERR(tsk)) {
- ret = PTR_ERR(tsk);
- printk(KERN_ERR "error running thread %d\n", ret);
- BUG();
- }
+ btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
return ret;
}
@@ -2930,9 +2928,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->full = 0;
found->force_alloc = CHUNK_ALLOC_NO_FORCE;
found->chunk_alloc = 0;
+ found->flush = 0;
+ init_waitqueue_head(&found->wait);
*space_info = found;
list_add_rcu(&found->list, &info->space_info);
- atomic_set(&found->caching_threads, 0);
return 0;
}
@@ -3065,7 +3064,7 @@ again:
spin_unlock(&data_sinfo->lock);
alloc:
alloc_target = btrfs_get_alloc_profile(root, 1);
- trans = btrfs_join_transaction(root, 1);
+ trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
@@ -3087,13 +3086,21 @@ alloc:
}
goto again;
}
+
+ /*
+ * If we have less pinned bytes than we want to allocate then
+ * don't bother committing the transaction, it won't help us.
+ */
+ if (data_sinfo->bytes_pinned < bytes)
+ committed = 1;
spin_unlock(&data_sinfo->lock);
/* commit the current transaction and try again */
commit_trans:
- if (!committed && !root->fs_info->open_ioctl_trans) {
+ if (!committed &&
+ !atomic_read(&root->fs_info->open_ioctl_trans)) {
committed = 1;
- trans = btrfs_join_transaction(root, 1);
+ trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans, root);
@@ -3304,9 +3311,13 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
if (reserved == 0)
return 0;
- /* nothing to shrink - nothing to reclaim */
- if (root->fs_info->delalloc_bytes == 0)
+ smp_mb();
+ if (root->fs_info->delalloc_bytes == 0) {
+ if (trans)
+ return 0;
+ btrfs_wait_ordered_extents(root, 0, 0);
return 0;
+ }
max_reclaim = min(reserved, to_reclaim);
@@ -3350,6 +3361,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
}
}
+ if (reclaimed >= to_reclaim && !trans)
+ btrfs_wait_ordered_extents(root, 0, 0);
return reclaimed >= to_reclaim;
}
@@ -3374,15 +3387,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
u64 num_bytes = orig_bytes;
int retries = 0;
int ret = 0;
- bool reserved = false;
bool committed = false;
+ bool flushing = false;
again:
- ret = -ENOSPC;
- if (reserved)
- num_bytes = 0;
-
+ ret = 0;
spin_lock(&space_info->lock);
+ /*
+ * We only want to wait if somebody other than us is flushing and we are
+ * actually alloed to flush.
+ */
+ while (flush && !flushing && space_info->flush) {
+ spin_unlock(&space_info->lock);
+ /*
+ * If we have a trans handle we can't wait because the flusher
+ * may have to commit the transaction, which would mean we would
+ * deadlock since we are waiting for the flusher to finish, but
+ * hold the current transaction open.
+ */
+ if (trans)
+ return -EAGAIN;
+ ret = wait_event_interruptible(space_info->wait,
+ !space_info->flush);
+ /* Must have been interrupted, return */
+ if (ret)
+ return -EINTR;
+
+ spin_lock(&space_info->lock);
+ }
+
+ ret = -ENOSPC;
unused = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
@@ -3397,8 +3431,7 @@ again:
if (unused <= space_info->total_bytes) {
unused = space_info->total_bytes - unused;
if (unused >= num_bytes) {
- if (!reserved)
- space_info->bytes_reserved += orig_bytes;
+ space_info->bytes_reserved += orig_bytes;
ret = 0;
} else {
/*
@@ -3423,17 +3456,14 @@ again:
* to reclaim space we can actually use it instead of somebody else
* stealing it from us.
*/
- if (ret && !reserved) {
- space_info->bytes_reserved += orig_bytes;
- reserved = true;
+ if (ret && flush) {
+ flushing = true;
+ space_info->flush = 1;
}
spin_unlock(&space_info->lock);
- if (!ret)
- return 0;
-
- if (!flush)
+ if (!ret || !flush)
goto out;
/*
@@ -3441,11 +3471,11 @@ again:
* metadata until after the IO is completed.
*/
ret = shrink_delalloc(trans, root, num_bytes, 1);
- if (ret > 0)
- return 0;
- else if (ret < 0)
+ if (ret < 0)
goto out;
+ ret = 0;
+
/*
* So if we were overcommitted it's possible that somebody else flushed
* out enough space and we simply didn't have enough space to reclaim,
@@ -3456,11 +3486,11 @@ again:
goto again;
}
- spin_lock(&space_info->lock);
/*
* Not enough space to be reclaimed, don't bother committing the
* transaction.
*/
+ spin_lock(&space_info->lock);
if (space_info->bytes_pinned < orig_bytes)
ret = -ENOSPC;
spin_unlock(&space_info->lock);
@@ -3468,11 +3498,14 @@ again:
goto out;
ret = -EAGAIN;
- if (trans || committed)
+ if (trans)
goto out;
ret = -ENOSPC;
- trans = btrfs_join_transaction(root, 1);
+ if (committed)
+ goto out;
+
+ trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
goto out;
ret = btrfs_commit_transaction(trans, root);
@@ -3483,12 +3516,12 @@ again:
}
out:
- if (reserved) {
+ if (flushing) {
spin_lock(&space_info->lock);
- space_info->bytes_reserved -= orig_bytes;
+ space_info->flush = 0;
+ wake_up_all(&space_info->wait);
spin_unlock(&space_info->lock);
}
-
return ret;
}
@@ -3698,8 +3731,7 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
if (commit_trans) {
if (trans)
return -EAGAIN;
-
- trans = btrfs_join_transaction(root, 1);
+ trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
ret = btrfs_commit_transaction(trans, root);
return 0;
@@ -3837,24 +3869,35 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
}
-int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- int num_items)
+int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_block_rsv *rsv)
{
+ struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv;
u64 num_bytes;
int ret;
- if (num_items == 0 || root->fs_info->chunk_root == root)
+ /*
+ * Truncate should be freeing data, but give us 2 items just in case it
+ * needs to use some space. We may want to be smarter about this in the
+ * future.
+ */
+ num_bytes = btrfs_calc_trans_metadata_size(root, 2);
+
+ /* We already have enough bytes, just return */
+ if (rsv->reserved >= num_bytes)
return 0;
- num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
- ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
- num_bytes);
- if (!ret) {
- trans->bytes_reserved += num_bytes;
- trans->block_rsv = &root->fs_info->trans_block_rsv;
- }
- return ret;
+ num_bytes -= rsv->reserved;
+
+ /*
+ * You should have reserved enough space before hand to do this, so this
+ * should not fail.
+ */
+ ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes);
+ BUG_ON(ret);
+
+ return 0;
}
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
@@ -3877,23 +3920,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
/*
- * one for deleting orphan item, one for updating inode and
- * two for calling btrfs_truncate_inode_items.
- *
- * btrfs_truncate_inode_items is a delete operation, it frees
- * more space than it uses in most cases. So two units of
- * metadata space should be enough for calling it many times.
- * If all of the metadata space is used, we can commit
- * transaction and use space it freed.
+ * We need to hold space in order to delete our orphan item once we've
+ * added it, so this takes the reservation so we can release it later
+ * when we are truly done with the orphan item.
*/
- u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
+ u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
void btrfs_orphan_release_metadata(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
+ u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
}
@@ -3912,6 +3950,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
+static unsigned drop_outstanding_extent(struct inode *inode)
+{
+ unsigned dropped_extents = 0;
+
+ spin_lock(&BTRFS_I(inode)->lock);
+ BUG_ON(!BTRFS_I(inode)->outstanding_extents);
+ BTRFS_I(inode)->outstanding_extents--;
+
+ /*
+ * If we have more or the same amount of outsanding extents than we have
+ * reserved then we need to leave the reserved extents count alone.
+ */
+ if (BTRFS_I(inode)->outstanding_extents >=
+ BTRFS_I(inode)->reserved_extents)
+ goto out;
+
+ dropped_extents = BTRFS_I(inode)->reserved_extents -
+ BTRFS_I(inode)->outstanding_extents;
+ BTRFS_I(inode)->reserved_extents -= dropped_extents;
+out:
+ spin_unlock(&BTRFS_I(inode)->lock);
+ return dropped_extents;
+}
+
static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
{
return num_bytes >>= 3;
@@ -3921,9 +3983,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
- u64 to_reserve;
- int nr_extents;
- int reserved_extents;
+ u64 to_reserve = 0;
+ unsigned nr_extents = 0;
int ret;
if (btrfs_transaction_in_commit(root->fs_info))
@@ -3931,66 +3992,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
num_bytes = ALIGN(num_bytes, root->sectorsize);
- nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
- reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+
+ if (BTRFS_I(inode)->outstanding_extents >
+ BTRFS_I(inode)->reserved_extents) {
+ nr_extents = BTRFS_I(inode)->outstanding_extents -
+ BTRFS_I(inode)->reserved_extents;
+ BTRFS_I(inode)->reserved_extents += nr_extents;
- if (nr_extents > reserved_extents) {
- nr_extents -= reserved_extents;
to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
- } else {
- nr_extents = 0;
- to_reserve = 0;
}
+ spin_unlock(&BTRFS_I(inode)->lock);
to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
- if (ret)
+ if (ret) {
+ unsigned dropped;
+ /*
+ * We don't need the return value since our reservation failed,
+ * we just need to clean up our counter.
+ */
+ dropped = drop_outstanding_extent(inode);
+ WARN_ON(dropped > 1);
return ret;
-
- atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
- atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+ }
block_rsv_add_bytes(block_rsv, to_reserve, 1);
- if (block_rsv->size > 512 * 1024 * 1024)
- shrink_delalloc(NULL, root, to_reserve, 0);
-
return 0;
}
void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 to_free;
- int nr_extents;
- int reserved_extents;
+ u64 to_free = 0;
+ unsigned dropped;
num_bytes = ALIGN(num_bytes, root->sectorsize);
- atomic_dec(&BTRFS_I(inode)->outstanding_extents);
- WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
-
- reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
- do {
- int old, new;
-
- nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
- if (nr_extents >= reserved_extents) {
- nr_extents = 0;
- break;
- }
- old = reserved_extents;
- nr_extents = reserved_extents - nr_extents;
- new = reserved_extents - nr_extents;
- old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
- reserved_extents, new);
- if (likely(old == reserved_extents))
- break;
- reserved_extents = old;
- } while (1);
+ dropped = drop_outstanding_extent(inode);
to_free = calc_csum_metadata_size(inode, num_bytes);
- if (nr_extents > 0)
- to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
+ if (dropped > 0)
+ to_free += btrfs_calc_trans_metadata_size(root, dropped);
btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
to_free);
@@ -4810,7 +4854,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
u64 num_bytes, u64 empty_size,
u64 search_start, u64 search_end,
u64 hint_byte, struct btrfs_key *ins,
- int data)
+ u64 data)
{
int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -4837,7 +4881,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
space_info = __find_space_info(root->fs_info, data);
if (!space_info) {
- printk(KERN_ERR "No space info for %d\n", data);
+ printk(KERN_ERR "No space info for %llu\n", data);
return -ENOSPC;
}
@@ -4958,14 +5002,10 @@ have_block_group:
}
/*
- * We only want to start kthread caching if we are at
- * the point where we will wait for caching to make
- * progress, or if our ideal search is over and we've
- * found somebody to start caching.
+ * The caching workers are limited to 2 threads, so we
+ * can queue as much work as we care to.
*/
- if (loop > LOOP_CACHING_NOWAIT ||
- (loop > LOOP_FIND_IDEAL &&
- atomic_read(&space_info->caching_threads) < 2)) {
+ if (loop > LOOP_FIND_IDEAL) {
ret = cache_block_group(block_group, trans,
orig_root, 0);
BUG_ON(ret);
@@ -4987,6 +5027,15 @@ have_block_group:
if (unlikely(block_group->ro))
goto loop;
+ spin_lock(&block_group->free_space_ctl->tree_lock);
+ if (cached &&
+ block_group->free_space_ctl->free_space <
+ num_bytes + empty_size) {
+ spin_unlock(&block_group->free_space_ctl->tree_lock);
+ goto loop;
+ }
+ spin_unlock(&block_group->free_space_ctl->tree_lock);
+
/*
* Ok we want to try and use the cluster allocator, so lets look
* there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
@@ -5150,6 +5199,7 @@ checks:
btrfs_add_free_space(block_group, offset,
search_start - offset);
BUG_ON(offset > search_start);
+ btrfs_put_block_group(block_group);
break;
loop:
failed_cluster_refill = false;
@@ -5172,15 +5222,12 @@ loop:
* LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
* again
*/
- if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
- (found_uncached_bg || empty_size || empty_cluster ||
- allowed_chunk_alloc)) {
+ if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
index = 0;
if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
found_uncached_bg = false;
loop++;
- if (!ideal_cache_percent &&
- atomic_read(&space_info->caching_threads))
+ if (!ideal_cache_percent)
goto search;
/*
@@ -5214,42 +5261,39 @@ loop:
goto search;
}
- if (loop < LOOP_CACHING_WAIT) {
- loop++;
- goto search;
- }
+ loop++;
if (loop == LOOP_ALLOC_CHUNK) {
- empty_size = 0;
- empty_cluster = 0;
- }
+ if (allowed_chunk_alloc) {
+ ret = do_chunk_alloc(trans, root, num_bytes +
+ 2 * 1024 * 1024, data,
+ CHUNK_ALLOC_LIMITED);
+ allowed_chunk_alloc = 0;
+ if (ret == 1)
+ done_chunk_alloc = 1;
+ } else if (!done_chunk_alloc &&
+ space_info->force_alloc ==
+ CHUNK_ALLOC_NO_FORCE) {
+ space_info->force_alloc = CHUNK_ALLOC_LIMITED;
+ }
- if (allowed_chunk_alloc) {
- ret = do_chunk_alloc(trans, root, num_bytes +
- 2 * 1024 * 1024, data,
- CHUNK_ALLOC_LIMITED);
- allowed_chunk_alloc = 0;
- done_chunk_alloc = 1;
- } else if (!done_chunk_alloc &&
- space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
- space_info->force_alloc = CHUNK_ALLOC_LIMITED;
+ /*
+ * We didn't allocate a chunk, go ahead and drop the
+ * empty size and loop again.
+ */
+ if (!done_chunk_alloc)
+ loop = LOOP_NO_EMPTY_SIZE;
}
- if (loop < LOOP_NO_EMPTY_SIZE) {
- loop++;
- goto search;
+ if (loop == LOOP_NO_EMPTY_SIZE) {
+ empty_size = 0;
+ empty_cluster = 0;
}
- ret = -ENOSPC;
+
+ goto search;
} else if (!ins->objectid) {
ret = -ENOSPC;
- }
-
- /* we found what we needed */
- if (ins->objectid) {
- if (!(data & BTRFS_BLOCK_GROUP_DATA))
- trans->block_group = block_group->key.objectid;
-
- btrfs_put_block_group(block_group);
+ } else if (ins->objectid) {
ret = 0;
}
@@ -5586,7 +5630,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
if (!buf)
return ERR_PTR(-ENOMEM);
btrfs_set_header_generation(buf, trans->transid);
- btrfs_set_buffer_lockdep_class(buf, level);
+ btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
clean_tree_block(trans, root, buf);
@@ -5873,7 +5917,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
return 1;
if (path->locks[level] && !wc->keep_locks) {
- btrfs_tree_unlock(eb);
+ btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
}
return 0;
@@ -5897,7 +5941,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
* keep the tree lock
*/
if (path->locks[level] && level > 0) {
- btrfs_tree_unlock(eb);
+ btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
}
return 0;
@@ -6010,7 +6054,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
BUG_ON(level != btrfs_header_level(next));
path->nodes[level] = next;
path->slots[level] = 0;
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
wc->level = level;
if (wc->level == 1)
wc->reada_slot = 0;
@@ -6081,7 +6125,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
BUG_ON(level == 0);
btrfs_tree_lock(eb);
btrfs_set_lock_blocking(eb);
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
ret = btrfs_lookup_extent_info(trans, root,
eb->start, eb->len,
@@ -6090,8 +6134,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
BUG_ON(ret);
BUG_ON(wc->refs[level] == 0);
if (wc->refs[level] == 1) {
- btrfs_tree_unlock(eb);
- path->locks[level] = 0;
+ btrfs_tree_unlock_rw(eb, path->locks[level]);
return 1;
}
}
@@ -6113,7 +6156,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_header_generation(eb) == trans->transid) {
btrfs_tree_lock(eb);
btrfs_set_lock_blocking(eb);
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
}
clean_tree_block(trans, root, eb);
}
@@ -6192,7 +6235,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
return 0;
if (path->locks[level]) {
- btrfs_tree_unlock(path->nodes[level]);
+ btrfs_tree_unlock_rw(path->nodes[level],
+ path->locks[level]);
path->locks[level] = 0;
}
free_extent_buffer(path->nodes[level]);
@@ -6244,7 +6288,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
path->nodes[level] = btrfs_lock_root_node(root);
btrfs_set_lock_blocking(path->nodes[level]);
path->slots[level] = 0;
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
memset(&wc->update_progress, 0,
sizeof(wc->update_progress));
} else {
@@ -6412,7 +6456,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
level = btrfs_header_level(node);
path->nodes[level] = node;
path->slots[level] = 0;
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
wc->refs[parent_level] = 1;
wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -6487,15 +6531,28 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
return flags;
}
-static int set_block_group_ro(struct btrfs_block_group_cache *cache)
+static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
{
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
+ u64 min_allocable_bytes;
int ret = -ENOSPC;
if (cache->ro)
return 0;
+ /*
+ * We need some metadata space and system metadata space for
+ * allocating chunks in some corner cases until we force to set
+ * it to be readonly.
+ */
+ if ((sinfo->flags &
+ (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
+ !force)
+ min_allocable_bytes = 1 * 1024 * 1024;
+ else
+ min_allocable_bytes = 0;
+
spin_lock(&sinfo->lock);
spin_lock(&cache->lock);
num_bytes = cache->key.offset - cache->reserved - cache->pinned -
@@ -6503,7 +6560,8 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache)
if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
sinfo->bytes_may_use + sinfo->bytes_readonly +
- cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
+ cache->reserved_pinned + num_bytes + min_allocable_bytes <=
+ sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes;
sinfo->bytes_reserved += cache->reserved_pinned;
cache->reserved_pinned = 0;
@@ -6526,7 +6584,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
BUG_ON(cache->ro);
- trans = btrfs_join_transaction(root, 1);
+ trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
alloc_flags = update_block_group_flags(root, cache->flags);
@@ -6534,7 +6592,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
CHUNK_ALLOC_FORCE);
- ret = set_block_group_ro(cache);
+ ret = set_block_group_ro(cache, 0);
if (!ret)
goto out;
alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@@ -6542,7 +6600,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
- ret = set_block_group_ro(cache);
+ ret = set_block_group_ro(cache, 0);
out:
btrfs_end_transaction(trans, root);
return ret;
@@ -6882,6 +6940,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
+ path->reada = 1;
cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
if (cache_gen != 0 &&
@@ -6978,7 +7037,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
set_avail_alloc_bits(root->fs_info, cache->flags);
if (btrfs_chunk_readonly(root, cache->key.objectid))
- set_block_group_ro(cache);
+ set_block_group_ro(cache, 1);
}
list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
@@ -6992,9 +7051,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* mirrored block groups.
*/
list_for_each_entry(cache, &space_info->block_groups[3], list)
- set_block_group_ro(cache);
+ set_block_group_ro(cache, 1);
list_for_each_entry(cache, &space_info->block_groups[4], list)
- set_block_group_ro(cache);
+ set_block_group_ro(cache, 1);
}
init_global_block_rsv(info);