summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c182
1 files changed, 107 insertions, 75 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 98b6a71decb..07b3ac662e1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -100,38 +100,83 @@ struct async_submit_bio {
struct btrfs_work work;
};
-/* These are used to set the lockdep class on the extent buffer locks.
- * The class is set by the readpage_end_io_hook after the buffer has
- * passed csum validation but before the pages are unlocked.
+/*
+ * Lockdep class keys for extent_buffer->lock's in this root. For a given
+ * eb, the lockdep key is determined by the btrfs_root it belongs to and
+ * the level the eb occupies in the tree.
+ *
+ * Different roots are used for different purposes and may nest inside each
+ * other and they require separate keysets. As lockdep keys should be
+ * static, assign keysets according to the purpose of the root as indicated
+ * by btrfs_root->objectid. This ensures that all special purpose roots
+ * have separate keysets.
*
- * The lockdep class is also set by btrfs_init_new_buffer on freshly
- * allocated blocks.
+ * Lock-nesting across peer nodes is always done with the immediate parent
+ * node locked thus preventing deadlock. As lockdep doesn't know this, use
+ * subclass to avoid triggering lockdep warning in such cases.
*
- * The class is based on the level in the tree block, which allows lockdep
- * to know that lower nodes nest inside the locks of higher nodes.
+ * The key is set by the readpage_end_io_hook after the buffer has passed
+ * csum validation but before the pages are unlocked. It is also set by
+ * btrfs_init_new_buffer on freshly allocated blocks.
*
- * We also add a check to make sure the highest level of the tree is
- * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this
- * code needs update as well.
+ * We also add a check to make sure the highest level of the tree is the
+ * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
+ * needs update as well.
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# if BTRFS_MAX_LEVEL != 8
# error
# endif
-static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
-static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
- /* leaf */
- "btrfs-extent-00",
- "btrfs-extent-01",
- "btrfs-extent-02",
- "btrfs-extent-03",
- "btrfs-extent-04",
- "btrfs-extent-05",
- "btrfs-extent-06",
- "btrfs-extent-07",
- /* highest possible level */
- "btrfs-extent-08",
+
+static struct btrfs_lockdep_keyset {
+ u64 id; /* root objectid */
+ const char *name_stem; /* lock name stem */
+ char names[BTRFS_MAX_LEVEL + 1][20];
+ struct lock_class_key keys[BTRFS_MAX_LEVEL + 1];
+} btrfs_lockdep_keysets[] = {
+ { .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" },
+ { .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" },
+ { .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" },
+ { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" },
+ { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" },
+ { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" },
+ { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" },
+ { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
+ { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
+ { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
+ { .id = 0, .name_stem = "tree" },
};
+
+void __init btrfs_init_lockdep(void)
+{
+ int i, j;
+
+ /* initialize lockdep class names */
+ for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) {
+ struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i];
+
+ for (j = 0; j < ARRAY_SIZE(ks->names); j++)
+ snprintf(ks->names[j], sizeof(ks->names[j]),
+ "btrfs-%s-%02d", ks->name_stem, j);
+ }
+}
+
+void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
+ int level)
+{
+ struct btrfs_lockdep_keyset *ks;
+
+ BUG_ON(level >= ARRAY_SIZE(ks->keys));
+
+ /* find the matching keyset, id 0 is the default entry */
+ for (ks = btrfs_lockdep_keysets; ks->id; ks++)
+ if (ks->id == objectid)
+ break;
+
+ lockdep_set_class_and_name(&eb->lock,
+ &ks->keys[level], ks->names[level]);
+}
+
#endif
/*
@@ -217,7 +262,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
unsigned long len;
unsigned long cur_len;
unsigned long offset = BTRFS_CSUM_SIZE;
- char *map_token = NULL;
char *kaddr;
unsigned long map_start;
unsigned long map_len;
@@ -228,8 +272,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
len = buf->len - offset;
while (len > 0) {
err = map_private_extent_buffer(buf, offset, 32,
- &map_token, &kaddr,
- &map_start, &map_len, KM_USER0);
+ &kaddr, &map_start, &map_len);
if (err)
return 1;
cur_len = min(len, map_len - (offset - map_start));
@@ -237,7 +280,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
crc, cur_len);
len -= cur_len;
offset += cur_len;
- unmap_extent_buffer(buf, map_token, KM_USER0);
}
if (csum_size > sizeof(inline_result)) {
result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
@@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root,
return 0;
}
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
-{
- lockdep_set_class_and_name(&eb->lock,
- &btrfs_eb_class[level],
- btrfs_eb_name[level]);
-}
-#endif
-
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
{
@@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
}
found_level = btrfs_header_level(eb);
- btrfs_set_buffer_lockdep_class(eb, found_level);
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
+ eb, found_level);
ret = csum_tree_block(root, eb, 1);
if (ret) {
@@ -1044,7 +1078,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->last_trans = 0;
root->highest_objectid = 0;
root->name = NULL;
- root->in_sysfs = 0;
root->inode_tree = RB_ROOT;
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
root->block_rsv = NULL;
@@ -1078,12 +1111,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
init_completion(&root->kobj_unregister);
root->defrag_running = 0;
root->root_key.objectid = objectid;
- root->anon_super.s_root = NULL;
- root->anon_super.s_dev = 0;
- INIT_LIST_HEAD(&root->anon_super.s_list);
- INIT_LIST_HEAD(&root->anon_super.s_instances);
- init_rwsem(&root->anon_super.s_umount);
-
+ root->anon_dev = 0;
return 0;
}
@@ -1300,19 +1328,21 @@ again:
return root;
root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
- if (!root->free_ino_ctl)
- goto fail;
root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
GFP_NOFS);
- if (!root->free_ino_pinned)
+ if (!root->free_ino_pinned || !root->free_ino_ctl) {
+ ret = -ENOMEM;
goto fail;
+ }
btrfs_init_free_ino_ctl(root);
mutex_init(&root->fs_commit_mutex);
spin_lock_init(&root->cache_lock);
init_waitqueue_head(&root->cache_wait);
- set_anon_super(&root->anon_super, NULL);
+ ret = get_anon_bdev(&root->anon_dev);
+ if (ret)
+ goto fail;
if (btrfs_root_refs(&root->root_item) == 0) {
ret = -ENOENT;
@@ -1505,24 +1535,24 @@ static int transaction_kthread(void *arg)
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
mutex_lock(&root->fs_info->transaction_kthread_mutex);
- spin_lock(&root->fs_info->new_trans_lock);
+ spin_lock(&root->fs_info->trans_lock);
cur = root->fs_info->running_transaction;
if (!cur) {
- spin_unlock(&root->fs_info->new_trans_lock);
+ spin_unlock(&root->fs_info->trans_lock);
goto sleep;
}
now = get_seconds();
if (!cur->blocked &&
(now < cur->start_time || now - cur->start_time < 30)) {
- spin_unlock(&root->fs_info->new_trans_lock);
+ spin_unlock(&root->fs_info->trans_lock);
delay = HZ * 5;
goto sleep;
}
transid = cur->transid;
- spin_unlock(&root->fs_info->new_trans_lock);
+ spin_unlock(&root->fs_info->trans_lock);
- trans = btrfs_join_transaction(root, 1);
+ trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
if (transid == trans->transid) {
ret = btrfs_commit_transaction(trans, root);
@@ -1602,7 +1632,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
goto fail_bdi;
}
- fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
+ mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
@@ -1613,11 +1643,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->ordered_operations);
INIT_LIST_HEAD(&fs_info->caching_block_groups);
spin_lock_init(&fs_info->delalloc_lock);
- spin_lock_init(&fs_info->new_trans_lock);
+ spin_lock_init(&fs_info->trans_lock);
spin_lock_init(&fs_info->ref_cache_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
+ mutex_init(&fs_info->reloc_mutex);
init_completion(&fs_info->kobj_unregister);
fs_info->tree_root = tree_root;
@@ -1645,6 +1676,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->max_inline = 8192 * 1024;
fs_info->metadata_ratio = 0;
fs_info->defrag_inodes = RB_ROOT;
+ fs_info->trans_no_join = 0;
fs_info->thread_pool_size = min_t(unsigned long,
num_online_cpus() + 2, 8);
@@ -1667,8 +1699,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->scrub_pause_wait);
init_rwsem(&fs_info->scrub_super_lock);
fs_info->scrub_workers_refcnt = 0;
- btrfs_init_workers(&fs_info->scrub_workers, "scrub",
- fs_info->thread_pool_size, &fs_info->generic_worker);
sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096);
@@ -1709,7 +1739,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->do_barriers = 1;
- mutex_init(&fs_info->trans_mutex);
mutex_init(&fs_info->ordered_operations_mutex);
mutex_init(&fs_info->tree_log_mutex);
mutex_init(&fs_info->chunk_mutex);
@@ -1807,6 +1836,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->thread_pool_size),
&fs_info->generic_worker);
+ btrfs_init_workers(&fs_info->caching_workers, "cache",
+ 2, &fs_info->generic_worker);
+
/* a higher idle thresh on the submit workers makes it much more
* likely that bios will be send down in a sane order to the
* devices
@@ -1860,6 +1892,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_start_workers(&fs_info->endio_write_workers, 1);
btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
btrfs_start_workers(&fs_info->delayed_workers, 1);
+ btrfs_start_workers(&fs_info->caching_workers, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2117,6 +2150,7 @@ fail_sb_buffer:
btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->delayed_workers);
+ btrfs_stop_workers(&fs_info->caching_workers);
fail_alloc:
kfree(fs_info->delayed_root);
fail_iput:
@@ -2393,10 +2427,8 @@ static void free_fs_root(struct btrfs_root *root)
{
iput(root->cache_inode);
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
- if (root->anon_super.s_dev) {
- down_write(&root->anon_super.s_umount);
- kill_anon_super(&root->anon_super);
- }
+ if (root->anon_dev)
+ free_anon_bdev(root->anon_dev);
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
kfree(root->free_ino_ctl);
@@ -2479,13 +2511,13 @@ int btrfs_commit_super(struct btrfs_root *root)
down_write(&root->fs_info->cleanup_work_sem);
up_write(&root->fs_info->cleanup_work_sem);
- trans = btrfs_join_transaction(root, 1);
+ trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
/* run commit again to drop the original snapshot */
- trans = btrfs_join_transaction(root, 1);
+ trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
btrfs_commit_transaction(trans, root);
@@ -2584,6 +2616,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->delayed_workers);
+ btrfs_stop_workers(&fs_info->caching_workers);
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -2911,9 +2944,8 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
INIT_LIST_HEAD(&splice);
- list_splice_init(&root->fs_info->delalloc_inodes, &splice);
-
spin_lock(&root->fs_info->delalloc_lock);
+ list_splice_init(&root->fs_info->delalloc_inodes, &splice);
while (!list_empty(&splice)) {
btrfs_inode = list_entry(splice.next, struct btrfs_inode,
@@ -3024,10 +3056,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
WARN_ON(1);
- mutex_lock(&root->fs_info->trans_mutex);
mutex_lock(&root->fs_info->transaction_kthread_mutex);
+ spin_lock(&root->fs_info->trans_lock);
list_splice_init(&root->fs_info->trans_list, &list);
+ root->fs_info->trans_no_join = 1;
+ spin_unlock(&root->fs_info->trans_lock);
+
while (!list_empty(&list)) {
t = list_entry(list.next, struct btrfs_transaction, list);
if (!t)
@@ -3052,23 +3087,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
t->blocked = 0;
if (waitqueue_active(&root->fs_info->transaction_wait))
wake_up(&root->fs_info->transaction_wait);
- mutex_unlock(&root->fs_info->trans_mutex);
- mutex_lock(&root->fs_info->trans_mutex);
t->commit_done = 1;
if (waitqueue_active(&t->commit_wait))
wake_up(&t->commit_wait);
- mutex_unlock(&root->fs_info->trans_mutex);
-
- mutex_lock(&root->fs_info->trans_mutex);
btrfs_destroy_pending_snapshots(t);
btrfs_destroy_delalloc_inodes(root);
- spin_lock(&root->fs_info->new_trans_lock);
+ spin_lock(&root->fs_info->trans_lock);
root->fs_info->running_transaction = NULL;
- spin_unlock(&root->fs_info->new_trans_lock);
+ spin_unlock(&root->fs_info->trans_lock);
btrfs_destroy_marked_extents(root, &t->dirty_pages,
EXTENT_DIRTY);
@@ -3082,8 +3112,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
kmem_cache_free(btrfs_transaction_cachep, t);
}
+ spin_lock(&root->fs_info->trans_lock);
+ root->fs_info->trans_no_join = 0;
+ spin_unlock(&root->fs_info->trans_lock);
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
- mutex_unlock(&root->fs_info->trans_mutex);
return 0;
}