summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c170
1 files changed, 134 insertions, 36 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 81a313874ae..3e18175248e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -16,7 +16,6 @@
* Boston, MA 021110-1307, USA.
*/
-#include <linux/version.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/scatterlist.h>
@@ -76,6 +75,40 @@ struct async_submit_bio {
struct btrfs_work work;
};
+/* These are used to set the lockdep class on the extent buffer locks.
+ * The class is set by the readpage_end_io_hook after the buffer has
+ * passed csum validation but before the pages are unlocked.
+ *
+ * The lockdep class is also set by btrfs_init_new_buffer on freshly
+ * allocated blocks.
+ *
+ * The class is based on the level in the tree block, which allows lockdep
+ * to know that lower nodes nest inside the locks of higher nodes.
+ *
+ * We also add a check to make sure the highest level of the tree is
+ * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this
+ * code needs update as well.
+ */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# if BTRFS_MAX_LEVEL != 8
+# error
+# endif
+static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
+static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
+ /* leaf */
+ "btrfs-extent-00",
+ "btrfs-extent-01",
+ "btrfs-extent-02",
+ "btrfs-extent-03",
+ "btrfs-extent-04",
+ "btrfs-extent-05",
+ "btrfs-extent-06",
+ "btrfs-extent-07",
+ /* highest possible level */
+ "btrfs-extent-08",
+};
+#endif
+
/*
* extents on the btree inode are pretty simple, there's one extent
* that covers the entire device
@@ -348,6 +381,15 @@ static int check_tree_block_fsid(struct btrfs_root *root,
return ret;
}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
+{
+ lockdep_set_class_and_name(&eb->lock,
+ &btrfs_eb_class[level],
+ btrfs_eb_name[level]);
+}
+#endif
+
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
{
@@ -393,6 +435,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
}
found_level = btrfs_header_level(eb);
+ btrfs_set_buffer_lockdep_class(eb, found_level);
+
ret = csum_tree_block(root, eb, 1);
if (ret)
ret = -EIO;
@@ -800,7 +844,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
if (ret == 0)
- buf->flags |= EXTENT_UPTODATE;
+ set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
else
WARN_ON(1);
return buf;
@@ -813,7 +857,11 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct inode *btree_inode = root->fs_info->btree_inode;
if (btrfs_header_generation(buf) ==
root->fs_info->running_transaction->transid) {
- WARN_ON(!btrfs_tree_locked(buf));
+ btrfs_assert_tree_locked(buf);
+
+ /* ugh, clear_extent_buffer_dirty can be expensive */
+ btrfs_set_lock_blocking(buf);
+
clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
buf);
}
@@ -850,6 +898,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
spin_lock_init(&root->list_lock);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
+ init_waitqueue_head(&root->log_writer_wait);
+ init_waitqueue_head(&root->log_commit_wait[0]);
+ init_waitqueue_head(&root->log_commit_wait[1]);
+ atomic_set(&root->log_commit[0], 0);
+ atomic_set(&root->log_commit[1], 0);
+ atomic_set(&root->log_writers, 0);
+ root->log_batch = 0;
+ root->log_transid = 0;
extent_io_tree_init(&root->dirty_log_pages,
fs_info->btree_inode->i_mapping, GFP_NOFS);
@@ -934,15 +990,16 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
return 0;
}
-int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
struct btrfs_root *tree_root = fs_info->tree_root;
+ struct extent_buffer *leaf;
root = kzalloc(sizeof(*root), GFP_NOFS);
if (!root)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
__setup_root(tree_root->nodesize, tree_root->leafsize,
tree_root->sectorsize, tree_root->stripesize,
@@ -951,12 +1008,23 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
+ /*
+ * log trees do not get reference counted because they go away
+ * before a real commit is actually done. They do store pointers
+ * to file data extents, and those reference counts still get
+ * updated (along with back refs to the log tree).
+ */
root->ref_cows = 0;
- root->node = btrfs_alloc_free_block(trans, root, root->leafsize,
- 0, BTRFS_TREE_LOG_OBJECTID,
- trans->transid, 0, 0, 0);
+ leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
+ 0, BTRFS_TREE_LOG_OBJECTID,
+ trans->transid, 0, 0, 0);
+ if (IS_ERR(leaf)) {
+ kfree(root);
+ return ERR_CAST(leaf);
+ }
+ root->node = leaf;
btrfs_set_header_nritems(root->node, 0);
btrfs_set_header_level(root->node, 0);
btrfs_set_header_bytenr(root->node, root->node->start);
@@ -968,7 +1036,48 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
BTRFS_FSID_SIZE);
btrfs_mark_buffer_dirty(root->node);
btrfs_tree_unlock(root->node);
- fs_info->log_root_tree = root;
+ return root;
+}
+
+int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *log_root;
+
+ log_root = alloc_log_tree(trans, fs_info);
+ if (IS_ERR(log_root))
+ return PTR_ERR(log_root);
+ WARN_ON(fs_info->log_root_tree);
+ fs_info->log_root_tree = log_root;
+ return 0;
+}
+
+int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_root *log_root;
+ struct btrfs_inode_item *inode_item;
+
+ log_root = alloc_log_tree(trans, root->fs_info);
+ if (IS_ERR(log_root))
+ return PTR_ERR(log_root);
+
+ log_root->last_trans = trans->transid;
+ log_root->root_key.offset = root->root_key.objectid;
+
+ inode_item = &log_root->root_item.inode;
+ inode_item->generation = cpu_to_le64(1);
+ inode_item->size = cpu_to_le64(3);
+ inode_item->nlink = cpu_to_le32(1);
+ inode_item->nbytes = cpu_to_le64(root->leafsize);
+ inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
+
+ btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start);
+ btrfs_set_root_generation(&log_root->root_item, trans->transid);
+
+ WARN_ON(root->log_root);
+ root->log_root = log_root;
+ root->log_transid = 0;
return 0;
}
@@ -1136,7 +1245,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
{
struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
int ret = 0;
- struct list_head *cur;
struct btrfs_device *device;
struct backing_dev_info *bdi;
#if 0
@@ -1144,8 +1252,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
btrfs_congested_async(info, 0))
return 1;
#endif
- list_for_each(cur, &info->fs_devices->devices) {
- device = list_entry(cur, struct btrfs_device, dev_list);
+ list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
if (!device->bdev)
continue;
bdi = blk_get_backing_dev_info(device->bdev);
@@ -1163,13 +1270,11 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
*/
static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
{
- struct list_head *cur;
struct btrfs_device *device;
struct btrfs_fs_info *info;
info = (struct btrfs_fs_info *)bdi->unplug_io_data;
- list_for_each(cur, &info->fs_devices->devices) {
- device = list_entry(cur, struct btrfs_device, dev_list);
+ list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
if (!device->bdev)
continue;
@@ -1447,7 +1552,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->hashers);
INIT_LIST_HEAD(&fs_info->delalloc_inodes);
- spin_lock_init(&fs_info->hash_lock);
spin_lock_init(&fs_info->delalloc_lock);
spin_lock_init(&fs_info->new_trans_lock);
spin_lock_init(&fs_info->ref_cache_lock);
@@ -1535,10 +1639,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->transaction_throttle);
init_waitqueue_head(&fs_info->transaction_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
- init_waitqueue_head(&fs_info->tree_log_wait);
- atomic_set(&fs_info->tree_log_commit, 0);
- atomic_set(&fs_info->tree_log_writers, 0);
- fs_info->tree_log_transid = 0;
__setup_root(4096, 4096, 4096, 4096, tree_root,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
@@ -1627,6 +1727,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
* low idle thresh
*/
fs_info->endio_workers.idle_thresh = 4;
+ fs_info->endio_meta_workers.idle_thresh = 4;
+
fs_info->endio_write_workers.idle_thresh = 64;
fs_info->endio_meta_write_workers.idle_thresh = 64;
@@ -1720,7 +1822,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
ret = find_and_setup_root(tree_root, fs_info,
BTRFS_DEV_TREE_OBJECTID, dev_root);
dev_root->track_dirty = 1;
-
if (ret)
goto fail_extent_root;
@@ -1740,13 +1841,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
"btrfs-cleaner");
- if (!fs_info->cleaner_kthread)
+ if (IS_ERR(fs_info->cleaner_kthread))
goto fail_csum_root;
fs_info->transaction_kthread = kthread_run(transaction_kthread,
tree_root,
"btrfs-transaction");
- if (!fs_info->transaction_kthread)
+ if (IS_ERR(fs_info->transaction_kthread))
goto fail_cleaner;
if (btrfs_super_log_root(disk_super) != 0) {
@@ -1828,13 +1929,14 @@ fail_sb_buffer:
fail_iput:
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
iput(fs_info->btree_inode);
-fail:
+
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
+ bdi_destroy(&fs_info->bdi);
+fail:
kfree(extent_root);
kfree(tree_root);
- bdi_destroy(&fs_info->bdi);
kfree(fs_info);
kfree(chunk_root);
kfree(dev_root);
@@ -1995,7 +2097,6 @@ static int write_dev_supers(struct btrfs_device *device,
int write_all_supers(struct btrfs_root *root, int max_mirrors)
{
- struct list_head *cur;
struct list_head *head = &root->fs_info->fs_devices->devices;
struct btrfs_device *dev;
struct btrfs_super_block *sb;
@@ -2011,8 +2112,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
sb = &root->fs_info->super_for_commit;
dev_item = &sb->dev_item;
- list_for_each(cur, head) {
- dev = list_entry(cur, struct btrfs_device, dev_list);
+ list_for_each_entry(dev, head, dev_list) {
if (!dev->bdev) {
total_errors++;
continue;
@@ -2045,8 +2145,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
}
total_errors = 0;
- list_for_each(cur, head) {
- dev = list_entry(cur, struct btrfs_device, dev_list);
+ list_for_each_entry(dev, head, dev_list) {
if (!dev->bdev)
continue;
if (!dev->in_fs_metadata || !dev->writeable)
@@ -2260,7 +2359,9 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
u64 transid = btrfs_header_generation(buf);
struct inode *btree_inode = root->fs_info->btree_inode;
- WARN_ON(!btrfs_tree_locked(buf));
+ btrfs_set_lock_blocking(buf);
+
+ btrfs_assert_tree_locked(buf);
if (transid != root->fs_info->generation) {
printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
"found %llu running %llu\n",
@@ -2302,14 +2403,13 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
int ret;
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
if (ret == 0)
- buf->flags |= EXTENT_UPTODATE;
+ set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
return ret;
}
int btree_lock_page_hook(struct page *page)
{
struct inode *inode = page->mapping->host;
- struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_buffer *eb;
unsigned long len;
@@ -2324,9 +2424,7 @@ int btree_lock_page_hook(struct page *page)
goto out;
btrfs_tree_lock(eb);
- spin_lock(&root->fs_info->hash_lock);
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
- spin_unlock(&root->fs_info->hash_lock);
btrfs_tree_unlock(eb);
free_extent_buffer(eb);
out: