summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c346
1 files changed, 207 insertions, 139 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d0ed9e664f7..1ad0f47ac85 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -39,7 +39,6 @@
#include "btrfs_inode.h"
#include "volumes.h"
#include "print-tree.h"
-#include "async-thread.h"
#include "locking.h"
#include "tree-log.h"
#include "free-space-cache.h"
@@ -73,21 +72,41 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root);
static void btrfs_error_commit_super(struct btrfs_root *root);
/*
- * end_io_wq structs are used to do processing in task context when an IO is
- * complete. This is used during reads to verify checksums, and it is used
+ * btrfs_end_io_wq structs are used to do processing in task context when an IO
+ * is complete. This is used during reads to verify checksums, and it is used
* by writes to insert metadata for new file extents after IO is complete.
*/
-struct end_io_wq {
+struct btrfs_end_io_wq {
struct bio *bio;
bio_end_io_t *end_io;
void *private;
struct btrfs_fs_info *info;
int error;
- int metadata;
+ enum btrfs_wq_endio_type metadata;
struct list_head list;
struct btrfs_work work;
};
+static struct kmem_cache *btrfs_end_io_wq_cache;
+
+int __init btrfs_end_io_wq_init(void)
+{
+ btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
+ sizeof(struct btrfs_end_io_wq),
+ 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ NULL);
+ if (!btrfs_end_io_wq_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void btrfs_end_io_wq_exit(void)
+{
+ if (btrfs_end_io_wq_cache)
+ kmem_cache_destroy(btrfs_end_io_wq_cache);
+}
+
/*
* async submit bios are used to offload expensive checksumming
* onto the worker threads. They checksum file and metadata bios
@@ -328,8 +347,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
{
struct extent_state *cached_state = NULL;
int ret;
- bool need_lock = (current->journal_info ==
- (void *)BTRFS_SEND_TRANS_STUB);
+ bool need_lock = (current->journal_info == BTRFS_SEND_TRANS_STUB);
if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
return 0;
@@ -349,9 +367,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
ret = 0;
goto out;
}
- printk_ratelimited("parent transid verify failed on %llu wanted %llu "
- "found %llu\n",
- eb->start, parent_transid, btrfs_header_generation(eb));
+ printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
+ eb->fs_info->sb->s_id, eb->start,
+ parent_transid, btrfs_header_generation(eb));
ret = 1;
/*
@@ -608,22 +626,22 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
goto err;
eb->read_mirror = mirror;
- if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
+ if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
ret = -EIO;
goto err;
}
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
- printk_ratelimited(KERN_INFO "BTRFS: bad tree block start "
+ printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start "
"%llu %llu\n",
- found_start, eb->start);
+ eb->fs_info->sb->s_id, found_start, eb->start);
ret = -EIO;
goto err;
}
if (check_tree_block_fsid(root, eb)) {
- printk_ratelimited(KERN_INFO "BTRFS: bad fsid on block %llu\n",
- eb->start);
+ printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n",
+ eb->fs_info->sb->s_id, eb->start);
ret = -EIO;
goto err;
}
@@ -681,7 +699,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
eb = (struct extent_buffer *)page->private;
- set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
+ set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = failed_mirror;
atomic_dec(&eb->io_pages);
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
@@ -691,52 +709,55 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
static void end_workqueue_bio(struct bio *bio, int err)
{
- struct end_io_wq *end_io_wq = bio->bi_private;
+ struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
struct btrfs_fs_info *fs_info;
+ struct btrfs_workqueue *wq;
+ btrfs_work_func_t func;
fs_info = end_io_wq->info;
end_io_wq->error = err;
- btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
if (bio->bi_rw & REQ_WRITE) {
- if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
- btrfs_queue_work(fs_info->endio_meta_write_workers,
- &end_io_wq->work);
- else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
- btrfs_queue_work(fs_info->endio_freespace_worker,
- &end_io_wq->work);
- else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
- btrfs_queue_work(fs_info->endio_raid56_workers,
- &end_io_wq->work);
- else
- btrfs_queue_work(fs_info->endio_write_workers,
- &end_io_wq->work);
+ if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
+ wq = fs_info->endio_meta_write_workers;
+ func = btrfs_endio_meta_write_helper;
+ } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) {
+ wq = fs_info->endio_freespace_worker;
+ func = btrfs_freespace_write_helper;
+ } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
+ wq = fs_info->endio_raid56_workers;
+ func = btrfs_endio_raid56_helper;
+ } else {
+ wq = fs_info->endio_write_workers;
+ func = btrfs_endio_write_helper;
+ }
} else {
- if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
- btrfs_queue_work(fs_info->endio_raid56_workers,
- &end_io_wq->work);
- else if (end_io_wq->metadata)
- btrfs_queue_work(fs_info->endio_meta_workers,
- &end_io_wq->work);
- else
- btrfs_queue_work(fs_info->endio_workers,
- &end_io_wq->work);
+ if (unlikely(end_io_wq->metadata ==
+ BTRFS_WQ_ENDIO_DIO_REPAIR)) {
+ wq = fs_info->endio_repair_workers;
+ func = btrfs_endio_repair_helper;
+ } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
+ wq = fs_info->endio_raid56_workers;
+ func = btrfs_endio_raid56_helper;
+ } else if (end_io_wq->metadata) {
+ wq = fs_info->endio_meta_workers;
+ func = btrfs_endio_meta_helper;
+ } else {
+ wq = fs_info->endio_workers;
+ func = btrfs_endio_helper;
+ }
}
+
+ btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL);
+ btrfs_queue_work(wq, &end_io_wq->work);
}
-/*
- * For the metadata arg you want
- *
- * 0 - if data
- * 1 - if normal metadta
- * 2 - if writing to the free space cache area
- * 3 - raid parity work
- */
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
- int metadata)
+ enum btrfs_wq_endio_type metadata)
{
- struct end_io_wq *end_io_wq;
- end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
+ struct btrfs_end_io_wq *end_io_wq;
+
+ end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
if (!end_io_wq)
return -ENOMEM;
@@ -828,7 +849,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
async->submit_bio_start = submit_bio_start;
async->submit_bio_done = submit_bio_done;
- btrfs_init_work(&async->work, run_one_async_start,
+ btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
run_one_async_done, run_one_async_free);
async->bio_flags = bio_flags;
@@ -920,7 +941,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
* can happen in the async kernel threads
*/
ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
- bio, 1);
+ bio, BTRFS_WQ_ENDIO_METADATA);
if (ret)
goto out_w_error;
ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
@@ -1052,20 +1073,17 @@ static const struct address_space_operations btree_aops = {
.set_page_dirty = btree_set_page_dirty,
};
-int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
- u64 parent_transid)
+void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
{
struct extent_buffer *buf = NULL;
struct inode *btree_inode = root->fs_info->btree_inode;
- int ret = 0;
buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
if (!buf)
- return 0;
+ return;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
buf, 0, WAIT_NONE, btree_get_extent, 0);
free_extent_buffer(buf);
- return ret;
}
int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
@@ -1101,7 +1119,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
}
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
- u64 bytenr, u32 blocksize)
+ u64 bytenr)
{
return find_extent_buffer(root->fs_info, bytenr);
}
@@ -1109,11 +1127,9 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize)
{
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
- if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+ if (btrfs_test_is_dummy_root(root))
return alloc_test_extent_buffer(root->fs_info, bytenr,
blocksize);
-#endif
return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
}
@@ -1131,12 +1147,12 @@ int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
}
struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
- u32 blocksize, u64 parent_transid)
+ u64 parent_transid)
{
struct extent_buffer *buf = NULL;
int ret;
- buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
+ buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize);
if (!buf)
return NULL;
@@ -1178,7 +1194,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
if (!writers)
return ERR_PTR(-ENOMEM);
- ret = percpu_counter_init(&writers->counter, 0);
+ ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL);
if (ret < 0) {
kfree(writers);
return ERR_PTR(ret);
@@ -1195,16 +1211,14 @@ btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers)
kfree(writers);
}
-static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
- u32 stripesize, struct btrfs_root *root,
- struct btrfs_fs_info *fs_info,
+static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
+ struct btrfs_root *root, struct btrfs_fs_info *fs_info,
u64 objectid)
{
root->node = NULL;
root->commit_root = NULL;
root->sectorsize = sectorsize;
root->nodesize = nodesize;
- root->leafsize = leafsize;
root->stripesize = stripesize;
root->state = 0;
root->orphan_cleanup_state = 0;
@@ -1290,7 +1304,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void)
root = btrfs_alloc_root(NULL);
if (!root)
return ERR_PTR(-ENOMEM);
- __setup_root(4096, 4096, 4096, 4096, root, NULL, 1);
+ __setup_root(4096, 4096, 4096, root, NULL, 1);
set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state);
root->alloc_bytenr = 0;
@@ -1313,15 +1327,13 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
if (!root)
return ERR_PTR(-ENOMEM);
- __setup_root(tree_root->nodesize, tree_root->leafsize,
- tree_root->sectorsize, tree_root->stripesize,
- root, fs_info, objectid);
+ __setup_root(tree_root->nodesize, tree_root->sectorsize,
+ tree_root->stripesize, root, fs_info, objectid);
root->root_key.objectid = objectid;
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
root->root_key.offset = 0;
- leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
- 0, objectid, NULL, 0, 0, 0);
+ leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
leaf = NULL;
@@ -1391,9 +1403,9 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
if (!root)
return ERR_PTR(-ENOMEM);
- __setup_root(tree_root->nodesize, tree_root->leafsize,
- tree_root->sectorsize, tree_root->stripesize,
- root, fs_info, BTRFS_TREE_LOG_OBJECTID);
+ __setup_root(tree_root->nodesize, tree_root->sectorsize,
+ tree_root->stripesize, root, fs_info,
+ BTRFS_TREE_LOG_OBJECTID);
root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
@@ -1408,9 +1420,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
* updated (along with back refs to the log tree).
*/
- leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
- BTRFS_TREE_LOG_OBJECTID, NULL,
- 0, 0, 0);
+ leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID,
+ NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
kfree(root);
return ERR_CAST(leaf);
@@ -1460,7 +1471,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
btrfs_set_stack_inode_generation(inode_item, 1);
btrfs_set_stack_inode_size(inode_item, 3);
btrfs_set_stack_inode_nlink(inode_item, 1);
- btrfs_set_stack_inode_nbytes(inode_item, root->leafsize);
+ btrfs_set_stack_inode_nbytes(inode_item, root->nodesize);
btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
btrfs_set_root_node(&log_root->root_item, log_root->node);
@@ -1480,7 +1491,6 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
struct btrfs_fs_info *fs_info = tree_root->fs_info;
struct btrfs_path *path;
u64 generation;
- u32 blocksize;
int ret;
path = btrfs_alloc_path();
@@ -1493,9 +1503,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
goto alloc_fail;
}
- __setup_root(tree_root->nodesize, tree_root->leafsize,
- tree_root->sectorsize, tree_root->stripesize,
- root, fs_info, key->objectid);
+ __setup_root(tree_root->nodesize, tree_root->sectorsize,
+ tree_root->stripesize, root, fs_info, key->objectid);
ret = btrfs_find_root(tree_root, key, path,
&root->root_item, &root->root_key);
@@ -1506,9 +1515,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
}
generation = btrfs_root_generation(&root->root_item);
- blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
- blocksize, generation);
+ generation);
if (!root->node) {
ret = -ENOMEM;
goto find_fail;
@@ -1568,8 +1576,8 @@ int btrfs_init_fs_root(struct btrfs_root *root)
root->subv_writers = writers;
btrfs_init_free_ino_ctl(root);
- spin_lock_init(&root->cache_lock);
- init_waitqueue_head(&root->cache_wait);
+ spin_lock_init(&root->ino_cache_lock);
+ init_waitqueue_head(&root->ino_cache_wait);
ret = get_anon_bdev(&root->anon_dev);
if (ret)
@@ -1694,7 +1702,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
if (!device->bdev)
continue;
bdi = blk_get_backing_dev_info(device->bdev);
- if (bdi && bdi_congested(bdi, bdi_bits)) {
+ if (bdi_congested(bdi, bdi_bits)) {
ret = 1;
break;
}
@@ -1703,10 +1711,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
return ret;
}
-/*
- * If this fails, caller must call bdi_destroy() to get rid of the
- * bdi again.
- */
static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
{
int err;
@@ -1729,16 +1733,16 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
static void end_workqueue_fn(struct btrfs_work *work)
{
struct bio *bio;
- struct end_io_wq *end_io_wq;
+ struct btrfs_end_io_wq *end_io_wq;
int error;
- end_io_wq = container_of(work, struct end_io_wq, work);
+ end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
bio = end_io_wq->bio;
error = end_io_wq->error;
bio->bi_private = end_io_wq->private;
bio->bi_end_io = end_io_wq->end_io;
- kfree(end_io_wq);
+ kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
bio_endio_nodec(bio, error);
}
@@ -1767,6 +1771,7 @@ static int cleaner_kthread(void *arg)
}
btrfs_run_delayed_iputs(root);
+ btrfs_delete_unused_bgs(root->fs_info);
again = btrfs_clean_one_deleted_snapshot(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
@@ -2058,6 +2063,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->endio_workers);
btrfs_destroy_workqueue(fs_info->endio_meta_workers);
btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
+ btrfs_destroy_workqueue(fs_info->endio_repair_workers);
btrfs_destroy_workqueue(fs_info->rmw_workers);
btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
btrfs_destroy_workqueue(fs_info->endio_write_workers);
@@ -2138,8 +2144,6 @@ int open_ctree(struct super_block *sb,
{
u32 sectorsize;
u32 nodesize;
- u32 leafsize;
- u32 blocksize;
u32 stripesize;
u64 generation;
u64 features;
@@ -2183,7 +2187,7 @@ int open_ctree(struct super_block *sb,
goto fail_srcu;
}
- ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
+ ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
if (ret) {
err = ret;
goto fail_bdi;
@@ -2191,13 +2195,13 @@ int open_ctree(struct super_block *sb,
fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
(1 + ilog2(nr_cpu_ids));
- ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
+ ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL);
if (ret) {
err = ret;
goto fail_dirty_metadata_bytes;
}
- ret = percpu_counter_init(&fs_info->bio_counter, 0);
+ ret = percpu_counter_init(&fs_info->bio_counter, 0, GFP_KERNEL);
if (ret) {
err = ret;
goto fail_delalloc_bytes;
@@ -2228,6 +2232,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock);
+ spin_lock_init(&fs_info->unused_bgs_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
@@ -2237,6 +2242,7 @@ int open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
INIT_LIST_HEAD(&fs_info->space_info);
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
+ INIT_LIST_HEAD(&fs_info->unused_bgs);
btrfs_mapping_init(&fs_info->mapping_tree);
btrfs_init_block_rsv(&fs_info->global_block_rsv,
BTRFS_BLOCK_RSV_GLOBAL);
@@ -2255,7 +2261,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->qgroup_op_seq, 0);
atomic64_set(&fs_info->tree_mod_seq, 0);
fs_info->sb = sb;
- fs_info->max_inline = 8192 * 1024;
+ fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
fs_info->metadata_ratio = 0;
fs_info->defrag_inodes = RB_ROOT;
fs_info->free_chunk_space = 0;
@@ -2384,7 +2390,7 @@ int open_ctree(struct super_block *sb,
goto fail_alloc;
}
- __setup_root(4096, 4096, 4096, 4096, tree_root,
+ __setup_root(4096, 4096, 4096, tree_root,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
invalidate_bdev(fs_devices->latest_bdev);
@@ -2464,19 +2470,22 @@ int open_ctree(struct super_block *sb,
goto fail_alloc;
}
- if (btrfs_super_leafsize(disk_super) !=
+ /*
+ * Leafsize and nodesize were always equal, this is only a sanity check.
+ */
+ if (le32_to_cpu(disk_super->__unused_leafsize) !=
btrfs_super_nodesize(disk_super)) {
printk(KERN_ERR "BTRFS: couldn't mount because metadata "
"blocksizes don't match. node %d leaf %d\n",
btrfs_super_nodesize(disk_super),
- btrfs_super_leafsize(disk_super));
+ le32_to_cpu(disk_super->__unused_leafsize));
err = -EINVAL;
goto fail_alloc;
}
- if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
printk(KERN_ERR "BTRFS: couldn't mount because metadata "
"blocksize (%d) was too large\n",
- btrfs_super_leafsize(disk_super));
+ btrfs_super_nodesize(disk_super));
err = -EINVAL;
goto fail_alloc;
}
@@ -2493,17 +2502,16 @@ int open_ctree(struct super_block *sb,
* flag our filesystem as having big metadata blocks if
* they are bigger than the page size
*/
- if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
+ if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) {
if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n");
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
}
nodesize = btrfs_super_nodesize(disk_super);
- leafsize = btrfs_super_leafsize(disk_super);
sectorsize = btrfs_super_sectorsize(disk_super);
stripesize = btrfs_super_stripesize(disk_super);
- fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids));
+ fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
/*
@@ -2511,7 +2519,7 @@ int open_ctree(struct super_block *sb,
* extent buffers for the same range. It leads to corruptions
*/
if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
- (sectorsize != leafsize)) {
+ (sectorsize != nodesize)) {
printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes "
"are not allowed for mixed block groups on %s\n",
sb->s_id);
@@ -2574,6 +2582,8 @@ int open_ctree(struct super_block *sb,
btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
fs_info->endio_raid56_workers =
btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
+ fs_info->endio_repair_workers =
+ btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
fs_info->rmw_workers =
btrfs_alloc_workqueue("rmw", flags, max_active, 2);
fs_info->endio_write_workers =
@@ -2595,11 +2605,12 @@ int open_ctree(struct super_block *sb,
fs_info->submit_workers && fs_info->flush_workers &&
fs_info->endio_workers && fs_info->endio_meta_workers &&
fs_info->endio_meta_write_workers &&
+ fs_info->endio_repair_workers &&
fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
fs_info->caching_workers && fs_info->readahead_workers &&
fs_info->fixup_workers && fs_info->delayed_workers &&
- fs_info->fixup_workers && fs_info->extent_workers &&
+ fs_info->extent_workers &&
fs_info->qgroup_rescan_workers)) {
err = -ENOMEM;
goto fail_sb_buffer;
@@ -2610,7 +2621,6 @@ int open_ctree(struct super_block *sb,
4 * 1024 * 1024 / PAGE_CACHE_SIZE);
tree_root->nodesize = nodesize;
- tree_root->leafsize = leafsize;
tree_root->sectorsize = sectorsize;
tree_root->stripesize = stripesize;
@@ -2637,16 +2647,14 @@ int open_ctree(struct super_block *sb,
goto fail_sb_buffer;
}
- blocksize = btrfs_level_size(tree_root,
- btrfs_super_chunk_root_level(disk_super));
generation = btrfs_super_chunk_root_generation(disk_super);
- __setup_root(nodesize, leafsize, sectorsize, stripesize,
- chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
+ __setup_root(nodesize, sectorsize, stripesize, chunk_root,
+ fs_info, BTRFS_CHUNK_TREE_OBJECTID);
chunk_root->node = read_tree_block(chunk_root,
btrfs_super_chunk_root(disk_super),
- blocksize, generation);
+ generation);
if (!chunk_root->node ||
!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n",
@@ -2679,13 +2687,11 @@ int open_ctree(struct super_block *sb,
}
retry_root_backup:
- blocksize = btrfs_level_size(tree_root,
- btrfs_super_root_level(disk_super));
generation = btrfs_super_generation(disk_super);
tree_root->node = read_tree_block(tree_root,
btrfs_super_root(disk_super),
- blocksize, generation);
+ generation);
if (!tree_root->node ||
!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
@@ -2854,9 +2860,6 @@ retry_root_backup:
err = -EIO;
goto fail_qgroup;
}
- blocksize =
- btrfs_level_size(tree_root,
- btrfs_super_log_root_level(disk_super));
log_tree_root = btrfs_alloc_root(fs_info);
if (!log_tree_root) {
@@ -2864,11 +2867,10 @@ retry_root_backup:
goto fail_qgroup;
}
- __setup_root(nodesize, leafsize, sectorsize, stripesize,
+ __setup_root(nodesize, sectorsize, stripesize,
log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
log_tree_root->node = read_tree_block(tree_root, bytenr,
- blocksize,
generation + 1);
if (!log_tree_root->node ||
!extent_buffer_uptodate(log_tree_root->node)) {
@@ -2975,6 +2977,8 @@ retry_root_backup:
fs_info->update_uuid_tree_gen = 1;
}
+ fs_info->open = 1;
+
return 0;
fail_qgroup:
@@ -3134,7 +3138,8 @@ static int write_dev_supers(struct btrfs_device *device,
for (i = 0; i < max_mirrors; i++) {
bytenr = btrfs_sb_offset(i);
- if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
+ if (bytenr + BTRFS_SUPER_INFO_SIZE >=
+ device->commit_total_bytes)
break;
if (wait) {
@@ -3450,8 +3455,10 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
btrfs_set_stack_device_generation(dev_item, 0);
btrfs_set_stack_device_type(dev_item, dev->type);
btrfs_set_stack_device_id(dev_item, dev->devid);
- btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
- btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
+ btrfs_set_stack_device_total_bytes(dev_item,
+ dev->commit_total_bytes);
+ btrfs_set_stack_device_bytes_used(dev_item,
+ dev->commit_bytes_used);
btrfs_set_stack_device_io_align(dev_item, dev->io_align);
btrfs_set_stack_device_io_width(dev_item, dev->io_width);
btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
@@ -3526,7 +3533,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
static void free_fs_root(struct btrfs_root *root)
{
- iput(root->cache_inode);
+ iput(root->ino_cache_inode);
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
btrfs_free_block_rsv(root, root->orphan_block_rsv);
root->orphan_block_rsv = NULL;
@@ -3617,7 +3624,7 @@ int btrfs_commit_super(struct btrfs_root *root)
return btrfs_commit_transaction(trans, root);
}
-int close_ctree(struct btrfs_root *root)
+void close_ctree(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
@@ -3683,6 +3690,7 @@ int close_ctree(struct btrfs_root *root)
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
btrfs_stop_all_workers(fs_info);
+ fs_info->open = 0;
free_root_pointers(fs_info, 1);
iput(fs_info->btree_inode);
@@ -3705,8 +3713,6 @@ int close_ctree(struct btrfs_root *root)
btrfs_free_block_rsv(root, root->orphan_block_rsv);
root->orphan_block_rsv = NULL;
-
- return 0;
}
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
@@ -3808,10 +3814,73 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only)
{
+ struct btrfs_super_block *sb = fs_info->super_copy;
+ int ret = 0;
+
+ if (sb->root_level > BTRFS_MAX_LEVEL) {
+ printk(KERN_ERR "BTRFS: tree_root level too big: %d > %d\n",
+ sb->root_level, BTRFS_MAX_LEVEL);
+ ret = -EINVAL;
+ }
+ if (sb->chunk_root_level > BTRFS_MAX_LEVEL) {
+ printk(KERN_ERR "BTRFS: chunk_root level too big: %d > %d\n",
+ sb->chunk_root_level, BTRFS_MAX_LEVEL);
+ ret = -EINVAL;
+ }
+ if (sb->log_root_level > BTRFS_MAX_LEVEL) {
+ printk(KERN_ERR "BTRFS: log_root level too big: %d > %d\n",
+ sb->log_root_level, BTRFS_MAX_LEVEL);
+ ret = -EINVAL;
+ }
+
/*
- * Placeholder for checks
+ * The common minimum, we don't know if we can trust the nodesize/sectorsize
+ * items yet, they'll be verified later. Issue just a warning.
*/
- return 0;
+ if (!IS_ALIGNED(sb->root, 4096))
+ printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
+ sb->root);
+ if (!IS_ALIGNED(sb->chunk_root, 4096))
+ printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
+ sb->chunk_root);
+ if (!IS_ALIGNED(sb->log_root, 4096))
+ printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
+ sb->log_root);
+
+ if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
+ printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
+ fs_info->fsid, sb->dev_item.fsid);
+ ret = -EINVAL;
+ }
+
+ /*
+ * Hint to catch really bogus numbers, bitflips or so, more exact checks are
+ * done later
+ */
+ if (sb->num_devices > (1UL << 31))
+ printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
+ sb->num_devices);
+
+ if (sb->bytenr != BTRFS_SUPER_INFO_OFFSET) {
+ printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
+ sb->bytenr, BTRFS_SUPER_INFO_OFFSET);
+ ret = -EINVAL;
+ }
+
+ /*
+ * The generation is a global counter, we'll trust it more than the others
+ * but it's still possible that it's the one that's wrong.
+ */
+ if (sb->generation < sb->chunk_root_generation)
+ printk(KERN_WARNING
+ "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n",
+ sb->generation, sb->chunk_root_generation);
+ if (sb->generation < sb->cache_generation && sb->cache_generation != (u64)-1)
+ printk(KERN_WARNING
+ "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n",
+ sb->generation, sb->cache_generation);
+
+ return ret;
}
static void btrfs_error_commit_super(struct btrfs_root *root)
@@ -4003,9 +4072,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
while (start <= end) {
- eb = btrfs_find_tree_block(root, start,
- root->leafsize);
- start += root->leafsize;
+ eb = btrfs_find_tree_block(root, start);
+ start += root->nodesize;
if (!eb)
continue;
wait_on_extent_buffer_writeback(eb);