diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 315 |
1 files changed, 219 insertions, 96 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7ae51decf6d..22e98e04c2e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -44,6 +44,7 @@ #include "free-space-cache.h" #include "inode-map.h" #include "check-integrity.h" +#include "rcu-string.h" static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); @@ -376,9 +377,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, ret = read_extent_buffer_pages(io_tree, eb, start, WAIT_COMPLETE, btree_get_extent, mirror_num); - if (!ret && !verify_parent_transid(io_tree, eb, + if (!ret) { + if (!verify_parent_transid(io_tree, eb, parent_transid, 0)) - break; + break; + else + ret = -EIO; + } /* * This buffer's crc is fine, but its contents are corrupted, so @@ -406,7 +411,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, break; } - if (failed && !ret) + if (failed && !ret && failed_mirror) repair_eb_io_failure(root, eb, failed_mirror); return ret; @@ -753,9 +758,7 @@ static void run_one_async_done(struct btrfs_work *work) limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; - atomic_dec(&fs_info->nr_async_submits); - - if (atomic_read(&fs_info->nr_async_submits) < limit && + if (atomic_dec_return(&fs_info->nr_async_submits) < limit && waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); @@ -1113,7 +1116,7 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, spin_unlock(&root->fs_info->delalloc_lock); btrfs_panic(root->fs_info, -EOVERFLOW, "Can't clear %lu bytes from " - " dirty_mdatadata_bytes (%lu)", + " dirty_mdatadata_bytes (%llu)", buf->len, root->fs_info->dirty_metadata_bytes); } @@ -1181,6 +1184,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->defrag_running = 0; root->root_key.objectid = objectid; root->anon_dev = 0; + + spin_lock_init(&root->root_times_lock); } static int __must_check find_and_setup_root(struct btrfs_root *tree_root, @@ -1224,6 +1229,82 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) return root; } +struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 objectid) +{ + struct extent_buffer *leaf; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *root; + struct btrfs_key key; + int ret = 0; + u64 bytenr; + + root = btrfs_alloc_root(fs_info); + if (!root) + return ERR_PTR(-ENOMEM); + + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, objectid); + root->root_key.objectid = objectid; + root->root_key.type = BTRFS_ROOT_ITEM_KEY; + root->root_key.offset = 0; + + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + 0, objectid, NULL, 0, 0, 0); + if (IS_ERR(leaf)) { + ret = PTR_ERR(leaf); + goto fail; + } + + bytenr = leaf->start; + memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_bytenr(leaf, leaf->start); + btrfs_set_header_generation(leaf, trans->transid); + btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); + btrfs_set_header_owner(leaf, objectid); + root->node = leaf; + + write_extent_buffer(leaf, fs_info->fsid, + (unsigned long)btrfs_header_fsid(leaf), + BTRFS_FSID_SIZE); + write_extent_buffer(leaf, fs_info->chunk_tree_uuid, + (unsigned long)btrfs_header_chunk_tree_uuid(leaf), + BTRFS_UUID_SIZE); + btrfs_mark_buffer_dirty(leaf); + + root->commit_root = btrfs_root_node(root); + root->track_dirty = 1; + + + root->root_item.flags = 0; + root->root_item.byte_limit = 0; + btrfs_set_root_bytenr(&root->root_item, leaf->start); + btrfs_set_root_generation(&root->root_item, trans->transid); + btrfs_set_root_level(&root->root_item, 0); + btrfs_set_root_refs(&root->root_item, 1); + btrfs_set_root_used(&root->root_item, leaf->len); + btrfs_set_root_last_snapshot(&root->root_item, 0); + btrfs_set_root_dirid(&root->root_item, 0); + root->root_item.drop_level = 0; + + key.objectid = objectid; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = 0; + ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item); + if (ret) + goto fail; + + btrfs_tree_unlock(leaf); + +fail: + if (ret) + return ERR_PTR(ret); + + return root; +} + static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { @@ -1325,6 +1406,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, u64 generation; u32 blocksize; int ret = 0; + int slot; root = btrfs_alloc_root(fs_info); if (!root) @@ -1351,9 +1433,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); if (ret == 0) { l = path->nodes[0]; - read_extent_buffer(l, &root->root_item, - btrfs_item_ptr_offset(l, path->slots[0]), - sizeof(root->root_item)); + slot = path->slots[0]; + btrfs_read_root_item(tree_root, l, slot, &root->root_item); memcpy(&root->root_key, location, sizeof(*location)); } btrfs_free_path(path); @@ -1395,6 +1476,9 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, return fs_info->dev_root; if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) return fs_info->csum_root; + if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID) + return fs_info->quota_root ? fs_info->quota_root : + ERR_PTR(-ENOENT); again: spin_lock(&fs_info->fs_roots_radix_lock); root = radix_tree_lookup(&fs_info->fs_roots_radix, @@ -1532,8 +1616,6 @@ static int cleaner_kthread(void *arg) struct btrfs_root *root = arg; do { - vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); - if (!(root->fs_info->sb->s_flags & MS_RDONLY) && mutex_trylock(&root->fs_info->cleaner_mutex)) { btrfs_run_delayed_iputs(root); @@ -1565,7 +1647,6 @@ static int transaction_kthread(void *arg) do { cannot_commit = false; delay = HZ * 30; - vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); spin_lock(&root->fs_info->trans_lock); @@ -1822,6 +1903,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) free_extent_buffer(info->extent_root->commit_root); free_extent_buffer(info->csum_root->node); free_extent_buffer(info->csum_root->commit_root); + if (info->quota_root) { + free_extent_buffer(info->quota_root->node); + free_extent_buffer(info->quota_root->commit_root); + } info->tree_root->node = NULL; info->tree_root->commit_root = NULL; @@ -1831,6 +1916,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) info->extent_root->commit_root = NULL; info->csum_root->node = NULL; info->csum_root->commit_root = NULL; + if (info->quota_root) { + info->quota_root->node = NULL; + info->quota_root->commit_root = NULL; + } if (chunk_root) { free_extent_buffer(info->chunk_root->node); @@ -1861,6 +1950,7 @@ int open_ctree(struct super_block *sb, struct btrfs_root *csum_root; struct btrfs_root *chunk_root; struct btrfs_root *dev_root; + struct btrfs_root *quota_root; struct btrfs_root *log_tree_root; int ret; int err = -EINVAL; @@ -1872,9 +1962,10 @@ int open_ctree(struct super_block *sb, csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); + quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info); if (!tree_root || !extent_root || !csum_root || - !chunk_root || !dev_root) { + !chunk_root || !dev_root || !quota_root) { err = -ENOMEM; goto fail; } @@ -2031,6 +2122,13 @@ int open_ctree(struct super_block *sb, init_rwsem(&fs_info->cleanup_work_sem); init_rwsem(&fs_info->subvol_sem); + spin_lock_init(&fs_info->qgroup_lock); + fs_info->qgroup_tree = RB_ROOT; + INIT_LIST_HEAD(&fs_info->dirty_qgroups); + fs_info->qgroup_seq = 1; + fs_info->quota_enabled = 0; + fs_info->pending_quota_state = 0; + btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); btrfs_init_free_cluster(&fs_info->data_alloc_cluster); @@ -2118,7 +2216,7 @@ int open_ctree(struct super_block *sb, features = btrfs_super_incompat_flags(disk_super); features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; - if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) + if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO) features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; /* @@ -2243,7 +2341,7 @@ int open_ctree(struct super_block *sb, ret |= btrfs_start_workers(&fs_info->caching_workers); ret |= btrfs_start_workers(&fs_info->readahead_workers); if (ret) { - ret = -ENOMEM; + err = -ENOMEM; goto fail_sb_buffer; } @@ -2353,12 +2451,28 @@ retry_root_backup: BTRFS_CSUM_TREE_OBJECTID, csum_root); if (ret) goto recovery_tree_root; - csum_root->track_dirty = 1; + ret = find_and_setup_root(tree_root, fs_info, + BTRFS_QUOTA_TREE_OBJECTID, quota_root); + if (ret) { + kfree(quota_root); + quota_root = fs_info->quota_root = NULL; + } else { + quota_root->track_dirty = 1; + fs_info->quota_enabled = 1; + fs_info->pending_quota_state = 1; + } + fs_info->generation = generation; fs_info->last_trans_committed = generation; + ret = btrfs_recover_balance(fs_info); + if (ret) { + printk(KERN_WARNING "btrfs: failed to recover balance\n"); + goto fail_block_groups; + } + ret = btrfs_init_dev_stats(fs_info); if (ret) { printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", @@ -2409,17 +2523,19 @@ retry_root_backup: " integrity check module %s\n", sb->s_id); } #endif + ret = btrfs_read_qgroup_config(fs_info); + if (ret) + goto fail_trans_kthread; /* do not make disk changes in broken FS */ - if (btrfs_super_log_root(disk_super) != 0 && - !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { + if (btrfs_super_log_root(disk_super) != 0) { u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { printk(KERN_WARNING "Btrfs log replay required " "on RO media\n"); err = -EIO; - goto fail_trans_kthread; + goto fail_qgroup; } blocksize = btrfs_level_size(tree_root, @@ -2428,7 +2544,7 @@ retry_root_backup: log_tree_root = btrfs_alloc_root(fs_info); if (!log_tree_root) { err = -ENOMEM; - goto fail_trans_kthread; + goto fail_qgroup; } __setup_root(nodesize, leafsize, sectorsize, stripesize, @@ -2460,15 +2576,15 @@ retry_root_backup: if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_cleanup_fs_roots(fs_info); - if (ret) { - } + if (ret) + goto fail_trans_kthread; ret = btrfs_recover_relocation(tree_root); if (ret < 0) { printk(KERN_WARNING "btrfs: failed to recover relocation\n"); err = -EINVAL; - goto fail_trans_kthread; + goto fail_qgroup; } } @@ -2478,30 +2594,35 @@ retry_root_backup: fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); if (!fs_info->fs_root) - goto fail_trans_kthread; + goto fail_qgroup; if (IS_ERR(fs_info->fs_root)) { err = PTR_ERR(fs_info->fs_root); - goto fail_trans_kthread; + goto fail_qgroup; } - if (!(sb->s_flags & MS_RDONLY)) { - down_read(&fs_info->cleanup_work_sem); - err = btrfs_orphan_cleanup(fs_info->fs_root); - if (!err) - err = btrfs_orphan_cleanup(fs_info->tree_root); - up_read(&fs_info->cleanup_work_sem); + if (sb->s_flags & MS_RDONLY) + return 0; - if (!err) - err = btrfs_recover_balance(fs_info->tree_root); + down_read(&fs_info->cleanup_work_sem); + if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) || + (ret = btrfs_orphan_cleanup(fs_info->tree_root))) { + up_read(&fs_info->cleanup_work_sem); + close_ctree(tree_root); + return ret; + } + up_read(&fs_info->cleanup_work_sem); - if (err) { - close_ctree(tree_root); - return err; - } + ret = btrfs_resume_balance_async(fs_info); + if (ret) { + printk(KERN_WARNING "btrfs: failed to resume balance\n"); + close_ctree(tree_root); + return ret; } return 0; +fail_qgroup: + btrfs_free_qgroup_config(fs_info); fail_trans_kthread: kthread_stop(fs_info->transaction_kthread); fail_cleaner: @@ -2575,8 +2696,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) struct btrfs_device *device = (struct btrfs_device *) bh->b_private; - printk_ratelimited(KERN_WARNING "lost page write due to " - "I/O error on %s\n", device->name); + printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to " + "I/O error on %s\n", + rcu_str_deref(device->name)); /* note, we dont' set_buffer_write_io_error because we have * our own ways of dealing with the IO errors */ @@ -2749,8 +2871,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait) wait_for_completion(&device->flush_wait); if (bio_flagged(bio, BIO_EOPNOTSUPP)) { - printk("btrfs: disabling barriers on dev %s\n", - device->name); + printk_in_rcu("btrfs: disabling barriers on dev %s\n", + rcu_str_deref(device->name)); device->nobarriers = 1; } if (!bio_flagged(bio, BIO_UPTODATE)) { @@ -3066,30 +3188,14 @@ int close_ctree(struct btrfs_root *root) /* clear out the rbtree of defraggable inodes */ btrfs_run_defrag_inodes(fs_info); - /* - * Here come 2 situations when btrfs is broken to flip readonly: - * - * 1. when btrfs flips readonly somewhere else before - * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, - * and btrfs will skip to write sb directly to keep - * ERROR state on disk. - * - * 2. when btrfs flips readonly just in btrfs_commit_super, - * and in such case, btrfs cannot write sb via btrfs_commit_super, - * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, - * btrfs will cleanup all FS resources first and write sb then. - */ if (!(fs_info->sb->s_flags & MS_RDONLY)) { ret = btrfs_commit_super(root); if (ret) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - ret = btrfs_error_commit_super(root); - if (ret) - printk(KERN_ERR "btrfs: commit super ret %d\n", ret); - } + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) + btrfs_error_commit_super(root); btrfs_put_block_group_cache(fs_info); @@ -3099,6 +3205,8 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 2; smp_mb(); + btrfs_free_qgroup_config(root->fs_info); + if (fs_info->delalloc_bytes) { printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", (unsigned long long)fs_info->delalloc_bytes); @@ -3118,6 +3226,10 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(fs_info->dev_root->commit_root); free_extent_buffer(fs_info->csum_root->node); free_extent_buffer(fs_info->csum_root->commit_root); + if (fs_info->quota_root) { + free_extent_buffer(fs_info->quota_root->node); + free_extent_buffer(fs_info->quota_root->commit_root); + } btrfs_free_block_groups(fs_info); @@ -3248,7 +3360,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); } -static int btree_lock_page_hook(struct page *page, void *data, +int btree_lock_page_hook(struct page *page, void *data, void (*flush_fn)(void *)) { struct inode *inode = page->mapping->host; @@ -3305,18 +3417,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, if (read_only) return 0; - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - printk(KERN_WARNING "warning: mount fs with errors, " - "running btrfsck is recommended\n"); - } - return 0; } -int btrfs_error_commit_super(struct btrfs_root *root) +void btrfs_error_commit_super(struct btrfs_root *root) { - int ret; - mutex_lock(&root->fs_info->cleaner_mutex); btrfs_run_delayed_iputs(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -3326,10 +3431,6 @@ int btrfs_error_commit_super(struct btrfs_root *root) /* cleanup FS via transaction */ btrfs_cleanup_transaction(root); - - ret = write_ctree_super(NULL, root, 0); - - return ret; } static void btrfs_destroy_ordered_operations(struct btrfs_root *root) @@ -3400,7 +3501,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, delayed_refs = &trans->delayed_refs; -again: spin_lock(&delayed_refs->lock); if (delayed_refs->num_entries == 0) { spin_unlock(&delayed_refs->lock); @@ -3408,31 +3508,37 @@ again: return ret; } - node = rb_first(&delayed_refs->root); - while (node) { + while ((node = rb_first(&delayed_refs->root)) != NULL) { ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - node = rb_next(node); - - ref->in_tree = 0; - rb_erase(&ref->rb_node, &delayed_refs->root); - delayed_refs->num_entries--; atomic_set(&ref->refs, 1); if (btrfs_delayed_ref_is_head(ref)) { struct btrfs_delayed_ref_head *head; head = btrfs_delayed_node_to_head(ref); - spin_unlock(&delayed_refs->lock); - mutex_lock(&head->mutex); + if (!mutex_trylock(&head->mutex)) { + atomic_inc(&ref->refs); + spin_unlock(&delayed_refs->lock); + + /* Need to wait for the delayed ref to run */ + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref(ref); + + spin_lock(&delayed_refs->lock); + continue; + } + kfree(head->extent_op); delayed_refs->num_heads--; if (list_empty(&head->cluster)) delayed_refs->num_heads_ready--; list_del_init(&head->cluster); - mutex_unlock(&head->mutex); - btrfs_put_delayed_ref(ref); - goto again; } + ref->in_tree = 0; + rb_erase(&ref->rb_node, &delayed_refs->root); + delayed_refs->num_entries--; + spin_unlock(&delayed_refs->lock); btrfs_put_delayed_ref(ref); @@ -3520,11 +3626,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, offset >> PAGE_CACHE_SHIFT); spin_unlock(&dirty_pages->buffer_lock); - if (eb) { + if (eb) ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); - atomic_set(&eb->refs, 1); - } if (PageWriteback(page)) end_page_writeback(page); @@ -3538,8 +3642,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, spin_unlock_irq(&page->mapping->tree_lock); } - page->mapping->a_ops->invalidatepage(page, 0); unlock_page(page); + page_cache_release(page); } } @@ -3553,8 +3657,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, u64 start; u64 end; int ret; + bool loop = true; unpin = pinned_extents; +again: while (1) { ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY); @@ -3572,6 +3678,15 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, cond_resched(); } + if (loop) { + if (unpin == &root->fs_info->freed_extents[0]) + unpin = &root->fs_info->freed_extents[1]; + else + unpin = &root->fs_info->freed_extents[0]; + loop = false; + goto again; + } + return 0; } @@ -3585,21 +3700,23 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, /* FIXME: cleanup wait for commit */ cur_trans->in_commit = 1; cur_trans->blocked = 1; - if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) - wake_up(&root->fs_info->transaction_blocked_wait); + wake_up(&root->fs_info->transaction_blocked_wait); cur_trans->blocked = 0; - if (waitqueue_active(&root->fs_info->transaction_wait)) - wake_up(&root->fs_info->transaction_wait); + wake_up(&root->fs_info->transaction_wait); cur_trans->commit_done = 1; - if (waitqueue_active(&cur_trans->commit_wait)) - wake_up(&cur_trans->commit_wait); + wake_up(&cur_trans->commit_wait); + + btrfs_destroy_delayed_inodes(root); + btrfs_assert_delayed_root_empty(root); btrfs_destroy_pending_snapshots(cur_trans); btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, EXTENT_DIRTY); + btrfs_destroy_pinned_extent(root, + root->fs_info->pinned_extents); /* memset(cur_trans, 0, sizeof(*cur_trans)); @@ -3637,17 +3754,23 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) /* FIXME: cleanup wait for commit */ t->in_commit = 1; t->blocked = 1; + smp_mb(); if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) wake_up(&root->fs_info->transaction_blocked_wait); t->blocked = 0; + smp_mb(); if (waitqueue_active(&root->fs_info->transaction_wait)) wake_up(&root->fs_info->transaction_wait); t->commit_done = 1; + smp_mb(); if (waitqueue_active(&t->commit_wait)) wake_up(&t->commit_wait); + btrfs_destroy_delayed_inodes(root); + btrfs_assert_delayed_root_empty(root); + btrfs_destroy_pending_snapshots(t); btrfs_destroy_delalloc_inodes(root); |