From 8d875f95da43c6a8f18f77869f2ef26e9594fecc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Aug 2014 10:47:42 -0700 Subject: btrfs: disable strict file flushes for renames and truncates Truncates and renames are often used to replace old versions of a file with new versions. Applications often expect this to be an atomic replacement, even if they haven't done anything to make sure the new version is fully on disk. Btrfs has strict flushing in place to make sure that renaming over an old file with a new file will fully flush out the new file before allowing the transaction commit with the rename to complete. This ordering means the commit code needs to be able to lock file pages, and there are a few paths in the filesystem where we will try to end a transaction with the page lock held. It's rare, but these things can deadlock. This patch removes the ordered flushes and switches to a best effort filemap_flush like ext4 uses. It's not perfect, but it should fix the deadlocks. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 08e65e9cf2a..d0ed9e664f7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -60,8 +60,6 @@ static void end_workqueue_fn(struct btrfs_work *work); static void free_fs_root(struct btrfs_root *root); static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int read_only); -static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, - struct btrfs_root *root); static void btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_root *root); @@ -3829,34 +3827,6 @@ static void btrfs_error_commit_super(struct btrfs_root *root) btrfs_cleanup_transaction(root); } -static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, - struct btrfs_root *root) -{ - struct btrfs_inode *btrfs_inode; - struct list_head splice; - - INIT_LIST_HEAD(&splice); - - mutex_lock(&root->fs_info->ordered_operations_mutex); - spin_lock(&root->fs_info->ordered_root_lock); - - list_splice_init(&t->ordered_operations, &splice); - while (!list_empty(&splice)) { - btrfs_inode = list_entry(splice.next, struct btrfs_inode, - ordered_operations); - - list_del_init(&btrfs_inode->ordered_operations); - spin_unlock(&root->fs_info->ordered_root_lock); - - btrfs_invalidate_inodes(btrfs_inode->root); - - spin_lock(&root->fs_info->ordered_root_lock); - } - - spin_unlock(&root->fs_info->ordered_root_lock); - mutex_unlock(&root->fs_info->ordered_operations_mutex); -} - static void btrfs_destroy_ordered_extents(struct btrfs_root *root) { struct btrfs_ordered_extent *ordered; @@ -4093,8 +4063,6 @@ again: void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_root *root) { - btrfs_destroy_ordered_operations(cur_trans, root); - btrfs_destroy_delayed_refs(cur_trans, root); cur_trans->state = TRANS_STATE_COMMIT_START; -- cgit v1.2.3-70-g09d2 From 7df69d3e94d6de537fd1afb574c760d8dc83ab60 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 24 Jul 2014 11:37:13 +0800 Subject: Btrfs: Fix wrong device size when we are resizing the device total_bytes of device is just a in-memory variant which is used to record the size of the device, and it might be changed before we resize a device, if the resize operation fails, it will be fallbacked. But some code used it to update on-disk metadata of the device, it would cause the problem that on-disk metadata of the devices was not consistent. We should use the other variant named disk_total_bytes to update the on-disk metadata of device, because that variant is updated only when the resize operation is successful. Fix it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- fs/btrfs/volumes.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d0ed9e664f7..c99a414813c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3450,7 +3450,8 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) btrfs_set_stack_device_generation(dev_item, 0); btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); - btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); + btrfs_set_stack_device_total_bytes(dev_item, + dev->disk_total_bytes); btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); btrfs_set_stack_device_io_align(dev_item, dev->io_align); btrfs_set_stack_device_io_width(dev_item, dev->io_width); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 00c8efdcd1e..9d4ce53d756 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1483,7 +1483,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, btrfs_set_device_io_align(leaf, dev_item, device->io_align); btrfs_set_device_io_width(leaf, dev_item, device->io_width); btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); - btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); btrfs_set_device_group(leaf, dev_item, 0); btrfs_set_device_seek_speed(leaf, dev_item, 0); -- cgit v1.2.3-70-g09d2 From 9e0af23764344f7f1b68e4eefbe7dc865018b63d Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 15 Aug 2014 23:36:53 +0800 Subject: Btrfs: fix task hang under heavy compressed write This has been reported and discussed for a long time, and this hang occurs in both 3.15 and 3.16. Btrfs now migrates to use kernel workqueue, but it introduces this hang problem. Btrfs has a kind of work queued as an ordered way, which means that its ordered_func() must be processed in the way of FIFO, so it usually looks like -- normal_work_helper(arg) work = container_of(arg, struct btrfs_work, normal_work); work->func() <---- (we name it work X) for ordered_work in wq->ordered_list ordered_work->ordered_func() ordered_work->ordered_free() The hang is a rare case, first when we find free space, we get an uncached block group, then we go to read its free space cache inode for free space information, so it will file a readahead request btrfs_readpages() for page that is not in page cache __do_readpage() submit_extent_page() btrfs_submit_bio_hook() btrfs_bio_wq_end_io() submit_bio() end_workqueue_bio() <--(ret by the 1st endio) queue a work(named work Y) for the 2nd also the real endio() So the hang occurs when work Y's work_struct and work X's work_struct happens to share the same address. A bit more explanation, A,B,C -- struct btrfs_work arg -- struct work_struct kthread: worker_thread() pick up a work_struct from @worklist process_one_work(arg) worker->current_work = arg; <-- arg is A->normal_work worker->current_func(arg) normal_work_helper(arg) A = container_of(arg, struct btrfs_work, normal_work); A->func() A->ordered_func() A->ordered_free() <-- A gets freed B->ordered_func() submit_compressed_extents() find_free_extent() load_free_space_inode() ... <-- (the above readhead stack) end_workqueue_bio() btrfs_queue_work(work C) B->ordered_free() As if work A has a high priority in wq->ordered_list and there are more ordered works queued after it, such as B->ordered_func(), its memory could have been freed before normal_work_helper() returns, which means that kernel workqueue code worker_thread() still has worker->current_work pointer to be work A->normal_work's, ie. arg's address. Meanwhile, work C is allocated after work A is freed, work C->normal_work and work A->normal_work are likely to share the same address(I confirmed this with ftrace output, so I'm not just guessing, it's rare though). When another kthread picks up work C->normal_work to process, and finds our kthread is processing it(see find_worker_executing_work()), it'll think work C as a collision and skip then, which ends up nobody processing work C. So the situation is that our kthread is waiting forever on work C. Besides, there're other cases that can lead to deadlock, but the real problem is that all btrfs workqueue shares one work->func, -- normal_work_helper, so this makes each workqueue to have its own helper function, but only a wraper pf normal_work_helper. With this patch, I no long hit the above hang. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 44 ++++++++++++++++++++++++++++++++-------- fs/btrfs/async-thread.h | 28 ++++++++++++++++++++++++- fs/btrfs/delayed-inode.c | 4 ++-- fs/btrfs/disk-io.c | 53 ++++++++++++++++++++++++++---------------------- fs/btrfs/extent-tree.c | 7 ++++--- fs/btrfs/inode.c | 35 +++++++++++++++++++++----------- fs/btrfs/ordered-data.c | 1 + fs/btrfs/qgroup.c | 1 + fs/btrfs/raid56.c | 9 +++++--- fs/btrfs/reada.c | 3 ++- fs/btrfs/scrub.c | 14 +++++++------ fs/btrfs/volumes.c | 3 ++- 12 files changed, 141 insertions(+), 61 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 5a201d81049..fbd76ded9a3 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -22,7 +22,6 @@ #include #include #include -#include #include "async-thread.h" #include "ctree.h" @@ -55,8 +54,39 @@ struct btrfs_workqueue { struct __btrfs_workqueue *high; }; -static inline struct __btrfs_workqueue -*__btrfs_alloc_workqueue(const char *name, int flags, int max_active, +static void normal_work_helper(struct btrfs_work *work); + +#define BTRFS_WORK_HELPER(name) \ +void btrfs_##name(struct work_struct *arg) \ +{ \ + struct btrfs_work *work = container_of(arg, struct btrfs_work, \ + normal_work); \ + normal_work_helper(work); \ +} + +BTRFS_WORK_HELPER(worker_helper); +BTRFS_WORK_HELPER(delalloc_helper); +BTRFS_WORK_HELPER(flush_delalloc_helper); +BTRFS_WORK_HELPER(cache_helper); +BTRFS_WORK_HELPER(submit_helper); +BTRFS_WORK_HELPER(fixup_helper); +BTRFS_WORK_HELPER(endio_helper); +BTRFS_WORK_HELPER(endio_meta_helper); +BTRFS_WORK_HELPER(endio_meta_write_helper); +BTRFS_WORK_HELPER(endio_raid56_helper); +BTRFS_WORK_HELPER(rmw_helper); +BTRFS_WORK_HELPER(endio_write_helper); +BTRFS_WORK_HELPER(freespace_write_helper); +BTRFS_WORK_HELPER(delayed_meta_helper); +BTRFS_WORK_HELPER(readahead_helper); +BTRFS_WORK_HELPER(qgroup_rescan_helper); +BTRFS_WORK_HELPER(extent_refs_helper); +BTRFS_WORK_HELPER(scrub_helper); +BTRFS_WORK_HELPER(scrubwrc_helper); +BTRFS_WORK_HELPER(scrubnc_helper); + +static struct __btrfs_workqueue * +__btrfs_alloc_workqueue(const char *name, int flags, int max_active, int thresh) { struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); @@ -232,13 +262,11 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) spin_unlock_irqrestore(lock, flags); } -static void normal_work_helper(struct work_struct *arg) +static void normal_work_helper(struct btrfs_work *work) { - struct btrfs_work *work; struct __btrfs_workqueue *wq; int need_order = 0; - work = container_of(arg, struct btrfs_work, normal_work); /* * We should not touch things inside work in the following cases: * 1) after work->func() if it has no ordered_free @@ -262,7 +290,7 @@ static void normal_work_helper(struct work_struct *arg) trace_btrfs_all_work_done(work); } -void btrfs_init_work(struct btrfs_work *work, +void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, btrfs_func_t func, btrfs_func_t ordered_func, btrfs_func_t ordered_free) @@ -270,7 +298,7 @@ void btrfs_init_work(struct btrfs_work *work, work->func = func; work->ordered_func = ordered_func; work->ordered_free = ordered_free; - INIT_WORK(&work->normal_work, normal_work_helper); + INIT_WORK(&work->normal_work, uniq_func); INIT_LIST_HEAD(&work->ordered_list); work->flags = 0; } diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 9c6b66d15fb..e9e31c94758 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -19,12 +19,14 @@ #ifndef __BTRFS_ASYNC_THREAD_ #define __BTRFS_ASYNC_THREAD_ +#include struct btrfs_workqueue; /* Internal use only */ struct __btrfs_workqueue; struct btrfs_work; typedef void (*btrfs_func_t)(struct btrfs_work *arg); +typedef void (*btrfs_work_func_t)(struct work_struct *arg); struct btrfs_work { btrfs_func_t func; @@ -38,11 +40,35 @@ struct btrfs_work { unsigned long flags; }; +#define BTRFS_WORK_HELPER_PROTO(name) \ +void btrfs_##name(struct work_struct *arg) + +BTRFS_WORK_HELPER_PROTO(worker_helper); +BTRFS_WORK_HELPER_PROTO(delalloc_helper); +BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper); +BTRFS_WORK_HELPER_PROTO(cache_helper); +BTRFS_WORK_HELPER_PROTO(submit_helper); +BTRFS_WORK_HELPER_PROTO(fixup_helper); +BTRFS_WORK_HELPER_PROTO(endio_helper); +BTRFS_WORK_HELPER_PROTO(endio_meta_helper); +BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper); +BTRFS_WORK_HELPER_PROTO(endio_raid56_helper); +BTRFS_WORK_HELPER_PROTO(rmw_helper); +BTRFS_WORK_HELPER_PROTO(endio_write_helper); +BTRFS_WORK_HELPER_PROTO(freespace_write_helper); +BTRFS_WORK_HELPER_PROTO(delayed_meta_helper); +BTRFS_WORK_HELPER_PROTO(readahead_helper); +BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper); +BTRFS_WORK_HELPER_PROTO(extent_refs_helper); +BTRFS_WORK_HELPER_PROTO(scrub_helper); +BTRFS_WORK_HELPER_PROTO(scrubwrc_helper); +BTRFS_WORK_HELPER_PROTO(scrubnc_helper); + struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, int flags, int max_active, int thresh); -void btrfs_init_work(struct btrfs_work *work, +void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper, btrfs_func_t func, btrfs_func_t ordered_func, btrfs_func_t ordered_free); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index da775bfdebc..a2e90f855d7 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1395,8 +1395,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, return -ENOMEM; async_work->delayed_root = delayed_root; - btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, - NULL, NULL); + btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper, + btrfs_async_run_delayed_root, NULL, NULL); async_work->nr = nr; btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c99a414813c..a1d36e62179 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -39,7 +39,6 @@ #include "btrfs_inode.h" #include "volumes.h" #include "print-tree.h" -#include "async-thread.h" #include "locking.h" #include "tree-log.h" #include "free-space-cache.h" @@ -693,35 +692,41 @@ static void end_workqueue_bio(struct bio *bio, int err) { struct end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; + struct btrfs_workqueue *wq; + btrfs_work_func_t func; fs_info = end_io_wq->info; end_io_wq->error = err; - btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); if (bio->bi_rw & REQ_WRITE) { - if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) - btrfs_queue_work(fs_info->endio_meta_write_workers, - &end_io_wq->work); - else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) - btrfs_queue_work(fs_info->endio_freespace_worker, - &end_io_wq->work); - else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) - btrfs_queue_work(fs_info->endio_raid56_workers, - &end_io_wq->work); - else - btrfs_queue_work(fs_info->endio_write_workers, - &end_io_wq->work); + if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { + wq = fs_info->endio_meta_write_workers; + func = btrfs_endio_meta_write_helper; + } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) { + wq = fs_info->endio_freespace_worker; + func = btrfs_freespace_write_helper; + } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { + wq = fs_info->endio_raid56_workers; + func = btrfs_endio_raid56_helper; + } else { + wq = fs_info->endio_write_workers; + func = btrfs_endio_write_helper; + } } else { - if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) - btrfs_queue_work(fs_info->endio_raid56_workers, - &end_io_wq->work); - else if (end_io_wq->metadata) - btrfs_queue_work(fs_info->endio_meta_workers, - &end_io_wq->work); - else - btrfs_queue_work(fs_info->endio_workers, - &end_io_wq->work); + if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { + wq = fs_info->endio_raid56_workers; + func = btrfs_endio_raid56_helper; + } else if (end_io_wq->metadata) { + wq = fs_info->endio_meta_workers; + func = btrfs_endio_meta_helper; + } else { + wq = fs_info->endio_workers; + func = btrfs_endio_helper; + } } + + btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL); + btrfs_queue_work(wq, &end_io_wq->work); } /* @@ -828,7 +833,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->submit_bio_start = submit_bio_start; async->submit_bio_done = submit_bio_done; - btrfs_init_work(&async->work, run_one_async_start, + btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start, run_one_async_done, run_one_async_free); async->bio_flags = bio_flags; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5524434da05..3efe1c3877b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -552,7 +552,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, caching_ctl->block_group = cache; caching_ctl->progress = cache->key.objectid; atomic_set(&caching_ctl->count, 1); - btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); + btrfs_init_work(&caching_ctl->work, btrfs_cache_helper, + caching_thread, NULL, NULL); spin_lock(&cache->lock); /* @@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root, async->sync = 0; init_completion(&async->wait); - btrfs_init_work(&async->work, delayed_ref_async_start, - NULL, NULL); + btrfs_init_work(&async->work, btrfs_extent_refs_helper, + delayed_ref_async_start, NULL, NULL); btrfs_queue_work(root->fs_info->extent_workers, &async->work); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ae98df67950..3d020d6d9ac 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1096,8 +1096,10 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_cow->end = cur_end; INIT_LIST_HEAD(&async_cow->extents); - btrfs_init_work(&async_cow->work, async_cow_start, - async_cow_submit, async_cow_free); + btrfs_init_work(&async_cow->work, + btrfs_delalloc_helper, + async_cow_start, async_cow_submit, + async_cow_free); nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT; @@ -1881,7 +1883,8 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) SetPageChecked(page); page_cache_get(page); - btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); + btrfs_init_work(&fixup->work, btrfs_fixup_helper, + btrfs_writepage_fixup_worker, NULL, NULL); fixup->page = page; btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); return -EBUSY; @@ -2822,7 +2825,8 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ordered_extent *ordered_extent = NULL; - struct btrfs_workqueue *workers; + struct btrfs_workqueue *wq; + btrfs_work_func_t func; trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); @@ -2831,13 +2835,17 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, end - start + 1, uptodate)) return 0; - btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); + if (btrfs_is_free_space_inode(inode)) { + wq = root->fs_info->endio_freespace_worker; + func = btrfs_freespace_write_helper; + } else { + wq = root->fs_info->endio_write_workers; + func = btrfs_endio_write_helper; + } - if (btrfs_is_free_space_inode(inode)) - workers = root->fs_info->endio_freespace_worker; - else - workers = root->fs_info->endio_write_workers; - btrfs_queue_work(workers, &ordered_extent->work); + btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL, + NULL); + btrfs_queue_work(wq, &ordered_extent->work); return 0; } @@ -7208,7 +7216,8 @@ again: if (!ret) goto out_test; - btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); + btrfs_init_work(&ordered->work, btrfs_endio_write_helper, + finish_ordered_fn, NULL, NULL); btrfs_queue_work(root->fs_info->endio_write_workers, &ordered->work); out_test: @@ -8535,7 +8544,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, work->inode = inode; work->wait = wait; work->delay_iput = delay_iput; - btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); + WARN_ON_ONCE(!inode); + btrfs_init_work(&work->work, btrfs_flush_delalloc_helper, + btrfs_run_delalloc_work, NULL, NULL); return work; } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 963895c1f80..ac734ec4cc2 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -615,6 +615,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) spin_unlock(&root->ordered_extent_lock); btrfs_init_work(&ordered->flush_work, + btrfs_flush_delalloc_helper, btrfs_run_ordered_extent_work, NULL, NULL); list_add_tail(&ordered->work_list, &works); btrfs_queue_work(root->fs_info->flush_workers, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 8abe45524de..ded5c601d91 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2720,6 +2720,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, memset(&fs_info->qgroup_rescan_work, 0, sizeof(fs_info->qgroup_rescan_work)); btrfs_init_work(&fs_info->qgroup_rescan_work, + btrfs_qgroup_rescan_helper, btrfs_qgroup_rescan_worker, NULL, NULL); if (ret) { diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 4a88f073fdd..0a6b6e4bcbb 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1416,7 +1416,8 @@ cleanup: static void async_rmw_stripe(struct btrfs_raid_bio *rbio) { - btrfs_init_work(&rbio->work, rmw_work, NULL, NULL); + btrfs_init_work(&rbio->work, btrfs_rmw_helper, + rmw_work, NULL, NULL); btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); @@ -1424,7 +1425,8 @@ static void async_rmw_stripe(struct btrfs_raid_bio *rbio) static void async_read_rebuild(struct btrfs_raid_bio *rbio) { - btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL); + btrfs_init_work(&rbio->work, btrfs_rmw_helper, + read_rebuild_work, NULL, NULL); btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); @@ -1665,7 +1667,8 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) plug = container_of(cb, struct btrfs_plug_cb, cb); if (from_schedule) { - btrfs_init_work(&plug->work, unplug_work, NULL, NULL); + btrfs_init_work(&plug->work, btrfs_rmw_helper, + unplug_work, NULL, NULL); btrfs_queue_work(plug->info->rmw_workers, &plug->work); return; diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 09230cf3a24..20408c6b665 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -798,7 +798,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) /* FIXME we cannot handle this properly right now */ BUG(); } - btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); + btrfs_init_work(&rmw->work, btrfs_readahead_helper, + reada_start_machine_worker, NULL, NULL); rmw->fs_info = fs_info; btrfs_queue_work(fs_info->readahead_workers, &rmw->work); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 23d3f6e6a48..f4a41f37be2 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -428,8 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) sbio->index = i; sbio->sctx = sctx; sbio->page_count = 0; - btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, - NULL, NULL); + btrfs_init_work(&sbio->work, btrfs_scrub_helper, + scrub_bio_end_io_worker, NULL, NULL); if (i != SCRUB_BIOS_PER_SCTX - 1) sctx->bios[i]->next_free = i + 1; @@ -999,8 +999,8 @@ nodatasum_case: fixup_nodatasum->root = fs_info->extent_root; fixup_nodatasum->mirror_num = failed_mirror_index + 1; scrub_pending_trans_workers_inc(sctx); - btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, - NULL, NULL); + btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper, + scrub_fixup_nodatasum, NULL, NULL); btrfs_queue_work(fs_info->scrub_workers, &fixup_nodatasum->work); goto out; @@ -1616,7 +1616,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err) sbio->err = err; sbio->bio = bio; - btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); + btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper, + scrub_wr_bio_end_io_worker, NULL, NULL); btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); } @@ -3214,7 +3215,8 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, nocow_ctx->len = len; nocow_ctx->mirror_num = mirror_num; nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; - btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); + btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper, + copy_nocow_pages_worker, NULL, NULL); INIT_LIST_HEAD(&nocow_ctx->inodes); btrfs_queue_work(fs_info->scrub_nocow_workers, &nocow_ctx->work); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9d4ce53d756..340a92d08e8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5854,7 +5854,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, else generate_random_uuid(dev->uuid); - btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); + btrfs_init_work(&dev->work, btrfs_submit_helper, + pending_bios_fn, NULL, NULL); return dev; } -- cgit v1.2.3-70-g09d2 From 908c7f1949cb7cc6e92ba8f18f2998e87e265b8e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 09:51:29 +0900 Subject: percpu_counter: add @gfp to percpu_counter_init() Percpu allocator now supports allocation mask. Add @gfp to percpu_counter_init() so that !GFP_KERNEL allocation masks can be used with percpu_counters too. We could have left percpu_counter_init() alone and added percpu_counter_init_gfp(); however, the number of users isn't that high and introducing _gfp variants to all percpu data structures would be quite ugly, so let's just do the conversion. This is the one with the most users. Other percpu data structures are a lot easier to convert. This patch doesn't make any functional difference. Signed-off-by: Tejun Heo Acked-by: Jan Kara Acked-by: "David S. Miller" Cc: x86@kernel.org Cc: Jens Axboe Cc: "Theodore Ts'o" Cc: Alexander Viro Cc: Andrew Morton --- arch/x86/kvm/mmu.c | 2 +- fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/extent-tree.c | 2 +- fs/ext2/super.c | 6 +++--- fs/ext3/super.c | 6 +++--- fs/ext4/super.c | 14 +++++++++----- fs/file_table.c | 2 +- fs/quota/dquot.c | 2 +- fs/super.c | 3 ++- include/linux/percpu_counter.h | 10 ++++++---- include/net/dst_ops.h | 2 +- include/net/inet_frag.h | 2 +- lib/flex_proportions.c | 4 ++-- lib/percpu_counter.c | 4 ++-- lib/proportions.c | 6 +++--- mm/backing-dev.c | 2 +- mm/mmap.c | 2 +- mm/nommu.c | 2 +- mm/shmem.c | 2 +- net/dccp/proto.c | 2 +- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_memcontrol.c | 2 +- net/sctp/protocol.c | 2 +- 23 files changed, 49 insertions(+), 42 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 931467881da..5bd53f206f4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4534,7 +4534,7 @@ int kvm_mmu_module_init(void) if (!mmu_page_header_cache) goto nomem; - if (percpu_counter_init(&kvm_total_used_mmu_pages, 0)) + if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL)) goto nomem; register_shrinker(&mmu_shrinker); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 08e65e9cf2a..61dae01788d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1180,7 +1180,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void) if (!writers) return ERR_PTR(-ENOMEM); - ret = percpu_counter_init(&writers->counter, 0); + ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL); if (ret < 0) { kfree(writers); return ERR_PTR(ret); @@ -2185,7 +2185,7 @@ int open_ctree(struct super_block *sb, goto fail_srcu; } - ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0); + ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); if (ret) { err = ret; goto fail_bdi; @@ -2193,13 +2193,13 @@ int open_ctree(struct super_block *sb, fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE * (1 + ilog2(nr_cpu_ids)); - ret = percpu_counter_init(&fs_info->delalloc_bytes, 0); + ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL); if (ret) { err = ret; goto fail_dirty_metadata_bytes; } - ret = percpu_counter_init(&fs_info->bio_counter, 0); + ret = percpu_counter_init(&fs_info->bio_counter, 0, GFP_KERNEL); if (ret) { err = ret; goto fail_delalloc_bytes; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 813537f362f..94ec71eda86 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3493,7 +3493,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, if (!found) return -ENOMEM; - ret = percpu_counter_init(&found->total_bytes_pinned, 0); + ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL); if (ret) { kfree(found); return ret; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index b88edc05c23..170dc41e8bf 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1067,14 +1067,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ext2_rsv_window_add(sb, &sbi->s_rsv_window_head); err = percpu_counter_init(&sbi->s_freeblocks_counter, - ext2_count_free_blocks(sb)); + ext2_count_free_blocks(sb), GFP_KERNEL); if (!err) { err = percpu_counter_init(&sbi->s_freeinodes_counter, - ext2_count_free_inodes(sb)); + ext2_count_free_inodes(sb), GFP_KERNEL); } if (!err) { err = percpu_counter_init(&sbi->s_dirs_counter, - ext2_count_dirs(sb)); + ext2_count_dirs(sb), GFP_KERNEL); } if (err) { ext2_msg(sb, KERN_ERR, "error: insufficient memory"); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 08cdfe5461e..eba021b8844 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2039,14 +2039,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount2; } err = percpu_counter_init(&sbi->s_freeblocks_counter, - ext3_count_free_blocks(sb)); + ext3_count_free_blocks(sb), GFP_KERNEL); if (!err) { err = percpu_counter_init(&sbi->s_freeinodes_counter, - ext3_count_free_inodes(sb)); + ext3_count_free_inodes(sb), GFP_KERNEL); } if (!err) { err = percpu_counter_init(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); + ext3_count_dirs(sb), GFP_KERNEL); } if (err) { ext3_msg(sb, KERN_ERR, "error: insufficient memory"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 32b43ad154b..e25ca8fdde7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3891,7 +3891,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* Register extent status tree shrinker */ ext4_es_register_shrinker(sbi); - if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) { + err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL); + if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); goto failed_mount3; } @@ -4105,17 +4106,20 @@ no_journal: block = ext4_count_free_clusters(sb); ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block)); - err = percpu_counter_init(&sbi->s_freeclusters_counter, block); + err = percpu_counter_init(&sbi->s_freeclusters_counter, block, + GFP_KERNEL); if (!err) { unsigned long freei = ext4_count_free_inodes(sb); sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); - err = percpu_counter_init(&sbi->s_freeinodes_counter, freei); + err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, + GFP_KERNEL); } if (!err) err = percpu_counter_init(&sbi->s_dirs_counter, - ext4_count_dirs(sb)); + ext4_count_dirs(sb), GFP_KERNEL); if (!err) - err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0); + err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, + GFP_KERNEL); if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); goto failed_mount6; diff --git a/fs/file_table.c b/fs/file_table.c index 385bfd31512..0bab12b2046 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -331,5 +331,5 @@ void __init files_init(unsigned long mempages) n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); - percpu_counter_init(&nr_files, 0); + percpu_counter_init(&nr_files, 0, GFP_KERNEL); } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index f2d0eee9d1f..8b663b2d956 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2725,7 +2725,7 @@ static int __init dquot_init(void) panic("Cannot create dquot hash table"); for (i = 0; i < _DQST_DQSTAT_LAST; i++) { - ret = percpu_counter_init(&dqstats.counter[i], 0); + ret = percpu_counter_init(&dqstats.counter[i], 0, GFP_KERNEL); if (ret) panic("Cannot create dquot stat counters"); } diff --git a/fs/super.c b/fs/super.c index b9a214d2fe9..1b836107ace 100644 --- a/fs/super.c +++ b/fs/super.c @@ -175,7 +175,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) goto fail; for (i = 0; i < SB_FREEZE_LEVELS; i++) { - if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0) + if (percpu_counter_init(&s->s_writers.counter[i], 0, + GFP_KERNEL) < 0) goto fail; lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], &type->s_writers_key[i], 0); diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index d5dd4657c8d..50e50095c8d 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_SMP @@ -26,14 +27,14 @@ struct percpu_counter { extern int percpu_counter_batch; -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, +int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key); -#define percpu_counter_init(fbc, value) \ +#define percpu_counter_init(fbc, value, gfp) \ ({ \ static struct lock_class_key __key; \ \ - __percpu_counter_init(fbc, value, &__key); \ + __percpu_counter_init(fbc, value, gfp, &__key); \ }) void percpu_counter_destroy(struct percpu_counter *fbc); @@ -89,7 +90,8 @@ struct percpu_counter { s64 count; }; -static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount) +static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount, + gfp_t gfp) { fbc->count = amount; return 0; diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 2f26dfb8450..1f99a1de0e4 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -63,7 +63,7 @@ static inline void dst_entries_add(struct dst_ops *dst, int val) static inline int dst_entries_init(struct dst_ops *dst) { - return percpu_counter_init(&dst->pcpuc_entries, 0); + return percpu_counter_init(&dst->pcpuc_entries, 0, GFP_KERNEL); } static inline void dst_entries_destroy(struct dst_ops *dst) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 65a8855e99f..8d1765577ac 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -151,7 +151,7 @@ static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i) static inline void init_frag_mem_limit(struct netns_frags *nf) { - percpu_counter_init(&nf->mem, 0); + percpu_counter_init(&nf->mem, 0, GFP_KERNEL); } static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index ebf3bac460b..b9d026bfcf3 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -40,7 +40,7 @@ int fprop_global_init(struct fprop_global *p) p->period = 0; /* Use 1 to avoid dealing with periods with 0 events... */ - err = percpu_counter_init(&p->events, 1); + err = percpu_counter_init(&p->events, 1, GFP_KERNEL); if (err) return err; seqcount_init(&p->sequence); @@ -172,7 +172,7 @@ int fprop_local_init_percpu(struct fprop_local_percpu *pl) { int err; - err = percpu_counter_init(&pl->events, 0); + err = percpu_counter_init(&pl->events, 0, GFP_KERNEL); if (err) return err; pl->period = 0; diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 3fde78275cd..48144cdae81 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -112,7 +112,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) } EXPORT_SYMBOL(__percpu_counter_sum); -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, +int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key) { unsigned long flags __maybe_unused; @@ -120,7 +120,7 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, raw_spin_lock_init(&fbc->lock); lockdep_set_class(&fbc->lock, key); fbc->count = amount; - fbc->counters = alloc_percpu(s32); + fbc->counters = alloc_percpu_gfp(s32, gfp); if (!fbc->counters) return -ENOMEM; diff --git a/lib/proportions.c b/lib/proportions.c index 05df84801b5..ca95f8d5438 100644 --- a/lib/proportions.c +++ b/lib/proportions.c @@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift) pd->index = 0; pd->pg[0].shift = shift; mutex_init(&pd->mutex); - err = percpu_counter_init(&pd->pg[0].events, 0); + err = percpu_counter_init(&pd->pg[0].events, 0, GFP_KERNEL); if (err) goto out; - err = percpu_counter_init(&pd->pg[1].events, 0); + err = percpu_counter_init(&pd->pg[1].events, 0, GFP_KERNEL); if (err) percpu_counter_destroy(&pd->pg[0].events); @@ -193,7 +193,7 @@ int prop_local_init_percpu(struct prop_local_percpu *pl) raw_spin_lock_init(&pl->lock); pl->shift = 0; pl->period = 0; - return percpu_counter_init(&pl->events, 0); + return percpu_counter_init(&pl->events, 0, GFP_KERNEL); } void prop_local_destroy_percpu(struct prop_local_percpu *pl) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 1706cbbdf5f..f19a818be2d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -455,7 +455,7 @@ int bdi_init(struct backing_dev_info *bdi) bdi_wb_init(&bdi->wb, bdi); for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { - err = percpu_counter_init(&bdi->bdi_stat[i], 0); + err = percpu_counter_init(&bdi->bdi_stat[i], 0, GFP_KERNEL); if (err) goto err; } diff --git a/mm/mmap.c b/mm/mmap.c index c1f2ea4a0b9..d7ec93e25fa 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3196,7 +3196,7 @@ void __init mmap_init(void) { int ret; - ret = percpu_counter_init(&vm_committed_as, 0); + ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); VM_BUG_ON(ret); } diff --git a/mm/nommu.c b/mm/nommu.c index a881d9673c6..bd1808e194a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -539,7 +539,7 @@ void __init mmap_init(void) { int ret; - ret = percpu_counter_init(&vm_committed_as, 0); + ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); VM_BUG_ON(ret); vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); } diff --git a/mm/shmem.c b/mm/shmem.c index 0e5fb225007..d4bc55d3f10 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2993,7 +2993,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) #endif spin_lock_init(&sbinfo->stat_lock); - if (percpu_counter_init(&sbinfo->used_blocks, 0)) + if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) goto failed; sbinfo->free_inodes = sbinfo->max_inodes; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index de2c1e71930..e421eddf67b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1115,7 +1115,7 @@ static int __init dccp_init(void) BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); - rc = percpu_counter_init(&dccp_orphan_count, 0); + rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL); if (rc) goto out_fail; rc = -ENOBUFS; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 541f26a67ba..d59c2604c24 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3188,8 +3188,8 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); - percpu_counter_init(&tcp_sockets_allocated, 0); - percpu_counter_init(&tcp_orphan_count, 0); + percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); + percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 3af522622fa..1d191357bf8 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -32,7 +32,7 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) res_parent = &parent_cg->memory_allocated; res_counter_init(&cg_proto->memory_allocated, res_parent); - percpu_counter_init(&cg_proto->sockets_allocated, 0); + percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL); return 0; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 6240834f4b9..f00a85a3fdd 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1341,7 +1341,7 @@ static __init int sctp_init(void) if (!sctp_chunk_cachep) goto err_chunk_cachep; - status = percpu_counter_init(&sctp_sockets_allocated, 0); + status = percpu_counter_init(&sctp_sockets_allocated, 0, GFP_KERNEL); if (status) goto err_percpu_counter_init; -- cgit v1.2.3-70-g09d2 From ff9ea323816dc1c8ac7144afd4eab3ac97704430 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:03:56 +0900 Subject: block, bdi: an active gendisk always has a request_queue associated with it bdev_get_queue() returns the request_queue associated with the specified block_device. blk_get_backing_dev_info() makes use of bdev_get_queue() to determine the associated bdi given a block_device. All the callers of bdev_get_queue() including blk_get_backing_dev_info() assume that bdev_get_queue() may return NULL and implement NULL handling; however, bdev_get_queue() requires the passed in block_device is opened and attached to its gendisk. Because an active gendisk always has a valid request_queue associated with it, bdev_get_queue() can never return NULL and neither can blk_get_backing_dev_info(). Make it clear that neither of the two functions can return NULL and remove NULL handling from all the callers. Signed-off-by: Tejun Heo Cc: Chris Mason Cc: Dave Chinner Signed-off-by: Jens Axboe --- block/blk-core.c | 10 +++------- block/compat_ioctl.c | 4 ---- block/ioctl.c | 4 ---- fs/block_dev.c | 2 -- fs/btrfs/disk-io.c | 2 +- fs/xfs/xfs_buf.c | 2 -- include/linux/blkdev.h | 2 +- 7 files changed, 5 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/block/blk-core.c b/block/blk-core.c index 93603e6ff47..81744617548 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -83,18 +83,14 @@ void blk_queue_congestion_threshold(struct request_queue *q) * @bdev: device * * Locates the passed device's request queue and returns the address of its - * backing_dev_info - * - * Will return NULL if the request queue cannot be located. + * backing_dev_info. This function can only be called if @bdev is opened + * and the return value is never NULL. */ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) { - struct backing_dev_info *ret = NULL; struct request_queue *q = bdev_get_queue(bdev); - if (q) - ret = &q->backing_dev_info; - return ret; + return &q->backing_dev_info; } EXPORT_SYMBOL(blk_get_backing_dev_info); diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 18b282ce361..f678c733df4 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -709,8 +709,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (!arg) return -EINVAL; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; return compat_put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); case BLKROGET: /* compatible */ @@ -731,8 +729,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EACCES; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; return 0; case BLKGETSIZE: diff --git a/block/ioctl.c b/block/ioctl.c index d6cda8147c9..6c7bf903742 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -356,8 +356,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, if (!arg) return -EINVAL; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); case BLKROGET: return put_int(arg, bdev_read_only(bdev) != 0); @@ -386,8 +384,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, if(!capable(CAP_SYS_ADMIN)) return -EACCES; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; return 0; case BLKBSZSET: diff --git a/fs/block_dev.c b/fs/block_dev.c index 6d7274619bf..d3251eca642 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1173,8 +1173,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (!ret) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - bdi = &default_backing_dev_info; bdev_inode_switch_bdi(bdev->bd_inode, bdi); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d0ed9e664f7..39ff591ae1b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1694,7 +1694,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) if (!device->bdev) continue; bdi = blk_get_backing_dev_info(device->bdev); - if (bdi && bdi_congested(bdi, bdi_bits)) { + if (bdi_congested(bdi, bdi_bits)) { ret = 1; break; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index cd7b8ca9b06..497fcde381d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1678,8 +1678,6 @@ xfs_alloc_buftarg( btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; btp->bt_bdi = blk_get_backing_dev_info(bdev); - if (!btp->bt_bdi) - goto error; if (xfs_setsize_buftarg_early(btp, bdev)) goto error; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 518b46555b8..e267bf0db55 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,7 +865,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { - return bdev->bd_disk->queue; + return bdev->bd_disk->queue; /* this is never NULL */ } /* -- cgit v1.2.3-70-g09d2 From 57cdc8db21bf9cfa6b2e45310d56e74e263e8609 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 5 Feb 2014 02:37:48 +0100 Subject: btrfs: cleanup ino cache members of btrfs_root The naming is confusing, generic yet used for a specific cache. Add a prefix 'ino_' or rename appropriately. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 10 +++---- fs/btrfs/disk-io.c | 6 ++-- fs/btrfs/free-space-cache.c | 14 +++++----- fs/btrfs/inode-map.c | 68 ++++++++++++++++++++++----------------------- fs/btrfs/ioctl.c | 6 ++-- 5 files changed, 52 insertions(+), 52 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8e29b614fe9..a835a548e47 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1776,12 +1776,12 @@ struct btrfs_root { /* free ino cache stuff */ struct btrfs_free_space_ctl *free_ino_ctl; - enum btrfs_caching_type cached; - spinlock_t cache_lock; - wait_queue_head_t cache_wait; + enum btrfs_caching_type ino_cache_state; + spinlock_t ino_cache_lock; + wait_queue_head_t ino_cache_wait; struct btrfs_free_space_ctl *free_ino_pinned; - u64 cache_progress; - struct inode *cache_inode; + u64 ino_cache_progress; + struct inode *ino_cache_inode; struct mutex log_mutex; wait_queue_head_t log_writer_wait; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a1d36e62179..354cc3f232b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1573,8 +1573,8 @@ int btrfs_init_fs_root(struct btrfs_root *root) root->subv_writers = writers; btrfs_init_free_ino_ctl(root); - spin_lock_init(&root->cache_lock); - init_waitqueue_head(&root->cache_wait); + spin_lock_init(&root->ino_cache_lock); + init_waitqueue_head(&root->ino_cache_wait); ret = get_anon_bdev(&root->anon_dev); if (ret) @@ -3532,7 +3532,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, static void free_fs_root(struct btrfs_root *root) { - iput(root->cache_inode); + iput(root->ino_cache_inode); WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); btrfs_free_block_rsv(root, root->orphan_block_rsv); root->orphan_block_rsv = NULL; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 2b0a627cb5f..f181c9afe5f 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3033,10 +3033,10 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root, { struct inode *inode = NULL; - spin_lock(&root->cache_lock); - if (root->cache_inode) - inode = igrab(root->cache_inode); - spin_unlock(&root->cache_lock); + spin_lock(&root->ino_cache_lock); + if (root->ino_cache_inode) + inode = igrab(root->ino_cache_inode); + spin_unlock(&root->ino_cache_lock); if (inode) return inode; @@ -3044,10 +3044,10 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root, if (IS_ERR(inode)) return inode; - spin_lock(&root->cache_lock); + spin_lock(&root->ino_cache_lock); if (!btrfs_fs_closing(root->fs_info)) - root->cache_inode = igrab(inode); - spin_unlock(&root->cache_lock); + root->ino_cache_inode = igrab(inode); + spin_unlock(&root->ino_cache_lock); return inode; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 888fbe19079..83d646bd2e4 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -87,7 +87,7 @@ again: */ btrfs_item_key_to_cpu(leaf, &key, 0); btrfs_release_path(path); - root->cache_progress = last; + root->ino_cache_progress = last; up_read(&fs_info->commit_root_sem); schedule_timeout(1); goto again; @@ -106,7 +106,7 @@ again: if (last != (u64)-1 && last + 1 != key.objectid) { __btrfs_add_free_space(ctl, last + 1, key.objectid - last - 1); - wake_up(&root->cache_wait); + wake_up(&root->ino_cache_wait); } last = key.objectid; @@ -119,14 +119,14 @@ next: root->highest_objectid - last - 1); } - spin_lock(&root->cache_lock); - root->cached = BTRFS_CACHE_FINISHED; - spin_unlock(&root->cache_lock); + spin_lock(&root->ino_cache_lock); + root->ino_cache_state = BTRFS_CACHE_FINISHED; + spin_unlock(&root->ino_cache_lock); - root->cache_progress = (u64)-1; + root->ino_cache_progress = (u64)-1; btrfs_unpin_free_ino(root); out: - wake_up(&root->cache_wait); + wake_up(&root->ino_cache_wait); up_read(&fs_info->commit_root_sem); btrfs_free_path(path); @@ -144,20 +144,20 @@ static void start_caching(struct btrfs_root *root) if (!btrfs_test_opt(root, INODE_MAP_CACHE)) return; - spin_lock(&root->cache_lock); - if (root->cached != BTRFS_CACHE_NO) { - spin_unlock(&root->cache_lock); + spin_lock(&root->ino_cache_lock); + if (root->ino_cache_state != BTRFS_CACHE_NO) { + spin_unlock(&root->ino_cache_lock); return; } - root->cached = BTRFS_CACHE_STARTED; - spin_unlock(&root->cache_lock); + root->ino_cache_state = BTRFS_CACHE_STARTED; + spin_unlock(&root->ino_cache_lock); ret = load_free_ino_cache(root->fs_info, root); if (ret == 1) { - spin_lock(&root->cache_lock); - root->cached = BTRFS_CACHE_FINISHED; - spin_unlock(&root->cache_lock); + spin_lock(&root->ino_cache_lock); + root->ino_cache_state = BTRFS_CACHE_FINISHED; + spin_unlock(&root->ino_cache_lock); return; } @@ -196,11 +196,11 @@ again: start_caching(root); - wait_event(root->cache_wait, - root->cached == BTRFS_CACHE_FINISHED || + wait_event(root->ino_cache_wait, + root->ino_cache_state == BTRFS_CACHE_FINISHED || root->free_ino_ctl->free_space > 0); - if (root->cached == BTRFS_CACHE_FINISHED && + if (root->ino_cache_state == BTRFS_CACHE_FINISHED && root->free_ino_ctl->free_space == 0) return -ENOSPC; else @@ -214,17 +214,17 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid) if (!btrfs_test_opt(root, INODE_MAP_CACHE)) return; again: - if (root->cached == BTRFS_CACHE_FINISHED) { + if (root->ino_cache_state == BTRFS_CACHE_FINISHED) { __btrfs_add_free_space(pinned, objectid, 1); } else { down_write(&root->fs_info->commit_root_sem); - spin_lock(&root->cache_lock); - if (root->cached == BTRFS_CACHE_FINISHED) { - spin_unlock(&root->cache_lock); + spin_lock(&root->ino_cache_lock); + if (root->ino_cache_state == BTRFS_CACHE_FINISHED) { + spin_unlock(&root->ino_cache_lock); up_write(&root->fs_info->commit_root_sem); goto again; } - spin_unlock(&root->cache_lock); + spin_unlock(&root->ino_cache_lock); start_caching(root); @@ -235,10 +235,10 @@ again: } /* - * When a transaction is committed, we'll move those inode numbers which - * are smaller than root->cache_progress from pinned tree to free_ino tree, - * and others will just be dropped, because the commit root we were - * searching has changed. + * When a transaction is committed, we'll move those inode numbers which are + * smaller than root->ino_cache_progress from pinned tree to free_ino tree, and + * others will just be dropped, because the commit root we were searching has + * changed. * * Must be called with root->fs_info->commit_root_sem held */ @@ -261,10 +261,10 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) info = rb_entry(n, struct btrfs_free_space, offset_index); BUG_ON(info->bitmap); /* Logic error */ - if (info->offset > root->cache_progress) + if (info->offset > root->ino_cache_progress) goto free; - else if (info->offset + info->bytes > root->cache_progress) - count = root->cache_progress - info->offset + 1; + else if (info->offset + info->bytes > root->ino_cache_progress) + count = root->ino_cache_progress - info->offset + 1; else count = info->bytes; @@ -462,13 +462,13 @@ again: } } - spin_lock(&root->cache_lock); - if (root->cached != BTRFS_CACHE_FINISHED) { + spin_lock(&root->ino_cache_lock); + if (root->ino_cache_state != BTRFS_CACHE_FINISHED) { ret = -1; - spin_unlock(&root->cache_lock); + spin_unlock(&root->ino_cache_lock); goto out_put; } - spin_unlock(&root->cache_lock); + spin_unlock(&root->ino_cache_lock); spin_lock(&ctl->tree_lock); prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8a8e29878c3..091c4d35671 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2526,9 +2526,9 @@ out_unlock: ASSERT(dest->send_in_progress == 0); /* the last ref */ - if (dest->cache_inode) { - iput(dest->cache_inode); - dest->cache_inode = NULL; + if (dest->ino_cache_inode) { + iput(dest->ino_cache_inode); + dest->ino_cache_inode = NULL; } } out_dput: -- cgit v1.2.3-70-g09d2 From 3abdbd780e9d75f0648b8a502c3789857b1e92ce Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 4 Jun 2014 18:10:45 +0200 Subject: btrfs: make close_ctree return void There's no user of the return value and we can get rid of the comment in put_super. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 +--- fs/btrfs/disk-io.h | 2 +- fs/btrfs/super.c | 8 +------- 3 files changed, 3 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 354cc3f232b..ec32bead96a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3623,7 +3623,7 @@ int btrfs_commit_super(struct btrfs_root *root) return btrfs_commit_transaction(trans, root); } -int close_ctree(struct btrfs_root *root) +void close_ctree(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; int ret; @@ -3711,8 +3711,6 @@ int close_ctree(struct btrfs_root *root) btrfs_free_block_rsv(root, root->orphan_block_rsv); root->orphan_block_rsv = NULL; - - return 0; } int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 23ce3ceba0a..52a17db700f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -56,7 +56,7 @@ void clean_tree_block(struct btrfs_trans_handle *trans, int open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options); -int close_ctree(struct btrfs_root *root); +void close_ctree(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, int max_mirrors); struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c4124de4435..568ddc16119 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -307,13 +307,7 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, static void btrfs_put_super(struct super_block *sb) { - (void)close_ctree(btrfs_sb(sb)->tree_root); - /* FIXME: need to fix VFS to return error? */ - /* AV: return it _where_? ->put_super() can be triggered by any number - * of async events, up to and including delivery of SIGKILL to the - * last process that kept it busy. Or segfault in the aforementioned - * process... Whom would you report that to? - */ + close_ctree(btrfs_sb(sb)->tree_root); } enum { -- cgit v1.2.3-70-g09d2 From 707e8a071528385a87b63a72a37c2322e463c7b8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 4 Jun 2014 19:22:26 +0200 Subject: btrfs: use nodesize everywhere, kill leafsize The nodesize and leafsize were never of different values. Unify the usage and make nodesize the one. Cleanup the redundant checks and helpers. Shaves a few bytes from .text: text data bss dec hex filename 852418 24560 23112 900090 dbbfa btrfs.ko.before 851074 24584 23112 898770 db6d2 btrfs.ko.after Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 8 ++--- fs/btrfs/check-integrity.c | 13 -------- fs/btrfs/ctree.c | 18 +++++------ fs/btrfs/ctree.h | 21 +++---------- fs/btrfs/disk-io.c | 74 +++++++++++++++++++++------------------------- fs/btrfs/extent-tree.c | 36 +++++++++++----------- fs/btrfs/file.c | 2 +- fs/btrfs/ioctl.c | 6 ++-- fs/btrfs/print-tree.c | 2 +- fs/btrfs/qgroup.c | 6 ++-- fs/btrfs/reada.c | 2 +- fs/btrfs/relocation.c | 21 +++++++------ fs/btrfs/scrub.c | 17 +---------- fs/btrfs/transaction.c | 2 +- fs/btrfs/tree-log.c | 2 +- 15 files changed, 89 insertions(+), 141 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index cfe8566e6e3..4de97926939 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -482,7 +482,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info, continue; BUG_ON(!ref->wanted_disk_byte); eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, - fs_info->tree_root->leafsize, 0); + fs_info->tree_root->nodesize, 0); if (!eb || !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); return -EIO; @@ -991,8 +991,8 @@ again: ref->level == 0) { u32 bsz; struct extent_buffer *eb; - bsz = btrfs_level_size(fs_info->extent_root, - ref->level); + + bsz = fs_info->extent_root->nodesize; eb = read_tree_block(fs_info->extent_root, ref->parent, bsz, 0); if (!eb || !extent_buffer_uptodate(eb)) { @@ -1366,7 +1366,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, } btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); if (found_key->type == BTRFS_METADATA_ITEM_KEY) - size = fs_info->extent_root->leafsize; + size = fs_info->extent_root->nodesize; else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) size = found_key->offset; diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index ce92ae30250..d0690da3b15 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -820,7 +820,6 @@ static int btrfsic_process_superblock_dev_mirror( btrfs_super_magic(super_tmp) != BTRFS_MAGIC || memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || btrfs_super_nodesize(super_tmp) != state->metablock_size || - btrfs_super_leafsize(super_tmp) != state->metablock_size || btrfs_super_sectorsize(super_tmp) != state->datablock_size) { brelse(bh); return 0; @@ -3120,24 +3119,12 @@ int btrfsic_mount(struct btrfs_root *root, struct list_head *dev_head = &fs_devices->devices; struct btrfs_device *device; - if (root->nodesize != root->leafsize) { - printk(KERN_INFO - "btrfsic: cannot handle nodesize %d != leafsize %d!\n", - root->nodesize, root->leafsize); - return -1; - } if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", root->nodesize, PAGE_CACHE_SIZE); return -1; } - if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { - printk(KERN_INFO - "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->leafsize, PAGE_CACHE_SIZE); - return -1; - } if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 44ee5d2e52a..263145b2715 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1444,7 +1444,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { btrfs_tree_read_unlock(eb_root); free_extent_buffer(eb_root); - blocksize = btrfs_level_size(root, old_root->level); + blocksize = root->nodesize; old = read_tree_block(root, logical, blocksize, 0); if (WARN_ON(!old || !extent_buffer_uptodate(old))) { free_extent_buffer(old); @@ -1651,7 +1651,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, WARN_ON(trans->transid != root->fs_info->generation); parent_nritems = btrfs_header_nritems(parent); - blocksize = btrfs_level_size(root, parent_level - 1); + blocksize = root->nodesize; end_slot = parent_nritems; if (parent_nritems == 1) @@ -1872,7 +1872,7 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, BUG_ON(level == 0); eb = read_tree_block(root, btrfs_node_blockptr(parent, slot), - btrfs_level_size(root, level - 1), + root->nodesize, btrfs_node_ptr_generation(parent, slot)); if (eb && !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); @@ -2267,7 +2267,7 @@ static void reada_for_search(struct btrfs_root *root, node = path->nodes[level]; search = btrfs_node_blockptr(node, slot); - blocksize = btrfs_level_size(root, level - 1); + blocksize = root->nodesize; eb = btrfs_find_tree_block(root, search, blocksize); if (eb) { free_extent_buffer(eb); @@ -2325,7 +2325,7 @@ static noinline void reada_for_balance(struct btrfs_root *root, nritems = btrfs_header_nritems(parent); slot = path->slots[level + 1]; - blocksize = btrfs_level_size(root, level); + blocksize = root->nodesize; if (slot > 0) { block1 = btrfs_node_blockptr(parent, slot - 1); @@ -2461,7 +2461,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, blocknr = btrfs_node_blockptr(b, slot); gen = btrfs_node_ptr_generation(b, slot); - blocksize = btrfs_level_size(root, level - 1); + blocksize = root->nodesize; tmp = btrfs_find_tree_block(root, blocknr, blocksize); if (tmp) { @@ -4282,13 +4282,13 @@ again: else btrfs_item_key(l, &disk_key, mid); - right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, + right = btrfs_alloc_free_block(trans, root, root->nodesize, 0, root->root_key.objectid, &disk_key, 0, l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); - root_add_used(root, root->leafsize); + root_add_used(root, root->nodesize); memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); btrfs_set_header_bytenr(right, right->start); @@ -5375,7 +5375,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; } - tmp_buf = kmalloc(left_root->leafsize, GFP_NOFS); + tmp_buf = kmalloc(left_root->nodesize, GFP_NOFS); if (!tmp_buf) { ret = -ENOMEM; goto out; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a835a548e47..6fc16d22d27 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -391,7 +391,7 @@ struct btrfs_header { sizeof(struct btrfs_header)) / \ sizeof(struct btrfs_key_ptr)) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) -#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) +#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->nodesize)) #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) @@ -474,7 +474,7 @@ struct btrfs_super_block { __le64 num_devices; __le32 sectorsize; __le32 nodesize; - __le32 leafsize; + __le32 __unused_leafsize; __le32 stripesize; __le32 sys_chunk_array_size; __le64 chunk_root_generation; @@ -1806,9 +1806,6 @@ struct btrfs_root { /* node allocations are done in nodesize units */ u32 nodesize; - /* leaf allocations are done in leafsize units */ - u32 leafsize; - u32 stripesize; u32 type; @@ -2995,8 +2992,6 @@ BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, sectorsize, 32); BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, nodesize, 32); -BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block, - leafsize, 32); BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, stripesize, 32); BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, @@ -3232,13 +3227,6 @@ static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) return sb->s_fs_info; } -static inline u32 btrfs_level_size(struct btrfs_root *root, int level) -{ - if (level == 0) - return root->leafsize; - return root->nodesize; -} - /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -3263,7 +3251,7 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, unsigned num_items) { - return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * + return (root->nodesize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * 2 * num_items; } @@ -3274,8 +3262,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, unsigned num_items) { - return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * - num_items; + return root->nodesize * BTRFS_MAX_LEVEL * num_items; } int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ec32bead96a..508bbee320f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1200,16 +1200,14 @@ btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers) kfree(writers); } -static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, - u32 stripesize, struct btrfs_root *root, - struct btrfs_fs_info *fs_info, +static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, + struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { root->node = NULL; root->commit_root = NULL; root->sectorsize = sectorsize; root->nodesize = nodesize; - root->leafsize = leafsize; root->stripesize = stripesize; root->state = 0; root->orphan_cleanup_state = 0; @@ -1295,7 +1293,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void) root = btrfs_alloc_root(NULL); if (!root) return ERR_PTR(-ENOMEM); - __setup_root(4096, 4096, 4096, 4096, root, NULL, 1); + __setup_root(4096, 4096, 4096, root, NULL, 1); set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); root->alloc_bytenr = 0; @@ -1318,14 +1316,13 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, if (!root) return ERR_PTR(-ENOMEM); - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - root, fs_info, objectid); + __setup_root(tree_root->nodesize, tree_root->sectorsize, + tree_root->stripesize, root, fs_info, objectid); root->root_key.objectid = objectid; root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = 0; - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + leaf = btrfs_alloc_free_block(trans, root, root->nodesize, 0, objectid, NULL, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); @@ -1396,9 +1393,9 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, if (!root) return ERR_PTR(-ENOMEM); - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - root, fs_info, BTRFS_TREE_LOG_OBJECTID); + __setup_root(tree_root->nodesize, tree_root->sectorsize, + tree_root->stripesize, root, fs_info, + BTRFS_TREE_LOG_OBJECTID); root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; root->root_key.type = BTRFS_ROOT_ITEM_KEY; @@ -1413,7 +1410,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, * updated (along with back refs to the log tree). */ - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, + leaf = btrfs_alloc_free_block(trans, root, root->nodesize, 0, BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0); if (IS_ERR(leaf)) { @@ -1465,7 +1462,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, btrfs_set_stack_inode_generation(inode_item, 1); btrfs_set_stack_inode_size(inode_item, 3); btrfs_set_stack_inode_nlink(inode_item, 1); - btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); + btrfs_set_stack_inode_nbytes(inode_item, root->nodesize); btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_root_node(&log_root->root_item, log_root->node); @@ -1498,9 +1495,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, goto alloc_fail; } - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - root, fs_info, key->objectid); + __setup_root(tree_root->nodesize, tree_root->sectorsize, + tree_root->stripesize, root, fs_info, key->objectid); ret = btrfs_find_root(tree_root, key, path, &root->root_item, &root->root_key); @@ -1511,7 +1507,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, } generation = btrfs_root_generation(&root->root_item); - blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + blocksize = root->nodesize; root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); if (!root->node) { @@ -2143,7 +2139,6 @@ int open_ctree(struct super_block *sb, { u32 sectorsize; u32 nodesize; - u32 leafsize; u32 blocksize; u32 stripesize; u64 generation; @@ -2389,7 +2384,7 @@ int open_ctree(struct super_block *sb, goto fail_alloc; } - __setup_root(4096, 4096, 4096, 4096, tree_root, + __setup_root(4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); invalidate_bdev(fs_devices->latest_bdev); @@ -2469,19 +2464,22 @@ int open_ctree(struct super_block *sb, goto fail_alloc; } - if (btrfs_super_leafsize(disk_super) != + /* + * Leafsize and nodesize were always equal, this is only a sanity check. + */ + if (le32_to_cpu(disk_super->__unused_leafsize) != btrfs_super_nodesize(disk_super)) { printk(KERN_ERR "BTRFS: couldn't mount because metadata " "blocksizes don't match. node %d leaf %d\n", btrfs_super_nodesize(disk_super), - btrfs_super_leafsize(disk_super)); + le32_to_cpu(disk_super->__unused_leafsize)); err = -EINVAL; goto fail_alloc; } - if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) { + if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) { printk(KERN_ERR "BTRFS: couldn't mount because metadata " "blocksize (%d) was too large\n", - btrfs_super_leafsize(disk_super)); + btrfs_super_nodesize(disk_super)); err = -EINVAL; goto fail_alloc; } @@ -2498,17 +2496,16 @@ int open_ctree(struct super_block *sb, * flag our filesystem as having big metadata blocks if * they are bigger than the page size */ - if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) { + if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) { if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n"); features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; } nodesize = btrfs_super_nodesize(disk_super); - leafsize = btrfs_super_leafsize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); stripesize = btrfs_super_stripesize(disk_super); - fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids)); + fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); /* @@ -2516,7 +2513,7 @@ int open_ctree(struct super_block *sb, * extent buffers for the same range. It leads to corruptions */ if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && - (sectorsize != leafsize)) { + (sectorsize != nodesize)) { printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes " "are not allowed for mixed block groups on %s\n", sb->s_id); @@ -2615,7 +2612,6 @@ int open_ctree(struct super_block *sb, 4 * 1024 * 1024 / PAGE_CACHE_SIZE); tree_root->nodesize = nodesize; - tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; tree_root->stripesize = stripesize; @@ -2642,12 +2638,11 @@ int open_ctree(struct super_block *sb, goto fail_sb_buffer; } - blocksize = btrfs_level_size(tree_root, - btrfs_super_chunk_root_level(disk_super)); + blocksize = tree_root->nodesize; generation = btrfs_super_chunk_root_generation(disk_super); - __setup_root(nodesize, leafsize, sectorsize, stripesize, - chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); + __setup_root(nodesize, sectorsize, stripesize, chunk_root, + fs_info, BTRFS_CHUNK_TREE_OBJECTID); chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), @@ -2684,8 +2679,7 @@ int open_ctree(struct super_block *sb, } retry_root_backup: - blocksize = btrfs_level_size(tree_root, - btrfs_super_root_level(disk_super)); + blocksize = tree_root->nodesize; generation = btrfs_super_generation(disk_super); tree_root->node = read_tree_block(tree_root, @@ -2859,9 +2853,7 @@ retry_root_backup: err = -EIO; goto fail_qgroup; } - blocksize = - btrfs_level_size(tree_root, - btrfs_super_log_root_level(disk_super)); + blocksize = tree_root->nodesize; log_tree_root = btrfs_alloc_root(fs_info); if (!log_tree_root) { @@ -2869,7 +2861,7 @@ retry_root_backup: goto fail_qgroup; } - __setup_root(nodesize, leafsize, sectorsize, stripesize, + __setup_root(nodesize, sectorsize, stripesize, log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); log_tree_root->node = read_tree_block(tree_root, bytenr, @@ -4008,8 +4000,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); while (start <= end) { eb = btrfs_find_tree_block(root, start, - root->leafsize); - start += root->leafsize; + root->nodesize); + start += root->nodesize; if (!eb) continue; wait_on_extent_buffer_writeback(eb); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4d1b50d4dc5..d52da9628f0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -491,7 +491,7 @@ next: key.objectid); if (key.type == BTRFS_METADATA_ITEM_KEY) last = key.objectid + - fs_info->tree_root->leafsize; + fs_info->tree_root->nodesize; else last = key.objectid + key.offset; @@ -765,7 +765,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, * different */ if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) { - offset = root->leafsize; + offset = root->nodesize; metadata = 0; } @@ -799,13 +799,13 @@ again: path->slots[0]); if (key.objectid == bytenr && key.type == BTRFS_EXTENT_ITEM_KEY && - key.offset == root->leafsize) + key.offset == root->nodesize) ret = 0; } if (ret) { key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = root->leafsize; + key.offset = root->nodesize; btrfs_release_path(path); goto again; } @@ -2651,7 +2651,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, num_bytes = btrfs_calc_trans_metadata_size(root, 1); num_heads = heads_to_leaves(root, num_heads); if (num_heads > 1) - num_bytes += (num_heads - 1) * root->leafsize; + num_bytes += (num_heads - 1) * root->nodesize; num_bytes <<= 1; global_rsv = &root->fs_info->global_block_rsv; @@ -3117,7 +3117,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, goto fail; } else { bytenr = btrfs_node_blockptr(buf, i); - num_bytes = btrfs_level_size(root, level - 1); + num_bytes = root->nodesize; ret = process_func(trans, root, bytenr, num_bytes, parent, ref_root, level - 1, 0, 1); @@ -4839,7 +4839,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) if (num_bytes * 3 > meta_used) num_bytes = div64_u64(meta_used, 3); - return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); + return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10); } static void update_global_block_rsv(struct btrfs_fs_info *fs_info) @@ -4988,7 +4988,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, if (root->fs_info->quota_enabled) { /* One for parent inode, two for dir entries */ - num_bytes = 3 * root->leafsize; + num_bytes = 3 * root->nodesize; ret = btrfs_qgroup_reserve(root, num_bytes); if (ret) return ret; @@ -5176,7 +5176,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) if (root->fs_info->quota_enabled) { ret = btrfs_qgroup_reserve(root, num_bytes + - nr_extents * root->leafsize); + nr_extents * root->nodesize); if (ret) goto out_fail; } @@ -5185,7 +5185,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) if (unlikely(ret)) { if (root->fs_info->quota_enabled) btrfs_qgroup_free(root, num_bytes + - nr_extents * root->leafsize); + nr_extents * root->nodesize); goto out_fail; } @@ -5301,7 +5301,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) btrfs_ino(inode), to_free, 0); if (root->fs_info->quota_enabled) { btrfs_qgroup_free(root, num_bytes + - dropped * root->leafsize); + dropped * root->nodesize); } btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, @@ -7077,7 +7077,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { btrfs_free_and_pin_reserved_extent(root, ins->objectid, - root->leafsize); + root->nodesize); return -ENOMEM; } @@ -7086,7 +7086,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, ins, size); if (ret) { btrfs_free_and_pin_reserved_extent(root, ins->objectid, - root->leafsize); + root->nodesize); btrfs_free_path(path); return ret; } @@ -7101,7 +7101,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, if (skinny_metadata) { iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); - num_bytes = root->leafsize; + num_bytes = root->nodesize; } else { block_info = (struct btrfs_tree_block_info *)(extent_item + 1); btrfs_set_tree_block_key(leaf, block_info, key); @@ -7131,14 +7131,14 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, return ret; } - ret = update_block_group(root, ins->objectid, root->leafsize, 1); + ret = update_block_group(root, ins->objectid, root->nodesize, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", ins->objectid, ins->offset); BUG(); } - trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize); + trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->nodesize); return ret; } @@ -7417,7 +7417,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, eb = path->nodes[wc->level]; nritems = btrfs_header_nritems(eb); - blocksize = btrfs_level_size(root, wc->level - 1); + blocksize = root->nodesize; for (slot = path->slots[wc->level]; slot < nritems; slot++) { if (nread >= wc->reada_count) @@ -7806,7 +7806,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, } bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); - blocksize = btrfs_level_size(root, level - 1); + blocksize = root->nodesize; next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a9b56e32dd8..033f04bac85 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1653,7 +1653,7 @@ again: cond_resched(); balance_dirty_pages_ratelimited(inode->i_mapping); - if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) + if (dirty_pages < (root->nodesize >> PAGE_CACHE_SHIFT) + 1) btrfs_btree_balance_dirty(root); pos += copied; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b61801ac052..d6e10d60f8a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -477,7 +477,7 @@ static noinline int create_subvol(struct inode *dir, if (ret) goto fail; - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + leaf = btrfs_alloc_free_block(trans, root, root->nodesize, 0, objectid, NULL, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); @@ -503,7 +503,7 @@ static noinline int create_subvol(struct inode *dir, btrfs_set_stack_inode_generation(inode_item, 1); btrfs_set_stack_inode_size(inode_item, 3); btrfs_set_stack_inode_nlink(inode_item, 1); - btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); + btrfs_set_stack_inode_nbytes(inode_item, root->nodesize); btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_root_flags(&root_item, 0); @@ -3199,7 +3199,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, u64 last_dest_end = destoff; ret = -ENOMEM; - buf = vmalloc(btrfs_level_size(root, 0)); + buf = vmalloc(root->nodesize); if (!buf) return ret; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 1591620bee3..eb309855d5c 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -336,7 +336,7 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) for (i = 0; i < nr; i++) { struct extent_buffer *next = read_tree_block(root, btrfs_node_blockptr(c, i), - btrfs_level_size(root, level - 1), + root->nodesize, btrfs_node_ptr_generation(c, i)); if (btrfs_is_leaf(next) && level != 1) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index ded5c601d91..2ce4ce7b47d 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2237,7 +2237,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, if (srcid) { struct btrfs_root *srcroot; struct btrfs_key srckey; - int srcroot_level; srckey.objectid = srcid; srckey.type = BTRFS_ROOT_ITEM_KEY; @@ -2249,8 +2248,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, } rcu_read_lock(); - srcroot_level = btrfs_header_level(srcroot->node); - level_size = btrfs_level_size(srcroot, srcroot_level); + level_size = srcroot->nodesize; rcu_read_unlock(); } @@ -2566,7 +2564,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, found.type != BTRFS_METADATA_ITEM_KEY) continue; if (found.type == BTRFS_METADATA_ITEM_KEY) - num_bytes = fs_info->extent_root->leafsize; + num_bytes = fs_info->extent_root->nodesize; else num_bytes = found.offset; diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 20408c6b665..b63ae20618f 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -347,7 +347,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, if (!re) return NULL; - blocksize = btrfs_level_size(root, level); + blocksize = root->nodesize; re->logical = logical; re->blocksize = blocksize; re->top = *top; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b3329ad3452..2d221c46180 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1787,7 +1787,7 @@ again: btrfs_node_key_to_cpu(parent, next_key, slot + 1); old_bytenr = btrfs_node_blockptr(parent, slot); - blocksize = btrfs_level_size(dest, level - 1); + blocksize = dest->nodesize; old_ptr_gen = btrfs_node_ptr_generation(parent, slot); if (level <= max_level) { @@ -1970,7 +1970,7 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, } bytenr = btrfs_node_blockptr(eb, path->slots[i]); - blocksize = btrfs_level_size(root, i - 1); + blocksize = root->nodesize; eb = read_tree_block(root, bytenr, blocksize, ptr_gen); if (!eb || !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); @@ -2544,8 +2544,7 @@ u64 calcu_metadata_size(struct reloc_control *rc, if (next->processed && (reserve || next != node)) break; - num_bytes += btrfs_level_size(rc->extent_root, - next->level); + num_bytes += rc->extent_root->nodesize; if (list_empty(&next->upper)) break; @@ -2679,7 +2678,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, goto next; } - blocksize = btrfs_level_size(root, node->level); + blocksize = root->nodesize; generation = btrfs_node_ptr_generation(upper->eb, slot); eb = read_tree_block(root, bytenr, blocksize, generation); if (!eb || !extent_buffer_uptodate(eb)) { @@ -2789,7 +2788,7 @@ static void __mark_block_processed(struct reloc_control *rc, u32 blocksize; if (node->level == 0 || in_block_group(node->bytenr, rc->block_group)) { - blocksize = btrfs_level_size(rc->extent_root, node->level); + blocksize = rc->extent_root->nodesize; mark_block_processed(rc, node->bytenr, blocksize); } node->processed = 1; @@ -2865,7 +2864,7 @@ static int reada_tree_block(struct reloc_control *rc, if (block->key.type == BTRFS_METADATA_ITEM_KEY) readahead_tree_block(rc->extent_root, block->bytenr, block->key.objectid, - rc->extent_root->leafsize); + rc->extent_root->nodesize); else readahead_tree_block(rc->extent_root, block->bytenr, block->key.objectid, block->key.offset); @@ -3313,7 +3312,7 @@ static int add_tree_block(struct reloc_control *rc, return -ENOMEM; block->bytenr = extent_key->objectid; - block->key.objectid = rc->extent_root->leafsize; + block->key.objectid = rc->extent_root->nodesize; block->key.offset = generation; block->level = level; block->key_ready = 0; @@ -3640,7 +3639,7 @@ int add_data_references(struct reloc_control *rc, struct btrfs_extent_inline_ref *iref; unsigned long ptr; unsigned long end; - u32 blocksize = btrfs_level_size(rc->extent_root, 0); + u32 blocksize = rc->extent_root->nodesize; int ret = 0; int err = 0; @@ -3783,7 +3782,7 @@ next: } if (key.type == BTRFS_METADATA_ITEM_KEY && - key.objectid + rc->extent_root->leafsize <= + key.objectid + rc->extent_root->nodesize <= rc->search_start) { path->slots[0]++; goto next; @@ -3801,7 +3800,7 @@ next: rc->search_start = key.objectid + key.offset; else rc->search_start = key.objectid + - rc->extent_root->leafsize; + rc->extent_root->nodesize; memcpy(extent_key, &key, sizeof(key)); return 0; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 053dd000d4e..4ae1c5feccb 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -137,7 +137,6 @@ struct scrub_ctx { int pages_per_rd_bio; u32 sectorsize; u32 nodesize; - u32 leafsize; int is_dev_replace; struct scrub_wr_ctx wr_ctx; @@ -438,7 +437,6 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) } sctx->first_free = 0; sctx->nodesize = dev->dev_root->nodesize; - sctx->leafsize = dev->dev_root->leafsize; sctx->sectorsize = dev->dev_root->sectorsize; atomic_set(&sctx->bios_in_flight, 0); atomic_set(&sctx->workers_pending, 0); @@ -1758,7 +1756,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) BTRFS_UUID_SIZE)) ++fail; - WARN_ON(sctx->nodesize != sctx->leafsize); len = sctx->nodesize - BTRFS_CSUM_SIZE; mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; @@ -2196,7 +2193,6 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, sctx->stat.data_bytes_scrubbed += len; spin_unlock(&sctx->stat_lock); } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { - WARN_ON(sctx->nodesize != sctx->leafsize); blocksize = sctx->nodesize; spin_lock(&sctx->stat_lock); sctx->stat.tree_extents_scrubbed++; @@ -2487,7 +2483,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, btrfs_item_key_to_cpu(l, &key, slot); if (key.type == BTRFS_METADATA_ITEM_KEY) - bytes = root->leafsize; + bytes = root->nodesize; else bytes = key.offset; @@ -2910,17 +2906,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, if (btrfs_fs_closing(fs_info)) return -EINVAL; - /* - * check some assumptions - */ - if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) { - btrfs_err(fs_info, - "scrub: size assumption nodesize == leafsize (%d == %d) fails", - fs_info->chunk_root->nodesize, - fs_info->chunk_root->leafsize); - return -EINVAL; - } - if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) { /* * in this case scrub is unable to calculate the checksum diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 977717b45bf..e336646508f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -408,7 +408,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, if (num_items > 0 && root != root->fs_info->chunk_root) { if (root->fs_info->quota_enabled && is_fstree(root->root_key.objectid)) { - qgroup_reserved = num_items * root->leafsize; + qgroup_reserved = num_items * root->nodesize; ret = btrfs_qgroup_reserve(root, qgroup_reserved); if (ret) return ERR_PTR(ret); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2f5000c0a87..7b6d1428f03 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2157,7 +2157,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); - blocksize = btrfs_level_size(root, *level - 1); + blocksize = root->nodesize; parent = path->nodes[*level]; root_owner = btrfs_header_owner(parent); -- cgit v1.2.3-70-g09d2 From 29549aec76bd6f1fc8e1723ed5396d65073d6521 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Fri, 4 Jul 2014 17:59:06 +0800 Subject: Btrfs: print btrfs specific info for some fatal error cases Marc argued that if there are several btrfs filesystems mounted, while users even don't know which filesystem hit the corrupted errors something like generation verification failure. Since @extent_buffer structure has a member @fs_info, let's output btrfs device info. Reported-by: Marc MERLIN Signed-off-by: Wang Shilong Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 508bbee320f..d14847d05f3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -348,9 +348,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, ret = 0; goto out; } - printk_ratelimited("parent transid verify failed on %llu wanted %llu " - "found %llu\n", - eb->start, parent_transid, btrfs_header_generation(eb)); + printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n", + eb->fs_info->sb->s_id, eb->start, + parent_transid, btrfs_header_generation(eb)); ret = 1; /* @@ -614,15 +614,15 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, found_start = btrfs_header_bytenr(eb); if (found_start != eb->start) { - printk_ratelimited(KERN_INFO "BTRFS: bad tree block start " + printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start " "%llu %llu\n", - found_start, eb->start); + eb->fs_info->sb->s_id, found_start, eb->start); ret = -EIO; goto err; } if (check_tree_block_fsid(root, eb)) { - printk_ratelimited(KERN_INFO "BTRFS: bad fsid on block %llu\n", - eb->start); + printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n", + eb->fs_info->sb->s_id, eb->start); ret = -EIO; goto err; } -- cgit v1.2.3-70-g09d2 From 56094eecd32cbb80d098eee5a7cbd60f39f4b764 Mon Sep 17 00:00:00 2001 From: Andrey Utkin Date: Sat, 9 Aug 2014 14:51:15 +0300 Subject: btrfs: Drop stray check of fixup_workers creation The issue was introduced in a79b7d4b3e8118f265dcb4bdf9a572c392f02708, adding allocation of extent_workers, so this stray check is surely not meant to be a check of something else. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=82021 Reported-by: Maks Naumov Signed-off-by: Andrey Utkin Reviewed-by: Eric Sandeen Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d14847d05f3..38b29555354 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2601,7 +2601,7 @@ int open_ctree(struct super_block *sb, fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->readahead_workers && fs_info->fixup_workers && fs_info->delayed_workers && - fs_info->fixup_workers && fs_info->extent_workers && + fs_info->extent_workers && fs_info->qgroup_rescan_workers)) { err = -ENOMEM; goto fail_sb_buffer; -- cgit v1.2.3-70-g09d2 From 82f70d62f7923cc43128e75ae85366f137055b76 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 8 Sep 2014 20:41:09 +0800 Subject: btrfs: remove the wrong comments This comments became wrong after c3c532[bdi: add helper function for doing init and register of a bdi for a file system], so remove them. Signed-off-by: Li RongQing Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 38b29555354..dbd792754b2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1704,10 +1704,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) return ret; } -/* - * If this fails, caller must call bdi_destroy() to get rid of the - * bdi again. - */ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { int err; -- cgit v1.2.3-70-g09d2 From 935e5cc935bcbf9b3d0dd59fed7dbc0f2ebca6bc Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 3 Sep 2014 21:35:33 +0800 Subject: Btrfs: fix wrong disk size when writing super blocks total_size will be changed when resizing a device, and disk_total_size will be changed if resizing is successful. Meanwhile, the on-disk super blocks of the previous transaction might not be updated. Considering the consistency of the metadata in the previous transaction, We should use the size in the previous transaction to check if the super block is beyond the boundary of the device. Fix it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/check-integrity.c | 2 +- fs/btrfs/dev-replace.c | 18 ++++++++++++++++++ fs/btrfs/disk-io.c | 5 +++-- fs/btrfs/scrub.c | 3 ++- fs/btrfs/transaction.c | 2 ++ fs/btrfs/volumes.c | 40 +++++++++++++++++++++++++++++++++++++++- fs/btrfs/volumes.h | 18 ++++++++++++++++++ 7 files changed, 83 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index e0033c843ce..cb7f3fe9c9f 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -807,7 +807,7 @@ static int btrfsic_process_superblock_dev_mirror( /* super block bytenr is always the unmapped device bytenr */ dev_bytenr = btrfs_sb_offset(superblock_mirror_num); - if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) + if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes) return -1; bh = __bread(superblock_bdev, dev_bytenr / 4096, BTRFS_SUPER_INFO_SIZE); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 72dc02e8294..7877b0fc6a8 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -168,6 +168,8 @@ no_valid_dev_replace_entry_found: dev_replace->srcdev->total_bytes; dev_replace->tgtdev->disk_total_bytes = dev_replace->srcdev->disk_total_bytes; + dev_replace->tgtdev->commit_total_bytes = + dev_replace->srcdev->commit_total_bytes; dev_replace->tgtdev->bytes_used = dev_replace->srcdev->bytes_used; } @@ -329,6 +331,20 @@ int btrfs_dev_replace_start(struct btrfs_root *root, args->start.tgtdev_name[0] == '\0') return -EINVAL; + /* + * Here we commit the transaction to make sure commit_total_bytes + * of all the devices are updated. + */ + trans = btrfs_attach_transaction(root); + if (!IS_ERR(trans)) { + ret = btrfs_commit_transaction(trans, root); + if (ret) + return ret; + } else if (PTR_ERR(trans) != -ENOENT) { + return PTR_ERR(trans); + } + + /* the disk copy procedure reuses the scrub code */ mutex_lock(&fs_info->volume_mutex); ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, args->start.srcdev_name, @@ -539,6 +555,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid)); tgt_device->total_bytes = src_device->total_bytes; tgt_device->disk_total_bytes = src_device->disk_total_bytes; + ASSERT(list_empty(&src_device->resized_list)); + tgt_device->commit_total_bytes = src_device->commit_total_bytes; tgt_device->bytes_used = src_device->bytes_used; if (fs_info->sb->s_bdev == src_device->bdev) fs_info->sb->s_bdev = tgt_device->bdev; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dbd792754b2..0cd18b72555 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3127,7 +3127,8 @@ static int write_dev_supers(struct btrfs_device *device, for (i = 0; i < max_mirrors; i++) { bytenr = btrfs_sb_offset(i); - if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) + if (bytenr + BTRFS_SUPER_INFO_SIZE >= + device->commit_total_bytes) break; if (wait) { @@ -3444,7 +3445,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); btrfs_set_stack_device_total_bytes(dev_item, - dev->disk_total_bytes); + dev->commit_total_bytes); btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); btrfs_set_stack_device_io_align(dev_item, dev->io_align); btrfs_set_stack_device_io_width(dev_item, dev->io_width); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 72c8981e7c0..9d80e37044d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2840,7 +2840,8 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { bytenr = btrfs_sb_offset(i); - if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes) + if (bytenr + BTRFS_SUPER_INFO_SIZE > + scrub_dev->commit_total_bytes) break; ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e336646508f..2f7c0bef404 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1868,6 +1868,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, sizeof(*root->fs_info->super_copy)); + btrfs_update_commit_device_size(root->fs_info); + spin_lock(&root->fs_info->trans_lock); cur_trans->state = TRANS_STATE_UNBLOCKED; root->fs_info->running_transaction = NULL; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1646659f280..7b5c04259a6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -74,6 +74,7 @@ static struct btrfs_fs_devices *__alloc_fs_devices(void) mutex_init(&fs_devs->device_list_mutex); INIT_LIST_HEAD(&fs_devs->devices); + INIT_LIST_HEAD(&fs_devs->resized_devices); INIT_LIST_HEAD(&fs_devs->alloc_list); INIT_LIST_HEAD(&fs_devs->list); @@ -154,6 +155,7 @@ static struct btrfs_device *__alloc_device(void) INIT_LIST_HEAD(&dev->dev_list); INIT_LIST_HEAD(&dev->dev_alloc_list); + INIT_LIST_HEAD(&dev->resized_list); spin_lock_init(&dev->io_lock); @@ -2168,6 +2170,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->sector_size = root->sectorsize; device->total_bytes = i_size_read(bdev->bd_inode); device->disk_total_bytes = device->total_bytes; + device->commit_total_bytes = device->total_bytes; device->dev_root = root->fs_info->dev_root; device->bdev = bdev; device->in_fs_metadata = 1; @@ -2364,6 +2367,8 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->sector_size = root->sectorsize; device->total_bytes = srcdev->total_bytes; device->disk_total_bytes = srcdev->disk_total_bytes; + ASSERT(list_empty(&srcdev->resized_list)); + device->commit_total_bytes = srcdev->commit_total_bytes; device->bytes_used = srcdev->bytes_used; device->dev_root = fs_info->dev_root; device->bdev = bdev; @@ -2448,6 +2453,7 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, { struct btrfs_super_block *super_copy = device->dev_root->fs_info->super_copy; + struct btrfs_fs_devices *fs_devices; u64 old_total = btrfs_super_total_bytes(super_copy); u64 diff = new_size - device->total_bytes; @@ -2457,12 +2463,17 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, device->is_tgtdev_for_dev_replace) return -EINVAL; + fs_devices = device->dev_root->fs_info->fs_devices; + btrfs_set_super_total_bytes(super_copy, old_total + diff); device->fs_devices->total_rw_bytes += diff; device->total_bytes = new_size; device->disk_total_bytes = new_size; btrfs_clear_space_info_full(device->dev_root->fs_info); + if (list_empty(&device->resized_list)) + list_add_tail(&device->resized_list, + &fs_devices->resized_devices); return btrfs_update_device(trans, device); } @@ -4011,8 +4022,11 @@ again: } lock_chunks(root); - device->disk_total_bytes = new_size; + if (list_empty(&device->resized_list)) + list_add_tail(&device->resized_list, + &root->fs_info->fs_devices->resized_devices); + /* Now btrfs_update_device() will change the on-disk size. */ ret = btrfs_update_device(trans, device); if (ret) { @@ -5993,6 +6007,7 @@ static void fill_device_from_item(struct extent_buffer *leaf, device->devid = btrfs_device_id(leaf, dev_item); device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item); device->total_bytes = device->disk_total_bytes; + device->commit_total_bytes = device->disk_total_bytes; device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); device->type = btrfs_device_type(leaf, dev_item); device->io_align = btrfs_device_io_align(leaf, dev_item); @@ -6520,3 +6535,26 @@ int btrfs_scratch_superblock(struct btrfs_device *device) return 0; } + +/* + * Update the size of all devices, which is used for writing out the + * super blocks. + */ +void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info) +{ + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + struct btrfs_device *curr, *next; + + if (list_empty(&fs_devices->resized_devices)) + return; + + mutex_lock(&fs_devices->device_list_mutex); + lock_chunks(fs_info->dev_root); + list_for_each_entry_safe(curr, next, &fs_devices->resized_devices, + resized_list) { + list_del_init(&curr->resized_list); + curr->commit_total_bytes = curr->disk_total_bytes; + } + unlock_chunks(fs_info->dev_root); + mutex_unlock(&fs_devices->device_list_mutex); +} diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index e15f2886d33..b30d018fa35 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -87,6 +87,21 @@ struct btrfs_device { /* physical drive uuid (or lvm uuid) */ u8 uuid[BTRFS_UUID_SIZE]; + /* + * size of the device on the current transaction + * + * This variant is update when committing the transaction, + * and protected by device_list_mutex + */ + u64 commit_total_bytes; + + /* + * used to manage the device which is resized + * + * It is protected by chunk_lock. + */ + struct list_head resized_list; + /* for sending down flush barriers */ int nobarriers; struct bio *flush_bio; @@ -136,6 +151,7 @@ struct btrfs_fs_devices { struct mutex device_list_mutex; struct list_head devices; + struct list_head resized_devices; /* devices not currently being allocated */ struct list_head alloc_list; struct list_head list; @@ -402,4 +418,6 @@ static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, { btrfs_dev_stat_set(dev, index, 0); } + +void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info); #endif -- cgit v1.2.3-70-g09d2 From ce7213c70c37e3a66bc0b50c45edcbfea505f62f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 3 Sep 2014 21:35:34 +0800 Subject: Btrfs: fix wrong device bytes_used in the super block device->bytes_used will be changed when allocating a new chunk, and disk_total_size will be changed if resizing is successful. Meanwhile, the on-disk super blocks of the previous transaction might not be updated. Considering the consistency of the metadata in the previous transaction, We should use the size in the previous transaction to check if the super block is beyond the boundary of the device. Though it is not big problem because we don't use it now, but anyway it is better that we make it be consistent with the common metadata, maybe we will use it in the future. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 3 +++ fs/btrfs/disk-io.c | 3 ++- fs/btrfs/transaction.c | 1 + fs/btrfs/volumes.c | 27 +++++++++++++++++++++++++++ fs/btrfs/volumes.h | 4 ++++ 5 files changed, 37 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 7877b0fc6a8..1be03d85d26 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -172,6 +172,8 @@ no_valid_dev_replace_entry_found: dev_replace->srcdev->commit_total_bytes; dev_replace->tgtdev->bytes_used = dev_replace->srcdev->bytes_used; + dev_replace->tgtdev->commit_bytes_used = + dev_replace->srcdev->commit_bytes_used; } dev_replace->tgtdev->is_tgtdev_for_dev_replace = 1; btrfs_init_dev_replace_tgtdev_for_resume(fs_info, @@ -558,6 +560,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ASSERT(list_empty(&src_device->resized_list)); tgt_device->commit_total_bytes = src_device->commit_total_bytes; tgt_device->bytes_used = src_device->bytes_used; + tgt_device->commit_bytes_used = src_device->bytes_used; if (fs_info->sb->s_bdev == src_device->bdev) fs_info->sb->s_bdev = tgt_device->bdev; if (fs_info->fs_devices->latest_bdev == src_device->bdev) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0cd18b72555..a224fb9b34a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3446,7 +3446,8 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) btrfs_set_stack_device_id(dev_item, dev->devid); btrfs_set_stack_device_total_bytes(dev_item, dev->commit_total_bytes); - btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); + btrfs_set_stack_device_bytes_used(dev_item, + dev->commit_bytes_used); btrfs_set_stack_device_io_align(dev_item, dev->io_align); btrfs_set_stack_device_io_width(dev_item, dev->io_width); btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2f7c0bef404..16d0c1b62b3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1869,6 +1869,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, sizeof(*root->fs_info->super_copy)); btrfs_update_commit_device_size(root->fs_info); + btrfs_update_commit_device_bytes_used(root, cur_trans); spin_lock(&root->fs_info->trans_lock); cur_trans->state = TRANS_STATE_UNBLOCKED; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7b5c04259a6..f8273bb53b3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2370,6 +2370,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, ASSERT(list_empty(&srcdev->resized_list)); device->commit_total_bytes = srcdev->commit_total_bytes; device->bytes_used = srcdev->bytes_used; + device->commit_bytes_used = device->bytes_used; device->dev_root = fs_info->dev_root; device->bdev = bdev; device->in_fs_metadata = 1; @@ -6009,6 +6010,7 @@ static void fill_device_from_item(struct extent_buffer *leaf, device->total_bytes = device->disk_total_bytes; device->commit_total_bytes = device->disk_total_bytes; device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); + device->commit_bytes_used = device->bytes_used; device->type = btrfs_device_type(leaf, dev_item); device->io_align = btrfs_device_io_align(leaf, dev_item); device->io_width = btrfs_device_io_width(leaf, dev_item); @@ -6558,3 +6560,28 @@ void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info) unlock_chunks(fs_info->dev_root); mutex_unlock(&fs_devices->device_list_mutex); } + +/* Must be invoked during the transaction commit */ +void btrfs_update_commit_device_bytes_used(struct btrfs_root *root, + struct btrfs_transaction *transaction) +{ + struct extent_map *em; + struct map_lookup *map; + struct btrfs_device *dev; + int i; + + if (list_empty(&transaction->pending_chunks)) + return; + + /* In order to kick the device replace finish process */ + lock_chunks(root); + list_for_each_entry(em, &transaction->pending_chunks, list) { + map = (struct map_lookup *)em->bdev; + + for (i = 0; i < map->num_stripes; i++) { + dev = map->stripes[i].dev; + dev->commit_bytes_used = dev->bytes_used; + } + } + unlock_chunks(root); +} diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index b30d018fa35..f79d532fedb 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -95,6 +95,8 @@ struct btrfs_device { */ u64 commit_total_bytes; + /* bytes used on the current transaction */ + u64 commit_bytes_used; /* * used to manage the device which is resized * @@ -420,4 +422,6 @@ static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, } void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info); +void btrfs_update_commit_device_bytes_used(struct btrfs_root *root, + struct btrfs_transaction *transaction); #endif -- cgit v1.2.3-70-g09d2 From 8b110e393c5a6e72d50fcdf9fa7ed8b647cfdfc9 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 12 Sep 2014 18:44:03 +0800 Subject: Btrfs: implement repair function when direct read fails This patch implement data repair function when direct read fails. The detail of the implementation is: - When we find the data is not right, we try to read the data from the other mirror. - When the io on the mirror ends, we will insert the endio work into the dedicated btrfs workqueue, not common read endio workqueue, because the original endio work is still blocked in the btrfs endio workqueue, if we insert the endio work of the io on the mirror into that workqueue, deadlock would happen. - After we get right data, we write it back to the corrupted mirror. - And if the data on the new mirror is still corrupted, we will try next mirror until we read right data or all the mirrors are traversed. - After the above work, we set the uptodate flag according to the result. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 1 + fs/btrfs/async-thread.h | 1 + fs/btrfs/btrfs_inode.h | 2 +- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 11 +- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent_io.c | 12 ++- fs/btrfs/extent_io.h | 5 +- fs/btrfs/inode.c | 276 ++++++++++++++++++++++++++++++++++++++++++++---- 9 files changed, 281 insertions(+), 29 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index fbd76ded9a3..2da0a66790b 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -74,6 +74,7 @@ BTRFS_WORK_HELPER(endio_helper); BTRFS_WORK_HELPER(endio_meta_helper); BTRFS_WORK_HELPER(endio_meta_write_helper); BTRFS_WORK_HELPER(endio_raid56_helper); +BTRFS_WORK_HELPER(endio_repair_helper); BTRFS_WORK_HELPER(rmw_helper); BTRFS_WORK_HELPER(endio_write_helper); BTRFS_WORK_HELPER(freespace_write_helper); diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index e9e31c94758..e386c29ef1f 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -53,6 +53,7 @@ BTRFS_WORK_HELPER_PROTO(endio_helper); BTRFS_WORK_HELPER_PROTO(endio_meta_helper); BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper); BTRFS_WORK_HELPER_PROTO(endio_raid56_helper); +BTRFS_WORK_HELPER_PROTO(endio_repair_helper); BTRFS_WORK_HELPER_PROTO(rmw_helper); BTRFS_WORK_HELPER_PROTO(endio_write_helper); BTRFS_WORK_HELPER_PROTO(freespace_write_helper); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 4d309471294..7a7521c87c8 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -271,7 +271,7 @@ struct btrfs_dio_private { * The original bio may be splited to several sub-bios, this is * done during endio of sub-bios */ - int (*subio_endio)(struct inode *, struct btrfs_io_bio *); + int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int); }; /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0f3e4f7e454..51ff3f8dbab 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1538,6 +1538,7 @@ struct btrfs_fs_info { struct btrfs_workqueue *endio_workers; struct btrfs_workqueue *endio_meta_workers; struct btrfs_workqueue *endio_raid56_workers; + struct btrfs_workqueue *endio_repair_workers; struct btrfs_workqueue *rmw_workers; struct btrfs_workqueue *endio_meta_write_workers; struct btrfs_workqueue *endio_write_workers; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a224fb9b34a..48794f95142 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -713,7 +713,11 @@ static void end_workqueue_bio(struct bio *bio, int err) func = btrfs_endio_write_helper; } } else { - if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { + if (unlikely(end_io_wq->metadata == + BTRFS_WQ_ENDIO_DIO_REPAIR)) { + wq = fs_info->endio_repair_workers; + func = btrfs_endio_repair_helper; + } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { wq = fs_info->endio_raid56_workers; func = btrfs_endio_raid56_helper; } else if (end_io_wq->metadata) { @@ -741,6 +745,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, int metadata) { struct end_io_wq *end_io_wq; + end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); if (!end_io_wq) return -ENOMEM; @@ -2055,6 +2060,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->endio_workers); btrfs_destroy_workqueue(fs_info->endio_meta_workers); btrfs_destroy_workqueue(fs_info->endio_raid56_workers); + btrfs_destroy_workqueue(fs_info->endio_repair_workers); btrfs_destroy_workqueue(fs_info->rmw_workers); btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); btrfs_destroy_workqueue(fs_info->endio_write_workers); @@ -2572,6 +2578,8 @@ int open_ctree(struct super_block *sb, btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); fs_info->endio_raid56_workers = btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); + fs_info->endio_repair_workers = + btrfs_alloc_workqueue("endio-repair", flags, 1, 0); fs_info->rmw_workers = btrfs_alloc_workqueue("rmw", flags, max_active, 2); fs_info->endio_write_workers = @@ -2593,6 +2601,7 @@ int open_ctree(struct super_block *sb, fs_info->submit_workers && fs_info->flush_workers && fs_info->endio_workers && fs_info->endio_meta_workers && fs_info->endio_meta_write_workers && + fs_info->endio_repair_workers && fs_info->endio_write_workers && fs_info->endio_raid56_workers && fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->readahead_workers && diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 52a17db700f..14d06ee1e14 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -30,6 +30,7 @@ enum { BTRFS_WQ_ENDIO_METADATA = 1, BTRFS_WQ_ENDIO_FREE_SPACE = 2, BTRFS_WQ_ENDIO_RAID56 = 3, + BTRFS_WQ_ENDIO_DIO_REPAIR = 4, }; static inline u64 btrfs_sb_offset(int mirror) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 05533c99f89..9e2ef27672e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1962,7 +1962,7 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) SetPageUptodate(page); } -static int free_io_failure(struct inode *inode, struct io_failure_record *rec) +int free_io_failure(struct inode *inode, struct io_failure_record *rec) { int ret; int err = 0; @@ -2081,8 +2081,8 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, * each time an IO finishes, we do a fast check in the IO failure tree * to see if we need to process or clean up an io_failure_record */ -static int clean_io_failure(struct inode *inode, u64 start, - struct page *page, unsigned int pg_offset) +int clean_io_failure(struct inode *inode, u64 start, struct page *page, + unsigned int pg_offset) { u64 private; u64 private_failure; @@ -2291,7 +2291,7 @@ int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, struct io_failure_record *failrec, struct page *page, int pg_offset, int icsum, - bio_end_io_t *endio_func) + bio_end_io_t *endio_func, void *data) { struct bio *bio; struct btrfs_io_bio *btrfs_failed_bio; @@ -2305,6 +2305,7 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, bio->bi_iter.bi_sector = failrec->logical >> 9; bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; bio->bi_iter.bi_size = 0; + bio->bi_private = data; btrfs_failed_bio = btrfs_io_bio(failed_bio); if (btrfs_failed_bio->csum) { @@ -2362,7 +2363,8 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, phy_offset >>= inode->i_sb->s_blocksize_bits; bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, start - page_offset(page), - (int)phy_offset, failed_bio->bi_end_io); + (int)phy_offset, failed_bio->bi_end_io, + NULL); if (!bio) { free_io_failure(inode, failrec); return -EIO; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index bf0597f3a9e..176a4b1ed52 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -341,6 +341,8 @@ struct btrfs_fs_info; int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, struct page *page, unsigned int pg_offset, int mirror_num); +int clean_io_failure(struct inode *inode, u64 start, struct page *page, + unsigned int pg_offset); int end_extent_writepage(struct page *page, int err, u64 start, u64 end); int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, int mirror_num); @@ -371,7 +373,8 @@ int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, struct io_failure_record *failrec, struct page *page, int pg_offset, int icsum, - bio_end_io_t *endio_func); + bio_end_io_t *endio_func, void *data); +int free_io_failure(struct inode *inode, struct io_failure_record *rec); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS noinline u64 find_lock_delalloc_range(struct inode *inode, struct extent_io_tree *tree, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 09d8c5ee886..c3c3269a9e0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7242,30 +7242,267 @@ unlock_err: return ret; } -static int btrfs_subio_endio_read(struct inode *inode, - struct btrfs_io_bio *io_bio) +static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio, + int rw, int mirror_num) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + BUG_ON(rw & REQ_WRITE); + + bio_get(bio); + + ret = btrfs_bio_wq_end_io(root->fs_info, bio, + BTRFS_WQ_ENDIO_DIO_REPAIR); + if (ret) + goto err; + + ret = btrfs_map_bio(root, rw, bio, mirror_num, 0); +err: + bio_put(bio); + return ret; +} + +static int btrfs_check_dio_repairable(struct inode *inode, + struct bio *failed_bio, + struct io_failure_record *failrec, + int failed_mirror) +{ + int num_copies; + + num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, + failrec->logical, failrec->len); + if (num_copies == 1) { + /* + * we only have a single copy of the data, so don't bother with + * all the retry and error correction code that follows. no + * matter what the error is, it is very likely to persist. + */ + pr_debug("Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", + num_copies, failrec->this_mirror, failed_mirror); + return 0; + } + + failrec->failed_mirror = failed_mirror; + failrec->this_mirror++; + if (failrec->this_mirror == failed_mirror) + failrec->this_mirror++; + + if (failrec->this_mirror > num_copies) { + pr_debug("Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", + num_copies, failrec->this_mirror, failed_mirror); + return 0; + } + + return 1; +} + +static int dio_read_error(struct inode *inode, struct bio *failed_bio, + struct page *page, u64 start, u64 end, + int failed_mirror, bio_end_io_t *repair_endio, + void *repair_arg) +{ + struct io_failure_record *failrec; + struct bio *bio; + int isector; + int read_mode; + int ret; + + BUG_ON(failed_bio->bi_rw & REQ_WRITE); + + ret = btrfs_get_io_failure_record(inode, start, end, &failrec); + if (ret) + return ret; + + ret = btrfs_check_dio_repairable(inode, failed_bio, failrec, + failed_mirror); + if (!ret) { + free_io_failure(inode, failrec); + return -EIO; + } + + if (failed_bio->bi_vcnt > 1) + read_mode = READ_SYNC | REQ_FAILFAST_DEV; + else + read_mode = READ_SYNC; + + isector = start - btrfs_io_bio(failed_bio)->logical; + isector >>= inode->i_sb->s_blocksize_bits; + bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, + 0, isector, repair_endio, repair_arg); + if (!bio) { + free_io_failure(inode, failrec); + return -EIO; + } + + btrfs_debug(BTRFS_I(inode)->root->fs_info, + "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n", + read_mode, failrec->this_mirror, failrec->in_validation); + + ret = submit_dio_repair_bio(inode, bio, read_mode, + failrec->this_mirror); + if (ret) { + free_io_failure(inode, failrec); + bio_put(bio); + } + + return ret; +} + +struct btrfs_retry_complete { + struct completion done; + struct inode *inode; + u64 start; + int uptodate; +}; + +static void btrfs_retry_endio_nocsum(struct bio *bio, int err) +{ + struct btrfs_retry_complete *done = bio->bi_private; + struct bio_vec *bvec; + int i; + + if (err) + goto end; + + done->uptodate = 1; + bio_for_each_segment_all(bvec, bio, i) + clean_io_failure(done->inode, done->start, bvec->bv_page, 0); +end: + complete(&done->done); + bio_put(bio); +} + +static int __btrfs_correct_data_nocsum(struct inode *inode, + struct btrfs_io_bio *io_bio) { struct bio_vec *bvec; + struct btrfs_retry_complete done; u64 start; int i; int ret; - int err = 0; - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) - return 0; + start = io_bio->logical; + done.inode = inode; + + bio_for_each_segment_all(bvec, &io_bio->bio, i) { +try_again: + done.uptodate = 0; + done.start = start; + init_completion(&done.done); + + ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, + start + bvec->bv_len - 1, + io_bio->mirror_num, + btrfs_retry_endio_nocsum, &done); + if (ret) + return ret; + + wait_for_completion(&done.done); + + if (!done.uptodate) { + /* We might have another mirror, so try again */ + goto try_again; + } + + start += bvec->bv_len; + } + + return 0; +} + +static void btrfs_retry_endio(struct bio *bio, int err) +{ + struct btrfs_retry_complete *done = bio->bi_private; + struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); + struct bio_vec *bvec; + int uptodate; + int ret; + int i; + + if (err) + goto end; + + uptodate = 1; + bio_for_each_segment_all(bvec, bio, i) { + ret = __readpage_endio_check(done->inode, io_bio, i, + bvec->bv_page, 0, + done->start, bvec->bv_len); + if (!ret) + clean_io_failure(done->inode, done->start, + bvec->bv_page, 0); + else + uptodate = 0; + } + + done->uptodate = uptodate; +end: + complete(&done->done); + bio_put(bio); +} +static int __btrfs_subio_endio_read(struct inode *inode, + struct btrfs_io_bio *io_bio, int err) +{ + struct bio_vec *bvec; + struct btrfs_retry_complete done; + u64 start; + u64 offset = 0; + int i; + int ret; + + err = 0; start = io_bio->logical; + done.inode = inode; + bio_for_each_segment_all(bvec, &io_bio->bio, i) { ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, 0, start, bvec->bv_len); - if (ret) - err = -EIO; + if (likely(!ret)) + goto next; +try_again: + done.uptodate = 0; + done.start = start; + init_completion(&done.done); + + ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, + start + bvec->bv_len - 1, + io_bio->mirror_num, + btrfs_retry_endio, &done); + if (ret) { + err = ret; + goto next; + } + + wait_for_completion(&done.done); + + if (!done.uptodate) { + /* We might have another mirror, so try again */ + goto try_again; + } +next: + offset += bvec->bv_len; start += bvec->bv_len; } return err; } +static int btrfs_subio_endio_read(struct inode *inode, + struct btrfs_io_bio *io_bio, int err) +{ + bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; + + if (skip_csum) { + if (unlikely(err)) + return __btrfs_correct_data_nocsum(inode, io_bio); + else + return 0; + } else { + return __btrfs_subio_endio_read(inode, io_bio, err); + } +} + static void btrfs_endio_direct_read(struct bio *bio, int err) { struct btrfs_dio_private *dip = bio->bi_private; @@ -7273,8 +7510,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct bio *dio_bio; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); - if (!err && (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)) - err = btrfs_subio_endio_read(inode, io_bio); + if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) + err = btrfs_subio_endio_read(inode, io_bio, err); unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, dip->logical_offset + dip->bytes - 1); @@ -7353,19 +7590,16 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, static void btrfs_end_dio_bio(struct bio *bio, int err) { struct btrfs_dio_private *dip = bio->bi_private; - int ret; - if (err) { - btrfs_err(BTRFS_I(dip->inode)->root->fs_info, - "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d", - btrfs_ino(dip->inode), bio->bi_rw, - (unsigned long long)bio->bi_iter.bi_sector, - bio->bi_iter.bi_size, err); - } else if (dip->subio_endio) { - ret = dip->subio_endio(dip->inode, btrfs_io_bio(bio)); - if (ret) - err = ret; - } + if (err) + btrfs_warn(BTRFS_I(dip->inode)->root->fs_info, + "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d", + btrfs_ino(dip->inode), bio->bi_rw, + (unsigned long long)bio->bi_iter.bi_sector, + bio->bi_iter.bi_size, err); + + if (dip->subio_endio) + err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err); if (err) { dip->errors = 1; -- cgit v1.2.3-70-g09d2 From 47ab2a6c689913db23ccae38349714edf8365e0a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 18 Sep 2014 11:20:02 -0400 Subject: Btrfs: remove empty block groups automatically One problem that has plagued us is that a user will use up all of his space with data, remove a bunch of that data, and then try to create a bunch of small files and run out of space. This happens because all the chunks were allocated for data since the metadata requirements were so low. But now there's a bunch of empty data block groups and not enough metadata space to do anything. This patch solves this problem by automatically deleting empty block groups. If we notice the used count go down to 0 when deleting or on mount notice that a block group has a used count of 0 then we will queue it to be deleted. When the cleaner thread runs we will double check to make sure the block group is still empty and then we will delete it. This patch has the side effect of no longer having a bunch of BUG_ON()'s in the chunk delete code, which will be helpful for both this and relocate. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 9 ++- fs/btrfs/disk-io.c | 6 ++ fs/btrfs/extent-tree.c | 141 ++++++++++++++++++++++++++++++++++++-- fs/btrfs/tests/free-space-tests.c | 2 +- fs/btrfs/volumes.c | 115 ++++++++++++++++++++----------- fs/btrfs/volumes.h | 2 + 6 files changed, 226 insertions(+), 49 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 51ff3f8dbab..089f6da0941 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1298,8 +1298,8 @@ struct btrfs_block_group_cache { */ struct list_head cluster_list; - /* For delayed block group creation */ - struct list_head new_bg_list; + /* For delayed block group creation or deletion of empty block groups */ + struct list_head bg_list; }; /* delayed seq elem */ @@ -1568,6 +1568,7 @@ struct btrfs_fs_info { int do_barriers; int closing; int log_root_recovering; + int open; u64 total_pinned; @@ -1717,6 +1718,9 @@ struct btrfs_fs_info { /* Used to reclaim the metadata space in the background. */ struct work_struct async_reclaim_work; + + spinlock_t unused_bgs_lock; + struct list_head unused_bgs; }; struct btrfs_subvolume_writers { @@ -3344,6 +3348,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 size); int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 group_start); +void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 48794f95142..4780e6623c7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1769,6 +1769,7 @@ static int cleaner_kthread(void *arg) } btrfs_run_delayed_iputs(root); + btrfs_delete_unused_bgs(root->fs_info); again = btrfs_clean_one_deleted_snapshot(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -2230,6 +2231,7 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->qgroup_op_lock); spin_lock_init(&fs_info->buffer_lock); + spin_lock_init(&fs_info->unused_bgs_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); @@ -2239,6 +2241,7 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); + INIT_LIST_HEAD(&fs_info->unused_bgs); btrfs_mapping_init(&fs_info->mapping_tree); btrfs_init_block_rsv(&fs_info->global_block_rsv, BTRFS_BLOCK_RSV_GLOBAL); @@ -2977,6 +2980,8 @@ retry_root_backup: fs_info->update_uuid_tree_gen = 1; } + fs_info->open = 1; + return 0; fail_qgroup: @@ -3688,6 +3693,7 @@ void close_ctree(struct btrfs_root *root) invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); + fs_info->open = 0; free_root_pointers(fs_info, 1); iput(fs_info->btree_inode); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b30ddb49cfa..28a27d5f02d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5433,6 +5433,20 @@ static int update_block_group(struct btrfs_root *root, spin_unlock(&cache->space_info->lock); } else { old_val -= num_bytes; + + /* + * No longer have used bytes in this block group, queue + * it for deletion. + */ + if (old_val == 0) { + spin_lock(&info->unused_bgs_lock); + if (list_empty(&cache->bg_list)) { + btrfs_get_block_group(cache); + list_add_tail(&cache->bg_list, + &info->unused_bgs); + } + spin_unlock(&info->unused_bgs_lock); + } btrfs_set_block_group_used(&cache->item, old_val); cache->pinned += num_bytes; cache->space_info->bytes_pinned += num_bytes; @@ -8855,6 +8869,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) } up_write(&info->commit_root_sem); + spin_lock(&info->unused_bgs_lock); + while (!list_empty(&info->unused_bgs)) { + block_group = list_first_entry(&info->unused_bgs, + struct btrfs_block_group_cache, + bg_list); + list_del_init(&block_group->bg_list); + btrfs_put_block_group(block_group); + } + spin_unlock(&info->unused_bgs_lock); + spin_lock(&info->block_group_cache_lock); while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { block_group = rb_entry(n, struct btrfs_block_group_cache, @@ -8989,7 +9013,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) init_rwsem(&cache->data_rwsem); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); - INIT_LIST_HEAD(&cache->new_bg_list); + INIT_LIST_HEAD(&cache->bg_list); btrfs_init_free_space_ctl(cache); return cache; @@ -9130,8 +9154,18 @@ int btrfs_read_block_groups(struct btrfs_root *root) __link_block_group(space_info, cache); set_avail_alloc_bits(root->fs_info, cache->flags); - if (btrfs_chunk_readonly(root, cache->key.objectid)) + if (btrfs_chunk_readonly(root, cache->key.objectid)) { set_block_group_ro(cache, 1); + } else if (btrfs_block_group_used(&cache->item) == 0) { + spin_lock(&info->unused_bgs_lock); + /* Should always be true but just in case. */ + if (list_empty(&cache->bg_list)) { + btrfs_get_block_group(cache); + list_add_tail(&cache->bg_list, + &info->unused_bgs); + } + spin_unlock(&info->unused_bgs_lock); + } } list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { @@ -9172,10 +9206,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, struct btrfs_key key; int ret = 0; - list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, - new_bg_list) { - list_del_init(&block_group->new_bg_list); - + list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { + list_del_init(&block_group->bg_list); if (ret) continue; @@ -9261,7 +9293,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, __link_block_group(cache->space_info, cache); - list_add_tail(&cache->new_bg_list, &trans->new_bgs); + list_add_tail(&cache->bg_list, &trans->new_bgs); set_avail_alloc_bits(extent_root->fs_info, type); @@ -9430,6 +9462,101 @@ out: return ret; } +/* + * Process the unused_bgs list and remove any that don't have any allocated + * space inside of them. + */ +void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) +{ + struct btrfs_block_group_cache *block_group; + struct btrfs_space_info *space_info; + struct btrfs_root *root = fs_info->extent_root; + struct btrfs_trans_handle *trans; + int ret = 0; + + if (!fs_info->open) + return; + + spin_lock(&fs_info->unused_bgs_lock); + while (!list_empty(&fs_info->unused_bgs)) { + u64 start, end; + + block_group = list_first_entry(&fs_info->unused_bgs, + struct btrfs_block_group_cache, + bg_list); + space_info = block_group->space_info; + list_del_init(&block_group->bg_list); + if (ret || btrfs_mixed_space_info(space_info)) { + btrfs_put_block_group(block_group); + continue; + } + spin_unlock(&fs_info->unused_bgs_lock); + + /* Don't want to race with allocators so take the groups_sem */ + down_write(&space_info->groups_sem); + spin_lock(&block_group->lock); + if (block_group->reserved || + btrfs_block_group_used(&block_group->item) || + block_group->ro) { + /* + * We want to bail if we made new allocations or have + * outstanding allocations in this block group. We do + * the ro check in case balance is currently acting on + * this block group. + */ + spin_unlock(&block_group->lock); + up_write(&space_info->groups_sem); + goto next; + } + spin_unlock(&block_group->lock); + + /* We don't want to force the issue, only flip if it's ok. */ + ret = set_block_group_ro(block_group, 0); + up_write(&space_info->groups_sem); + if (ret < 0) { + ret = 0; + goto next; + } + + /* + * Want to do this before we do anything else so we can recover + * properly if we fail to join the transaction. + */ + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + btrfs_set_block_group_rw(root, block_group); + ret = PTR_ERR(trans); + goto next; + } + + /* + * We could have pending pinned extents for this block group, + * just delete them, we don't care about them anymore. + */ + start = block_group->key.objectid; + end = start + block_group->key.offset - 1; + clear_extent_bits(&fs_info->freed_extents[0], start, end, + EXTENT_DIRTY, GFP_NOFS); + clear_extent_bits(&fs_info->freed_extents[1], start, end, + EXTENT_DIRTY, GFP_NOFS); + + /* Reset pinned so btrfs_put_block_group doesn't complain */ + block_group->pinned = 0; + + /* + * Btrfs_remove_chunk will abort the transaction if things go + * horribly wrong. + */ + ret = btrfs_remove_chunk(trans, root, + block_group->key.objectid); + btrfs_end_transaction(trans, root); +next: + btrfs_put_block_group(block_group); + spin_lock(&fs_info->unused_bgs_lock); + } + spin_unlock(&fs_info->unused_bgs_lock); +} + int btrfs_init_space_info(struct btrfs_fs_info *fs_info) { struct btrfs_space_info *space_info; diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index d78ae10d044..2299bfde39e 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -45,7 +45,7 @@ static struct btrfs_block_group_cache *init_test_block_group(void) spin_lock_init(&cache->lock); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); - INIT_LIST_HEAD(&cache->new_bg_list); + INIT_LIST_HEAD(&cache->bg_list); btrfs_init_free_space_ctl(cache); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 63e632746d8..f27c0f7c387 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2568,58 +2568,49 @@ static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 return ret; } -static int btrfs_relocate_chunk(struct btrfs_root *root, - u64 chunk_tree, u64 chunk_objectid, - u64 chunk_offset) +int btrfs_remove_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 chunk_offset) { struct extent_map_tree *em_tree; - struct btrfs_root *extent_root; - struct btrfs_trans_handle *trans; - struct btrfs_device *device; struct extent_map *em; + struct btrfs_root *extent_root = root->fs_info->extent_root; struct map_lookup *map; u64 dev_extent_len = 0; - int ret; - int i; + u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + u64 chunk_tree = root->fs_info->chunk_root->objectid; + int i, ret = 0; + /* Just in case */ root = root->fs_info->chunk_root; - extent_root = root->fs_info->extent_root; em_tree = &root->fs_info->mapping_tree.map_tree; - ret = btrfs_can_relocate(extent_root, chunk_offset); - if (ret) - return -ENOSPC; - - /* step one, relocate all the extents inside this chunk */ - ret = btrfs_relocate_block_group(extent_root, chunk_offset); - if (ret) - return ret; - - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - btrfs_std_error(root->fs_info, ret); - return ret; - } - - /* - * step two, delete the device extents and the - * chunk tree entries - */ read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, chunk_offset, 1); read_unlock(&em_tree->lock); - BUG_ON(!em || em->start > chunk_offset || - em->start + em->len < chunk_offset); + if (!em || em->start > chunk_offset || + em->start + em->len < chunk_offset) { + /* + * This is a logic error, but we don't want to just rely on the + * user having built with ASSERT enabled, so if ASSERT doens't + * do anything we still error out. + */ + ASSERT(0); + if (em) + free_extent_map(em); + return -EINVAL; + } map = (struct map_lookup *)em->bdev; for (i = 0; i < map->num_stripes; i++) { - device = map->stripes[i].dev; + struct btrfs_device *device = map->stripes[i].dev; ret = btrfs_free_dev_extent(trans, device, map->stripes[i].physical, &dev_extent_len); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } if (device->bytes_used > 0) { lock_chunks(root); @@ -2634,23 +2625,34 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, if (map->stripes[i].dev) { ret = btrfs_update_device(trans, map->stripes[i].dev); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } } } ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, chunk_offset); - - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } trace_btrfs_chunk_free(root, map, chunk_offset, em->len); if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } } ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, extent_root, ret); + goto out; + } write_lock(&em_tree->lock); remove_extent_mapping(em_tree, em); @@ -2658,11 +2660,46 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, /* once for the tree */ free_extent_map(em); +out: /* once for us */ free_extent_map(em); + return ret; +} + +static int btrfs_relocate_chunk(struct btrfs_root *root, + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset) +{ + struct btrfs_root *extent_root; + struct btrfs_trans_handle *trans; + int ret; + + root = root->fs_info->chunk_root; + extent_root = root->fs_info->extent_root; + + ret = btrfs_can_relocate(extent_root, chunk_offset); + if (ret) + return -ENOSPC; + + /* step one, relocate all the extents inside this chunk */ + ret = btrfs_relocate_block_group(extent_root, chunk_offset); + if (ret) + return ret; + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + btrfs_std_error(root->fs_info, ret); + return ret; + } + + /* + * step two, delete the device extents and the + * chunk tree entries + */ + ret = btrfs_remove_chunk(trans, root, chunk_offset); btrfs_end_transaction(trans, root); - return 0; + return ret; } static int btrfs_relocate_sys_chunks(struct btrfs_root *root) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 91998bc0b4c..08980fa2303 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -463,6 +463,8 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root, int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 chunk_offset, u64 chunk_size); +int btrfs_remove_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 chunk_offset); static inline int btrfs_dev_stats_dirty(struct btrfs_device *dev) { -- cgit v1.2.3-70-g09d2 From 58dc4ce4325108b35425ffd30e6acfad9644d49d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 15 Jun 2014 00:29:04 +0200 Subject: btrfs: remove unused parameter from readahead_tree_block The parent_transid parameter has been unused since its introduction in ca7a79ad8dbe2466 ("Pass down the expected generation number when reading tree blocks"). In reada_tree_block, it was even wrongly set to leafsize. Transid check is done in the proper read and readahead ignores errors. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 6 +++--- fs/btrfs/disk-io.c | 3 +-- fs/btrfs/disk-io.h | 3 +-- fs/btrfs/extent-tree.c | 3 +-- fs/btrfs/relocation.c | 9 ++------- 5 files changed, 8 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 39021bf2df9..1b7e3545a59 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2298,7 +2298,7 @@ static void reada_for_search(struct btrfs_root *root, if ((search <= target && target - search <= 65536) || (search > target && search - target <= 65536)) { gen = btrfs_node_ptr_generation(node, nr); - readahead_tree_block(root, search, blocksize, gen); + readahead_tree_block(root, search, blocksize); nread += blocksize; } nscan++; @@ -2350,9 +2350,9 @@ static noinline void reada_for_balance(struct btrfs_root *root, } if (block1) - readahead_tree_block(root, block1, blocksize, 0); + readahead_tree_block(root, block1, blocksize); if (block2) - readahead_tree_block(root, block2, blocksize, 0); + readahead_tree_block(root, block2, blocksize); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4780e6623c7..ff83748d39d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1062,8 +1062,7 @@ static const struct address_space_operations btree_aops = { .set_page_dirty = btree_set_page_dirty, }; -int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, - u64 parent_transid) +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 14d06ee1e14..8cd6a53db62 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -46,8 +46,7 @@ struct btrfs_fs_devices; struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid); -int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, - u64 parent_transid); +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, int mirror_num, struct extent_buffer **eb); struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 44d04979f07..058abd088f0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7486,8 +7486,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, continue; } reada: - ret = readahead_tree_block(root, bytenr, blocksize, - generation); + ret = readahead_tree_block(root, bytenr, blocksize); if (ret) break; nread++; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2d221c46180..16cb2b4a962 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2861,13 +2861,8 @@ static int reada_tree_block(struct reloc_control *rc, struct tree_block *block) { BUG_ON(block->key_ready); - if (block->key.type == BTRFS_METADATA_ITEM_KEY) - readahead_tree_block(rc->extent_root, block->bytenr, - block->key.objectid, - rc->extent_root->nodesize); - else - readahead_tree_block(rc->extent_root, block->bytenr, - block->key.objectid, block->key.offset); + readahead_tree_block(rc->extent_root, block->bytenr, + block->key.objectid); return 0; } -- cgit v1.2.3-70-g09d2 From 6197d86eabb844c1a9c99956d4e6b0f8eb548ad3 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 15 Jun 2014 00:49:36 +0200 Subject: btrfs: return void from readahead_tree_block Errors in readahead are not fatal and ignored elsewhere in the code. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 6 ++---- fs/btrfs/disk-io.h | 2 +- fs/btrfs/extent-tree.c | 4 +--- 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ff83748d39d..332f6351815 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1062,19 +1062,17 @@ static const struct address_space_operations btree_aops = { .set_page_dirty = btree_set_page_dirty, }; -int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) +void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; - int ret = 0; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) - return 0; + return; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, WAIT_NONE, btree_get_extent, 0); free_extent_buffer(buf); - return ret; } int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 8cd6a53db62..0d9793f6b59 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -46,7 +46,7 @@ struct btrfs_fs_devices; struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid); -int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); +void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, int mirror_num, struct extent_buffer **eb); struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 058abd088f0..e0468a9789a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7486,9 +7486,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, continue; } reada: - ret = readahead_tree_block(root, bytenr, blocksize); - if (ret) - break; + readahead_tree_block(root, bytenr, blocksize); nread++; } wc->reada_slot = slot; -- cgit v1.2.3-70-g09d2 From ce86cd59179279a6fe673d2a105d24fb7e70aef3 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 15 Jun 2014 01:07:32 +0200 Subject: btrfs: remove parameter blocksize from read_tree_block We know the tree block size, no need to pass it around. Signed-off-by: David Sterba --- fs/btrfs/backref.c | 6 ++---- fs/btrfs/ctree.c | 10 +++------- fs/btrfs/disk-io.c | 17 +++++------------ fs/btrfs/disk-io.h | 2 +- fs/btrfs/extent-tree.c | 8 +++----- fs/btrfs/print-tree.c | 1 - fs/btrfs/relocation.c | 11 ++++------- 7 files changed, 18 insertions(+), 37 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 6829dc5aa65..2d3e32ebfd1 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -490,7 +490,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info, continue; BUG_ON(!ref->wanted_disk_byte); eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, - fs_info->tree_root->nodesize, 0); + 0); if (!eb || !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); return -EIO; @@ -1028,12 +1028,10 @@ again: if (ref->count && ref->parent) { if (extent_item_pos && !ref->inode_list && ref->level == 0) { - u32 bsz; struct extent_buffer *eb; - bsz = fs_info->extent_root->nodesize; eb = read_tree_block(fs_info->extent_root, - ref->parent, bsz, 0); + ref->parent, 0); if (!eb || !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); ret = -EIO; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1b7e3545a59..302c3f95570 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1425,7 +1425,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq) struct tree_mod_root *old_root = NULL; u64 old_generation = 0; u64 logical; - u32 blocksize; eb_root = btrfs_read_lock_root_node(root); tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq); @@ -1444,8 +1443,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { btrfs_tree_read_unlock(eb_root); free_extent_buffer(eb_root); - blocksize = root->nodesize; - old = read_tree_block(root, logical, blocksize, 0); + old = read_tree_block(root, logical, 0); if (WARN_ON(!old || !extent_buffer_uptodate(old))) { free_extent_buffer(old); btrfs_warn(root->fs_info, @@ -1692,8 +1690,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, uptodate = 0; if (!cur || !uptodate) { if (!cur) { - cur = read_tree_block(root, blocknr, - blocksize, gen); + cur = read_tree_block(root, blocknr, gen); if (!cur || !extent_buffer_uptodate(cur)) { free_extent_buffer(cur); return -EIO; @@ -1872,7 +1869,6 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, BUG_ON(level == 0); eb = read_tree_block(root, btrfs_node_blockptr(parent, slot), - root->nodesize, btrfs_node_ptr_generation(parent, slot)); if (eb && !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); @@ -2507,7 +2503,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, btrfs_release_path(p); ret = -EAGAIN; - tmp = read_tree_block(root, blocknr, blocksize, 0); + tmp = read_tree_block(root, blocknr, 0); if (tmp) { /* * If the read above didn't mark this buffer up to date, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 332f6351815..03c0973568e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1138,12 +1138,12 @@ int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) } struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize, u64 parent_transid) + u64 parent_transid) { struct extent_buffer *buf = NULL; int ret; - buf = btrfs_find_create_tree_block(root, bytenr, blocksize); + buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize); if (!buf) return NULL; @@ -1484,7 +1484,6 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_path *path; u64 generation; - u32 blocksize; int ret; path = btrfs_alloc_path(); @@ -1509,9 +1508,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, } generation = btrfs_root_generation(&root->root_item); - blocksize = root->nodesize; root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize, generation); + generation); if (!root->node) { ret = -ENOMEM; goto find_fail; @@ -2139,7 +2137,6 @@ int open_ctree(struct super_block *sb, { u32 sectorsize; u32 nodesize; - u32 blocksize; u32 stripesize; u64 generation; u64 features; @@ -2643,7 +2640,6 @@ int open_ctree(struct super_block *sb, goto fail_sb_buffer; } - blocksize = tree_root->nodesize; generation = btrfs_super_chunk_root_generation(disk_super); __setup_root(nodesize, sectorsize, stripesize, chunk_root, @@ -2651,7 +2647,7 @@ int open_ctree(struct super_block *sb, chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), - blocksize, generation); + generation); if (!chunk_root->node || !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n", @@ -2684,12 +2680,11 @@ int open_ctree(struct super_block *sb, } retry_root_backup: - blocksize = tree_root->nodesize; generation = btrfs_super_generation(disk_super); tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), - blocksize, generation); + generation); if (!tree_root->node || !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", @@ -2858,7 +2853,6 @@ retry_root_backup: err = -EIO; goto fail_qgroup; } - blocksize = tree_root->nodesize; log_tree_root = btrfs_alloc_root(fs_info); if (!log_tree_root) { @@ -2870,7 +2864,6 @@ retry_root_backup: log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); log_tree_root->node = read_tree_block(tree_root, bytenr, - blocksize, generation + 1); if (!log_tree_root->node || !extent_buffer_uptodate(log_tree_root->node)) { diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 0d9793f6b59..03f396144fe 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -45,7 +45,7 @@ struct btrfs_device; struct btrfs_fs_devices; struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize, u64 parent_transid); + u64 parent_transid); void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, int mirror_num, struct extent_buffer **eb); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e0468a9789a..178f6dbf2d7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7645,7 +7645,6 @@ walk_down: level = root_level; while (level >= 0) { if (path->nodes[level] == NULL) { - int child_bsize = root->nodesize; int parent_slot; u64 child_gen; u64 child_bytenr; @@ -7657,8 +7656,7 @@ walk_down: child_bytenr = btrfs_node_blockptr(eb, parent_slot); child_gen = btrfs_node_ptr_generation(eb, parent_slot); - eb = read_tree_block(root, child_bytenr, child_bsize, - child_gen); + eb = read_tree_block(root, child_bytenr, child_gen); if (!eb || !extent_buffer_uptodate(eb)) { ret = -EIO; goto out; @@ -7674,7 +7672,7 @@ walk_down: ret = btrfs_qgroup_record_ref(trans, root->fs_info, root->objectid, child_bytenr, - child_bsize, + root->nodesize, BTRFS_QGROUP_OPER_SUB_SUBTREE, 0); if (ret) @@ -7889,7 +7887,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, if (!next) { if (reada && level == 1) reada_walk_down(trans, root, wc, path); - next = read_tree_block(root, bytenr, blocksize, generation); + next = read_tree_block(root, bytenr, generation); if (!next || !extent_buffer_uptodate(next)) { free_extent_buffer(next); return -EIO; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index eb309855d5c..647ab12fdf5 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -336,7 +336,6 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) for (i = 0; i < nr; i++) { struct extent_buffer *next = read_tree_block(root, btrfs_node_blockptr(c, i), - root->nodesize, btrfs_node_ptr_generation(c, i)); if (btrfs_is_leaf(next) && level != 1) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d7506325b02..95bc40ae358 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1813,8 +1813,7 @@ again: break; } - eb = read_tree_block(dest, old_bytenr, blocksize, - old_ptr_gen); + eb = read_tree_block(dest, old_bytenr, old_ptr_gen); if (!eb || !extent_buffer_uptodate(eb)) { ret = (!eb) ? -ENOMEM : -EIO; free_extent_buffer(eb); @@ -1944,7 +1943,6 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, u64 bytenr; u64 ptr_gen = 0; u64 last_snapshot; - u32 blocksize; u32 nritems; last_snapshot = btrfs_root_last_snapshot(&root->root_item); @@ -1970,8 +1968,7 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, } bytenr = btrfs_node_blockptr(eb, path->slots[i]); - blocksize = root->nodesize; - eb = read_tree_block(root, bytenr, blocksize, ptr_gen); + eb = read_tree_block(root, bytenr, ptr_gen); if (!eb || !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); return -EIO; @@ -2680,7 +2677,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, blocksize = root->nodesize; generation = btrfs_node_ptr_generation(upper->eb, slot); - eb = read_tree_block(root, bytenr, blocksize, generation); + eb = read_tree_block(root, bytenr, generation); if (!eb || !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); err = -EIO; @@ -2842,7 +2839,7 @@ static int get_tree_block_key(struct reloc_control *rc, BUG_ON(block->key_ready); eb = read_tree_block(rc->extent_root, block->bytenr, - block->key.objectid, block->key.offset); + block->key.offset); if (!eb || !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); return -EIO; -- cgit v1.2.3-70-g09d2 From 0308af4465897c889e32754ef37bb465a1b2b872 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 15 Jun 2014 01:43:40 +0200 Subject: btrfs: remove unused parameter blocksize from btrfs_find_tree_block Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 12 +++++------- fs/btrfs/disk-io.c | 5 ++--- fs/btrfs/disk-io.h | 2 +- fs/btrfs/extent-tree.c | 2 +- 4 files changed, 9 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 302c3f95570..2fb4ab659a0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1683,7 +1683,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, continue; } - cur = btrfs_find_tree_block(root, blocknr, blocksize); + cur = btrfs_find_tree_block(root, blocknr); if (cur) uptodate = btrfs_buffer_uptodate(cur, gen, 0); else @@ -2264,7 +2264,7 @@ static void reada_for_search(struct btrfs_root *root, search = btrfs_node_blockptr(node, slot); blocksize = root->nodesize; - eb = btrfs_find_tree_block(root, search, blocksize); + eb = btrfs_find_tree_block(root, search); if (eb) { free_extent_buffer(eb); return; @@ -2326,7 +2326,7 @@ static noinline void reada_for_balance(struct btrfs_root *root, if (slot > 0) { block1 = btrfs_node_blockptr(parent, slot - 1); gen = btrfs_node_ptr_generation(parent, slot - 1); - eb = btrfs_find_tree_block(root, block1, blocksize); + eb = btrfs_find_tree_block(root, block1); /* * if we get -eagain from btrfs_buffer_uptodate, we * don't want to return eagain here. That will loop @@ -2339,7 +2339,7 @@ static noinline void reada_for_balance(struct btrfs_root *root, if (slot + 1 < nritems) { block2 = btrfs_node_blockptr(parent, slot + 1); gen = btrfs_node_ptr_generation(parent, slot + 1); - eb = btrfs_find_tree_block(root, block2, blocksize); + eb = btrfs_find_tree_block(root, block2); if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) block2 = 0; free_extent_buffer(eb); @@ -2450,16 +2450,14 @@ read_block_for_search(struct btrfs_trans_handle *trans, { u64 blocknr; u64 gen; - u32 blocksize; struct extent_buffer *b = *eb_ret; struct extent_buffer *tmp; int ret; blocknr = btrfs_node_blockptr(b, slot); gen = btrfs_node_ptr_generation(b, slot); - blocksize = root->nodesize; - tmp = btrfs_find_tree_block(root, blocknr, blocksize); + tmp = btrfs_find_tree_block(root, blocknr); if (tmp) { /* first we do an atomic uptodate check */ if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 03c0973568e..e0293d2fbb3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1108,7 +1108,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, } struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 bytenr, u32 blocksize) + u64 bytenr) { return find_extent_buffer(root->fs_info, bytenr); } @@ -4002,8 +4002,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); while (start <= end) { - eb = btrfs_find_tree_block(root, start, - root->nodesize); + eb = btrfs_find_tree_block(root, start); start += root->nodesize; if (!eb) continue; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 03f396144fe..ae04daef608 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -62,7 +62,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); int btrfs_commit_super(struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 bytenr, u32 blocksize); + u64 bytenr); struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, struct btrfs_key *location); int btrfs_init_fs_root(struct btrfs_root *root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 178f6dbf2d7..d9a90da9330 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7825,7 +7825,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); blocksize = root->nodesize; - next = btrfs_find_tree_block(root, bytenr, blocksize); + next = btrfs_find_tree_block(root, bytenr); if (!next) { next = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!next) -- cgit v1.2.3-70-g09d2 From 4d75f8a9c87b843c8ded15b82b8d137b9724cccc Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 15 Jun 2014 01:54:12 +0200 Subject: btrfs: remove blocksize from btrfs_alloc_free_block and rename Rename to btrfs_alloc_tree_block as it fits to the alloc/find/free + _tree_block family. The parameter blocksize was set to the metadata block size, directly or indirectly. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 26 +++++++++++--------------- fs/btrfs/ctree.h | 6 +++--- fs/btrfs/disk-io.c | 8 +++----- fs/btrfs/extent-tree.c | 5 +++-- fs/btrfs/ioctl.c | 3 +-- 5 files changed, 21 insertions(+), 27 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2fb4ab659a0..d498982bd20 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -258,9 +258,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, else btrfs_node_key(buf, &disk_key, 0); - cow = btrfs_alloc_free_block(trans, root, buf->len, 0, - new_root_objectid, &disk_key, level, - buf->start, 0); + cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid, + &disk_key, level, buf->start, 0); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -1133,9 +1132,9 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, } else parent_start = 0; - cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, - root->root_key.objectid, &disk_key, - level, search_start, empty_size); + cow = btrfs_alloc_tree_block(trans, root, parent_start, + root->root_key.objectid, &disk_key, level, + search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -3355,9 +3354,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, else btrfs_node_key(lower, &lower_key, 0); - c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, - root->root_key.objectid, &lower_key, - level, root->node->start, 0); + c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, + &lower_key, level, root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); @@ -3495,9 +3493,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, mid = (c_nritems + 1) / 2; btrfs_node_key(c, &disk_key, mid); - split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, - root->root_key.objectid, - &disk_key, level, c->start, 0); + split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, + &disk_key, level, c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); @@ -4275,9 +4272,8 @@ again: else btrfs_item_key(l, &disk_key, mid); - right = btrfs_alloc_free_block(trans, root, root->nodesize, 0, - root->root_key.objectid, - &disk_key, 0, l->start, 0); + right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, + &disk_key, 0, l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 089f6da0941..3073b8876bc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3290,9 +3290,9 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group( u64 bytenr); void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int get_block_group_index(struct btrfs_block_group_cache *cache); -struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u32 blocksize, - u64 parent, u64 root_objectid, +struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 parent, + u64 root_objectid, struct btrfs_disk_key *key, int level, u64 hint, u64 empty_size); void btrfs_free_tree_block(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0293d2fbb3..2e5d460d4e0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1324,8 +1324,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = 0; - leaf = btrfs_alloc_free_block(trans, root, root->nodesize, - 0, objectid, NULL, 0, 0, 0); + leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); leaf = NULL; @@ -1412,9 +1411,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, * updated (along with back refs to the log tree). */ - leaf = btrfs_alloc_free_block(trans, root, root->nodesize, 0, - BTRFS_TREE_LOG_OBJECTID, NULL, - 0, 0, 0); + leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID, + NULL, 0, 0, 0); if (IS_ERR(leaf)) { kfree(root); return ERR_CAST(leaf); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d9a90da9330..0ba42eb9677 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7322,8 +7322,8 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info, * * returns the tree buffer or NULL. */ -struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u32 blocksize, +struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 parent, u64 root_objectid, struct btrfs_disk_key *key, int level, u64 hint, u64 empty_size) @@ -7333,6 +7333,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *buf; u64 flags = 0; int ret; + u32 blocksize = root->nodesize; bool skinny_metadata = btrfs_fs_incompat(root->fs_info, SKINNY_METADATA); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0ff212757b9..2fc48905ccf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -480,8 +480,7 @@ static noinline int create_subvol(struct inode *dir, if (ret) goto fail; - leaf = btrfs_alloc_free_block(trans, root, root->nodesize, - 0, objectid, NULL, 0, 0, 0); + leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); goto fail; -- cgit v1.2.3-70-g09d2 From 95ac567af212db3293af3897ccb521efdf1dd7ff Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Thu, 8 Aug 2013 22:45:48 +0100 Subject: Btrfs: set default max_inline to 8KiB instead of 8MiB 8MiB is way too large and likely set by mistake. This is not a significant issue as in practice the max amount of data added to an inline extent is also limited by the page cache and btree leaf sizes. Signed-off-by: Filipe David Borba Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 2 +- fs/btrfs/super.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 089f6da0941..dd79ba7ee3e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2089,6 +2089,7 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_CHANGE_INODE_CACHE (1 << 24) #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) +#define BTRFS_DEFAULT_MAX_INLINE (8192) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4780e6623c7..9b2a741370b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2260,7 +2260,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->qgroup_op_seq, 0); atomic64_set(&fs_info->tree_mod_seq, 0); fs_info->sb = sb; - fs_info->max_inline = 8192 * 1024; + fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; fs_info->metadata_ratio = 0; fs_info->defrag_inodes = RB_ROOT; fs_info->free_chunk_space = 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1c6da8e00c1..b1d2a42f379 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1014,7 +1014,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",nodatacow"); if (btrfs_test_opt(root, NOBARRIER)) seq_puts(seq, ",nobarrier"); - if (info->max_inline != 8192 * 1024) + if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE) seq_printf(seq, ",max_inline=%llu", info->max_inline); if (info->alloc_start != 0) seq_printf(seq, ",alloc_start=%llu", info->alloc_start); -- cgit v1.2.3-70-g09d2 From bfebd8b5441755f228ad02273682d675d3335123 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 30 Jul 2014 00:25:45 +0200 Subject: btrfs: use enum for wq endio metadata type The enum exists but is not consistently used. Signed-off-by: David Sterba --- fs/btrfs/compression.c | 11 +++++++---- fs/btrfs/disk-io.c | 14 +++----------- fs/btrfs/disk-io.h | 4 ++-- fs/btrfs/inode.c | 3 ++- 4 files changed, 14 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index eeee13842cd..d3220d31d3c 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -388,7 +388,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, * freed before we're done setting it up */ atomic_inc(&cb->pending_bios); - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + ret = btrfs_bio_wq_end_io(root->fs_info, bio, + BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ if (!skip_sum) { @@ -419,7 +420,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, } bio_get(bio); - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + ret = btrfs_bio_wq_end_io(root->fs_info, bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ if (!skip_sum) { @@ -668,7 +669,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, PAGE_CACHE_SIZE) { bio_get(comp_bio); - ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); + ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, + BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ /* @@ -706,7 +708,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, } bio_get(comp_bio); - ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); + ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, + BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9b2a741370b..d7cb58ed294 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -82,7 +82,7 @@ struct end_io_wq { void *private; struct btrfs_fs_info *info; int error; - int metadata; + enum btrfs_wq_endio_type metadata; struct list_head list; struct btrfs_work work; }; @@ -733,16 +733,8 @@ static void end_workqueue_bio(struct bio *bio, int err) btrfs_queue_work(wq, &end_io_wq->work); } -/* - * For the metadata arg you want - * - * 0 - if data - * 1 - if normal metadta - * 2 - if writing to the free space cache area - * 3 - raid parity work - */ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, - int metadata) + enum btrfs_wq_endio_type metadata) { struct end_io_wq *end_io_wq; @@ -930,7 +922,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, * can happen in the async kernel threads */ ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, - bio, 1); + bio, BTRFS_WQ_ENDIO_METADATA); if (ret) goto out_w_error; ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 14d06ee1e14..84da438fd9a 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -25,7 +25,7 @@ #define BTRFS_SUPER_MIRROR_MAX 3 #define BTRFS_SUPER_MIRROR_SHIFT 12 -enum { +enum btrfs_wq_endio_type { BTRFS_WQ_ENDIO_DATA = 0, BTRFS_WQ_ENDIO_METADATA = 1, BTRFS_WQ_ENDIO_FREE_SPACE = 2, @@ -120,7 +120,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); u32 btrfs_csum_data(char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, - int metadata); + enum btrfs_wq_endio_type metadata); int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, int rw, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 344a322eb38..b1e388dea7b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7721,7 +7721,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, bio_get(bio); if (!write) { - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + ret = btrfs_bio_wq_end_io(root->fs_info, bio, + BTRFS_WQ_ENDIO_DATA); if (ret) goto err; } -- cgit v1.2.3-70-g09d2 From 97eb6b69d1e856cb5e1cf2c3d94afab643e93128 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 30 Jul 2014 00:55:42 +0200 Subject: btrfs: use slab for end_io_wq structures The structure is frequently reused. Rename it according to the slab name. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 38 +++++++++++++++++++++++++++++--------- fs/btrfs/disk-io.h | 2 ++ fs/btrfs/super.c | 8 +++++++- 3 files changed, 38 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d7cb58ed294..2f075ef2005 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -72,11 +72,11 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root); static void btrfs_error_commit_super(struct btrfs_root *root); /* - * end_io_wq structs are used to do processing in task context when an IO is - * complete. This is used during reads to verify checksums, and it is used + * btrfs_end_io_wq structs are used to do processing in task context when an IO + * is complete. This is used during reads to verify checksums, and it is used * by writes to insert metadata for new file extents after IO is complete. */ -struct end_io_wq { +struct btrfs_end_io_wq { struct bio *bio; bio_end_io_t *end_io; void *private; @@ -87,6 +87,26 @@ struct end_io_wq { struct btrfs_work work; }; +static struct kmem_cache *btrfs_end_io_wq_cache; + +int __init btrfs_end_io_wq_init(void) +{ + btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq", + sizeof(struct btrfs_end_io_wq), + 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + NULL); + if (!btrfs_end_io_wq_cache) + return -ENOMEM; + return 0; +} + +void btrfs_end_io_wq_exit(void) +{ + if (btrfs_end_io_wq_cache) + kmem_cache_destroy(btrfs_end_io_wq_cache); +} + /* * async submit bios are used to offload expensive checksumming * onto the worker threads. They checksum file and metadata bios @@ -690,7 +710,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) static void end_workqueue_bio(struct bio *bio, int err) { - struct end_io_wq *end_io_wq = bio->bi_private; + struct btrfs_end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; struct btrfs_workqueue *wq; btrfs_work_func_t func; @@ -736,9 +756,9 @@ static void end_workqueue_bio(struct bio *bio, int err) int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, enum btrfs_wq_endio_type metadata) { - struct end_io_wq *end_io_wq; + struct btrfs_end_io_wq *end_io_wq; - end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); + end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS); if (!end_io_wq) return -ENOMEM; @@ -1723,16 +1743,16 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) static void end_workqueue_fn(struct btrfs_work *work) { struct bio *bio; - struct end_io_wq *end_io_wq; + struct btrfs_end_io_wq *end_io_wq; int error; - end_io_wq = container_of(work, struct end_io_wq, work); + end_io_wq = container_of(work, struct btrfs_end_io_wq, work); bio = end_io_wq->bio; error = end_io_wq->error; bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; - kfree(end_io_wq); + kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); bio_endio_nodec(bio, error); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 84da438fd9a..9ac233923ca 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -142,6 +142,8 @@ int btree_lock_page_hook(struct page *page, void *data, void (*flush_fn)(void *)); int btrfs_calc_num_tolerated_disk_barrier_failures( struct btrfs_fs_info *fs_info); +int __init btrfs_end_io_wq_init(void); +void btrfs_end_io_wq_exit(void); #ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_init_lockdep(void); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b915d7704f1..4685b9704f1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2001,10 +2001,14 @@ static int __init init_btrfs_fs(void) if (err) goto free_delayed_ref; - err = btrfs_interface_init(); + err = btrfs_end_io_wq_init(); if (err) goto free_prelim_ref; + err = btrfs_interface_init(); + if (err) + goto free_end_io_wq; + btrfs_init_lockdep(); btrfs_print_info(); @@ -2021,6 +2025,8 @@ static int __init init_btrfs_fs(void) unregister_ioctl: btrfs_interface_exit(); +free_end_io_wq: + btrfs_end_io_wq_exit(); free_prelim_ref: btrfs_prelim_ref_exit(); free_delayed_ref: -- cgit v1.2.3-70-g09d2 From 2755a0de64693501741fb3603cd8ca928b0b7e81 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 31 Jul 2014 00:43:18 +0200 Subject: btrfs: hide typecast to definition of BTRFS_SEND_TRANS_STUB Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 +-- fs/btrfs/send.c | 2 +- fs/btrfs/transaction.c | 2 +- fs/btrfs/transaction.h | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2f075ef2005..0abf4b0a901 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -347,8 +347,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, { struct extent_state *cached_state = NULL; int ret; - bool need_lock = (current->journal_info == - (void *)BTRFS_SEND_TRANS_STUB); + bool need_lock = (current->journal_info == BTRFS_SEND_TRANS_STUB); if (!parent_transid || btrfs_header_generation(eb) == parent_transid) return 0; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 7edfc7cebda..8b44630f4ab 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5728,7 +5728,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) NULL); sort_clone_roots = 1; - current->journal_info = (void *)BTRFS_SEND_TRANS_STUB; + current->journal_info = BTRFS_SEND_TRANS_STUB; ret = send_subvol(sctx); current->journal_info = NULL; if (ret < 0) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 16d0c1b62b3..f4c194b160b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -386,7 +386,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, int ret; /* Send isn't supposed to start transactions. */ - ASSERT(current->journal_info != (void *)BTRFS_SEND_TRANS_STUB); + ASSERT(current->journal_info != BTRFS_SEND_TRANS_STUB); if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) return ERR_PTR(-EROFS); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 579be51b27e..d8f40e1a5d2 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -79,7 +79,7 @@ struct btrfs_transaction { #define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \ __TRANS_ATTACH) -#define BTRFS_SEND_TRANS_STUB 1 +#define BTRFS_SEND_TRANS_STUB ((void *)1) struct btrfs_trans_handle { u64 transid; -- cgit v1.2.3-70-g09d2 From fccb84c94a9755f48668e43d0a44d6ecc750900f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 29 Sep 2014 23:53:21 +0200 Subject: btrfs: move checks for DUMMY_ROOT into a helper Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 5 ++--- fs/btrfs/ctree.h | 9 +++++++++ fs/btrfs/disk-io.c | 4 +--- fs/btrfs/extent-tree.c | 16 +++++++--------- fs/btrfs/qgroup.c | 10 ++++------ 5 files changed, 23 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 533657c508e..ce1d71d171b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1506,10 +1506,9 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) + if (btrfs_test_is_dummy_root(root)) return 0; -#endif + /* ensure we can see the force_cow */ smp_rmb(); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index bae025a20e6..557fd952060 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4131,4 +4131,13 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, u64 rfer, u64 excl); #endif +static inline int btrfs_test_is_dummy_root(struct btrfs_root *root) +{ +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) + return 1; +#endif + return 0; +} + #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0abf4b0a901..14117f85b54 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1130,11 +1130,9 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) + if (btrfs_test_is_dummy_root(root)) return alloc_test_extent_buffer(root->fs_info, bytenr, blocksize); -#endif return alloc_extent_buffer(root->fs_info, bytenr, blocksize); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 44d04979f07..7895db9c6bd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3073,10 +3073,10 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, u64, u64, u64, u64, u64, u64, int); -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) + + if (btrfs_test_is_dummy_root(root)) return 0; -#endif + ref_root = btrfs_header_owner(buf); nritems = btrfs_header_nritems(buf); level = btrfs_header_level(buf); @@ -6264,10 +6264,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, int ret; struct btrfs_fs_info *fs_info = root->fs_info; -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) + if (btrfs_test_is_dummy_root(root)) return 0; -#endif + add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); /* @@ -7336,15 +7335,14 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, bool skinny_metadata = btrfs_fs_incompat(root->fs_info, SKINNY_METADATA); -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) { + if (btrfs_test_is_dummy_root(root)) { buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, blocksize, level); if (!IS_ERR(buf)) root->alloc_bytenr += blocksize; return buf; } -#endif + block_rsv = use_block_rsv(trans, root, blocksize); if (IS_ERR(block_rsv)) return ERR_CAST(block_rsv); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index cd9717ea8c9..48b60dbf807 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -539,10 +539,9 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_key key; -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, "a_root->state))) + if (btrfs_test_is_dummy_root(quota_root)) return 0; -#endif + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -698,10 +697,9 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans, int ret; int slot; -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) + if (btrfs_test_is_dummy_root(root)) return 0; -#endif + key.objectid = 0; key.type = BTRFS_QGROUP_INFO_KEY; key.offset = qgroup->qgroupid; -- cgit v1.2.3-70-g09d2 From 656f30dba7ab8179c9a2e04293b0c7b383fa9ce9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 26 Sep 2014 12:25:56 +0100 Subject: Btrfs: be aware of btree inode write errors to avoid fs corruption While we have a transaction ongoing, the VM might decide at any time to call btree_inode->i_mapping->a_ops->writepages(), which will start writeback of dirty pages belonging to btree nodes/leafs. This call might return an error or the writeback might finish with an error before we attempt to commit the running transaction. If this happens, we might have no way of knowing that such error happened when we are committing the transaction - because the pages might no longer be marked dirty nor tagged for writeback (if a subsequent modification to the extent buffer didn't happen before the transaction commit) which makes filemap_fdata[write|wait]_range unable to find such pages (even if they're marked with SetPageError). So if this happens we must abort the transaction, otherwise we commit a super block with btree roots that point to btree nodes/leafs whose content on disk is invalid - either garbage or the content of some node/leaf from a past generation that got cowed or deleted and is no longer valid (for this later case we end up getting error messages like "parent transid verify failed on 10826481664 wanted 25748 found 29562" when reading btree nodes/leafs from disk). Note that setting and checking AS_EIO/AS_ENOSPC in the btree inode's i_mapping would not be enough because we need to distinguish between log tree extents (not fatal) vs non-log tree extents (fatal) and because the next call to filemap_fdatawait_range() will catch and clear such errors in the mapping - and that call might be from a log sync and not from a transaction commit, which means we would not know about the error at transaction commit time. Also, checking for the eb flag EXTENT_BUFFER_IOERR at transaction commit time isn't done and would not be completely reliable, as the eb might be removed from memory and read back when trying to get it, which clears that flag right before reading the eb's pages from disk, making us not know about the previous write error. Using the new 3 flags for the btree inode also makes us achieve the goal of AS_EIO/AS_ENOSPC when writepages() returns success, started writeback for all dirty pages and before filemap_fdatawait_range() is called, the writeback for all dirty pages had already finished with errors - because we were not using AS_EIO/AS_ENOSPC, filemap_fdatawait_range() would return success, as it could not know that writeback errors happened (the pages were no longer tagged for writeback). Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 11 ++++++++ fs/btrfs/disk-io.c | 4 +-- fs/btrfs/extent-tree.c | 4 ++- fs/btrfs/extent_io.c | 74 +++++++++++++++++++++++++++++++++++++++++++++----- fs/btrfs/extent_io.h | 7 +++-- fs/btrfs/transaction.c | 26 ++++++++++++++++++ 6 files changed, 114 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 7a7521c87c8..8a42adb4e5e 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -44,6 +44,17 @@ #define BTRFS_INODE_IN_DELALLOC_LIST 9 #define BTRFS_INODE_READDIO_NEED_LOCK 10 #define BTRFS_INODE_HAS_PROPS 11 +/* + * The following 3 bits are meant only for the btree inode. + * When any of them is set, it means an error happened while writing an + * extent buffer belonging to: + * 1) a non-log btree + * 2) a log btree and first log sub-transaction + * 3) a log btree and second log sub-transaction + */ +#define BTRFS_INODE_BTREE_ERR 12 +#define BTRFS_INODE_BTREE_LOG1_ERR 13 +#define BTRFS_INODE_BTREE_LOG2_ERR 14 /* in memory btrfs inode */ struct btrfs_inode { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4780e6623c7..09b3c8a0c79 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -607,7 +607,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, goto err; eb->read_mirror = mirror; - if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { + if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) { ret = -EIO; goto err; } @@ -680,7 +680,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; eb = (struct extent_buffer *)page->private; - set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); + set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = failed_mirror; atomic_dec(&eb->io_pages); if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 44d04979f07..8ebe6bf66e7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7235,17 +7235,19 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_set_buffer_uptodate(buf); if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + buf->log_index = root->log_transid % 2; /* * we allow two log transactions at a time, use different * EXENT bit to differentiate dirty pages. */ - if (root->log_transid % 2 == 0) + if (buf->log_index == 0) set_extent_dirty(&root->dirty_log_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); else set_extent_new(&root->dirty_log_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); } else { + buf->log_index = -1; set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4267a054b9c..215603b911f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3601,6 +3601,68 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb) wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); } +static void set_btree_ioerr(struct page *page) +{ + struct extent_buffer *eb = (struct extent_buffer *)page->private; + struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode); + + SetPageError(page); + if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) + return; + + /* + * If writeback for a btree extent that doesn't belong to a log tree + * failed, increment the counter transaction->eb_write_errors. + * We do this because while the transaction is running and before it's + * committing (when we call filemap_fdata[write|wait]_range against + * the btree inode), we might have + * btree_inode->i_mapping->a_ops->writepages() called by the VM - if it + * returns an error or an error happens during writeback, when we're + * committing the transaction we wouldn't know about it, since the pages + * can be no longer dirty nor marked anymore for writeback (if a + * subsequent modification to the extent buffer didn't happen before the + * transaction commit), which makes filemap_fdata[write|wait]_range not + * able to find the pages tagged with SetPageError at transaction + * commit time. So if this happens we must abort the transaction, + * otherwise we commit a super block with btree roots that point to + * btree nodes/leafs whose content on disk is invalid - either garbage + * or the content of some node/leaf from a past generation that got + * cowed or deleted and is no longer valid. + * + * Note: setting AS_EIO/AS_ENOSPC in the btree inode's i_mapping would + * not be enough - we need to distinguish between log tree extents vs + * non-log tree extents, and the next filemap_fdatawait_range() call + * will catch and clear such errors in the mapping - and that call might + * be from a log sync and not from a transaction commit. Also, checking + * for the eb flag EXTENT_BUFFER_WRITE_ERR at transaction commit time is + * not done and would not be reliable - the eb might have been released + * from memory and reading it back again means that flag would not be + * set (since it's a runtime flag, not persisted on disk). + * + * Using the flags below in the btree inode also makes us achieve the + * goal of AS_EIO/AS_ENOSPC when writepages() returns success, started + * writeback for all dirty pages and before filemap_fdatawait_range() + * is called, the writeback for all dirty pages had already finished + * with errors - because we were not using AS_EIO/AS_ENOSPC, + * filemap_fdatawait_range() would return success, as it could not know + * that writeback errors happened (the pages were no longer tagged for + * writeback). + */ + switch (eb->log_index) { + case -1: + set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags); + break; + case 0: + set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); + break; + case 1: + set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); + break; + default: + BUG(); /* unexpected, logic error */ + } +} + static void end_bio_extent_buffer_writepage(struct bio *bio, int err) { struct bio_vec *bvec; @@ -3614,10 +3676,9 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err) BUG_ON(!eb); done = atomic_dec_and_test(&eb->io_pages); - if (err || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { - set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); + if (err || test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) { ClearPageUptodate(page); - SetPageError(page); + set_btree_ioerr(page); } end_page_writeback(page); @@ -3644,7 +3705,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META; int ret = 0; - clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); + clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); num_pages = num_extent_pages(eb->start, eb->len); atomic_set(&eb->io_pages, num_pages); if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) @@ -3661,8 +3722,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, 0, epd->bio_flags, bio_flags); epd->bio_flags = bio_flags; if (ret) { - set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); - SetPageError(p); + set_btree_ioerr(p); end_page_writeback(p); if (atomic_sub_and_test(num_pages - i, &eb->io_pages)) end_extent_buffer_writeback(eb); @@ -5055,7 +5115,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, goto unlock_exit; } - clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); + clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = 0; atomic_set(&eb->io_pages, num_reads); for (i = start_i; i < num_pages; i++) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 5e91fb9d176..06f030c0084 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -41,9 +41,10 @@ #define EXTENT_BUFFER_TREE_REF 5 #define EXTENT_BUFFER_STALE 6 #define EXTENT_BUFFER_WRITEBACK 7 -#define EXTENT_BUFFER_IOERR 8 +#define EXTENT_BUFFER_READ_ERR 8 /* read IO error */ #define EXTENT_BUFFER_DUMMY 9 #define EXTENT_BUFFER_IN_TREE 10 +#define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */ /* these are flags for extent_clear_unlock_delalloc */ #define PAGE_UNLOCK (1 << 0) @@ -141,7 +142,9 @@ struct extent_buffer { atomic_t blocking_readers; atomic_t spinning_readers; atomic_t spinning_writers; - int lock_nested; + short lock_nested; + /* >= 0 if eb belongs to a log tree, -1 otherwise */ + short log_index; /* protects write locks */ rwlock_t lock; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 16d0c1b62b3..a47b1000a6e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -851,6 +851,8 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, struct extent_state *cached_state = NULL; u64 start = 0; u64 end; + struct btrfs_inode *btree_ino = BTRFS_I(root->fs_info->btree_inode); + bool errors = false; while (!find_first_extent_bit(dirty_pages, start, &start, &end, EXTENT_NEED_WAIT, &cached_state)) { @@ -864,6 +866,26 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, } if (err) werr = err; + + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + if ((mark & EXTENT_DIRTY) && + test_and_clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, + &btree_ino->runtime_flags)) + errors = true; + + if ((mark & EXTENT_NEW) && + test_and_clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, + &btree_ino->runtime_flags)) + errors = true; + } else { + if (test_and_clear_bit(BTRFS_INODE_BTREE_ERR, + &btree_ino->runtime_flags)) + errors = true; + } + + if (errors && !werr) + werr = -EIO; + return werr; } @@ -1629,6 +1651,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_transaction *prev_trans = NULL; + struct btrfs_inode *btree_ino = BTRFS_I(root->fs_info->btree_inode); int ret; /* Stop the commit early if ->aborted is set */ @@ -1871,6 +1894,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_update_commit_device_size(root->fs_info); btrfs_update_commit_device_bytes_used(root, cur_trans); + clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); + clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); + spin_lock(&root->fs_info->trans_lock); cur_trans->state = TRANS_STATE_UNBLOCKED; root->fs_info->running_transaction = NULL; -- cgit v1.2.3-70-g09d2 From c926093ec516f5d316ecdf8c1be11f577ac71b85 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 30 Sep 2014 19:16:47 +0200 Subject: btrfs: add more superblock checks Populate btrfs_check_super_valid() with checks that try to verify consistency of superblock by additional conditions that may arise from corrupted devices or bitflips. Some of tests are only hints and issue warnings instead of failing the mount, basically when the checks are derived from the data found in the superblock. Tested on a broken image provided by Qu. Reported-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 09b3c8a0c79..fc8dfaa2796 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3817,10 +3817,73 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int read_only) { + struct btrfs_super_block *sb = fs_info->super_copy; + int ret = 0; + + if (sb->root_level > BTRFS_MAX_LEVEL) { + printk(KERN_ERR "BTRFS: tree_root level too big: %d > %d\n", + sb->root_level, BTRFS_MAX_LEVEL); + ret = -EINVAL; + } + if (sb->chunk_root_level > BTRFS_MAX_LEVEL) { + printk(KERN_ERR "BTRFS: chunk_root level too big: %d > %d\n", + sb->chunk_root_level, BTRFS_MAX_LEVEL); + ret = -EINVAL; + } + if (sb->log_root_level > BTRFS_MAX_LEVEL) { + printk(KERN_ERR "BTRFS: log_root level too big: %d > %d\n", + sb->log_root_level, BTRFS_MAX_LEVEL); + ret = -EINVAL; + } + /* - * Placeholder for checks + * The common minimum, we don't know if we can trust the nodesize/sectorsize + * items yet, they'll be verified later. Issue just a warning. */ - return 0; + if (!IS_ALIGNED(sb->root, 4096)) + printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", + sb->root); + if (!IS_ALIGNED(sb->chunk_root, 4096)) + printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", + sb->chunk_root); + if (!IS_ALIGNED(sb->log_root, 4096)) + printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", + sb->log_root); + + if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { + printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", + fs_info->fsid, sb->dev_item.fsid); + ret = -EINVAL; + } + + /* + * Hint to catch really bogus numbers, bitflips or so, more exact checks are + * done later + */ + if (sb->num_devices > (1UL << 31)) + printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", + sb->num_devices); + + if (sb->bytenr != BTRFS_SUPER_INFO_OFFSET) { + printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", + sb->bytenr, BTRFS_SUPER_INFO_OFFSET); + ret = -EINVAL; + } + + /* + * The generation is a global counter, we'll trust it more than the others + * but it's still possible that it's the one that's wrong. + */ + if (sb->generation < sb->chunk_root_generation) + printk(KERN_WARNING + "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n", + sb->generation, sb->chunk_root_generation); + if (sb->generation < sb->cache_generation && sb->cache_generation != (u64)-1) + printk(KERN_WARNING + "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n", + sb->generation, sb->cache_generation); + + return ret; } static void btrfs_error_commit_super(struct btrfs_root *root) -- cgit v1.2.3-70-g09d2 From 21e7626b12f25770e2975bc7c7b2e1d5b1d58a57 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 27 Oct 2014 13:52:21 +0100 Subject: btrfs: use macro accessors in superblock validation checks The initial patch c926093ec516f5d316 (btrfs: add more superblock checks) did not properly use the macro accessors that wrap endianness and the code would not work correctly on big endian machines. Reported-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2409718e3f2..1ae1661ba14 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3817,19 +3817,19 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, struct btrfs_super_block *sb = fs_info->super_copy; int ret = 0; - if (sb->root_level > BTRFS_MAX_LEVEL) { - printk(KERN_ERR "BTRFS: tree_root level too big: %d > %d\n", - sb->root_level, BTRFS_MAX_LEVEL); + if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { + printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n", + btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } - if (sb->chunk_root_level > BTRFS_MAX_LEVEL) { - printk(KERN_ERR "BTRFS: chunk_root level too big: %d > %d\n", - sb->chunk_root_level, BTRFS_MAX_LEVEL); + if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { + printk(KERN_ERR "BTRFS: chunk_root level too big: %d >= %d\n", + btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } - if (sb->log_root_level > BTRFS_MAX_LEVEL) { - printk(KERN_ERR "BTRFS: log_root level too big: %d > %d\n", - sb->log_root_level, BTRFS_MAX_LEVEL); + if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { + printk(KERN_ERR "BTRFS: log_root level too big: %d >= %d\n", + btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } @@ -3837,15 +3837,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * The common minimum, we don't know if we can trust the nodesize/sectorsize * items yet, they'll be verified later. Issue just a warning. */ - if (!IS_ALIGNED(sb->root, 4096)) + if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", sb->root); - if (!IS_ALIGNED(sb->chunk_root, 4096)) + if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", sb->chunk_root); - if (!IS_ALIGNED(sb->log_root, 4096)) + if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", - sb->log_root); + btrfs_super_log_root(sb)); if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", @@ -3857,13 +3857,13 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * Hint to catch really bogus numbers, bitflips or so, more exact checks are * done later */ - if (sb->num_devices > (1UL << 31)) + if (btrfs_super_num_devices(sb) > (1UL << 31)) printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", - sb->num_devices); + btrfs_super_num_devices(sb)); - if (sb->bytenr != BTRFS_SUPER_INFO_OFFSET) { + if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) { printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", - sb->bytenr, BTRFS_SUPER_INFO_OFFSET); + btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); ret = -EINVAL; } @@ -3871,14 +3871,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * The generation is a global counter, we'll trust it more than the others * but it's still possible that it's the one that's wrong. */ - if (sb->generation < sb->chunk_root_generation) + if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb)) printk(KERN_WARNING "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n", - sb->generation, sb->chunk_root_generation); - if (sb->generation < sb->cache_generation && sb->cache_generation != (u64)-1) + btrfs_super_generation(sb), btrfs_super_chunk_root_generation(sb)); + if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb) + && btrfs_super_cache_generation(sb) != (u64)-1) printk(KERN_WARNING "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n", - sb->generation, sb->cache_generation); + btrfs_super_generation(sb), btrfs_super_cache_generation(sb)); return ret; } -- cgit v1.2.3-70-g09d2