From 920bbbfb05c9fce22e088d20eb9dcb8f96342de9 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:34:08 +0000 Subject: Btrfs: Rewrite btrfs_drop_extents Rewrite btrfs_drop_extents by using btrfs_duplicate_item, so we can avoid calling lock_extent within transaction. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b3ad168a0bf..ef250be49cd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -230,8 +230,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, return 1; } - ret = btrfs_drop_extents(trans, root, inode, start, - aligned_end, aligned_end, start, + ret = btrfs_drop_extents(trans, inode, start, aligned_end, &hint_byte, 1); BUG_ON(ret); @@ -1596,7 +1595,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, struct inode *inode, u64 file_pos, u64 disk_bytenr, u64 disk_num_bytes, u64 num_bytes, u64 ram_bytes, - u64 locked_end, u8 compression, u8 encryption, u16 other_encoding, int extent_type) { @@ -1622,9 +1620,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, * the caller is expected to unpin it and allow it to be merged * with the others. */ - ret = btrfs_drop_extents(trans, root, inode, file_pos, - file_pos + num_bytes, locked_end, - file_pos, &hint, 0); + ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes, + &hint, 0); BUG_ON(ret); ins.objectid = inode->i_ino; @@ -1746,7 +1743,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) compressed = 1; if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { BUG_ON(compressed); - ret = btrfs_mark_extent_written(trans, root, inode, + ret = btrfs_mark_extent_written(trans, inode, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len); @@ -1758,8 +1755,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->disk_len, ordered_extent->len, ordered_extent->len, - ordered_extent->file_offset + - ordered_extent->len, compressed, 0, 0, BTRFS_FILE_EXTENT_REG); unpin_extent_cache(&BTRFS_I(inode)->extent_tree, @@ -3209,11 +3204,9 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { u64 hint_byte = 0; hole_size = last_byte - cur_offset; - err = btrfs_drop_extents(trans, root, inode, - cur_offset, + err = btrfs_drop_extents(trans, inode, cur_offset, cur_offset + hole_size, - block_end, - cur_offset, &hint_byte, 1); + &hint_byte, 1); if (err) break; @@ -5643,7 +5636,7 @@ out_fail: static int prealloc_file_range(struct btrfs_trans_handle *trans, struct inode *inode, u64 start, u64 end, - u64 locked_end, u64 alloc_hint, int mode) + u64 alloc_hint, int mode) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; @@ -5669,8 +5662,7 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, ret = insert_reserved_file_extent(trans, inode, cur_offset, ins.objectid, ins.offset, ins.offset, - ins.offset, locked_end, - 0, 0, 0, + ins.offset, 0, 0, 0, BTRFS_FILE_EXTENT_PREALLOC); BUG_ON(ret); btrfs_drop_extent_cache(inode, cur_offset, @@ -5779,8 +5771,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, last_byte = (last_byte + mask) & ~mask; if (em->block_start == EXTENT_MAP_HOLE) { ret = prealloc_file_range(trans, inode, cur_offset, - last_byte, locked_end + 1, - alloc_hint, mode); + last_byte, alloc_hint, mode); if (ret < 0) { free_extent_map(em); break; -- cgit v1.2.3-70-g09d2 From c216775458a2ee345d9412a2770c2916acfb5d30 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:34:21 +0000 Subject: Btrfs: Fix disk_i_size update corner case There are some cases file extents are inserted without involving ordered struct. In these cases, we update disk_i_size directly, without checking pending ordered extent and DELALLOC bit. This patch extends btrfs_ordered_update_i_size() to handle these cases. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 5 +-- fs/btrfs/inode.c | 71 +++++++++++++++++++------------- fs/btrfs/ordered-data.c | 105 +++++++++++++++++++++++++++++++++++++----------- fs/btrfs/ordered-data.h | 2 +- 4 files changed, 127 insertions(+), 56 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index f6783a42f01..3f1f50d9d91 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -44,9 +44,6 @@ struct btrfs_inode { */ struct extent_io_tree io_failure_tree; - /* held while inesrting or deleting extents from files */ - struct mutex extent_mutex; - /* held while logging the inode in tree-log.c */ struct mutex log_mutex; @@ -166,7 +163,7 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode) static inline void btrfs_i_size_write(struct inode *inode, u64 size) { - inode->i_size = size; + i_size_write(inode, size); BTRFS_I(inode)->disk_i_size = size; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ef250be49cd..fa57247887e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -188,8 +188,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); + /* + * we're an inline extent, so nobody can + * extend the file past i_size without locking + * a page we already have locked. + * + * We must do any isize and inode updates + * before we unlock the pages. Otherwise we + * could end up racing with unlink. + */ BTRFS_I(inode)->disk_i_size = inode->i_size; btrfs_update_inode(trans, root, inode); + return 0; fail: btrfs_free_path(path); @@ -415,7 +425,6 @@ again: start, end, total_compressed, pages); } - btrfs_end_transaction(trans, root); if (ret == 0) { /* * inline extent creation worked, we don't need @@ -429,9 +438,11 @@ again: EXTENT_CLEAR_DELALLOC | EXTENT_CLEAR_ACCOUNTING | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); - ret = 0; + + btrfs_end_transaction(trans, root); goto free_pages_out; } + btrfs_end_transaction(trans, root); } if (will_compress) { @@ -542,7 +553,6 @@ static noinline int submit_compressed_extents(struct inode *inode, if (list_empty(&async_cow->extents)) return 0; - trans = btrfs_join_transaction(root, 1); while (!list_empty(&async_cow->extents)) { async_extent = list_entry(async_cow->extents.next, @@ -589,19 +599,15 @@ retry: lock_extent(io_tree, async_extent->start, async_extent->start + async_extent->ram_size - 1, GFP_NOFS); - /* - * here we're doing allocation and writeback of the - * compressed pages - */ - btrfs_drop_extent_cache(inode, async_extent->start, - async_extent->start + - async_extent->ram_size - 1, 0); + trans = btrfs_join_transaction(root, 1); ret = btrfs_reserve_extent(trans, root, async_extent->compressed_size, async_extent->compressed_size, 0, alloc_hint, (u64)-1, &ins, 1); + btrfs_end_transaction(trans, root); + if (ret) { int i; for (i = 0; i < async_extent->nr_pages; i++) { @@ -617,6 +623,14 @@ retry: goto retry; } + /* + * here we're doing allocation and writeback of the + * compressed pages + */ + btrfs_drop_extent_cache(inode, async_extent->start, + async_extent->start + + async_extent->ram_size - 1, 0); + em = alloc_extent_map(GFP_NOFS); em->start = async_extent->start; em->len = async_extent->ram_size; @@ -648,8 +662,6 @@ retry: BTRFS_ORDERED_COMPRESSED); BUG_ON(ret); - btrfs_end_transaction(trans, root); - /* * clear dirty, set writeback and unlock the pages. */ @@ -671,13 +683,11 @@ retry: async_extent->nr_pages); BUG_ON(ret); - trans = btrfs_join_transaction(root, 1); alloc_hint = ins.objectid + ins.offset; kfree(async_extent); cond_resched(); } - btrfs_end_transaction(trans, root); return 0; } @@ -741,6 +751,7 @@ static noinline int cow_file_range(struct inode *inode, EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); + *nr_written = *nr_written + (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; *page_started = 1; @@ -1727,18 +1738,27 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) } } - trans = btrfs_join_transaction(root, 1); - if (!ordered_extent) ordered_extent = btrfs_lookup_ordered_extent(inode, start); BUG_ON(!ordered_extent); - if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) - goto nocow; + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { + BUG_ON(!list_empty(&ordered_extent->list)); + ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); + if (!ret) { + trans = btrfs_join_transaction(root, 1); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + btrfs_end_transaction(trans, root); + } + goto out; + } lock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); + trans = btrfs_join_transaction(root, 1); + if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) compressed = 1; if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { @@ -1765,22 +1785,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) unlock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); -nocow: add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); - mutex_lock(&BTRFS_I(inode)->extent_mutex); - btrfs_ordered_update_i_size(inode, ordered_extent); - btrfs_update_inode(trans, root, inode); - btrfs_remove_ordered_extent(inode, ordered_extent); - mutex_unlock(&BTRFS_I(inode)->extent_mutex); - + /* this also removes the ordered extent from the tree */ + btrfs_ordered_update_i_size(inode, 0, ordered_extent); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + btrfs_end_transaction(trans, root); +out: /* once for us */ btrfs_put_ordered_extent(ordered_extent); /* once for the tree */ btrfs_put_ordered_extent(ordered_extent); - btrfs_end_transaction(trans, root); return 0; } @@ -3562,7 +3580,6 @@ static noinline void init_btrfs_i(struct inode *inode) INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); - mutex_init(&BTRFS_I(inode)->extent_mutex); mutex_init(&BTRFS_I(inode)->log_mutex); } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 5799bc46a30..9b16073bb87 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -291,16 +291,16 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) /* * remove an ordered extent from the tree. No references are dropped - * but, anyone waiting on this extent is woken up. + * and you must wake_up entry->wait. You must hold the tree mutex + * while you call this function. */ -int btrfs_remove_ordered_extent(struct inode *inode, +static int __btrfs_remove_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; tree = &BTRFS_I(inode)->ordered_tree; - mutex_lock(&tree->mutex); node = &entry->rb_node; rb_erase(node, &tree->tree); tree->last = NULL; @@ -326,9 +326,26 @@ int btrfs_remove_ordered_extent(struct inode *inode, } spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + return 0; +} + +/* + * remove an ordered extent from the tree. No references are dropped + * but any waiters are woken. + */ +int btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) +{ + struct btrfs_ordered_inode_tree *tree; + int ret; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + ret = __btrfs_remove_ordered_extent(inode, entry); mutex_unlock(&tree->mutex); wake_up(&entry->wait); - return 0; + + return ret; } /* @@ -589,7 +606,7 @@ out: * After an extent is done, call this to conditionally update the on disk * i_size. i_size is updated to cover any fully written part of the file. */ -int btrfs_ordered_update_i_size(struct inode *inode, +int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *ordered) { struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; @@ -597,18 +614,30 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 disk_i_size; u64 new_i_size; u64 i_size_test; + u64 i_size = i_size_read(inode); struct rb_node *node; + struct rb_node *prev = NULL; struct btrfs_ordered_extent *test; + int ret = 1; + + if (ordered) + offset = entry_end(ordered); mutex_lock(&tree->mutex); disk_i_size = BTRFS_I(inode)->disk_i_size; + /* truncate file */ + if (disk_i_size > i_size) { + BTRFS_I(inode)->disk_i_size = i_size; + ret = 0; + goto out; + } + /* * if the disk i_size is already at the inode->i_size, or * this ordered extent is inside the disk i_size, we're done */ - if (disk_i_size >= inode->i_size || - ordered->file_offset + ordered->len <= disk_i_size) { + if (disk_i_size == i_size || offset <= disk_i_size) { goto out; } @@ -616,8 +645,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, * we can't update the disk_isize if there are delalloc bytes * between disk_i_size and this ordered extent */ - if (test_range_bit(io_tree, disk_i_size, - ordered->file_offset + ordered->len - 1, + if (test_range_bit(io_tree, disk_i_size, offset - 1, EXTENT_DELALLOC, 0, NULL)) { goto out; } @@ -626,20 +654,32 @@ int btrfs_ordered_update_i_size(struct inode *inode, * if we find an ordered extent then we can't update disk i_size * yet */ - node = &ordered->rb_node; - while (1) { - node = rb_prev(node); - if (!node) - break; + if (ordered) { + node = rb_prev(&ordered->rb_node); + } else { + prev = tree_search(tree, offset); + /* + * we insert file extents without involving ordered struct, + * so there should be no ordered struct cover this offset + */ + if (prev) { + test = rb_entry(prev, struct btrfs_ordered_extent, + rb_node); + BUG_ON(offset_in_entry(test, offset)); + } + node = prev; + } + while (node) { test = rb_entry(node, struct btrfs_ordered_extent, rb_node); if (test->file_offset + test->len <= disk_i_size) break; - if (test->file_offset >= inode->i_size) + if (test->file_offset >= i_size) break; if (test->file_offset >= disk_i_size) goto out; + node = rb_prev(node); } - new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); + new_i_size = min_t(u64, offset, i_size); /* * at this point, we know we can safely update i_size to at least @@ -647,7 +687,14 @@ int btrfs_ordered_update_i_size(struct inode *inode, * walk forward and see if ios from higher up in the file have * finished. */ - node = rb_next(&ordered->rb_node); + if (ordered) { + node = rb_next(&ordered->rb_node); + } else { + if (prev) + node = rb_next(prev); + else + node = rb_first(&tree->tree); + } i_size_test = 0; if (node) { /* @@ -655,10 +702,10 @@ int btrfs_ordered_update_i_size(struct inode *inode, * between our ordered extent and the next one. */ test = rb_entry(node, struct btrfs_ordered_extent, rb_node); - if (test->file_offset > entry_end(ordered)) + if (test->file_offset > offset) i_size_test = test->file_offset; } else { - i_size_test = i_size_read(inode); + i_size_test = i_size; } /* @@ -667,15 +714,25 @@ int btrfs_ordered_update_i_size(struct inode *inode, * are no delalloc bytes in this area, it is safe to update * disk_i_size to the end of the region. */ - if (i_size_test > entry_end(ordered) && - !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, - EXTENT_DELALLOC, 0, NULL)) { - new_i_size = min_t(u64, i_size_test, i_size_read(inode)); + if (i_size_test > offset && + !test_range_bit(io_tree, offset, i_size_test - 1, + EXTENT_DELALLOC, 0, NULL)) { + new_i_size = min_t(u64, i_size_test, i_size); } BTRFS_I(inode)->disk_i_size = new_i_size; + ret = 0; out: + /* + * we need to remove the ordered extent with the tree lock held + * so that other people calling this function don't find our fully + * processed ordered entry and skip updating the i_size + */ + if (ordered) + __btrfs_remove_ordered_extent(inode, ordered); mutex_unlock(&tree->mutex); - return 0; + if (ordered) + wake_up(&ordered->wait); + return ret; } /* diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index f82e87488ca..4fa20398aec 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -150,7 +150,7 @@ void btrfs_start_ordered_extent(struct inode *inode, int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); struct btrfs_ordered_extent * btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); -int btrfs_ordered_update_i_size(struct inode *inode, +int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); -- cgit v1.2.3-70-g09d2 From c71bf099abddf3e0fdc27f251ba76fca1461d49a Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:34:40 +0000 Subject: Btrfs: Avoid orphan inodes cleanup while replaying log We do log replay in a single transaction, so it's not good to do unbound operations. This patch cleans up orphan inodes cleanup after replaying the log. It also avoids doing other unbound operations such as truncating a file during replaying log. These unbound operations are postponed to the orphan inode cleanup stage. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 5 +++-- fs/btrfs/disk-io.c | 17 +++++++++++------ fs/btrfs/inode.c | 19 ++++++++++++++++--- fs/btrfs/relocation.c | 1 + fs/btrfs/tree-log.c | 49 ++++++++++++++++++++++++------------------------- 5 files changed, 55 insertions(+), 36 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ae5b0aaa938..fcfbefbbb68 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -859,8 +859,9 @@ struct btrfs_fs_info { struct mutex ordered_operations_mutex; struct rw_semaphore extent_commit_sem; - struct rw_semaphore subvol_sem; + struct rw_semaphore cleanup_work_sem; + struct rw_semaphore subvol_sem; struct srcu_struct subvol_srcu; struct list_head trans_list; @@ -1034,12 +1035,12 @@ struct btrfs_root { int ref_cows; int track_dirty; int in_radix; + int clean_orphans; u64 defrag_trans_start; struct btrfs_key defrag_progress; struct btrfs_key defrag_max; int defrag_running; - int defrag_level; char *name; int in_sysfs; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 101940fab9b..c1e59e33f02 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -892,6 +892,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->stripesize = stripesize; root->ref_cows = 0; root->track_dirty = 0; + root->in_radix = 0; + root->clean_orphans = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -928,7 +930,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->defrag_trans_start = fs_info->generation; init_completion(&root->kobj_unregister); root->defrag_running = 0; - root->defrag_level = 0; root->root_key.objectid = objectid; root->anon_super.s_root = NULL; root->anon_super.s_dev = 0; @@ -1210,8 +1211,10 @@ again: ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, root); - if (ret == 0) + if (ret == 0) { root->in_radix = 1; + root->clean_orphans = 1; + } spin_unlock(&fs_info->fs_roots_radix_lock); radix_tree_preload_end(); if (ret) { @@ -1225,10 +1228,6 @@ again: ret = btrfs_find_dead_roots(fs_info->tree_root, root->root_key.objectid); WARN_ON(ret); - - if (!(fs_info->sb->s_flags & MS_RDONLY)) - btrfs_orphan_cleanup(root); - return root; fail: free_fs_root(root); @@ -1689,6 +1688,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); init_rwsem(&fs_info->extent_commit_sem); + init_rwsem(&fs_info->cleanup_work_sem); init_rwsem(&fs_info->subvol_sem); btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); @@ -2388,6 +2388,11 @@ int btrfs_commit_super(struct btrfs_root *root) mutex_lock(&root->fs_info->cleaner_mutex); btrfs_clean_old_snapshots(root); mutex_unlock(&root->fs_info->cleaner_mutex); + + /* wait until ongoing cleanup work done */ + down_write(&root->fs_info->cleanup_work_sem); + up_write(&root->fs_info->cleanup_work_sem); + trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fa57247887e..eb2db3bde23 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2093,16 +2093,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) struct inode *inode; int ret = 0, nr_unlink = 0, nr_truncate = 0; - path = btrfs_alloc_path(); - if (!path) + if (!xchg(&root->clean_orphans, 0)) return; + + path = btrfs_alloc_path(); + BUG_ON(!path); path->reada = -1; key.objectid = BTRFS_ORPHAN_OBJECTID; btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); key.offset = (u64)-1; - while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) { @@ -3298,6 +3299,11 @@ void btrfs_delete_inode(struct inode *inode) } btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (root->fs_info->log_root_recovering) { + BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); + goto no_delete; + } + if (inode->i_nlink > 0) { BUG_ON(btrfs_root_refs(&root->root_item) != 0); goto no_delete; @@ -3705,6 +3711,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) } srcu_read_unlock(&root->fs_info->subvol_srcu, index); + if (root != sub_root) { + down_read(&root->fs_info->cleanup_work_sem); + if (!(inode->i_sb->s_flags & MS_RDONLY)) + btrfs_orphan_cleanup(sub_root); + up_read(&root->fs_info->cleanup_work_sem); + } + return inode; } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index cfcc93c93a7..975fdd33ac4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3755,6 +3755,7 @@ out: BTRFS_DATA_RELOC_TREE_OBJECTID); if (IS_ERR(fs_root)) err = PTR_ERR(fs_root); + btrfs_orphan_cleanup(fs_root); } return err; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6bbaa10bb67..4a9434b622e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -930,6 +930,17 @@ out_nowrite: return 0; } +static int insert_orphan_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 offset) +{ + int ret; + ret = btrfs_find_orphan_item(root, offset); + if (ret > 0) + ret = btrfs_insert_orphan_item(trans, root, offset); + return ret; +} + + /* * There are a few corners where the link count of the file can't * be properly maintained during replay. So, instead of adding @@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, } BTRFS_I(inode)->index_cnt = (u64)-1; - if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { - ret = replay_dir_deletes(trans, root, NULL, path, - inode->i_ino, 1); + if (inode->i_nlink == 0) { + if (S_ISDIR(inode->i_mode)) { + ret = replay_dir_deletes(trans, root, NULL, path, + inode->i_ino, 1); + BUG_ON(ret); + } + ret = insert_orphan_item(trans, root, inode->i_ino); BUG_ON(ret); } btrfs_free_path(path); @@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, /* inode keys are done during the first stage */ if (key.type == BTRFS_INODE_ITEM_KEY && wc->stage == LOG_WALK_REPLAY_INODES) { - struct inode *inode; struct btrfs_inode_item *inode_item; u32 mode; @@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, eb, i, &key); BUG_ON(ret); - /* for regular files, truncate away - * extents past the new EOF + /* for regular files, make sure corresponding + * orhpan item exist. extents past the new EOF + * will be truncated later by orphan cleanup. */ if (S_ISREG(mode)) { - inode = read_one_inode(root, - key.objectid); - BUG_ON(!inode); - - ret = btrfs_truncate_inode_items(wc->trans, - root, inode, inode->i_size, - BTRFS_EXTENT_DATA_KEY); + ret = insert_orphan_item(wc->trans, root, + key.objectid); BUG_ON(ret); - - /* if the nlink count is zero here, the iput - * will free the inode. We bump it to make - * sure it doesn't get freed until the link - * count fixup is done - */ - if (inode->i_nlink == 0) { - btrfs_inc_nlink(inode); - btrfs_update_inode(wc->trans, - root, inode); - } - iput(inode); } + ret = link_to_fixup_dir(wc->trans, root, path, key.objectid); BUG_ON(ret); -- cgit v1.2.3-70-g09d2 From 5a303d5d4b8055d2e5a03e92d04745bfc5881a22 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:34:52 +0000 Subject: Btrfs: Make fallocate(2) more ENOSPC friendly fallocate(2) may allocate large number of file extents, so it's not good to do it in a single transaction. This patch make fallocate(2) start a new transaction for each file extents it allocates. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 65 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 33 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index eb2db3bde23..8d8baaa6150 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5664,10 +5664,10 @@ out_fail: return err; } -static int prealloc_file_range(struct btrfs_trans_handle *trans, - struct inode *inode, u64 start, u64 end, +static int prealloc_file_range(struct inode *inode, u64 start, u64 end, u64 alloc_hint, int mode) { + struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 alloc_size; @@ -5678,17 +5678,23 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, while (num_bytes > 0) { alloc_size = min(num_bytes, root->fs_info->max_extent); - ret = btrfs_reserve_metadata_space(root, 1); - if (ret) - goto out; - ret = btrfs_reserve_extent(trans, root, alloc_size, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); - goto out; + break; + } + + ret = btrfs_reserve_metadata_space(root, 3); + if (ret) { + btrfs_free_reserved_extent(root, ins.objectid, + ins.offset); + break; } + + trans = btrfs_start_transaction(root, 1); + ret = insert_reserved_file_extent(trans, inode, cur_offset, ins.objectid, ins.offset, ins.offset, @@ -5697,22 +5703,25 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, BUG_ON(ret); btrfs_drop_extent_cache(inode, cur_offset, cur_offset + ins.offset -1, 0); + num_bytes -= ins.offset; cur_offset += ins.offset; alloc_hint = ins.objectid + ins.offset; - btrfs_unreserve_metadata_space(root, 1); - } -out: - if (cur_offset > start) { + inode->i_ctime = CURRENT_TIME; BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && - cur_offset > i_size_read(inode)) - btrfs_i_size_write(inode, cur_offset); + cur_offset > inode->i_size) { + i_size_write(inode, cur_offset); + btrfs_ordered_update_i_size(inode, cur_offset, NULL); + } + ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); - } + btrfs_end_transaction(trans, root); + btrfs_unreserve_metadata_space(root, 3); + } return ret; } @@ -5727,8 +5736,6 @@ static long btrfs_fallocate(struct inode *inode, int mode, u64 locked_end; u64 mask = BTRFS_I(inode)->root->sectorsize - 1; struct extent_map *em; - struct btrfs_trans_handle *trans; - struct btrfs_root *root; int ret; alloc_start = offset & ~mask; @@ -5747,9 +5754,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, goto out; } - root = BTRFS_I(inode)->root; - - ret = btrfs_check_data_free_space(root, inode, + ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, alloc_end - alloc_start); if (ret) goto out; @@ -5758,12 +5763,6 @@ static long btrfs_fallocate(struct inode *inode, int mode, while (1) { struct btrfs_ordered_extent *ordered; - trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); - if (!trans) { - ret = -EIO; - goto out_free; - } - /* the extent lock is ordered inside the running * transaction */ @@ -5777,8 +5776,6 @@ static long btrfs_fallocate(struct inode *inode, int mode, btrfs_put_ordered_extent(ordered); unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, GFP_NOFS); - btrfs_end_transaction(trans, BTRFS_I(inode)->root); - /* * we can't wait on the range with the transaction * running or with the extent lock held @@ -5799,9 +5796,12 @@ static long btrfs_fallocate(struct inode *inode, int mode, BUG_ON(IS_ERR(em) || !em); last_byte = min(extent_map_end(em), alloc_end); last_byte = (last_byte + mask) & ~mask; - if (em->block_start == EXTENT_MAP_HOLE) { - ret = prealloc_file_range(trans, inode, cur_offset, - last_byte, alloc_hint, mode); + if (em->block_start == EXTENT_MAP_HOLE || + (cur_offset >= inode->i_size && + !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { + ret = prealloc_file_range(inode, + cur_offset, last_byte, + alloc_hint, mode); if (ret < 0) { free_extent_map(em); break; @@ -5820,9 +5820,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, GFP_NOFS); - btrfs_end_transaction(trans, BTRFS_I(inode)->root); -out_free: - btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start); + btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, + alloc_end - alloc_start); out: mutex_unlock(&inode->i_mutex); return ret; -- cgit v1.2.3-70-g09d2 From 8082510e7124cc50d728f1b875639cb4e22312cc Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:35:36 +0000 Subject: Btrfs: Make truncate(2) more ENOSPC friendly truncating and deleting regular files are unbound operations, so it's not good to do them in a single transaction. This patch makes btrfs_truncate and btrfs_delete_inode start a new transaction after all items in a tree leaf are deleted. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 316 ++++++++++++++++++++++++++++++-------------------- fs/btrfs/relocation.c | 33 +++--- 2 files changed, 212 insertions(+), 137 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8d8baaa6150..dcec42ee8cf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2848,37 +2848,40 @@ out: * min_type is the minimum key type to truncate down to. If set to 0, this * will kill all the items on this inode, including the INODE_ITEM_KEY. */ -noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u64 new_size, u32 min_type) +int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 new_size, u32 min_type) { - int ret; struct btrfs_path *path; - struct btrfs_key key; - struct btrfs_key found_key; - u32 found_type = (u8)-1; struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; + struct btrfs_key key; + struct btrfs_key found_key; u64 extent_start = 0; u64 extent_num_bytes = 0; u64 extent_offset = 0; u64 item_end = 0; + u64 mask = root->sectorsize - 1; + u32 found_type = (u8)-1; int found_extent; int del_item; int pending_del_nr = 0; int pending_del_slot = 0; int extent_type = -1; int encoding; - u64 mask = root->sectorsize - 1; + int ret; + int err = 0; + + BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); if (root->ref_cows) btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); + path = btrfs_alloc_path(); BUG_ON(!path); path->reada = -1; - /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; key.type = (u8)-1; @@ -2886,17 +2889,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, search_again: path->leave_spinning = 1; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto error; + if (ret < 0) { + err = ret; + goto out; + } if (ret > 0) { /* there are no items in the tree for us to truncate, we're * done */ - if (path->slots[0] == 0) { - ret = 0; - goto error; - } + if (path->slots[0] == 0) + goto out; path->slots[0]--; } @@ -2931,28 +2934,17 @@ search_again: } item_end--; } - if (item_end < new_size) { - if (found_type == BTRFS_DIR_ITEM_KEY) - found_type = BTRFS_INODE_ITEM_KEY; - else if (found_type == BTRFS_EXTENT_ITEM_KEY) - found_type = BTRFS_EXTENT_DATA_KEY; - else if (found_type == BTRFS_EXTENT_DATA_KEY) - found_type = BTRFS_XATTR_ITEM_KEY; - else if (found_type == BTRFS_XATTR_ITEM_KEY) - found_type = BTRFS_INODE_REF_KEY; - else if (found_type) - found_type--; - else + if (found_type > min_type) { + del_item = 1; + } else { + if (item_end < new_size) break; - btrfs_set_key_type(&key, found_type); - goto next; + if (found_key.offset >= new_size) + del_item = 1; + else + del_item = 0; } - if (found_key.offset >= new_size) - del_item = 1; - else - del_item = 0; found_extent = 0; - /* FIXME, shrink the extent if the ref count is only 1 */ if (found_type != BTRFS_EXTENT_DATA_KEY) goto delete; @@ -3039,42 +3031,36 @@ delete: inode->i_ino, extent_offset); BUG_ON(ret); } -next: - if (path->slots[0] == 0) { - if (pending_del_nr) - goto del_pending; - btrfs_release_path(root, path); - if (found_type == BTRFS_INODE_ITEM_KEY) - break; - goto search_again; - } - path->slots[0]--; - if (pending_del_nr && - path->slots[0] + 1 != pending_del_slot) { - struct btrfs_key debug; -del_pending: - btrfs_item_key_to_cpu(path->nodes[0], &debug, - pending_del_slot); - ret = btrfs_del_items(trans, root, path, - pending_del_slot, - pending_del_nr); - BUG_ON(ret); - pending_del_nr = 0; + if (found_type == BTRFS_INODE_ITEM_KEY) + break; + + if (path->slots[0] == 0 || + path->slots[0] != pending_del_slot) { + if (root->ref_cows) { + err = -EAGAIN; + goto out; + } + if (pending_del_nr) { + ret = btrfs_del_items(trans, root, path, + pending_del_slot, + pending_del_nr); + BUG_ON(ret); + pending_del_nr = 0; + } btrfs_release_path(root, path); - if (found_type == BTRFS_INODE_ITEM_KEY) - break; goto search_again; + } else { + path->slots[0]--; } } - ret = 0; -error: +out: if (pending_del_nr) { ret = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); } btrfs_free_path(path); - return ret; + return err; } /* @@ -3194,10 +3180,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) if (size <= hole_start) return 0; - err = btrfs_truncate_page(inode->i_mapping, inode->i_size); - if (err) - return err; - while (1) { struct btrfs_ordered_extent *ordered; btrfs_wait_ordered_range(inode, hole_start, @@ -3210,9 +3192,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) btrfs_put_ordered_extent(ordered); } - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - cur_offset = hole_start; while (1) { em = btrfs_get_extent(inode, NULL, 0, cur_offset, @@ -3220,38 +3199,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) BUG_ON(IS_ERR(em) || !em); last_byte = min(extent_map_end(em), block_end); last_byte = (last_byte + mask) & ~mask; - if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { + if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { u64 hint_byte = 0; hole_size = last_byte - cur_offset; - err = btrfs_drop_extents(trans, inode, cur_offset, - cur_offset + hole_size, - &hint_byte, 1); - if (err) - break; - err = btrfs_reserve_metadata_space(root, 1); + err = btrfs_reserve_metadata_space(root, 2); if (err) break; + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + err = btrfs_drop_extents(trans, inode, cur_offset, + cur_offset + hole_size, + &hint_byte, 1); + BUG_ON(err); + err = btrfs_insert_file_extent(trans, root, inode->i_ino, cur_offset, 0, 0, hole_size, 0, hole_size, 0, 0, 0); + BUG_ON(err); + btrfs_drop_extent_cache(inode, hole_start, last_byte - 1, 0); - btrfs_unreserve_metadata_space(root, 1); + + btrfs_end_transaction(trans, root); + btrfs_unreserve_metadata_space(root, 2); } free_extent_map(em); cur_offset = last_byte; - if (err || cur_offset >= block_end) + if (cur_offset >= block_end) break; } - btrfs_end_transaction(trans, root); unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); return err; } +static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + unsigned long nr; + int ret; + + if (attr->ia_size == inode->i_size) + return 0; + + if (attr->ia_size > inode->i_size) { + unsigned long limit; + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (attr->ia_size > inode->i_sb->s_maxbytes) + return -EFBIG; + if (limit != RLIM_INFINITY && attr->ia_size > limit) { + send_sig(SIGXFSZ, current, 0); + return -EFBIG; + } + } + + ret = btrfs_reserve_metadata_space(root, 1); + if (ret) + return ret; + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + ret = btrfs_orphan_add(trans, inode); + BUG_ON(ret); + + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_unreserve_metadata_space(root, 1); + btrfs_btree_balance_dirty(root, nr); + + if (attr->ia_size > inode->i_size) { + ret = btrfs_cont_expand(inode, attr->ia_size); + if (ret) { + btrfs_truncate(inode); + return ret; + } + + i_size_write(inode, attr->ia_size); + btrfs_ordered_update_i_size(inode, inode->i_size, NULL); + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + if (inode->i_nlink > 0) { + ret = btrfs_orphan_del(trans, inode); + BUG_ON(ret); + } + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root, nr); + return 0; + } + + /* + * We're truncating a file that used to have good data down to + * zero. Make sure it gets into the ordered flush list so that + * any new writes get down to disk quickly. + */ + if (attr->ia_size == 0) + BTRFS_I(inode)->ordered_data_close = 1; + + /* we don't support swapfiles, so vmtruncate shouldn't fail */ + ret = vmtruncate(inode, attr->ia_size); + BUG_ON(ret); + + return 0; +} + static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; @@ -3262,23 +3323,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { - if (attr->ia_size > inode->i_size) { - err = btrfs_cont_expand(inode, attr->ia_size); - if (err) - return err; - } else if (inode->i_size > 0 && - attr->ia_size == 0) { - - /* we're truncating a file that used to have good - * data down to zero. Make sure it gets into - * the ordered flush list so that any new writes - * get down to disk quickly. - */ - BTRFS_I(inode)->ordered_data_close = 1; - } + err = btrfs_setattr_size(inode, attr); + if (err) + return err; } + attr->ia_valid &= ~ATTR_SIZE; - err = inode_setattr(inode, attr); + if (attr->ia_valid) + err = inode_setattr(inode, attr); if (!err && ((attr->ia_valid & ATTR_MODE))) err = btrfs_acl_chmod(inode); @@ -3310,30 +3362,32 @@ void btrfs_delete_inode(struct inode *inode) } btrfs_i_size_write(inode, 0); - trans = btrfs_join_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); - if (ret) { - btrfs_orphan_del(NULL, inode); - goto no_delete_lock; - } + while (1) { + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); - btrfs_orphan_del(trans, inode); + if (ret != -EAGAIN) + break; - nr = trans->blocks_used; - clear_inode(inode); + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + trans = NULL; + btrfs_btree_balance_dirty(root, nr); + } - btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root, nr); - return; + if (ret == 0) { + ret = btrfs_orphan_del(trans, inode); + BUG_ON(ret); + } -no_delete_lock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); no_delete: clear_inode(inode); + return; } /* @@ -5097,17 +5151,20 @@ static void btrfs_truncate(struct inode *inode) unsigned long nr; u64 mask = root->sectorsize - 1; - if (!S_ISREG(inode->i_mode)) - return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (!S_ISREG(inode->i_mode)) { + WARN_ON(1); return; + } ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); if (ret) return; + btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); + btrfs_ordered_update_i_size(inode, inode->i_size, NULL); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); /* * setattr is responsible for setting the ordered_data_close flag, @@ -5129,21 +5186,32 @@ static void btrfs_truncate(struct inode *inode) if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) btrfs_add_ordered_operation(trans, root, inode); - btrfs_set_trans_block_group(trans, inode); - btrfs_i_size_write(inode, inode->i_size); + while (1) { + ret = btrfs_truncate_inode_items(trans, root, inode, + inode->i_size, + BTRFS_EXTENT_DATA_KEY); + if (ret != -EAGAIN) + break; - ret = btrfs_orphan_add(trans, inode); - if (ret) - goto out; - /* FIXME, add redo link to tree so we don't leak on crash */ - ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, - BTRFS_EXTENT_DATA_KEY); - btrfs_update_inode(trans, root, inode); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root, nr); + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + } - ret = btrfs_orphan_del(trans, inode); + if (ret == 0 && inode->i_nlink > 0) { + ret = btrfs_orphan_del(trans, inode); + BUG_ON(ret); + } + + ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); -out: nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); BUG_ON(ret); @@ -5240,9 +5308,9 @@ void btrfs_destroy_inode(struct inode *inode) spin_lock(&root->list_lock); if (!list_empty(&BTRFS_I(inode)->i_orphan)) { - printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" - " list\n", inode->i_ino); - dump_stack(); + printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", + inode->i_ino); + list_del_init(&BTRFS_I(inode)->i_orphan); } spin_unlock(&root->list_lock); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 975fdd33ac4..f2aa53d2f94 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1561,6 +1561,20 @@ static int invalidate_extent_cache(struct btrfs_root *root, return 0; } +static void put_inodes(struct list_head *list) +{ + struct inodevec *ivec; + while (!list_empty(list)) { + ivec = list_entry(list->next, struct inodevec, list); + list_del(&ivec->list); + while (ivec->nr > 0) { + ivec->nr--; + iput(ivec->inode[ivec->nr]); + } + kfree(ivec); + } +} + static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key) @@ -1723,6 +1737,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, btrfs_btree_balance_dirty(root, nr); + /* + * put inodes outside transaction, otherwise we may deadlock. + */ + put_inodes(&inode_list); + if (replaced && rc->stage == UPDATE_DATA_PTRS) invalidate_extent_cache(root, &key, &next_key); } @@ -1752,19 +1771,7 @@ out: btrfs_btree_balance_dirty(root, nr); - /* - * put inodes while we aren't holding the tree locks - */ - while (!list_empty(&inode_list)) { - struct inodevec *ivec; - ivec = list_entry(inode_list.next, struct inodevec, list); - list_del(&ivec->list); - while (ivec->nr > 0) { - ivec->nr--; - iput(ivec->inode[ivec->nr]); - } - kfree(ivec); - } + put_inodes(&inode_list); if (replaced && rc->stage == UPDATE_DATA_PTRS) invalidate_extent_cache(root, &key, &next_key); -- cgit v1.2.3-70-g09d2 From f34f57a3ab4e73304d78c125682f1a53cd3975f2 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:35:27 +0000 Subject: Btrfs: Pass transaction handle to security and ACL initialization functions Pass transaction handle down to security and ACL initialization functions, so we can avoid starting nested transactions Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 23 ++++++++------- fs/btrfs/ctree.h | 13 ++++++--- fs/btrfs/dir-item.c | 19 +++++-------- fs/btrfs/inode.c | 15 +++++----- fs/btrfs/xattr.c | 80 ++++++++++++++++++++++++++++++++++++----------------- fs/btrfs/xattr.h | 9 +++--- 6 files changed, 96 insertions(+), 63 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 36160424427..1898f8555f0 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -94,7 +94,8 @@ static int btrfs_xattr_get_acl(struct inode *inode, int type, /* * Needs to be called with fs_mutex held */ -static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +static int btrfs_set_acl(struct btrfs_trans_handle *trans, + struct inode *inode, struct posix_acl *acl, int type) { int ret, size = 0; const char *name; @@ -140,8 +141,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) goto out; } - ret = __btrfs_setxattr(inode, name, value, size, 0); - + ret = __btrfs_setxattr(trans, inode, name, value, size, 0); out: kfree(value); @@ -154,7 +154,7 @@ out: static int btrfs_xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) { - int ret = 0; + int ret; struct posix_acl *acl = NULL; if (value) { @@ -167,7 +167,7 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type, } } - ret = btrfs_set_acl(inode, acl, type); + ret = btrfs_set_acl(NULL, inode, acl, type); posix_acl_release(acl); @@ -221,7 +221,8 @@ int btrfs_check_acl(struct inode *inode, int mask) * stuff has been fixed to work with that. If the locking stuff changes, we * need to re-evaluate the acl locking stuff. */ -int btrfs_init_acl(struct inode *inode, struct inode *dir) +int btrfs_init_acl(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) { struct posix_acl *acl = NULL; int ret = 0; @@ -246,7 +247,8 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) mode_t mode; if (S_ISDIR(inode->i_mode)) { - ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT); + ret = btrfs_set_acl(trans, inode, acl, + ACL_TYPE_DEFAULT); if (ret) goto failed; } @@ -261,7 +263,7 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) inode->i_mode = mode; if (ret > 0) { /* we need an acl */ - ret = btrfs_set_acl(inode, clone, + ret = btrfs_set_acl(trans, inode, clone, ACL_TYPE_ACCESS); } } @@ -294,7 +296,7 @@ int btrfs_acl_chmod(struct inode *inode) ret = posix_acl_chmod_masq(clone, inode->i_mode); if (!ret) - ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS); + ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS); posix_acl_release(clone); @@ -320,7 +322,8 @@ int btrfs_acl_chmod(struct inode *inode) return 0; } -int btrfs_init_acl(struct inode *inode, struct inode *dir) +int btrfs_init_acl(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) { return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fcfbefbbb68..a7cac2148c7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -310,6 +310,9 @@ struct btrfs_header { #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) +#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ + sizeof(struct btrfs_item) -\ + sizeof(struct btrfs_dir_item)) /* @@ -2201,9 +2204,10 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_dir_item *di); int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, const char *name, - u16 name_len, const void *data, u16 data_len, - u64 dir); + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid, + const char *name, u16 name_len, + const void *data, u16 data_len); struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, @@ -2382,7 +2386,8 @@ int btrfs_check_acl(struct inode *inode, int mask); #else #define btrfs_check_acl NULL #endif -int btrfs_init_acl(struct inode *inode, struct inode *dir); +int btrfs_init_acl(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir); int btrfs_acl_chmod(struct inode *inode); /* relocation.c */ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index f3a6075519c..e9103b3baa4 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -68,12 +68,12 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle * into the tree */ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, const char *name, - u16 name_len, const void *data, u16 data_len, - u64 dir) + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid, + const char *name, u16 name_len, + const void *data, u16 data_len) { int ret = 0; - struct btrfs_path *path; struct btrfs_dir_item *dir_item; unsigned long name_ptr, data_ptr; struct btrfs_key key, location; @@ -81,15 +81,11 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; u32 data_size; - key.objectid = dir; + BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)); + + key.objectid = objectid; btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); key.offset = btrfs_name_hash(name, name_len); - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - if (name_len + data_len + sizeof(struct btrfs_dir_item) > - BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item)) - return -ENOSPC; data_size = sizeof(*dir_item) + name_len + data_len; dir_item = insert_with_overflow(trans, root, path, &key, data_size, @@ -117,7 +113,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, write_extent_buffer(leaf, data, data_ptr, data_len); btrfs_mark_buffer_dirty(path->nodes[0]); - btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dcec42ee8cf..82740a3c628 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode, u64 start, u64 end, int *page_started, unsigned long *nr_written, int unlock); -static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) +static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) { int err; - err = btrfs_init_acl(inode, dir); + err = btrfs_init_acl(trans, inode, dir); if (!err) - err = btrfs_xattr_security_init(inode, dir); + err = btrfs_xattr_security_init(trans, inode, dir); return err; } @@ -4296,7 +4297,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -4367,7 +4368,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -4500,7 +4501,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) drop_on_err = 1; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) goto out_fail; @@ -5660,7 +5661,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) { drop_inode = 1; goto out_unlock; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index b6dd5967c48..193b58f7d3f 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -85,22 +85,23 @@ out: return ret; } -int __btrfs_setxattr(struct inode *inode, const char *name, - const void *value, size_t size, int flags) +static int do_setxattr(struct btrfs_trans_handle *trans, + struct inode *inode, const char *name, + const void *value, size_t size, int flags) { struct btrfs_dir_item *di; struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; struct btrfs_path *path; - int ret = 0, mod = 0; + size_t name_len = strlen(name); + int ret = 0; + + if (name_len + size > BTRFS_MAX_XATTR_SIZE(root)) + return -ENOSPC; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - trans = btrfs_join_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - /* first lets see if we already have this xattr */ di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, strlen(name), -1); @@ -118,15 +119,12 @@ int __btrfs_setxattr(struct inode *inode, const char *name, } ret = btrfs_delete_one_dir_name(trans, root, path, di); - if (ret) - goto out; + BUG_ON(ret); btrfs_release_path(root, path); /* if we don't have a value then we are removing the xattr */ - if (!value) { - mod = 1; + if (!value) goto out; - } } else { btrfs_release_path(root, path); @@ -138,20 +136,45 @@ int __btrfs_setxattr(struct inode *inode, const char *name, } /* ok we have to create a completely new xattr */ - ret = btrfs_insert_xattr_item(trans, root, name, strlen(name), - value, size, inode->i_ino); + ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino, + name, name_len, value, size); + BUG_ON(ret); +out: + btrfs_free_path(path); + return ret; +} + +int __btrfs_setxattr(struct btrfs_trans_handle *trans, + struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + if (trans) + return do_setxattr(trans, inode, name, value, size, flags); + + ret = btrfs_reserve_metadata_space(root, 2); if (ret) - goto out; - mod = 1; + return ret; -out: - if (mod) { - inode->i_ctime = CURRENT_TIME; - ret = btrfs_update_inode(trans, root, inode); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + ret = -ENOMEM; + goto out; } + btrfs_set_trans_block_group(trans, inode); - btrfs_end_transaction(trans, root); - btrfs_free_path(path); + ret = do_setxattr(trans, inode, name, value, size, flags); + if (ret) + goto out; + + inode->i_ctime = CURRENT_TIME; + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); +out: + btrfs_end_transaction_throttle(trans, root); + btrfs_unreserve_metadata_space(root, 2); return ret; } @@ -314,7 +337,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, if (size == 0) value = ""; /* empty EA, do not remove */ - return __btrfs_setxattr(dentry->d_inode, name, value, size, flags); + + return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size, + flags); } int btrfs_removexattr(struct dentry *dentry, const char *name) @@ -329,10 +354,13 @@ int btrfs_removexattr(struct dentry *dentry, const char *name) if (!btrfs_is_valid_xattr(name)) return -EOPNOTSUPP; - return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); + + return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0, + XATTR_REPLACE); } -int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) +int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) { int err; size_t len; @@ -354,7 +382,7 @@ int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) } else { strcpy(name, XATTR_SECURITY_PREFIX); strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); - err = __btrfs_setxattr(inode, name, value, len, 0); + err = __btrfs_setxattr(trans, inode, name, value, len, 0); kfree(name); } diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index c71e9c3cf3f..721efa0346e 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h @@ -27,15 +27,16 @@ extern struct xattr_handler *btrfs_xattr_handlers[]; extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, void *buffer, size_t size); -extern int __btrfs_setxattr(struct inode *inode, const char *name, - const void *value, size_t size, int flags); - +extern int __btrfs_setxattr(struct btrfs_trans_handle *trans, + struct inode *inode, const char *name, + const void *value, size_t size, int flags); extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); extern int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); extern int btrfs_removexattr(struct dentry *dentry, const char *name); -extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir); +extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir); #endif /* __XATTR__ */ -- cgit v1.2.3-70-g09d2 From 24bbcf0442ee04660a5a030efdbb6d03f1c275cb Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:36:34 +0000 Subject: Btrfs: Add delayed iput iput() can trigger new transactions if we are dropping the final reference, so calling it in btrfs_commit_transaction may end up deadlock. This patch adds delayed iput to avoid the issue. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 7 ++++++- fs/btrfs/disk-io.c | 4 ++++ fs/btrfs/extent-tree.c | 8 +++---- fs/btrfs/inode.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/ordered-data.c | 10 ++++++--- fs/btrfs/ordered-data.h | 3 ++- fs/btrfs/relocation.c | 4 ++-- fs/btrfs/super.c | 4 ++-- fs/btrfs/transaction.c | 13 +++++++++--- 9 files changed, 90 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a7cac2148c7..1983c889bb1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -872,6 +872,9 @@ struct btrfs_fs_info { struct list_head dead_roots; struct list_head caching_block_groups; + spinlock_t delayed_iput_lock; + struct list_head delayed_iputs; + atomic_t nr_async_submits; atomic_t async_submit_draining; atomic_t nr_async_bios; @@ -2301,7 +2304,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct inode *inode, u64 new_size, u32 min_type); -int btrfs_start_delalloc_inodes(struct btrfs_root *root); +int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); @@ -2341,6 +2344,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); void btrfs_orphan_cleanup(struct btrfs_root *root); int btrfs_cont_expand(struct inode *inode, loff_t size); int btrfs_invalidate_inodes(struct btrfs_root *root); +void btrfs_add_delayed_iput(struct inode *inode); +void btrfs_run_delayed_iputs(struct btrfs_root *root); extern const struct dentry_operations btrfs_dentry_operations; /* ioctl.c */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c1e59e33f02..009e3bd18f2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1476,6 +1476,7 @@ static int cleaner_kthread(void *arg) if (!(root->fs_info->sb->s_flags & MS_RDONLY) && mutex_trylock(&root->fs_info->cleaner_mutex)) { + btrfs_run_delayed_iputs(root); btrfs_clean_old_snapshots(root); mutex_unlock(&root->fs_info->cleaner_mutex); } @@ -1605,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); + INIT_LIST_HEAD(&fs_info->delayed_iputs); INIT_LIST_HEAD(&fs_info->hashers); INIT_LIST_HEAD(&fs_info->delalloc_inodes); INIT_LIST_HEAD(&fs_info->ordered_operations); @@ -1613,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->new_trans_lock); spin_lock_init(&fs_info->ref_cache_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); + spin_lock_init(&fs_info->delayed_iput_lock); init_completion(&fs_info->kobj_unregister); fs_info->tree_root = tree_root; @@ -2386,6 +2389,7 @@ int btrfs_commit_super(struct btrfs_root *root) int ret; mutex_lock(&root->fs_info->cleaner_mutex); + btrfs_run_delayed_iputs(root); btrfs_clean_old_snapshots(root); mutex_unlock(&root->fs_info->cleaner_mutex); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4a86508ce47..fcdccfa4600 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2880,9 +2880,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work) root = async->root; info = async->info; - btrfs_start_delalloc_inodes(root); + btrfs_start_delalloc_inodes(root, 0); wake_up(&info->flush_wait); - btrfs_wait_ordered_extents(root, 0); + btrfs_wait_ordered_extents(root, 0, 0); spin_lock(&info->lock); info->flushing = 0; @@ -2956,8 +2956,8 @@ static void flush_delalloc(struct btrfs_root *root, return; flush: - btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(root, 0); + btrfs_start_delalloc_inodes(root, 0); + btrfs_wait_ordered_extents(root, 0, 0); spin_lock(&info->lock); info->flushing = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 82740a3c628..168e8c040aa 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2022,6 +2022,54 @@ zeroit: return -EIO; } +struct delayed_iput { + struct list_head list; + struct inode *inode; +}; + +void btrfs_add_delayed_iput(struct inode *inode) +{ + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct delayed_iput *delayed; + + if (atomic_add_unless(&inode->i_count, -1, 1)) + return; + + delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL); + delayed->inode = inode; + + spin_lock(&fs_info->delayed_iput_lock); + list_add_tail(&delayed->list, &fs_info->delayed_iputs); + spin_unlock(&fs_info->delayed_iput_lock); +} + +void btrfs_run_delayed_iputs(struct btrfs_root *root) +{ + LIST_HEAD(list); + struct btrfs_fs_info *fs_info = root->fs_info; + struct delayed_iput *delayed; + int empty; + + spin_lock(&fs_info->delayed_iput_lock); + empty = list_empty(&fs_info->delayed_iputs); + spin_unlock(&fs_info->delayed_iput_lock); + if (empty) + return; + + down_read(&root->fs_info->cleanup_work_sem); + spin_lock(&fs_info->delayed_iput_lock); + list_splice_init(&fs_info->delayed_iputs, &list); + spin_unlock(&fs_info->delayed_iput_lock); + + while (!list_empty(&list)) { + delayed = list_entry(list.next, struct delayed_iput, list); + list_del(&delayed->list); + iput(delayed->inode); + kfree(delayed); + } + up_read(&root->fs_info->cleanup_work_sem); +} + /* * This creates an orphan entry for the given inode in case something goes * wrong in the middle of an unlink/truncate. @@ -5568,7 +5616,7 @@ out_fail: * some fairly slow code that needs optimization. This walks the list * of all the inodes with pending delalloc and forces them to disk. */ -int btrfs_start_delalloc_inodes(struct btrfs_root *root) +int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) { struct list_head *head = &root->fs_info->delalloc_inodes; struct btrfs_inode *binode; @@ -5587,7 +5635,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) spin_unlock(&root->fs_info->delalloc_lock); if (inode) { filemap_flush(inode->i_mapping); - iput(inode); + if (delay_iput) + btrfs_add_delayed_iput(inode); + else + iput(inode); } cond_resched(); spin_lock(&root->fs_info->delalloc_lock); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 9b16073bb87..b10a49d4bc6 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -352,7 +352,8 @@ int btrfs_remove_ordered_extent(struct inode *inode, * wait for all the ordered extents in a root. This is done when balancing * space between drives. */ -int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) +int btrfs_wait_ordered_extents(struct btrfs_root *root, + int nocow_only, int delay_iput) { struct list_head splice; struct list_head *cur; @@ -389,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) if (inode) { btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); - iput(inode); + if (delay_iput) + btrfs_add_delayed_iput(inode); + else + iput(inode); } else { btrfs_put_ordered_extent(ordered); } @@ -447,7 +451,7 @@ again: btrfs_wait_ordered_range(inode, 0, (u64)-1); else filemap_flush(inode->i_mapping); - iput(inode); + btrfs_add_delayed_iput(inode); } cond_resched(); diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 4fa20398aec..1fe1282ef47 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -153,9 +153,10 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); -int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); +int btrfs_wait_ordered_extents(struct btrfs_root *root, + int nocow_only, int delay_iput); #endif diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f2aa53d2f94..a9728680eca 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3541,8 +3541,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) (unsigned long long)rc->block_group->key.objectid, (unsigned long long)rc->block_group->flags); - btrfs_start_delalloc_inodes(fs_info->tree_root); - btrfs_wait_ordered_extents(fs_info->tree_root, 0); + btrfs_start_delalloc_inodes(fs_info->tree_root, 0); + btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); while (1) { rc->extents_found = 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 752a5463bf5..270cc96b9a4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -405,8 +405,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) return 0; } - btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(root, 0); + btrfs_start_delalloc_inodes(root, 0); + btrfs_wait_ordered_extents(root, 0, 0); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 728e8fe5d2c..75b31caade2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -333,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, memset(trans, 0, sizeof(*trans)); kmem_cache_free(btrfs_trans_handle_cachep, trans); + if (throttle) + btrfs_run_delayed_iputs(root); + return 0; } @@ -991,11 +994,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); if (flush_on_commit) { - btrfs_start_delalloc_inodes(root); - ret = btrfs_wait_ordered_extents(root, 0); + btrfs_start_delalloc_inodes(root, 1); + ret = btrfs_wait_ordered_extents(root, 0, 1); BUG_ON(ret); } else if (snap_pending) { - ret = btrfs_wait_ordered_extents(root, 1); + ret = btrfs_wait_ordered_extents(root, 0, 1); BUG_ON(ret); } @@ -1113,6 +1116,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, current->journal_info = NULL; kmem_cache_free(btrfs_trans_handle_cachep, trans); + + if (current != root->fs_info->transaction_kthread) + btrfs_run_delayed_iputs(root); + return ret; } -- cgit v1.2.3-70-g09d2 From 4a8be425a8fb8fbb5d881eb55fa6634c3463b9c9 Mon Sep 17 00:00:00 2001 From: TARUISI Hiroaki Date: Thu, 12 Nov 2009 07:14:26 +0000 Subject: Btrfs: deny sys_link across subvolumes. I rebased Christian Parpart's patch to deny hard link across subvolumes. Original patch modifies also btrfs_rename, but I excluded it because we can move across subvolumes now and it make no problem. ----------------- Hard link across subvolumes should not allowed in Btrfs. btrfs_link checks root of 'to' directory is same as root of 'from' file. If not same, btrfs_link returns -EPERM. Signed-off-by: TARUISI Hiroaki Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 168e8c040aa..da76cad92ec 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4462,6 +4462,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) return -ENOENT; + /* do not allow sys_link's with other subvols of the same device */ + if (root->objectid != BTRFS_I(inode)->root->objectid) + return -EPERM; + /* * 1 item for inode ref * 2 items for dir items -- cgit v1.2.3-70-g09d2 From 3a1abec9f6880cf406593c392636199ea1c6c917 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Dec 2009 15:47:17 -0500 Subject: Btrfs: make sure fallocate properly starts a transaction The recent patch to make fallocate enospc friendly would send down a NULL trans handle to the allocator. This moves the transaction start to properly fix things. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index da76cad92ec..5440bab2363 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5802,23 +5802,23 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, while (num_bytes > 0) { alloc_size = min(num_bytes, root->fs_info->max_extent); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_reserve_extent(trans, root, alloc_size, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); - break; + goto stop_trans; } ret = btrfs_reserve_metadata_space(root, 3); if (ret) { btrfs_free_reserved_extent(root, ins.objectid, ins.offset); - break; + goto stop_trans; } - trans = btrfs_start_transaction(root, 1); - ret = insert_reserved_file_extent(trans, inode, cur_offset, ins.objectid, ins.offset, ins.offset, @@ -5847,6 +5847,11 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, btrfs_unreserve_metadata_space(root, 3); } return ret; + +stop_trans: + btrfs_end_transaction(trans, root); + return ret; + } static long btrfs_fallocate(struct inode *inode, int mode, -- cgit v1.2.3-70-g09d2 From 406266ab9ac8ed8b085c58aacd9e3161480dc5d5 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 9 Dec 2009 22:00:38 +0000 Subject: btrfs: fix missing last-entry in readdir(3) parent 49313cdac7b34c9f7ecbb1780cfc648b1c082cd7 (v2.6.32-1-g49313cd) commit ff48c08e1c05c67e8348ab6f8a24de8034e0e34d Author: Jan Engelhardt Date: Wed Dec 9 22:57:36 2009 +0100 Btrfs: fix missing last-entry in readdir(3) When one does a 32-bit readdir(3), the last entry of a directory is missing. This is however not due to passing a large value to filldir, but it seems to have to do with glibc doing telldir or something quirky. In any case, this patch fixes it in practice. Signed-off-by: Jan Engelhardt Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5440bab2363..d5aa9731094 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3995,7 +3995,11 @@ skip: /* Reached end of directory/root. Bump pos past the last item. */ if (key_type == BTRFS_DIR_INDEX_KEY) - filp->f_pos = INT_LIMIT(off_t); + /* + * 32-bit glibc will use getdents64, but then strtol - + * so the last number we can serve is this. + */ + filp->f_pos = 0x7fffffff; else filp->f_pos++; nopos: -- cgit v1.2.3-70-g09d2 From 6c090a11e1c403b727a6a8eff0b97d5fb9e95cb5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 15 Jan 2010 20:08:22 +0000 Subject: Btrfs: fix regression in orphan cleanup Currently orphan cleanup only ever gets triggered if we cross subvolumes during a lookup, which means that if we just mount a plain jane fs that has orphans in it, they will never get cleaned up. This results in panic's like these http://www.kerneloops.org/oops.php?number=1109085 where adding an orphan entry results in -EEXIST being returned and we panic. In order to fix this, we check to see on lookup if our root has had the orphan cleanup done, and if not go ahead and do it. This is easily reproduceable by running this testcase #include #include #include #include #include #include int main(int argc, char **argv) { char data[4096]; char newdata[4096]; int fd1, fd2; memset(data, 'a', 4096); memset(newdata, 'b', 4096); while (1) { int i; fd1 = creat("file1", 0666); if (fd1 < 0) break; for (i = 0; i < 512; i++) write(fd1, data, 4096); fsync(fd1); close(fd1); fd2 = creat("file2", 0666); if (fd2 < 0) break; ftruncate(fd2, 4096 * 512); for (i = 0; i < 512; i++) write(fd2, newdata, 4096); close(fd2); i = rename("file2", "file1"); unlink("file1"); } return 0; } and then pulling the power on the box, and then trying to run that test again when the box comes back up. I've tested this locally and it fixes the problem. Thanks to Tomas Carnecky for helping me track this down initially. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d5aa9731094..b330e27c2d8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3796,6 +3796,12 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) if (location.type == BTRFS_INODE_ITEM_KEY) { inode = btrfs_iget(dir->i_sb, &location, root); + if (unlikely(root->clean_orphans) && + !(inode->i_sb->s_flags & MS_RDONLY)) { + down_read(&root->fs_info->cleanup_work_sem); + btrfs_orphan_cleanup(root); + up_read(&root->fs_info->cleanup_work_sem); + } return inode; } -- cgit v1.2.3-70-g09d2