diff options
author | Chris Mason <chris.mason@oracle.com> | 2009-04-21 11:53:38 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-04-21 12:45:12 -0400 |
commit | 546888da82082555a56528730a83f0afd12f33bf (patch) | |
tree | 98ee868d1b8a4bd390a980fed707f91419b79fb5 /fs | |
parent | 8c594ea81d7abbbffdda447b127f8ba8d76f319d (diff) |
Btrfs: fix btrfs fallocate oops and deadlock
Btrfs fallocate was incorrectly starting a transaction with a lock held
on the extent_io tree for the file, which could deadlock. Strictly
speaking it was using join_transaction which would be safe, but it is better
to move the transaction outside of the lock.
When preallocated extents are overwritten, btrfs_mark_buffer_dirty was
being called on an unlocked buffer. This was triggering an assertion and
oops because the lock is supposed to be held.
The bug was calling btrfs_mark_buffer_dirty on a leaf after btrfs_del_item had
been run. btrfs_del_item takes care of dirtying things, so the solution is a
to skip the btrfs_mark_buffer_dirty call in this case.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/file.c | 4 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 36 |
2 files changed, 31 insertions, 9 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e21c0060ee7..482f8db2cfd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -830,7 +830,7 @@ again: ret = btrfs_del_items(trans, root, path, del_slot, del_nr); BUG_ON(ret); - goto done; + goto release; } else if (split == start) { if (locked_end < extent_end) { ret = try_lock_extent(&BTRFS_I(inode)->io_tree, @@ -926,6 +926,8 @@ again: } done: btrfs_mark_buffer_dirty(leaf); + +release: btrfs_release_path(root, path); if (split_end && split == start) { split = end; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a0d1dd492a5..65219f6a16a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4970,10 +4970,10 @@ out_fail: return err; } -static int prealloc_file_range(struct inode *inode, u64 start, u64 end, +static int prealloc_file_range(struct btrfs_trans_handle *trans, + struct inode *inode, u64 start, u64 end, u64 alloc_hint, int mode) { - struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 alloc_size; @@ -4981,10 +4981,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, u64 num_bytes = end - start; int ret = 0; - trans = btrfs_join_transaction(root, 1); - BUG_ON(!trans); - btrfs_set_trans_block_group(trans, inode); - while (num_bytes > 0) { alloc_size = min(num_bytes, root->fs_info->max_extent); ret = btrfs_reserve_extent(trans, root, alloc_size, @@ -5015,7 +5011,6 @@ out: BUG_ON(ret); } - btrfs_end_transaction(trans, root); return ret; } @@ -5029,11 +5024,18 @@ static long btrfs_fallocate(struct inode *inode, int mode, u64 alloc_hint = 0; u64 mask = BTRFS_I(inode)->root->sectorsize - 1; struct extent_map *em; + struct btrfs_trans_handle *trans; int ret; alloc_start = offset & ~mask; alloc_end = (offset + len + mask) & ~mask; + /* + * wait for ordered IO before we have any locks. We'll loop again + * below with the locks held. + */ + btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); + mutex_lock(&inode->i_mutex); if (alloc_start > inode->i_size) { ret = btrfs_cont_expand(inode, alloc_start); @@ -5043,6 +5045,16 @@ static long btrfs_fallocate(struct inode *inode, int mode, while (1) { struct btrfs_ordered_extent *ordered; + + trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); + if (!trans) { + ret = -EIO; + goto out; + } + + /* the extent lock is ordered inside the running + * transaction + */ lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, GFP_NOFS); ordered = btrfs_lookup_first_ordered_extent(inode, @@ -5053,6 +5065,12 @@ static long btrfs_fallocate(struct inode *inode, int mode, btrfs_put_ordered_extent(ordered); unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, GFP_NOFS); + btrfs_end_transaction(trans, BTRFS_I(inode)->root); + + /* + * we can't wait on the range with the transaction + * running or with the extent lock held + */ btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); } else { @@ -5070,7 +5088,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, last_byte = min(extent_map_end(em), alloc_end); last_byte = (last_byte + mask) & ~mask; if (em->block_start == EXTENT_MAP_HOLE) { - ret = prealloc_file_range(inode, cur_offset, + ret = prealloc_file_range(trans, inode, cur_offset, last_byte, alloc_hint, mode); if (ret < 0) { free_extent_map(em); @@ -5089,6 +5107,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, } unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, GFP_NOFS); + + btrfs_end_transaction(trans, BTRFS_I(inode)->root); out: mutex_unlock(&inode->i_mutex); return ret; |