diff options
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 906 |
1 files changed, 678 insertions, 228 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 03708ef3dee..d23362f4464 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -153,7 +153,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, key.objectid = btrfs_ino(inode); key.offset = start; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + key.type = BTRFS_EXTENT_DATA_KEY; datasize = btrfs_file_extent_calc_inline_size(cur_size); path->leave_spinning = 1; @@ -249,8 +249,8 @@ static noinline int cow_file_range_inline(struct btrfs_root *root, data_len = compressed_size; if (start > 0 || - actual_end >= PAGE_CACHE_SIZE || - data_len >= BTRFS_MAX_INLINE_DATA_SIZE(root) || + actual_end > PAGE_CACHE_SIZE || + data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) || (!compressed_size && (actual_end & (root->sectorsize - 1)) == 0) || end + 1 < isize || @@ -348,6 +348,23 @@ static noinline int add_async_extent(struct async_cow *cow, return 0; } +static inline int inode_need_compress(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + + /* force compress */ + if (btrfs_test_opt(root, FORCE_COMPRESS)) + return 1; + /* bad compression ratios */ + if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) + return 0; + if (btrfs_test_opt(root, COMPRESS) || + BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS || + BTRFS_I(inode)->force_compress) + return 1; + return 0; +} + /* * we create compressed extents in two phases. The first * phase compresses a range of pages that have already been @@ -444,10 +461,7 @@ again: * inode has not been flagged as nocompress. This flag can * change at any time if we discover bad compression ratios. */ - if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && - (btrfs_test_opt(root, COMPRESS) || - (BTRFS_I(inode)->force_compress) || - (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { + if (inode_need_compress(inode)) { WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); if (!pages) { @@ -778,8 +792,12 @@ retry: ins.offset, BTRFS_ORDERED_COMPRESSED, async_extent->compress_type); - if (ret) + if (ret) { + btrfs_drop_extent_cache(inode, async_extent->start, + async_extent->start + + async_extent->ram_size - 1, 0); goto out_free_reserve; + } /* * clear dirty, set writeback and unlock the pages. @@ -971,14 +989,14 @@ static noinline int cow_file_range(struct inode *inode, ret = btrfs_add_ordered_extent(inode, start, ins.objectid, ram_size, cur_alloc_size, 0); if (ret) - goto out_reserve; + goto out_drop_extent_cache; if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_reloc_clone_csums(inode, start, cur_alloc_size); if (ret) - goto out_reserve; + goto out_drop_extent_cache; } if (disk_num_bytes < cur_alloc_size) @@ -1006,6 +1024,8 @@ static noinline int cow_file_range(struct inode *inode, out: return ret; +out_drop_extent_cache: + btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); out_reserve: btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); out_unlock: @@ -1088,7 +1108,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_cow->locked_page = locked_page; async_cow->start = start; - if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) + if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && + !btrfs_test_opt(root, FORCE_COMPRESS)) cur_end = end; else cur_end = min(end, start + 512 * 1024 - 1); @@ -1096,8 +1117,10 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_cow->end = cur_end; INIT_LIST_HEAD(&async_cow->extents); - btrfs_init_work(&async_cow->work, async_cow_start, - async_cow_submit, async_cow_free); + btrfs_init_work(&async_cow->work, + btrfs_delalloc_helper, + async_cow_start, async_cow_submit, + async_cow_free); nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT; @@ -1437,6 +1460,26 @@ error: return ret; } +static inline int need_force_cow(struct inode *inode, u64 start, u64 end) +{ + + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && + !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) + return 0; + + /* + * @defrag_bytes is a hint value, no spinlock held here, + * if is not zero, it means the file is defragging. + * Force cow if given extent needs to be defragged. + */ + if (BTRFS_I(inode)->defrag_bytes && + test_range_bit(&BTRFS_I(inode)->io_tree, start, end, + EXTENT_DEFRAG, 0, NULL)) + return 1; + + return 0; +} + /* * extent_io.c call back to do delayed allocation processing */ @@ -1445,17 +1488,15 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, unsigned long *nr_written) { int ret; - struct btrfs_root *root = BTRFS_I(inode)->root; + int force_cow = need_force_cow(inode, start, end); - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) { + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 1, nr_written); - } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) { + } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); - } else if (!btrfs_test_opt(root, COMPRESS) && - !(BTRFS_I(inode)->force_compress) && - !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) { + } else if (!inode_need_compress(inode)) { ret = cow_file_range(inode, locked_page, start, end, page_started, nr_written, 1); } else { @@ -1547,6 +1588,8 @@ static void btrfs_set_bit_hook(struct inode *inode, struct extent_state *state, unsigned long *bits) { + if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC)) + WARN_ON(1); /* * set_bit and clear bit hooks normally require _irqsave/restore * but in this case, we are only testing for the DELALLOC @@ -1569,6 +1612,8 @@ static void btrfs_set_bit_hook(struct inode *inode, root->fs_info->delalloc_batch); spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->delalloc_bytes += len; + if (*bits & EXTENT_DEFRAG) + BTRFS_I(inode)->defrag_bytes += len; if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST, &BTRFS_I(inode)->runtime_flags)) btrfs_add_delalloc_inodes(root, inode); @@ -1583,6 +1628,13 @@ static void btrfs_clear_bit_hook(struct inode *inode, struct extent_state *state, unsigned long *bits) { + u64 len = state->end + 1 - state->start; + + spin_lock(&BTRFS_I(inode)->lock); + if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) + BTRFS_I(inode)->defrag_bytes -= len; + spin_unlock(&BTRFS_I(inode)->lock); + /* * set_bit and clear bit hooks normally require _irqsave/restore * but in this case, we are only testing for the DELALLOC @@ -1590,7 +1642,6 @@ static void btrfs_clear_bit_hook(struct inode *inode, */ if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; - u64 len = state->end + 1 - state->start; bool do_list = !btrfs_is_free_space_inode(inode); if (*bits & EXTENT_FIRST_DELALLOC) { @@ -1881,7 +1932,8 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) SetPageChecked(page); page_cache_get(page); - btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); + btrfs_init_work(&fixup->work, btrfs_fixup_helper, + btrfs_writepage_fixup_worker, NULL, NULL); fixup->page = page; btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); return -EBUSY; @@ -2651,6 +2703,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } + btrfs_free_io_failure_record(inode, ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len - 1); + if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { truncated = true; logical_len = ordered_extent->truncated_len; @@ -2822,7 +2878,8 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ordered_extent *ordered_extent = NULL; - struct btrfs_workqueue *workers; + struct btrfs_workqueue *wq; + btrfs_work_func_t func; trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); @@ -2831,17 +2888,55 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, end - start + 1, uptodate)) return 0; - btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); + if (btrfs_is_free_space_inode(inode)) { + wq = root->fs_info->endio_freespace_worker; + func = btrfs_freespace_write_helper; + } else { + wq = root->fs_info->endio_write_workers; + func = btrfs_endio_write_helper; + } - if (btrfs_is_free_space_inode(inode)) - workers = root->fs_info->endio_freespace_worker; - else - workers = root->fs_info->endio_write_workers; - btrfs_queue_work(workers, &ordered_extent->work); + btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL, + NULL); + btrfs_queue_work(wq, &ordered_extent->work); return 0; } +static int __readpage_endio_check(struct inode *inode, + struct btrfs_io_bio *io_bio, + int icsum, struct page *page, + int pgoff, u64 start, size_t len) +{ + char *kaddr; + u32 csum_expected; + u32 csum = ~(u32)0; + static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + + csum_expected = *(((u32 *)io_bio->csum) + icsum); + + kaddr = kmap_atomic(page); + csum = btrfs_csum_data(kaddr + pgoff, csum, len); + btrfs_csum_final(csum, (char *)&csum); + if (csum != csum_expected) + goto zeroit; + + kunmap_atomic(kaddr); + return 0; +zeroit: + if (__ratelimit(&_rs)) + btrfs_info(BTRFS_I(inode)->root->fs_info, + "csum failed ino %llu off %llu csum %u expected csum %u", + btrfs_ino(inode), start, csum, csum_expected); + memset(kaddr + pgoff, 1, len); + flush_dcache_page(page); + kunmap_atomic(kaddr); + if (csum_expected == 0) + return 0; + return -EIO; +} + /* * when reads are done, we need to check csums to verify the data is correct * if there's a match, we allow the bio to finish. If not, the code in @@ -2854,20 +2949,15 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, size_t offset = start - page_offset(page); struct inode *inode = page->mapping->host; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - char *kaddr; struct btrfs_root *root = BTRFS_I(inode)->root; - u32 csum_expected; - u32 csum = ~(u32)0; - static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, - DEFAULT_RATELIMIT_BURST); if (PageChecked(page)) { ClearPageChecked(page); - goto good; + return 0; } if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) - goto good; + return 0; if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { @@ -2877,28 +2967,8 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, } phy_offset >>= inode->i_sb->s_blocksize_bits; - csum_expected = *(((u32 *)io_bio->csum) + phy_offset); - - kaddr = kmap_atomic(page); - csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1); - btrfs_csum_final(csum, (char *)&csum); - if (csum != csum_expected) - goto zeroit; - - kunmap_atomic(kaddr); -good: - return 0; - -zeroit: - if (__ratelimit(&_rs)) - btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", - btrfs_ino(page->mapping->host), start, csum, csum_expected); - memset(kaddr + offset, 1, end - start + 1); - flush_dcache_page(page); - kunmap_atomic(kaddr); - if (csum_expected == 0) - return 0; - return -EIO; + return __readpage_endio_check(inode, io_bio, phy_offset, page, offset, + start, (size_t)(end - start + 1)); } struct delayed_iput { @@ -3145,7 +3215,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) path->reada = -1; key.objectid = BTRFS_ORPHAN_OBJECTID; - btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); + key.type = BTRFS_ORPHAN_ITEM_KEY; key.offset = (u64)-1; while (1) { @@ -3172,7 +3242,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) /* make sure the item matches what we want */ if (found_key.objectid != BTRFS_ORPHAN_OBJECTID) break; - if (btrfs_key_type(&found_key) != BTRFS_ORPHAN_ITEM_KEY) + if (found_key.type != BTRFS_ORPHAN_ITEM_KEY) break; /* release the path since we're done with it */ @@ -3648,7 +3718,8 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, * without delay */ if (!btrfs_is_free_space_inode(inode) - && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { + && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID + && !root->fs_info->log_root_recovering) { btrfs_update_root_times(trans, root); ret = btrfs_delayed_update_inode(trans, root, inode); @@ -4071,7 +4142,7 @@ search_again: fi = NULL; leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - found_type = btrfs_key_type(&found_key); + found_type = found_key.type; if (found_key.objectid != ino) break; @@ -4234,7 +4305,8 @@ out: btrfs_abort_transaction(trans, root, ret); } error: - if (last_size != (u64)-1) + if (last_size != (u64)-1 && + root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) btrfs_ordered_update_i_size(inode, last_size, NULL); btrfs_free_path(path); return err; @@ -4674,6 +4746,11 @@ static void evict_inode_truncate_pages(struct inode *inode) clear_bit(EXTENT_FLAG_LOGGING, &em->flags); remove_extent_mapping(map_tree, em); free_extent_map(em); + if (need_resched()) { + write_unlock(&map_tree->lock); + cond_resched(); + write_lock(&map_tree->lock); + } } write_unlock(&map_tree->lock); @@ -4696,6 +4773,7 @@ static void evict_inode_truncate_pages(struct inode *inode) &cached_state, GFP_NOFS); free_extent_state(state); + cond_resched(); spin_lock(&io_tree->lock); } spin_unlock(&io_tree->lock); @@ -4726,6 +4804,8 @@ void btrfs_evict_inode(struct inode *inode) /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ btrfs_wait_ordered_range(inode, 0, (u64)-1); + btrfs_free_io_failure_record(inode, 0, (u64)-1); + if (root->fs_info->log_root_recovering) { BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &BTRFS_I(inode)->runtime_flags)); @@ -5274,7 +5354,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) btrfs_get_delayed_items(inode, &ins_list, &del_list); } - btrfs_set_key_type(&key, key_type); + key.type = key_type; key.offset = ctx->pos; key.objectid = btrfs_ino(inode); @@ -5299,7 +5379,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (found_key.objectid != key.objectid) break; - if (btrfs_key_type(&found_key) != key_type) + if (found_key.type != key_type) break; if (found_key.offset < ctx->pos) goto next; @@ -5511,7 +5591,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) int ret; key.objectid = btrfs_ino(inode); - btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.type = BTRFS_DIR_INDEX_KEY; key.offset = (u64)-1; path = btrfs_alloc_path(); @@ -5543,7 +5623,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != btrfs_ino(inode) || - btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) { + found_key.type != BTRFS_DIR_INDEX_KEY) { BTRFS_I(inode)->index_cnt = 2; goto out; } @@ -5577,6 +5657,17 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index) return ret; } +static int btrfs_insert_inode_locked(struct inode *inode) +{ + struct btrfs_iget_args args; + args.location = &BTRFS_I(inode)->location; + args.root = BTRFS_I(inode)->root; + + return insert_inode_locked4(inode, + btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root), + btrfs_find_actor, &args); +} + static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, @@ -5606,6 +5697,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, } /* + * O_TMPFILE, set link count to 0, so that after this point, + * we fill in an inode item with the correct link count. + */ + if (!name) + set_nlink(inode, 0); + + /* * we have to initialize this early, so we can reclaim the inode * number if we fail afterwards in this function. */ @@ -5643,7 +5741,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); key[0].objectid = objectid; - btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); + key[0].type = BTRFS_INODE_ITEM_KEY; key[0].offset = 0; sizes[0] = sizeof(struct btrfs_inode_item); @@ -5656,16 +5754,25 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, * add more hard links than can fit in the ref item. */ key[1].objectid = objectid; - btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); + key[1].type = BTRFS_INODE_REF_KEY; key[1].offset = ref_objectid; sizes[1] = name_len + sizeof(*ref); } + location = &BTRFS_I(inode)->location; + location->objectid = objectid; + location->offset = 0; + location->type = BTRFS_INODE_ITEM_KEY; + + ret = btrfs_insert_inode_locked(inode); + if (ret < 0) + goto fail; + path->leave_spinning = 1; ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems); if (ret != 0) - goto fail; + goto fail_unlock; inode_init_owner(inode, dir, mode); inode_set_bytes(inode, 0); @@ -5688,11 +5795,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); - location = &BTRFS_I(inode)->location; - location->objectid = objectid; - location->offset = 0; - btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - btrfs_inherit_iflags(inode, dir); if (S_ISREG(mode)) { @@ -5703,7 +5805,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BTRFS_INODE_NODATASUM; } - btrfs_insert_inode_hash(inode); inode_tree_add(inode); trace_btrfs_inode_new(inode); @@ -5718,6 +5819,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_ino(inode), root->root_key.objectid, ret); return inode; + +fail_unlock: + unlock_new_inode(inode); fail: if (dir && name) BTRFS_I(dir)->index_cnt--; @@ -5751,7 +5855,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); } else { key.objectid = ino; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; } @@ -5852,28 +5956,28 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock; } - err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); - if (err) { - drop_inode = 1; - goto out_unlock; - } - /* * If the active LSM wants to access the inode during * d_instantiate it needs these. Smack checks to see * if the filesystem supports xattrs by looking at the * ops vector. */ - inode->i_op = &btrfs_special_inode_operations; - err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); + init_special_inode(inode, inode->i_mode, rdev); + + err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - drop_inode = 1; - else { - init_special_inode(inode, inode->i_mode, rdev); + goto out_unlock_inode; + + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); + if (err) { + goto out_unlock_inode; + } else { btrfs_update_inode(trans, root, inode); + unlock_new_inode(inode); d_instantiate(dentry, inode); } + out_unlock: btrfs_end_transaction(trans, root); btrfs_balance_delayed_items(root); @@ -5883,6 +5987,12 @@ out_unlock: iput(inode); } return err; + +out_unlock_inode: + drop_inode = 1; + unlock_new_inode(inode); + goto out_unlock; + } static int btrfs_create(struct inode *dir, struct dentry *dentry, @@ -5917,15 +6027,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } drop_inode_on_err = 1; - - err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); - if (err) - goto out_unlock; - - err = btrfs_update_inode(trans, root, inode); - if (err) - goto out_unlock; - /* * If the active LSM wants to access the inode during * d_instantiate it needs these. Smack checks to see @@ -5934,14 +6035,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, */ inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_mapping->backing_dev_info = &root->fs_info->bdi; + + err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); + if (err) + goto out_unlock_inode; + + err = btrfs_update_inode(trans, root, inode); + if (err) + goto out_unlock_inode; err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) - goto out_unlock; + goto out_unlock_inode; - inode->i_mapping->a_ops = &btrfs_aops; - inode->i_mapping->backing_dev_info = &root->fs_info->bdi; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + unlock_new_inode(inode); d_instantiate(dentry, inode); out_unlock: @@ -5953,6 +6063,11 @@ out_unlock: btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); return err; + +out_unlock_inode: + unlock_new_inode(inode); + goto out_unlock; + } static int btrfs_link(struct dentry *old_dentry, struct inode *dir, @@ -6060,25 +6175,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } drop_on_err = 1; + /* these must be set before we unlock the inode */ + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_fail; - - inode->i_op = &btrfs_dir_inode_operations; - inode->i_fop = &btrfs_dir_file_operations; + goto out_fail_inode; btrfs_i_size_write(inode, 0); err = btrfs_update_inode(trans, root, inode); if (err) - goto out_fail; + goto out_fail_inode; err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, dentry->d_name.len, 0, index); if (err) - goto out_fail; + goto out_fail_inode; d_instantiate(dentry, inode); + /* + * mkdir is special. We're unlocking after we call d_instantiate + * to avoid a race with nfsd calling d_instantiate. + */ + unlock_new_inode(inode); drop_on_err = 0; out_fail: @@ -6088,23 +6208,66 @@ out_fail: btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); return err; + +out_fail_inode: + unlock_new_inode(inode); + goto out_fail; +} + +/* Find next extent map of a given extent map, caller needs to ensure locks */ +static struct extent_map *next_extent_map(struct extent_map *em) +{ + struct rb_node *next; + + next = rb_next(&em->rb_node); + if (!next) + return NULL; + return container_of(next, struct extent_map, rb_node); +} + +static struct extent_map *prev_extent_map(struct extent_map *em) +{ + struct rb_node *prev; + + prev = rb_prev(&em->rb_node); + if (!prev) + return NULL; + return container_of(prev, struct extent_map, rb_node); } /* helper for btfs_get_extent. Given an existing extent in the tree, + * the existing extent is the nearest extent to map_start, * and an extent that you want to insert, deal with overlap and insert - * the new extent into the tree. + * the best fitted new extent into the tree. */ static int merge_extent_mapping(struct extent_map_tree *em_tree, struct extent_map *existing, struct extent_map *em, - u64 map_start, u64 map_len) + u64 map_start) { + struct extent_map *prev; + struct extent_map *next; + u64 start; + u64 end; u64 start_diff; BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); - start_diff = map_start - em->start; - em->start = map_start; - em->len = map_len; + + if (existing->start > map_start) { + next = existing; + prev = prev_extent_map(next); + } else { + prev = existing; + next = next_extent_map(prev); + } + + start = prev ? extent_map_end(prev) : em->start; + start = max_t(u64, start, em->start); + end = next ? next->start : extent_map_end(em); + end = min_t(u64, end, extent_map_end(em)); + start_diff = start - em->start; + em->start = start; + em->len = end - start; if (em->block_start < EXTENT_MAP_LAST_BYTE && !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { em->block_start += start_diff; @@ -6232,7 +6395,7 @@ again: struct btrfs_file_extent_item); /* are we inside the extent that was found? */ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - found_type = btrfs_key_type(&found_key); + found_type = found_key.type; if (found_key.objectid != objectid || found_type != BTRFS_EXTENT_DATA_KEY) { /* @@ -6275,6 +6438,8 @@ next: goto not_found; if (start + len <= found_key.offset) goto not_found; + if (start > found_key.offset) + goto next; em->start = start; em->orig_start = start; em->len = found_key.offset - start; @@ -6379,26 +6544,21 @@ insert: ret = 0; - existing = lookup_extent_mapping(em_tree, start, len); - if (existing && (existing->start > start || - existing->start + existing->len <= start)) { + existing = search_extent_mapping(em_tree, start, len); + /* + * existing will always be non-NULL, since there must be + * extent causing the -EEXIST. + */ + if (start >= extent_map_end(existing) || + start <= existing->start) { + /* + * The existing extent map is the one nearest to + * the [start, start + len) range which overlaps + */ + err = merge_extent_mapping(em_tree, existing, + em, start); free_extent_map(existing); - existing = NULL; - } - if (!existing) { - existing = lookup_extent_mapping(em_tree, em->start, - em->len); - if (existing) { - err = merge_extent_mapping(em_tree, existing, - em, start, - root->sectorsize); - free_extent_map(existing); - if (err) { - free_extent_map(em); - em = NULL; - } - } else { - err = -EIO; + if (err) { free_extent_map(em); em = NULL; } @@ -7010,8 +7170,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, block_start, len, orig_block_len, ram_bytes, type); - if (IS_ERR(em)) + if (IS_ERR(em)) { + ret = PTR_ERR(em); goto unlock_err; + } } ret = btrfs_add_ordered_extent_dio(inode, start, @@ -7086,45 +7248,277 @@ unlock_err: return ret; } -static void btrfs_endio_direct_read(struct bio *bio, int err) +static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio, + int rw, int mirror_num) { - struct btrfs_dio_private *dip = bio->bi_private; - struct bio_vec *bvec; - struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct bio *dio_bio; - u32 *csums = (u32 *)dip->csum; + int ret; + + BUG_ON(rw & REQ_WRITE); + + bio_get(bio); + + ret = btrfs_bio_wq_end_io(root->fs_info, bio, + BTRFS_WQ_ENDIO_DIO_REPAIR); + if (ret) + goto err; + + ret = btrfs_map_bio(root, rw, bio, mirror_num, 0); +err: + bio_put(bio); + return ret; +} + +static int btrfs_check_dio_repairable(struct inode *inode, + struct bio *failed_bio, + struct io_failure_record *failrec, + int failed_mirror) +{ + int num_copies; + + num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, + failrec->logical, failrec->len); + if (num_copies == 1) { + /* + * we only have a single copy of the data, so don't bother with + * all the retry and error correction code that follows. no + * matter what the error is, it is very likely to persist. + */ + pr_debug("Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", + num_copies, failrec->this_mirror, failed_mirror); + return 0; + } + + failrec->failed_mirror = failed_mirror; + failrec->this_mirror++; + if (failrec->this_mirror == failed_mirror) + failrec->this_mirror++; + + if (failrec->this_mirror > num_copies) { + pr_debug("Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", + num_copies, failrec->this_mirror, failed_mirror); + return 0; + } + + return 1; +} + +static int dio_read_error(struct inode *inode, struct bio *failed_bio, + struct page *page, u64 start, u64 end, + int failed_mirror, bio_end_io_t *repair_endio, + void *repair_arg) +{ + struct io_failure_record *failrec; + struct bio *bio; + int isector; + int read_mode; + int ret; + + BUG_ON(failed_bio->bi_rw & REQ_WRITE); + + ret = btrfs_get_io_failure_record(inode, start, end, &failrec); + if (ret) + return ret; + + ret = btrfs_check_dio_repairable(inode, failed_bio, failrec, + failed_mirror); + if (!ret) { + free_io_failure(inode, failrec); + return -EIO; + } + + if (failed_bio->bi_vcnt > 1) + read_mode = READ_SYNC | REQ_FAILFAST_DEV; + else + read_mode = READ_SYNC; + + isector = start - btrfs_io_bio(failed_bio)->logical; + isector >>= inode->i_sb->s_blocksize_bits; + bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, + 0, isector, repair_endio, repair_arg); + if (!bio) { + free_io_failure(inode, failrec); + return -EIO; + } + + btrfs_debug(BTRFS_I(inode)->root->fs_info, + "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n", + read_mode, failrec->this_mirror, failrec->in_validation); + + ret = submit_dio_repair_bio(inode, bio, read_mode, + failrec->this_mirror); + if (ret) { + free_io_failure(inode, failrec); + bio_put(bio); + } + + return ret; +} + +struct btrfs_retry_complete { + struct completion done; + struct inode *inode; u64 start; + int uptodate; +}; + +static void btrfs_retry_endio_nocsum(struct bio *bio, int err) +{ + struct btrfs_retry_complete *done = bio->bi_private; + struct bio_vec *bvec; int i; - start = dip->logical_offset; + if (err) + goto end; + + done->uptodate = 1; + bio_for_each_segment_all(bvec, bio, i) + clean_io_failure(done->inode, done->start, bvec->bv_page, 0); +end: + complete(&done->done); + bio_put(bio); +} + +static int __btrfs_correct_data_nocsum(struct inode *inode, + struct btrfs_io_bio *io_bio) +{ + struct bio_vec *bvec; + struct btrfs_retry_complete done; + u64 start; + int i; + int ret; + + start = io_bio->logical; + done.inode = inode; + + bio_for_each_segment_all(bvec, &io_bio->bio, i) { +try_again: + done.uptodate = 0; + done.start = start; + init_completion(&done.done); + + ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, + start + bvec->bv_len - 1, + io_bio->mirror_num, + btrfs_retry_endio_nocsum, &done); + if (ret) + return ret; + + wait_for_completion(&done.done); + + if (!done.uptodate) { + /* We might have another mirror, so try again */ + goto try_again; + } + + start += bvec->bv_len; + } + + return 0; +} + +static void btrfs_retry_endio(struct bio *bio, int err) +{ + struct btrfs_retry_complete *done = bio->bi_private; + struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); + struct bio_vec *bvec; + int uptodate; + int ret; + int i; + + if (err) + goto end; + + uptodate = 1; bio_for_each_segment_all(bvec, bio, i) { - if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { - struct page *page = bvec->bv_page; - char *kaddr; - u32 csum = ~(u32)0; - unsigned long flags; - - local_irq_save(flags); - kaddr = kmap_atomic(page); - csum = btrfs_csum_data(kaddr + bvec->bv_offset, - csum, bvec->bv_len); - btrfs_csum_final(csum, (char *)&csum); - kunmap_atomic(kaddr); - local_irq_restore(flags); - - flush_dcache_page(bvec->bv_page); - if (csum != csums[i]) { - btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", - btrfs_ino(inode), start, csum, - csums[i]); - err = -EIO; - } + ret = __readpage_endio_check(done->inode, io_bio, i, + bvec->bv_page, 0, + done->start, bvec->bv_len); + if (!ret) + clean_io_failure(done->inode, done->start, + bvec->bv_page, 0); + else + uptodate = 0; + } + + done->uptodate = uptodate; +end: + complete(&done->done); + bio_put(bio); +} + +static int __btrfs_subio_endio_read(struct inode *inode, + struct btrfs_io_bio *io_bio, int err) +{ + struct bio_vec *bvec; + struct btrfs_retry_complete done; + u64 start; + u64 offset = 0; + int i; + int ret; + + err = 0; + start = io_bio->logical; + done.inode = inode; + + bio_for_each_segment_all(bvec, &io_bio->bio, i) { + ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, + 0, start, bvec->bv_len); + if (likely(!ret)) + goto next; +try_again: + done.uptodate = 0; + done.start = start; + init_completion(&done.done); + + ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, + start + bvec->bv_len - 1, + io_bio->mirror_num, + btrfs_retry_endio, &done); + if (ret) { + err = ret; + goto next; } + wait_for_completion(&done.done); + + if (!done.uptodate) { + /* We might have another mirror, so try again */ + goto try_again; + } +next: + offset += bvec->bv_len; start += bvec->bv_len; } + return err; +} + +static int btrfs_subio_endio_read(struct inode *inode, + struct btrfs_io_bio *io_bio, int err) +{ + bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; + + if (skip_csum) { + if (unlikely(err)) + return __btrfs_correct_data_nocsum(inode, io_bio); + else + return 0; + } else { + return __btrfs_subio_endio_read(inode, io_bio, err); + } +} + +static void btrfs_endio_direct_read(struct bio *bio, int err) +{ + struct btrfs_dio_private *dip = bio->bi_private; + struct inode *inode = dip->inode; + struct bio *dio_bio; + struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); + + if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) + err = btrfs_subio_endio_read(inode, io_bio, err); + unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, dip->logical_offset + dip->bytes - 1); dio_bio = dip->dio_bio; @@ -7135,6 +7529,9 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) if (err) clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); dio_end_io(dio_bio, err); + + if (io_bio->end_io) + io_bio->end_io(io_bio, err); bio_put(bio); } @@ -7158,7 +7555,8 @@ again: if (!ret) goto out_test; - btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); + btrfs_init_work(&ordered->work, btrfs_endio_write_helper, + finish_ordered_fn, NULL, NULL); btrfs_queue_work(root->fs_info->endio_write_workers, &ordered->work); out_test: @@ -7199,12 +7597,17 @@ static void btrfs_end_dio_bio(struct bio *bio, int err) { struct btrfs_dio_private *dip = bio->bi_private; + if (err) + btrfs_warn(BTRFS_I(dip->inode)->root->fs_info, + "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d", + btrfs_ino(dip->inode), bio->bi_rw, + (unsigned long long)bio->bi_iter.bi_sector, + bio->bi_iter.bi_size, err); + + if (dip->subio_endio) + err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err); + if (err) { - btrfs_err(BTRFS_I(dip->inode)->root->fs_info, - "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d", - btrfs_ino(dip->inode), bio->bi_rw, - (unsigned long long)bio->bi_iter.bi_sector, - bio->bi_iter.bi_size, err); dip->errors = 1; /* @@ -7235,6 +7638,38 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags); } +static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root, + struct inode *inode, + struct btrfs_dio_private *dip, + struct bio *bio, + u64 file_offset) +{ + struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); + struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio); + int ret; + + /* + * We load all the csum data we need when we submit + * the first bio to reduce the csum tree search and + * contention. + */ + if (dip->logical_offset == file_offset) { + ret = btrfs_lookup_bio_sums_dio(root, inode, dip->orig_bio, + file_offset); + if (ret) + return ret; + } + + if (bio == dip->orig_bio) + return 0; + + file_offset -= dip->logical_offset; + file_offset >>= inode->i_sb->s_blocksize_bits; + io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset); + + return 0; +} + static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, int rw, u64 file_offset, int skip_sum, int async_submit) @@ -7250,7 +7685,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, bio_get(bio); if (!write) { - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + ret = btrfs_bio_wq_end_io(root->fs_info, bio, + BTRFS_WQ_ENDIO_DATA); if (ret) goto err; } @@ -7273,13 +7709,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1); if (ret) goto err; - } else if (!skip_sum) { - ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio, - file_offset); + } else { + ret = btrfs_lookup_and_bind_dio_csum(root, inode, dip, bio, + file_offset); if (ret) goto err; } - map: ret = btrfs_map_bio(root, rw, bio, 0, async_submit); err: @@ -7300,19 +7735,18 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, u64 submit_len = 0; u64 map_length; int nr_pages = 0; - int ret = 0; + int ret; int async_submit = 0; map_length = orig_bio->bi_iter.bi_size; ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, &map_length, NULL, 0); - if (ret) { - bio_put(orig_bio); + if (ret) return -EIO; - } if (map_length >= orig_bio->bi_iter.bi_size) { bio = orig_bio; + dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED; goto submit; } @@ -7326,14 +7760,16 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); if (!bio) return -ENOMEM; + bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; + btrfs_io_bio(bio)->logical = file_offset; atomic_inc(&dip->pending_bios); while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { - if (unlikely(map_length < submit_len + bvec->bv_len || + if (map_length < submit_len + bvec->bv_len || bio_add_page(bio, bvec->bv_page, bvec->bv_len, - bvec->bv_offset) < bvec->bv_len)) { + bvec->bv_offset) < bvec->bv_len) { /* * inc the count before we submit the bio so * we know the end IO handler won't happen before @@ -7362,6 +7798,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, goto out_err; bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; + btrfs_io_bio(bio)->logical = file_offset; map_length = orig_bio->bi_iter.bi_size; ret = btrfs_map_block(root->fs_info, rw, @@ -7405,11 +7842,10 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_dio_private *dip; struct bio *io_bio; + struct btrfs_io_bio *btrfs_bio; int skip_sum; - int sum_len; int write = rw & REQ_WRITE; int ret = 0; - u16 csum_size; skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; @@ -7419,16 +7855,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, goto free_ordered; } - if (!skip_sum && !write) { - csum_size = btrfs_super_csum_size(root->fs_info->super_copy); - sum_len = dio_bio->bi_iter.bi_size >> - inode->i_sb->s_blocksize_bits; - sum_len *= csum_size; - } else { - sum_len = 0; - } - - dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS); + dip = kzalloc(sizeof(*dip), GFP_NOFS); if (!dip) { ret = -ENOMEM; goto free_io_bio; @@ -7440,20 +7867,25 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, dip->bytes = dio_bio->bi_iter.bi_size; dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9; io_bio->bi_private = dip; - dip->errors = 0; dip->orig_bio = io_bio; dip->dio_bio = dio_bio; atomic_set(&dip->pending_bios, 0); + btrfs_bio = btrfs_io_bio(io_bio); + btrfs_bio->logical = file_offset; - if (write) + if (write) { io_bio->bi_end_io = btrfs_endio_direct_write; - else + } else { io_bio->bi_end_io = btrfs_endio_direct_read; + dip->subio_endio = btrfs_subio_endio_read; + } ret = btrfs_submit_direct_hook(rw, dip, skip_sum); if (!ret) return; + if (btrfs_bio->end_io) + btrfs_bio->end_io(btrfs_bio, ret); free_io_bio: bio_put(io_bio); @@ -7534,7 +7966,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, count = iov_iter_count(iter); if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &BTRFS_I(inode)->runtime_flags)) - filemap_fdatawrite_range(inode->i_mapping, offset, count); + filemap_fdatawrite_range(inode->i_mapping, offset, + offset + count - 1); if (rw & WRITE) { /* @@ -7549,8 +7982,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ret = btrfs_delalloc_reserve_space(inode, count); if (ret) goto out; - } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK, - &BTRFS_I(inode)->runtime_flags))) { + } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, + &BTRFS_I(inode)->runtime_flags)) { inode_dio_done(inode); flags = DIO_LOCKING | DIO_SKIP_HOLES; wakeup = false; @@ -8041,6 +8474,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, set_nlink(inode, 1); btrfs_i_size_write(inode, 0); + unlock_new_inode(inode); err = btrfs_subvol_inherit_props(trans, new_root, parent_root); if (err) @@ -8069,6 +8503,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->last_sub_trans = 0; ei->logged_trans = 0; ei->delalloc_bytes = 0; + ei->defrag_bytes = 0; ei->disk_i_size = 0; ei->flags = 0; ei->csum_bytes = 0; @@ -8127,6 +8562,7 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(BTRFS_I(inode)->reserved_extents); WARN_ON(BTRFS_I(inode)->delalloc_bytes); WARN_ON(BTRFS_I(inode)->csum_bytes); + WARN_ON(BTRFS_I(inode)->defrag_bytes); /* * This can happen where we create an inode, but somebody else also @@ -8495,7 +8931,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, work->inode = inode; work->wait = wait; work->delay_iput = delay_iput; - btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); + WARN_ON_ONCE(!inode); + btrfs_init_work(&work->work, btrfs_flush_delalloc_helper, + btrfs_run_delalloc_work, NULL, NULL); return work; } @@ -8540,7 +8978,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, spin_unlock(&root->delalloc_lock); work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); - if (unlikely(!work)) { + if (!work) { if (delay_iput) btrfs_add_delayed_iput(inode); else @@ -8699,12 +9137,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; } - err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); - if (err) { - drop_inode = 1; - goto out_unlock; - } - /* * If the active LSM wants to access the inode during * d_instantiate it needs these. Smack checks to see @@ -8713,34 +9145,32 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, */ inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_mapping->backing_dev_info = &root->fs_info->bdi; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + + err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); + if (err) + goto out_unlock_inode; err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) - drop_inode = 1; - else { - inode->i_mapping->a_ops = &btrfs_aops; - inode->i_mapping->backing_dev_info = &root->fs_info->bdi; - BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; - } - if (drop_inode) - goto out_unlock; + goto out_unlock_inode; path = btrfs_alloc_path(); if (!path) { err = -ENOMEM; - drop_inode = 1; - goto out_unlock; + goto out_unlock_inode; } key.objectid = btrfs_ino(inode); key.offset = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + key.type = BTRFS_EXTENT_DATA_KEY; datasize = btrfs_file_extent_calc_inline_size(name_len); err = btrfs_insert_empty_item(trans, root, path, &key, datasize); if (err) { - drop_inode = 1; btrfs_free_path(path); - goto out_unlock; + goto out_unlock_inode; } leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], @@ -8764,12 +9194,15 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode_set_bytes(inode, name_len); btrfs_i_size_write(inode, name_len); err = btrfs_update_inode(trans, root, inode); - if (err) + if (err) { drop_inode = 1; + goto out_unlock_inode; + } + + unlock_new_inode(inode); + d_instantiate(dentry, inode); out_unlock: - if (!err) - d_instantiate(dentry, inode); btrfs_end_transaction(trans, root); if (drop_inode) { inode_dec_link_count(inode); @@ -8777,6 +9210,11 @@ out_unlock: } btrfs_btree_balance_dirty(root); return err; + +out_unlock_inode: + drop_inode = 1; + unlock_new_inode(inode); + goto out_unlock; } static int __btrfs_prealloc_file_range(struct inode *inode, int mode, @@ -8960,14 +9398,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) goto out; } - ret = btrfs_init_inode_security(trans, inode, dir, NULL); - if (ret) - goto out; - - ret = btrfs_update_inode(trans, root, inode); - if (ret) - goto out; - inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; @@ -8975,10 +9405,26 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mapping->backing_dev_info = &root->fs_info->bdi; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + ret = btrfs_init_inode_security(trans, inode, dir, NULL); + if (ret) + goto out_inode; + + ret = btrfs_update_inode(trans, root, inode); + if (ret) + goto out_inode; ret = btrfs_orphan_add(trans, inode); if (ret) - goto out; + goto out_inode; + /* + * We set number of links to 0 in btrfs_new_inode(), and here we set + * it to 1 because d_tmpfile() will issue a warning if the count is 0, + * through: + * + * d_tmpfile() -> inode_dec_link_count() -> drop_nlink() + */ + set_nlink(inode, 1); + unlock_new_inode(inode); d_tmpfile(dentry, inode); mark_inode_dirty(inode); @@ -8988,8 +9434,12 @@ out: iput(inode); btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); - return ret; + +out_inode: + unlock_new_inode(inode); + goto out; + } static const struct inode_operations btrfs_dir_inode_operations = { |