diff options
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 225 |
1 files changed, 193 insertions, 32 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e354c33df08..c800d58f301 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -24,6 +24,7 @@ #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/mpage.h> +#include <linux/falloc.h> #include <linux/swap.h> #include <linux/writeback.h> #include <linux/statfs.h> @@ -48,30 +49,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, struct page **prepared_pages, struct iov_iter *i) { - size_t copied; + size_t copied = 0; int pg = 0; int offset = pos & (PAGE_CACHE_SIZE - 1); + int total_copied = 0; while (write_bytes > 0) { size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset, write_bytes); struct page *page = prepared_pages[pg]; -again: - if (unlikely(iov_iter_fault_in_readable(i, count))) - return -EFAULT; - - /* Copy data from userspace to the current page */ - copied = iov_iter_copy_from_user(page, i, offset, count); + /* + * Copy data from userspace to the current page + * + * Disable pagefault to avoid recursive lock since + * the pages are already locked + */ + pagefault_disable(); + copied = iov_iter_copy_from_user_atomic(page, i, offset, count); + pagefault_enable(); /* Flush processor's dcache for this page */ flush_dcache_page(page); iov_iter_advance(i, copied); write_bytes -= copied; + total_copied += copied; + /* Return to btrfs_file_aio_write to fault page */ if (unlikely(copied == 0)) { - count = min_t(size_t, PAGE_CACHE_SIZE - offset, - iov_iter_single_seg_count(i)); - goto again; + break; } if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { @@ -81,7 +86,7 @@ again: offset = 0; } } - return 0; + return total_copied; } /* @@ -220,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->bdev = em->bdev; split->flags = flags; + split->compress_type = em->compress_type; ret = add_extent_mapping(em_tree, split); BUG_ON(ret); free_extent_map(split); @@ -234,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->len = em->start + em->len - (start + len); split->bdev = em->bdev; split->flags = flags; + split->compress_type = em->compress_type; if (compressed) { split->block_len = em->block_len; @@ -854,6 +861,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, unsigned long last_index; int will_write; int buffered = 0; + int copied = 0; + int dirty_pages = 0; will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || (file->f_flags & O_DIRECT)); @@ -884,6 +893,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, if (err) goto out; + /* + * If BTRFS flips readonly due to some impossible error + * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), + * although we have opened a file as writable, we have + * to stop this write operation to ensure FS consistency. + */ + if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + err = -EROFS; + goto out; + } + file_update_time(file); BTRFS_I(inode)->sequence++; @@ -970,7 +990,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(struct page *) * nrptrs); - ret = btrfs_delalloc_reserve_space(inode, write_bytes); + /* + * Fault pages before locking them in prepare_pages + * to avoid recursive lock + */ + if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { + ret = -EFAULT; + goto out; + } + + ret = btrfs_delalloc_reserve_space(inode, + num_pages << PAGE_CACHE_SHIFT); if (ret) goto out; @@ -978,37 +1008,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, pos, first_index, last_index, write_bytes); if (ret) { - btrfs_delalloc_release_space(inode, write_bytes); + btrfs_delalloc_release_space(inode, + num_pages << PAGE_CACHE_SHIFT); goto out; } - ret = btrfs_copy_from_user(pos, num_pages, + copied = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, &i); - if (ret == 0) { + dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + + if (num_pages > dirty_pages) { + if (copied > 0) + atomic_inc( + &BTRFS_I(inode)->outstanding_extents); + btrfs_delalloc_release_space(inode, + (num_pages - dirty_pages) << + PAGE_CACHE_SHIFT); + } + + if (copied > 0) { dirty_and_release_pages(NULL, root, file, pages, - num_pages, pos, write_bytes); + dirty_pages, pos, copied); } btrfs_drop_pages(pages, num_pages); - if (ret) { - btrfs_delalloc_release_space(inode, write_bytes); - goto out; - } - if (will_write) { - filemap_fdatawrite_range(inode->i_mapping, pos, - pos + write_bytes - 1); - } else { - balance_dirty_pages_ratelimited_nr(inode->i_mapping, - num_pages); - if (num_pages < - (root->leafsize >> PAGE_CACHE_SHIFT) + 1) - btrfs_btree_balance_dirty(root, 1); - btrfs_throttle(root); + if (copied > 0) { + if (will_write) { + filemap_fdatawrite_range(inode->i_mapping, pos, + pos + copied - 1); + } else { + balance_dirty_pages_ratelimited_nr( + inode->i_mapping, + dirty_pages); + if (dirty_pages < + (root->leafsize >> PAGE_CACHE_SHIFT) + 1) + btrfs_btree_balance_dirty(root, 1); + btrfs_throttle(root); + } } - pos += write_bytes; - num_written += write_bytes; + pos += copied; + num_written += copied; cond_resched(); } @@ -1047,8 +1089,14 @@ out: if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + num_written = PTR_ERR(trans); + goto done; + } + mutex_lock(&inode->i_mutex); ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); + mutex_unlock(&inode->i_mutex); if (ret == 0) { ret = btrfs_sync_log(trans, root); if (ret == 0) @@ -1067,6 +1115,7 @@ out: (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } } +done: current->backing_dev_info = NULL; return num_written ? num_written : err; } @@ -1202,6 +1251,117 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) return 0; } +static long btrfs_fallocate(struct file *file, int mode, + loff_t offset, loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct extent_state *cached_state = NULL; + u64 cur_offset; + u64 last_byte; + u64 alloc_start; + u64 alloc_end; + u64 alloc_hint = 0; + u64 locked_end; + u64 mask = BTRFS_I(inode)->root->sectorsize - 1; + struct extent_map *em; + int ret; + + alloc_start = offset & ~mask; + alloc_end = (offset + len + mask) & ~mask; + + /* We only support the FALLOC_FL_KEEP_SIZE mode */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + + /* + * wait for ordered IO before we have any locks. We'll loop again + * below with the locks held. + */ + btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); + + mutex_lock(&inode->i_mutex); + ret = inode_newsize_ok(inode, alloc_end); + if (ret) + goto out; + + if (alloc_start > inode->i_size) { + ret = btrfs_cont_expand(inode, alloc_start); + if (ret) + goto out; + } + + ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); + if (ret) + goto out; + + locked_end = alloc_end - 1; + while (1) { + struct btrfs_ordered_extent *ordered; + + /* the extent lock is ordered inside the running + * transaction + */ + lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, + locked_end, 0, &cached_state, GFP_NOFS); + ordered = btrfs_lookup_first_ordered_extent(inode, + alloc_end - 1); + if (ordered && + ordered->file_offset + ordered->len > alloc_start && + ordered->file_offset < alloc_end) { + btrfs_put_ordered_extent(ordered); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, + alloc_start, locked_end, + &cached_state, GFP_NOFS); + /* + * we can't wait on the range with the transaction + * running or with the extent lock held + */ + btrfs_wait_ordered_range(inode, alloc_start, + alloc_end - alloc_start); + } else { + if (ordered) + btrfs_put_ordered_extent(ordered); + break; + } + } + + cur_offset = alloc_start; + while (1) { + em = btrfs_get_extent(inode, NULL, 0, cur_offset, + alloc_end - cur_offset, 0); + BUG_ON(IS_ERR(em) || !em); + last_byte = min(extent_map_end(em), alloc_end); + last_byte = (last_byte + mask) & ~mask; + if (em->block_start == EXTENT_MAP_HOLE || + (cur_offset >= inode->i_size && + !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { + ret = btrfs_prealloc_file_range(inode, mode, cur_offset, + last_byte - cur_offset, + 1 << inode->i_blkbits, + offset + len, + &alloc_hint); + if (ret < 0) { + free_extent_map(em); + break; + } + } + free_extent_map(em); + + cur_offset = last_byte; + if (cur_offset >= alloc_end) { + ret = 0; + break; + } + } + unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, + &cached_state, GFP_NOFS); + + btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); +out: + mutex_unlock(&inode->i_mutex); + return ret; +} + const struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -1213,6 +1373,7 @@ const struct file_operations btrfs_file_operations = { .open = generic_file_open, .release = btrfs_release_file, .fsync = btrfs_sync_file, + .fallocate = btrfs_fallocate, .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_ioctl, |