summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c225
1 files changed, 193 insertions, 32 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e354c33df08..c800d58f301 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,6 +24,7 @@
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
+#include <linux/falloc.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
@@ -48,30 +49,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
struct page **prepared_pages,
struct iov_iter *i)
{
- size_t copied;
+ size_t copied = 0;
int pg = 0;
int offset = pos & (PAGE_CACHE_SIZE - 1);
+ int total_copied = 0;
while (write_bytes > 0) {
size_t count = min_t(size_t,
PAGE_CACHE_SIZE - offset, write_bytes);
struct page *page = prepared_pages[pg];
-again:
- if (unlikely(iov_iter_fault_in_readable(i, count)))
- return -EFAULT;
-
- /* Copy data from userspace to the current page */
- copied = iov_iter_copy_from_user(page, i, offset, count);
+ /*
+ * Copy data from userspace to the current page
+ *
+ * Disable pagefault to avoid recursive lock since
+ * the pages are already locked
+ */
+ pagefault_disable();
+ copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
+ pagefault_enable();
/* Flush processor's dcache for this page */
flush_dcache_page(page);
iov_iter_advance(i, copied);
write_bytes -= copied;
+ total_copied += copied;
+ /* Return to btrfs_file_aio_write to fault page */
if (unlikely(copied == 0)) {
- count = min_t(size_t, PAGE_CACHE_SIZE - offset,
- iov_iter_single_seg_count(i));
- goto again;
+ break;
}
if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
@@ -81,7 +86,7 @@ again:
offset = 0;
}
}
- return 0;
+ return total_copied;
}
/*
@@ -220,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->bdev = em->bdev;
split->flags = flags;
+ split->compress_type = em->compress_type;
ret = add_extent_mapping(em_tree, split);
BUG_ON(ret);
free_extent_map(split);
@@ -234,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->len = em->start + em->len - (start + len);
split->bdev = em->bdev;
split->flags = flags;
+ split->compress_type = em->compress_type;
if (compressed) {
split->block_len = em->block_len;
@@ -854,6 +861,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
unsigned long last_index;
int will_write;
int buffered = 0;
+ int copied = 0;
+ int dirty_pages = 0;
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
@@ -884,6 +893,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
if (err)
goto out;
+ /*
+ * If BTRFS flips readonly due to some impossible error
+ * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
+ * although we have opened a file as writable, we have
+ * to stop this write operation to ensure FS consistency.
+ */
+ if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+ err = -EROFS;
+ goto out;
+ }
+
file_update_time(file);
BTRFS_I(inode)->sequence++;
@@ -970,7 +990,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
WARN_ON(num_pages > nrptrs);
memset(pages, 0, sizeof(struct page *) * nrptrs);
- ret = btrfs_delalloc_reserve_space(inode, write_bytes);
+ /*
+ * Fault pages before locking them in prepare_pages
+ * to avoid recursive lock
+ */
+ if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = btrfs_delalloc_reserve_space(inode,
+ num_pages << PAGE_CACHE_SHIFT);
if (ret)
goto out;
@@ -978,37 +1008,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
pos, first_index, last_index,
write_bytes);
if (ret) {
- btrfs_delalloc_release_space(inode, write_bytes);
+ btrfs_delalloc_release_space(inode,
+ num_pages << PAGE_CACHE_SHIFT);
goto out;
}
- ret = btrfs_copy_from_user(pos, num_pages,
+ copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, &i);
- if (ret == 0) {
+ dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
+
+ if (num_pages > dirty_pages) {
+ if (copied > 0)
+ atomic_inc(
+ &BTRFS_I(inode)->outstanding_extents);
+ btrfs_delalloc_release_space(inode,
+ (num_pages - dirty_pages) <<
+ PAGE_CACHE_SHIFT);
+ }
+
+ if (copied > 0) {
dirty_and_release_pages(NULL, root, file, pages,
- num_pages, pos, write_bytes);
+ dirty_pages, pos, copied);
}
btrfs_drop_pages(pages, num_pages);
- if (ret) {
- btrfs_delalloc_release_space(inode, write_bytes);
- goto out;
- }
- if (will_write) {
- filemap_fdatawrite_range(inode->i_mapping, pos,
- pos + write_bytes - 1);
- } else {
- balance_dirty_pages_ratelimited_nr(inode->i_mapping,
- num_pages);
- if (num_pages <
- (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
- btrfs_btree_balance_dirty(root, 1);
- btrfs_throttle(root);
+ if (copied > 0) {
+ if (will_write) {
+ filemap_fdatawrite_range(inode->i_mapping, pos,
+ pos + copied - 1);
+ } else {
+ balance_dirty_pages_ratelimited_nr(
+ inode->i_mapping,
+ dirty_pages);
+ if (dirty_pages <
+ (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
+ btrfs_btree_balance_dirty(root, 1);
+ btrfs_throttle(root);
+ }
}
- pos += write_bytes;
- num_written += write_bytes;
+ pos += copied;
+ num_written += copied;
cond_resched();
}
@@ -1047,8 +1089,14 @@ out:
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ num_written = PTR_ERR(trans);
+ goto done;
+ }
+ mutex_lock(&inode->i_mutex);
ret = btrfs_log_dentry_safe(trans, root,
file->f_dentry);
+ mutex_unlock(&inode->i_mutex);
if (ret == 0) {
ret = btrfs_sync_log(trans, root);
if (ret == 0)
@@ -1067,6 +1115,7 @@ out:
(start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
}
}
+done:
current->backing_dev_info = NULL;
return num_written ? num_written : err;
}
@@ -1202,6 +1251,117 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
return 0;
}
+static long btrfs_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct extent_state *cached_state = NULL;
+ u64 cur_offset;
+ u64 last_byte;
+ u64 alloc_start;
+ u64 alloc_end;
+ u64 alloc_hint = 0;
+ u64 locked_end;
+ u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
+ struct extent_map *em;
+ int ret;
+
+ alloc_start = offset & ~mask;
+ alloc_end = (offset + len + mask) & ~mask;
+
+ /* We only support the FALLOC_FL_KEEP_SIZE mode */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+
+ /*
+ * wait for ordered IO before we have any locks. We'll loop again
+ * below with the locks held.
+ */
+ btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
+
+ mutex_lock(&inode->i_mutex);
+ ret = inode_newsize_ok(inode, alloc_end);
+ if (ret)
+ goto out;
+
+ if (alloc_start > inode->i_size) {
+ ret = btrfs_cont_expand(inode, alloc_start);
+ if (ret)
+ goto out;
+ }
+
+ ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
+ if (ret)
+ goto out;
+
+ locked_end = alloc_end - 1;
+ while (1) {
+ struct btrfs_ordered_extent *ordered;
+
+ /* the extent lock is ordered inside the running
+ * transaction
+ */
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
+ locked_end, 0, &cached_state, GFP_NOFS);
+ ordered = btrfs_lookup_first_ordered_extent(inode,
+ alloc_end - 1);
+ if (ordered &&
+ ordered->file_offset + ordered->len > alloc_start &&
+ ordered->file_offset < alloc_end) {
+ btrfs_put_ordered_extent(ordered);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree,
+ alloc_start, locked_end,
+ &cached_state, GFP_NOFS);
+ /*
+ * we can't wait on the range with the transaction
+ * running or with the extent lock held
+ */
+ btrfs_wait_ordered_range(inode, alloc_start,
+ alloc_end - alloc_start);
+ } else {
+ if (ordered)
+ btrfs_put_ordered_extent(ordered);
+ break;
+ }
+ }
+
+ cur_offset = alloc_start;
+ while (1) {
+ em = btrfs_get_extent(inode, NULL, 0, cur_offset,
+ alloc_end - cur_offset, 0);
+ BUG_ON(IS_ERR(em) || !em);
+ last_byte = min(extent_map_end(em), alloc_end);
+ last_byte = (last_byte + mask) & ~mask;
+ if (em->block_start == EXTENT_MAP_HOLE ||
+ (cur_offset >= inode->i_size &&
+ !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
+ ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
+ last_byte - cur_offset,
+ 1 << inode->i_blkbits,
+ offset + len,
+ &alloc_hint);
+ if (ret < 0) {
+ free_extent_map(em);
+ break;
+ }
+ }
+ free_extent_map(em);
+
+ cur_offset = last_byte;
+ if (cur_offset >= alloc_end) {
+ ret = 0;
+ break;
+ }
+ }
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
+ &cached_state, GFP_NOFS);
+
+ btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
+out:
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}
+
const struct file_operations btrfs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -1213,6 +1373,7 @@ const struct file_operations btrfs_file_operations = {
.open = generic_file_open,
.release = btrfs_release_file,
.fsync = btrfs_sync_file,
+ .fallocate = btrfs_fallocate,
.unlocked_ioctl = btrfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_ioctl,