diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-04 15:31:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-04 15:31:36 -0700 |
commit | 53c566625fb872e7826a237f0f5c21458028e94a (patch) | |
tree | 8ef9990ed2124f085442bc5a44c3f5212bf4002d /fs/btrfs/ioctl.c | |
parent | 34917f9713905a937816ebb7ee5f25bef7a6441c (diff) | |
parent | 00fdf13a2e9f313a044288aa59d3b8ec29ff904a (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs changes from Chris Mason:
"This is a pretty long stream of bug fixes and performance fixes.
Qu Wenruo has replaced the btrfs async threads with regular kernel
workqueues. We'll keep an eye out for performance differences, but
it's nice to be using more generic code for this.
We still have some corruption fixes and other patches coming in for
the merge window, but this batch is tested and ready to go"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (108 commits)
Btrfs: fix a crash of clone with inline extents's split
btrfs: fix uninit variable warning
Btrfs: take into account total references when doing backref lookup
Btrfs: part 2, fix incremental send's decision to delay a dir move/rename
Btrfs: fix incremental send's decision to delay a dir move/rename
Btrfs: remove unnecessary inode generation lookup in send
Btrfs: fix race when updating existing ref head
btrfs: Add trace for btrfs_workqueue alloc/destroy
Btrfs: less fs tree lock contention when using autodefrag
Btrfs: return EPERM when deleting a default subvolume
Btrfs: add missing kfree in btrfs_destroy_workqueue
Btrfs: cache extent states in defrag code path
Btrfs: fix deadlock with nested trans handles
Btrfs: fix possible empty list access when flushing the delalloc inodes
Btrfs: split the global ordered extents mutex
Btrfs: don't flush all delalloc inodes when we doesn't get s_umount lock
Btrfs: reclaim delalloc metadata more aggressively
Btrfs: remove unnecessary lock in may_commit_transaction()
Btrfs: remove the unnecessary flush when preparing the pages
Btrfs: just do dirty page flush for the inode with compression before direct IO
...
Diffstat (limited to 'fs/btrfs/ioctl.c')
-rw-r--r-- | fs/btrfs/ioctl.c | 210 |
1 files changed, 178 insertions, 32 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a6d8efa46bf..0401397b5c9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -59,6 +59,32 @@ #include "props.h" #include "sysfs.h" +#ifdef CONFIG_64BIT +/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI + * structures are incorrect, as the timespec structure from userspace + * is 4 bytes too small. We define these alternatives here to teach + * the kernel about the 32-bit struct packing. + */ +struct btrfs_ioctl_timespec_32 { + __u64 sec; + __u32 nsec; +} __attribute__ ((__packed__)); + +struct btrfs_ioctl_received_subvol_args_32 { + char uuid[BTRFS_UUID_SIZE]; /* in */ + __u64 stransid; /* in */ + __u64 rtransid; /* out */ + struct btrfs_ioctl_timespec_32 stime; /* in */ + struct btrfs_ioctl_timespec_32 rtime; /* out */ + __u64 flags; /* in */ + __u64 reserved[16]; /* in */ +} __attribute__ ((__packed__)); + +#define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \ + struct btrfs_ioctl_received_subvol_args_32) +#endif + + static int btrfs_clone(struct inode *src, struct inode *inode, u64 off, u64 olen, u64 olen_aligned, u64 destoff); @@ -585,6 +611,23 @@ fail: return ret; } +static void btrfs_wait_nocow_write(struct btrfs_root *root) +{ + s64 writers; + DEFINE_WAIT(wait); + + do { + prepare_to_wait(&root->subv_writers->wait, &wait, + TASK_UNINTERRUPTIBLE); + + writers = percpu_counter_sum(&root->subv_writers->counter); + if (writers) + schedule(); + + finish_wait(&root->subv_writers->wait, &wait); + } while (writers); +} + static int create_snapshot(struct btrfs_root *root, struct inode *dir, struct dentry *dentry, char *name, int namelen, u64 *async_transid, bool readonly, @@ -598,15 +641,21 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (!root->ref_cows) return -EINVAL; + atomic_inc(&root->will_be_snapshoted); + smp_mb__after_atomic_inc(); + btrfs_wait_nocow_write(root); + ret = btrfs_start_delalloc_inodes(root, 0); if (ret) - return ret; + goto out; btrfs_wait_ordered_extents(root, -1); pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); - if (!pending_snapshot) - return -ENOMEM; + if (!pending_snapshot) { + ret = -ENOMEM; + goto out; + } btrfs_init_block_rsv(&pending_snapshot->block_rsv, BTRFS_BLOCK_RSV_TEMP); @@ -623,7 +672,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, &pending_snapshot->qgroup_reserved, false); if (ret) - goto out; + goto free; pending_snapshot->dentry = dentry; pending_snapshot->root = root; @@ -674,8 +723,10 @@ fail: btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, &pending_snapshot->block_rsv, pending_snapshot->qgroup_reserved); -out: +free: kfree(pending_snapshot); +out: + atomic_dec(&root->will_be_snapshoted); return ret; } @@ -884,12 +935,14 @@ static int find_new_extents(struct btrfs_root *root, min_key.type = BTRFS_EXTENT_DATA_KEY; min_key.offset = *off; - path->keep_locks = 1; - while (1) { + path->keep_locks = 1; ret = btrfs_search_forward(root, &min_key, path, newer_than); if (ret != 0) goto none; + path->keep_locks = 0; + btrfs_unlock_up_safe(path, 1); +process_slot: if (min_key.objectid != ino) goto none; if (min_key.type != BTRFS_EXTENT_DATA_KEY) @@ -908,6 +961,12 @@ static int find_new_extents(struct btrfs_root *root, return 0; } + path->slots[0]++; + if (path->slots[0] < btrfs_header_nritems(leaf)) { + btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]); + goto process_slot; + } + if (min_key.offset == (u64)-1) goto none; @@ -935,10 +994,13 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) read_unlock(&em_tree->lock); if (!em) { + struct extent_state *cached = NULL; + u64 end = start + len - 1; + /* get the big lock and read metadata off disk */ - lock_extent(io_tree, start, start + len - 1); + lock_extent_bits(io_tree, start, end, 0, &cached); em = btrfs_get_extent(inode, NULL, 0, start, len, 0); - unlock_extent(io_tree, start, start + len - 1); + unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS); if (IS_ERR(em)) return NULL; @@ -957,7 +1019,8 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) return false; next = defrag_lookup_extent(inode, em->start + em->len); - if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) + if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || + (em->block_start + em->block_len == next->block_start)) ret = false; free_extent_map(next); @@ -1076,10 +1139,12 @@ again: page_start = page_offset(page); page_end = page_start + PAGE_CACHE_SIZE - 1; while (1) { - lock_extent(tree, page_start, page_end); + lock_extent_bits(tree, page_start, page_end, + 0, &cached_state); ordered = btrfs_lookup_ordered_extent(inode, page_start); - unlock_extent(tree, page_start, page_end); + unlock_extent_cached(tree, page_start, page_end, + &cached_state, GFP_NOFS); if (!ordered) break; @@ -1356,8 +1421,12 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, } } - if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) + if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) { filemap_flush(inode->i_mapping); + if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) + filemap_flush(inode->i_mapping); + } if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { /* the filemap_flush will queue IO into the worker threads, but @@ -1573,7 +1642,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, if (src_inode->i_sb != file_inode(file)->i_sb) { btrfs_info(BTRFS_I(src_inode)->root->fs_info, "Snapshot src from another FS"); - ret = -EINVAL; + ret = -EXDEV; } else if (!inode_owner_or_capable(src_inode)) { /* * Subvolume creation is not restricted, but snapshots @@ -1797,7 +1866,9 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) if (di && !IS_ERR(di)) { btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); if (key.objectid == root->root_key.objectid) { - ret = -ENOTEMPTY; + ret = -EPERM; + btrfs_err(root->fs_info, "deleting default subvolume " + "%llu is not allowed", key.objectid); goto out; } btrfs_release_path(path); @@ -2994,8 +3065,9 @@ process_slot: new_key.offset + datal, 1); if (ret) { - btrfs_abort_transaction(trans, root, - ret); + if (ret != -EINVAL) + btrfs_abort_transaction(trans, + root, ret); btrfs_end_transaction(trans, root); goto out; } @@ -3153,8 +3225,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, * decompress into destination's address_space (the file offset * may change, so source mapping won't do), then recompress (or * otherwise reinsert) a subrange. - * - allow ranges within the same file to be cloned (provided - * they don't overlap)? + * + * - split destination inode's inline extents. The inline extents can + * be either compressed or non-compressed. */ /* the destination must be opened for writing */ @@ -4353,10 +4426,9 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) return btrfs_qgroup_wait_for_completion(root->fs_info); } -static long btrfs_ioctl_set_received_subvol(struct file *file, - void __user *arg) +static long _btrfs_ioctl_set_received_subvol(struct file *file, + struct btrfs_ioctl_received_subvol_args *sa) { - struct btrfs_ioctl_received_subvol_args *sa = NULL; struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root_item *root_item = &root->root_item; @@ -4384,13 +4456,6 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, goto out; } - sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) { - ret = PTR_ERR(sa); - sa = NULL; - goto out; - } - /* * 1 - root item * 2 - uuid items (received uuid + subvol uuid) @@ -4444,14 +4509,91 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, goto out; } +out: + up_write(&root->fs_info->subvol_sem); + mnt_drop_write_file(file); + return ret; +} + +#ifdef CONFIG_64BIT +static long btrfs_ioctl_set_received_subvol_32(struct file *file, + void __user *arg) +{ + struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL; + struct btrfs_ioctl_received_subvol_args *args64 = NULL; + int ret = 0; + + args32 = memdup_user(arg, sizeof(*args32)); + if (IS_ERR(args32)) { + ret = PTR_ERR(args32); + args32 = NULL; + goto out; + } + + args64 = kmalloc(sizeof(*args64), GFP_NOFS); + if (IS_ERR(args64)) { + ret = PTR_ERR(args64); + args64 = NULL; + goto out; + } + + memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE); + args64->stransid = args32->stransid; + args64->rtransid = args32->rtransid; + args64->stime.sec = args32->stime.sec; + args64->stime.nsec = args32->stime.nsec; + args64->rtime.sec = args32->rtime.sec; + args64->rtime.nsec = args32->rtime.nsec; + args64->flags = args32->flags; + + ret = _btrfs_ioctl_set_received_subvol(file, args64); + if (ret) + goto out; + + memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE); + args32->stransid = args64->stransid; + args32->rtransid = args64->rtransid; + args32->stime.sec = args64->stime.sec; + args32->stime.nsec = args64->stime.nsec; + args32->rtime.sec = args64->rtime.sec; + args32->rtime.nsec = args64->rtime.nsec; + args32->flags = args64->flags; + + ret = copy_to_user(arg, args32, sizeof(*args32)); + if (ret) + ret = -EFAULT; + +out: + kfree(args32); + kfree(args64); + return ret; +} +#endif + +static long btrfs_ioctl_set_received_subvol(struct file *file, + void __user *arg) +{ + struct btrfs_ioctl_received_subvol_args *sa = NULL; + int ret = 0; + + sa = memdup_user(arg, sizeof(*sa)); + if (IS_ERR(sa)) { + ret = PTR_ERR(sa); + sa = NULL; + goto out; + } + + ret = _btrfs_ioctl_set_received_subvol(file, sa); + + if (ret) + goto out; + ret = copy_to_user(arg, sa, sizeof(*sa)); if (ret) ret = -EFAULT; out: kfree(sa); - up_write(&root->fs_info->subvol_sem); - mnt_drop_write_file(file); return ret; } @@ -4746,7 +4888,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_SYNC: { int ret; - ret = btrfs_start_delalloc_roots(root->fs_info, 0); + ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); if (ret) return ret; ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); @@ -4770,6 +4912,10 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_balance_progress(root, argp); case BTRFS_IOC_SET_RECEIVED_SUBVOL: return btrfs_ioctl_set_received_subvol(file, argp); +#ifdef CONFIG_64BIT + case BTRFS_IOC_SET_RECEIVED_SUBVOL_32: + return btrfs_ioctl_set_received_subvol_32(file, argp); +#endif case BTRFS_IOC_SEND: return btrfs_ioctl_send(file, argp); case BTRFS_IOC_GET_DEV_STATS: |