From 5d4f98a28c7d334091c1b7744f48a1acdd2a4ae0 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 10 Jun 2009 10:45:14 -0400 Subject: Btrfs: Mixed back reference (FORWARD ROLLING FORMAT CHANGE) This commit introduces a new kind of back reference for btrfs metadata. Once a filesystem has been mounted with this commit, IT WILL NO LONGER BE MOUNTABLE BY OLDER KERNELS. When a tree block in subvolume tree is cow'd, the reference counts of all extents it points to are increased by one. At transaction commit time, the old root of the subvolume is recorded in a "dead root" data structure, and the btree it points to is later walked, dropping reference counts and freeing any blocks where the reference count goes to 0. The increments done during cow and decrements done after commit cancel out, and the walk is a very expensive way to go about freeing the blocks that are no longer referenced by the new btree root. This commit reduces the transaction overhead by avoiding the need for dead root records. When a non-shared tree block is cow'd, we free the old block at once, and the new block inherits old block's references. When a tree block with reference count > 1 is cow'd, we increase the reference counts of all extents the new block points to by one, and decrease the old block's reference count by one. This dead tree avoidance code removes the need to modify the reference counts of lower level extents when a non-shared tree block is cow'd. But we still need to update back ref for all pointers in the block. This is because the location of the block is recorded in the back ref item. We can solve this by introducing a new type of back ref. The new back ref provides information about pointer's key, level and in which tree the pointer lives. This information allow us to find the pointer by searching the tree. The shortcoming of the new back ref is that it only works for pointers in tree blocks referenced by their owner trees. This is mostly a problem for snapshots, where resolving one of these fuzzy back references would be O(number_of_snapshots) and quite slow. The solution used here is to use the fuzzy back references in the common case where a given tree block is only referenced by one root, and use the full back references when multiple roots have a reference on a given block. This commit adds per subvolume red-black tree to keep trace of cached inodes. The red-black tree helps the balancing code to find cached inodes whose inode numbers within a given range. This commit improves the balancing code by introducing several data structures to keep the state of balancing. The most important one is the back ref cache. It caches how the upper level tree blocks are referenced. This greatly reduce the overhead of checking back ref. The improved balancing code scales significantly better with a large number of snapshots. This is a very large commit and was written in a number of pieces. But, they depend heavily on the disk format change and were squashed together to make sure git bisect didn't end up in a bad state wrt space balancing or the format change. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 132 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 73 insertions(+), 59 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1c8b0190d03..917bf10597c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -48,7 +48,6 @@ #include "ordered-data.h" #include "xattr.h" #include "tree-log.h" -#include "ref-cache.h" #include "compression.h" #include "locking.h" @@ -944,6 +943,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, u64 cow_start; u64 cur_offset; u64 extent_end; + u64 extent_offset; u64 disk_bytenr; u64 num_bytes; int extent_type; @@ -1005,6 +1005,7 @@ next_slot: if (extent_type == BTRFS_FILE_EXTENT_REG || extent_type == BTRFS_FILE_EXTENT_PREALLOC) { disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + extent_offset = btrfs_file_extent_offset(leaf, fi); extent_end = found_key.offset + btrfs_file_extent_num_bytes(leaf, fi); if (extent_end <= start) { @@ -1022,9 +1023,10 @@ next_slot: if (btrfs_extent_readonly(root, disk_bytenr)) goto out_check; if (btrfs_cross_ref_exist(trans, root, inode->i_ino, - disk_bytenr)) + found_key.offset - + extent_offset, disk_bytenr)) goto out_check; - disk_bytenr += btrfs_file_extent_offset(leaf, fi); + disk_bytenr += extent_offset; disk_bytenr += cur_offset - found_key.offset; num_bytes = min(end + 1, extent_end) - cur_offset; /* @@ -1489,9 +1491,9 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, ins.objectid = disk_bytenr; ins.offset = disk_num_bytes; ins.type = BTRFS_EXTENT_ITEM_KEY; - ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, - root->root_key.objectid, - trans->transid, inode->i_ino, &ins); + ret = btrfs_alloc_reserved_file_extent(trans, root, + root->root_key.objectid, + inode->i_ino, file_pos, &ins); BUG_ON(ret); btrfs_free_path(path); @@ -1956,23 +1958,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) * crossing root thing. we store the inode number in the * offset of the orphan item. */ - inode = btrfs_iget_locked(root->fs_info->sb, - found_key.offset, root); - if (!inode) + found_key.objectid = found_key.offset; + found_key.type = BTRFS_INODE_ITEM_KEY; + found_key.offset = 0; + inode = btrfs_iget(root->fs_info->sb, &found_key, root); + if (IS_ERR(inode)) break; - if (inode->i_state & I_NEW) { - BTRFS_I(inode)->root = root; - - /* have to set the location manually */ - BTRFS_I(inode)->location.objectid = inode->i_ino; - BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; - BTRFS_I(inode)->location.offset = 0; - - btrfs_read_locked_inode(inode); - unlock_new_inode(inode); - } - /* * add this inode to the orphan list so btrfs_orphan_del does * the proper thing when we hit it @@ -2069,7 +2061,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf, /* * read an inode from the btree into the in-memory inode */ -void btrfs_read_locked_inode(struct inode *inode) +static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; struct extent_buffer *leaf; @@ -2599,9 +2591,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item *fi; u64 extent_start = 0; u64 extent_num_bytes = 0; + u64 extent_offset = 0; u64 item_end = 0; - u64 root_gen = 0; - u64 root_owner = 0; int found_extent; int del_item; int pending_del_nr = 0; @@ -2716,6 +2707,9 @@ search_again: extent_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); + extent_offset = found_key.offset - + btrfs_file_extent_offset(leaf, fi); + /* FIXME blocksize != 4096 */ num_dec = btrfs_file_extent_num_bytes(leaf, fi); if (extent_start != 0) { @@ -2723,8 +2717,6 @@ search_again: if (root->ref_cows) inode_sub_bytes(inode, num_dec); } - root_gen = btrfs_header_generation(leaf); - root_owner = btrfs_header_owner(leaf); } } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { /* @@ -2768,12 +2760,12 @@ delete: } else { break; } - if (found_extent) { + if (found_extent && root->ref_cows) { btrfs_set_path_blocking(path); ret = btrfs_free_extent(trans, root, extent_start, - extent_num_bytes, - leaf->start, root_owner, - root_gen, inode->i_ino, 0); + extent_num_bytes, 0, + btrfs_header_owner(leaf), + inode->i_ino, extent_offset); BUG_ON(ret); } next: @@ -3105,6 +3097,45 @@ static int fixup_tree_root_location(struct btrfs_root *root, return 0; } +static void inode_tree_add(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_inode *entry; + struct rb_node **p = &root->inode_tree.rb_node; + struct rb_node *parent = NULL; + + spin_lock(&root->inode_lock); + while (*p) { + parent = *p; + entry = rb_entry(parent, struct btrfs_inode, rb_node); + + if (inode->i_ino < entry->vfs_inode.i_ino) + p = &(*p)->rb_left; + else if (inode->i_ino > entry->vfs_inode.i_ino) + p = &(*p)->rb_right; + else { + WARN_ON(!(entry->vfs_inode.i_state & + (I_WILL_FREE | I_FREEING | I_CLEAR))); + break; + } + } + rb_link_node(&BTRFS_I(inode)->rb_node, parent, p); + rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree); + spin_unlock(&root->inode_lock); +} + +static void inode_tree_del(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + + if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { + spin_lock(&root->inode_lock); + rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); + spin_unlock(&root->inode_lock); + RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); + } +} + static noinline void init_btrfs_i(struct inode *inode) { struct btrfs_inode *bi = BTRFS_I(inode); @@ -3130,6 +3161,7 @@ static noinline void init_btrfs_i(struct inode *inode) inode->i_mapping, GFP_NOFS); INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); + RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); mutex_init(&BTRFS_I(inode)->extent_mutex); mutex_init(&BTRFS_I(inode)->log_mutex); @@ -3152,26 +3184,9 @@ static int btrfs_find_actor(struct inode *inode, void *opaque) args->root == BTRFS_I(inode)->root; } -struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, - struct btrfs_root *root, int wait) -{ - struct inode *inode; - struct btrfs_iget_args args; - args.ino = objectid; - args.root = root; - - if (wait) { - inode = ilookup5(s, objectid, btrfs_find_actor, - (void *)&args); - } else { - inode = ilookup5_nowait(s, objectid, btrfs_find_actor, - (void *)&args); - } - return inode; -} - -struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, - struct btrfs_root *root) +static struct inode *btrfs_iget_locked(struct super_block *s, + u64 objectid, + struct btrfs_root *root) { struct inode *inode; struct btrfs_iget_args args; @@ -3188,24 +3203,21 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, * Returns in *is_new if the inode was read from disk */ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, - struct btrfs_root *root, int *is_new) + struct btrfs_root *root) { struct inode *inode; inode = btrfs_iget_locked(s, location->objectid, root); if (!inode) - return ERR_PTR(-EACCES); + return ERR_PTR(-ENOMEM); if (inode->i_state & I_NEW) { BTRFS_I(inode)->root = root; memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); btrfs_read_locked_inode(inode); + + inode_tree_add(inode); unlock_new_inode(inode); - if (is_new) - *is_new = 1; - } else { - if (is_new) - *is_new = 0; } return inode; @@ -3218,7 +3230,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct btrfs_root *root = bi->root; struct btrfs_root *sub_root = root; struct btrfs_key location; - int ret, new; + int ret; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); @@ -3236,7 +3248,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) return ERR_PTR(ret); if (ret > 0) return ERR_PTR(-ENOENT); - inode = btrfs_iget(dir->i_sb, &location, sub_root, &new); + inode = btrfs_iget(dir->i_sb, &location, sub_root); if (IS_ERR(inode)) return ERR_CAST(inode); } @@ -3631,6 +3643,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); insert_inode_hash(inode); + inode_tree_add(inode); return inode; fail: if (dir) @@ -4683,6 +4696,7 @@ void btrfs_destroy_inode(struct inode *inode) btrfs_put_ordered_extent(ordered); } } + inode_tree_del(inode); btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } -- cgit v1.2.3-70-g09d2 From 6cbff00f4632c8060b06bfc9585805217f11e12e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2009 10:37:41 +0200 Subject: Btrfs: implement FS_IOC_GETFLAGS/SETFLAGS/GETVERSION Add support for the standard attributes set via chattr and read via lsattr. Currently we store the attributes in the flags value in the btrfs inode, but I wonder whether we should split it into two so that we don't have to keep converting between the two formats. Remove the btrfs_clear_flag/btrfs_set_flag/btrfs_test_flag macros as they were confusing the existing code and got in the way of the new additions. Also add the FS_IOC_GETVERSION ioctl for getting i_generation as it's trivial. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 - fs/btrfs/compression.c | 6 +- fs/btrfs/ctree.h | 16 +++-- fs/btrfs/inode.c | 27 ++++---- fs/btrfs/ioctl.c | 171 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 200 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index ecf5f7d8166..acb4f351758 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -157,5 +157,4 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) BTRFS_I(inode)->disk_i_size = size; } - #endif diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ab07627084f..de1e2fd3208 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -123,7 +123,7 @@ static int check_compressed_csum(struct inode *inode, u32 csum; u32 *cb_sum = &cb->sums; - if (btrfs_test_flag(inode, NODATASUM)) + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) return 0; for (i = 0; i < cb->nr_pages; i++) { @@ -670,7 +670,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, */ atomic_inc(&cb->pending_bios); - if (!btrfs_test_flag(inode, NODATASUM)) { + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { btrfs_lookup_bio_sums(root, inode, comp_bio, sums); } @@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); BUG_ON(ret); - if (!btrfs_test_flag(inode, NODATASUM)) + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) btrfs_lookup_bio_sums(root, inode, comp_bio, sums); ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5fa7d7d287a..4d6e0b6f21e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1115,12 +1115,14 @@ struct btrfs_root { #define BTRFS_INODE_READONLY (1 << 2) #define BTRFS_INODE_NOCOMPRESS (1 << 3) #define BTRFS_INODE_PREALLOC (1 << 4) -#define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ - ~BTRFS_INODE_##flag) -#define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ - BTRFS_INODE_##flag) -#define btrfs_test_flag(inode, flag) (BTRFS_I(inode)->flags & \ - BTRFS_INODE_##flag) +#define BTRFS_INODE_SYNC (1 << 5) +#define BTRFS_INODE_IMMUTABLE (1 << 6) +#define BTRFS_INODE_APPEND (1 << 7) +#define BTRFS_INODE_NODUMP (1 << 8) +#define BTRFS_INODE_NOATIME (1 << 9) +#define BTRFS_INODE_DIRSYNC (1 << 10) + + /* some macros to generate set/get funcs for the struct fields. This * assumes there is a lefoo_to_cpu for every type, so lets make a simple * one for u8: @@ -2260,6 +2262,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t size); /* ioctl.c */ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +void btrfs_update_iflags(struct inode *inode); +void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); /* file.c */ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 917bf10597c..5b68330f858 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -368,7 +368,7 @@ again: * inode has not been flagged as nocompress. This flag can * change at any time if we discover bad compression ratios. */ - if (!btrfs_test_flag(inode, NOCOMPRESS) && + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && btrfs_test_opt(root, COMPRESS)) { WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); @@ -469,7 +469,7 @@ again: nr_pages_ret = 0; /* flag the file so we don't compress in the future */ - btrfs_set_flag(inode, NOCOMPRESS); + BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; } if (will_compress) { *num_added += 1; @@ -862,7 +862,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_cow->locked_page = locked_page; async_cow->start = start; - if (btrfs_test_flag(inode, NOCOMPRESS)) + if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) cur_end = end; else cur_end = min(end, start + 512 * 1024 - 1); @@ -1133,10 +1133,10 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, int ret; struct btrfs_root *root = BTRFS_I(inode)->root; - if (btrfs_test_flag(inode, NODATACOW)) + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 1, nr_written); - else if (btrfs_test_flag(inode, PREALLOC)) + else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); else if (!btrfs_test_opt(root, COMPRESS)) @@ -1290,7 +1290,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int ret = 0; int skip_sum; - skip_sum = btrfs_test_flag(inode, NODATASUM); + skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); BUG_ON(ret); @@ -1790,7 +1790,8 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, ClearPageChecked(page); goto good; } - if (btrfs_test_flag(inode, NODATASUM)) + + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) return 0; if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && @@ -2156,6 +2157,8 @@ static void btrfs_read_locked_inode(struct inode *inode) init_special_inode(inode, inode->i_mode, rdev); break; } + + btrfs_update_iflags(inode); return; make_bad: @@ -3586,9 +3589,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_find_block_group(root, 0, alloc_hint, owner); if ((mode & S_IFREG)) { if (btrfs_test_opt(root, NODATASUM)) - btrfs_set_flag(inode, NODATASUM); + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; if (btrfs_test_opt(root, NODATACOW)) - btrfs_set_flag(inode, NODATACOW); + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; } key[0].objectid = objectid; @@ -3642,6 +3645,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, location->offset = 0; btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); + btrfs_inherit_iflags(inode, dir); + insert_inode_hash(inode); inode_tree_add(inode); return inode; @@ -5075,7 +5080,7 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, out: if (cur_offset > start) { inode->i_ctime = CURRENT_TIME; - btrfs_set_flag(inode, PREALLOC); + BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && cur_offset > i_size_read(inode)) btrfs_i_size_write(inode, cur_offset); @@ -5196,7 +5201,7 @@ static int btrfs_set_page_dirty(struct page *page) static int btrfs_permission(struct inode *inode, int mask) { - if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE)) + if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) return -EACCES; return generic_permission(inode, mask, btrfs_check_acl); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 54dfd45cc59..926332a73cd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -50,7 +50,172 @@ #include "volumes.h" #include "locking.h" +/* Mask out flags that are inappropriate for the given type of inode. */ +static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & ~FS_DIRSYNC_FL; + else + return flags & (FS_NODUMP_FL | FS_NOATIME_FL); +} + +/* + * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl. + */ +static unsigned int btrfs_flags_to_ioctl(unsigned int flags) +{ + unsigned int iflags = 0; + + if (flags & BTRFS_INODE_SYNC) + iflags |= FS_SYNC_FL; + if (flags & BTRFS_INODE_IMMUTABLE) + iflags |= FS_IMMUTABLE_FL; + if (flags & BTRFS_INODE_APPEND) + iflags |= FS_APPEND_FL; + if (flags & BTRFS_INODE_NODUMP) + iflags |= FS_NODUMP_FL; + if (flags & BTRFS_INODE_NOATIME) + iflags |= FS_NOATIME_FL; + if (flags & BTRFS_INODE_DIRSYNC) + iflags |= FS_DIRSYNC_FL; + + return iflags; +} + +/* + * Update inode->i_flags based on the btrfs internal flags. + */ +void btrfs_update_iflags(struct inode *inode) +{ + struct btrfs_inode *ip = BTRFS_I(inode); + + inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + + if (ip->flags & BTRFS_INODE_SYNC) + inode->i_flags |= S_SYNC; + if (ip->flags & BTRFS_INODE_IMMUTABLE) + inode->i_flags |= S_IMMUTABLE; + if (ip->flags & BTRFS_INODE_APPEND) + inode->i_flags |= S_APPEND; + if (ip->flags & BTRFS_INODE_NOATIME) + inode->i_flags |= S_NOATIME; + if (ip->flags & BTRFS_INODE_DIRSYNC) + inode->i_flags |= S_DIRSYNC; +} + +/* + * Inherit flags from the parent inode. + * + * Unlike extN we don't have any flags we don't want to inherit currently. + */ +void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) +{ + unsigned int flags = BTRFS_I(dir)->flags; + + if (S_ISREG(inode->i_mode)) + flags &= ~BTRFS_INODE_DIRSYNC; + else if (!S_ISDIR(inode->i_mode)) + flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME); + + BTRFS_I(inode)->flags = flags; + btrfs_update_iflags(inode); +} + +static int btrfs_ioctl_getflags(struct file *file, void __user *arg) +{ + struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode); + unsigned int flags = btrfs_flags_to_ioctl(ip->flags); + + if (copy_to_user(arg, &flags, sizeof(flags))) + return -EFAULT; + return 0; +} + +static int btrfs_ioctl_setflags(struct file *file, void __user *arg) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct btrfs_inode *ip = BTRFS_I(inode); + struct btrfs_root *root = ip->root; + struct btrfs_trans_handle *trans; + unsigned int flags, oldflags; + int ret; + + if (copy_from_user(&flags, arg, sizeof(flags))) + return -EFAULT; + + if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ + FS_NOATIME_FL | FS_NODUMP_FL | \ + FS_SYNC_FL | FS_DIRSYNC_FL)) + return -EOPNOTSUPP; + if (!is_owner_or_cap(inode)) + return -EACCES; + + mutex_lock(&inode->i_mutex); + + flags = btrfs_mask_flags(inode->i_mode, flags); + oldflags = btrfs_flags_to_ioctl(ip->flags); + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { + if (!capable(CAP_LINUX_IMMUTABLE)) { + ret = -EPERM; + goto out_unlock; + } + } + + ret = mnt_want_write(file->f_path.mnt); + if (ret) + goto out_unlock; + + if (flags & FS_SYNC_FL) + ip->flags |= BTRFS_INODE_SYNC; + else + ip->flags &= ~BTRFS_INODE_SYNC; + if (flags & FS_IMMUTABLE_FL) + ip->flags |= BTRFS_INODE_IMMUTABLE; + else + ip->flags &= ~BTRFS_INODE_IMMUTABLE; + if (flags & FS_APPEND_FL) + ip->flags |= BTRFS_INODE_APPEND; + else + ip->flags &= ~BTRFS_INODE_APPEND; + if (flags & FS_NODUMP_FL) + ip->flags |= BTRFS_INODE_NODUMP; + else + ip->flags &= ~BTRFS_INODE_NODUMP; + if (flags & FS_NOATIME_FL) + ip->flags |= BTRFS_INODE_NOATIME; + else + ip->flags &= ~BTRFS_INODE_NOATIME; + if (flags & FS_DIRSYNC_FL) + ip->flags |= BTRFS_INODE_DIRSYNC; + else + ip->flags &= ~BTRFS_INODE_DIRSYNC; + + + trans = btrfs_join_transaction(root, 1); + BUG_ON(!trans); + + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + + btrfs_update_iflags(inode); + inode->i_ctime = CURRENT_TIME; + btrfs_end_transaction(trans, root); + + mnt_drop_write(file->f_path.mnt); + out_unlock: + mutex_unlock(&inode->i_mutex); + return 0; +} + +static int btrfs_ioctl_getversion(struct file *file, int __user *arg) +{ + struct inode *inode = file->f_path.dentry->d_inode; + + return put_user(inode->i_generation, arg); +} static noinline int create_subvol(struct btrfs_root *root, struct dentry *dentry, @@ -1077,6 +1242,12 @@ long btrfs_ioctl(struct file *file, unsigned int void __user *argp = (void __user *)arg; switch (cmd) { + case FS_IOC_GETFLAGS: + return btrfs_ioctl_getflags(file, argp); + case FS_IOC_SETFLAGS: + return btrfs_ioctl_setflags(file, argp); + case FS_IOC_GETVERSION: + return btrfs_ioctl_getversion(file, argp); case BTRFS_IOC_SNAP_CREATE: return btrfs_ioctl_snap_create(file, argp, 0); case BTRFS_IOC_SUBVOL_CREATE: -- cgit v1.2.3-70-g09d2 From 59d697b70285c348c01cfc2695c3469ba71d7539 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Apr 2009 09:46:41 -0400 Subject: btrfs: remove ->write_super and stop maintaining ->s_dirt Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/btrfs/inode.c | 7 ------- fs/btrfs/super.c | 8 -------- 2 files changed, 15 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5b68330f858..8612b3a0981 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2322,7 +2322,6 @@ err: btrfs_update_inode(trans, root, dir); btrfs_drop_nlink(inode); ret = btrfs_update_inode(trans, root, inode); - dir->i_sb->s_dirt = 1; out: return ret; } @@ -2806,7 +2805,6 @@ error: pending_del_nr); } btrfs_free_path(path); - inode->i_sb->s_dirt = 1; return ret; } @@ -3768,7 +3766,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); btrfs_update_inode(trans, root, inode); } - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); out_unlock: @@ -3833,7 +3830,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); out_unlock: @@ -3880,7 +3876,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (err) drop_inode = 1; - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, dir); err = btrfs_update_inode(trans, root, inode); @@ -3962,7 +3957,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); drop_on_err = 0; - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); @@ -4991,7 +4985,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); if (drop_inode) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 708ac06b953..52d84522c2c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -397,7 +397,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) if (sb->s_flags & MS_RDONLY) return 0; - sb->s_dirt = 0; if (!wait) { filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; @@ -408,7 +407,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); - sb->s_dirt = 0; return ret; } @@ -454,11 +452,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) return 0; } -static void btrfs_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; -} - static int btrfs_test_super(struct super_block *s, void *data) { struct btrfs_fs_devices *test_fs_devices = data; @@ -689,7 +682,6 @@ static int btrfs_unfreeze(struct super_block *sb) static struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, .put_super = btrfs_put_super, - .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, .write_inode = btrfs_write_inode, -- cgit v1.2.3-70-g09d2 From 5affd88a104af43f0063a12ad1ee4c7a587945dc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 8 Jun 2009 19:55:32 -0400 Subject: switch btrfs to inode->i_acl Signed-off-by: Al Viro --- fs/btrfs/acl.c | 14 +++++++------- fs/btrfs/btrfs_inode.h | 4 ---- fs/btrfs/ctree.h | 2 -- fs/btrfs/inode.c | 16 ++-------------- 4 files changed, 9 insertions(+), 27 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 603972576f0..6db8a42a3e5 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -34,7 +34,7 @@ static void btrfs_update_cached_acl(struct inode *inode, struct posix_acl *acl) { spin_lock(&inode->i_lock); - if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED) + if (*p_acl && *p_acl != ACL_NOT_CACHED) posix_acl_release(*p_acl); *p_acl = posix_acl_dup(acl); spin_unlock(&inode->i_lock); @@ -50,11 +50,11 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) switch (type) { case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; - p_acl = &BTRFS_I(inode)->i_acl; + p_acl = &inode->i_acl; break; case ACL_TYPE_DEFAULT: name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &BTRFS_I(inode)->i_default_acl; + p_acl = &inode->i_default_acl; break; default: return ERR_PTR(-EINVAL); @@ -67,11 +67,11 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) spin_lock(&inode->i_lock); acl = *p_acl; - if (acl != BTRFS_ACL_NOT_CACHED) + if (acl != ACL_NOT_CACHED) acl = posix_acl_dup(acl); spin_unlock(&inode->i_lock); - if (acl != BTRFS_ACL_NOT_CACHED) + if (acl != ACL_NOT_CACHED) return acl; size = __btrfs_getxattr(inode, name, "", 0); @@ -141,13 +141,13 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) ret = 0; inode->i_mode = mode; name = POSIX_ACL_XATTR_ACCESS; - p_acl = &BTRFS_I(inode)->i_acl; + p_acl = &inode->i_acl; break; case ACL_TYPE_DEFAULT: if (!S_ISDIR(inode->i_mode)) return acl ? -EINVAL : 0; name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &BTRFS_I(inode)->i_default_acl; + p_acl = &inode->i_default_acl; break; default: return -EINVAL; diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index acb4f351758..ea1ea0af8c0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -53,10 +53,6 @@ struct btrfs_inode { /* used to order data wrt metadata */ struct btrfs_ordered_inode_tree ordered_tree; - /* standard acl pointers */ - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; - /* for keeping track of orphaned inodes */ struct list_head i_orphan; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 03441a99ea3..2779c2f5360 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -41,8 +41,6 @@ struct btrfs_ordered_sum; #define BTRFS_MAGIC "_BHRfS_M" -#define BTRFS_ACL_NOT_CACHED ((void *)-1) - #define BTRFS_MAX_LEVEL 8 #define BTRFS_COMPAT_EXTENT_TREE_V0 diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8612b3a0981..78ad38ddd01 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2123,8 +2123,8 @@ static void btrfs_read_locked_inode(struct inode *inode) */ maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); if (!maybe_acls) { - BTRFS_I(inode)->i_acl = NULL; - BTRFS_I(inode)->i_default_acl = NULL; + inode->i_acl = NULL; + inode->i_default_acl = NULL; } BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, @@ -3141,9 +3141,6 @@ static noinline void init_btrfs_i(struct inode *inode) { struct btrfs_inode *bi = BTRFS_I(inode); - bi->i_acl = BTRFS_ACL_NOT_CACHED; - bi->i_default_acl = BTRFS_ACL_NOT_CACHED; - bi->generation = 0; bi->sequence = 0; bi->last_trans = 0; @@ -4640,8 +4637,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->last_trans = 0; ei->logged_trans = 0; btrfs_ordered_inode_tree_init(&ei->ordered_tree); - ei->i_acl = BTRFS_ACL_NOT_CACHED; - ei->i_default_acl = BTRFS_ACL_NOT_CACHED; INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->ordered_operations); return &ei->vfs_inode; @@ -4655,13 +4650,6 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); - if (BTRFS_I(inode)->i_acl && - BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED) - posix_acl_release(BTRFS_I(inode)->i_acl); - if (BTRFS_I(inode)->i_default_acl && - BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) - posix_acl_release(BTRFS_I(inode)->i_default_acl); - /* * Make sure we're properly removed from the ordered operation * lists. -- cgit v1.2.3-70-g09d2 From 72c04902d1e27c8a324014cff1d4475c11b1cecd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 24 Jun 2009 16:58:48 -0400 Subject: Get "no acls for this inode" right, fix shmem breakage Signed-off-by: Al Viro --- fs/btrfs/inode.c | 6 ++---- fs/jffs2/acl.c | 3 +-- include/linux/posix_acl.h | 9 +++++++++ mm/shmem.c | 5 +---- 4 files changed, 13 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 78ad38ddd01..dbe1aabf96c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2122,10 +2122,8 @@ static void btrfs_read_locked_inode(struct inode *inode) * any xattrs or acls */ maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); - if (!maybe_acls) { - inode->i_acl = NULL; - inode->i_default_acl = NULL; - } + if (!maybe_acls) + cache_no_acl(inode); BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, alloc_group_block, 0); diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index edd2ad6416d..8fcb6239218 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -284,8 +284,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) struct posix_acl *acl, *clone; int rc; - inode->i_default_acl = NULL; - inode->i_acl = NULL; + cache_no_acl(inode); if (S_ISLNK(*i_mode)) return 0; /* Symlink always has no-ACL */ diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index c513466c7dc..065a3652a3e 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -148,4 +148,13 @@ static inline void forget_cached_acl(struct inode *inode, int type) posix_acl_release(old); } #endif + +static inline void cache_no_acl(struct inode *inode) +{ +#ifdef CONFIG_FS_POSIX_ACL + inode->i_acl = NULL; + inode->i_default_acl = NULL; +#endif +} + #endif /* __LINUX_POSIX_ACL_H */ diff --git a/mm/shmem.c b/mm/shmem.c index 5f2019fc789..d713239ce2c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1558,6 +1558,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, int mode, spin_lock_init(&info->lock); info->flags = flags & VM_NORESERVE; INIT_LIST_HEAD(&info->swaplist); + cache_no_acl(inode); switch (mode & S_IFMT) { default: @@ -2379,10 +2380,6 @@ static struct inode *shmem_alloc_inode(struct super_block *sb) p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); if (!p) return NULL; -#ifdef CONFIG_TMPFS_POSIX_ACL - p->vfs_inode.i_acl = NULL; - p->vfs_inode.i_default_acl = NULL; -#endif return &p->vfs_inode; } -- cgit v1.2.3-70-g09d2