From c5739bba5260a59cebd20a51a55080592c8d3b07 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 09:27:04 -0400 Subject: Btrfs: snapshot progress Signed-off-by: Chris Mason --- fs/btrfs/ioctl.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 fs/btrfs/ioctl.h (limited to 'fs/btrfs/ioctl.h') diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h new file mode 100644 index 00000000000..201fb327072 --- /dev/null +++ b/fs/btrfs/ioctl.h @@ -0,0 +1,13 @@ +#ifndef __IOCTL_ +#define __IOCTL_ +#include + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_VOL_NAME_MAX 255 +struct btrfs_ioctl_vol_args { + char name[BTRFS_VOL_NAME_MAX + 1]; +}; + +#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ + struct btrfs_ioctl_vol_args) +#endif -- cgit v1.2.3-70-g09d2 From 8352d8a473ac84bf7a1c69690b626946d744ca58 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Apr 2007 10:43:05 -0400 Subject: Btrfs: add disk ioctl, mostly working Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/disk-io.h | 6 +++ fs/btrfs/ioctl.h | 2 + fs/btrfs/super.c | 96 +++++++++++++++++++++++++++++++++---- fs/btrfs/transaction.c | 6 +++ 5 files changed, 221 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/ioctl.h') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c872a7e67ab..3ba4df20c91 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,6 +16,28 @@ struct dev_lookup { struct block_device *bdev; }; +int btrfs_insert_dev_radix(struct btrfs_root *root, + struct block_device *bdev, + u64 block_start, + u64 num_blocks) +{ + struct dev_lookup *lookup; + char b[BDEVNAME_SIZE]; + int ret; + + lookup = kmalloc(sizeof(*lookup), GFP_NOFS); + if (!lookup) + return -ENOMEM; + lookup->block_start = block_start; + lookup->num_blocks = num_blocks; + lookup->bdev = bdev; +printk("inserting %s into dev radix %Lu %Lu\n", bdevname(bdev, b), block_start, num_blocks); + + ret = radix_tree_insert(&root->fs_info->dev_radix, block_start + + num_blocks - 1, lookup); + return ret; +} + u64 bh_blocknr(struct buffer_head *bh) { int blkbits = bh->b_page->mapping->host->i_blkbits; @@ -34,6 +56,8 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { + printk(KERN_CRIT "bh_blocknr(buf) is %Lu, header is %Lu\n", + bh_blocknr(buf), btrfs_header_blocknr(&node->header)); BUG(); } return 0; @@ -76,11 +100,10 @@ out_unlock: return ret; } -static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, +int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical) { struct dev_lookup *lookup[2]; - char b[BDEVNAME_SIZE]; int ret; @@ -96,7 +119,6 @@ static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, } bh->b_bdev = lookup[0]->bdev; bh->b_blocknr = logical - lookup[0]->block_start; -printk("logical mapping %Lu to %lu bdev %s\n", logical, bh->b_blocknr, bdevname(bh->b_bdev, b)); set_buffer_mapped(bh); ret = 0; out: @@ -126,7 +148,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh = head; do { if (!buffer_mapped(bh)) { - err = map_bh_to_logical(root, bh, first_block); + err = btrfs_map_bh_to_logical(root, bh, first_block); BUG_ON(err); } if (bh_blocknr(bh) == blocknr) { @@ -150,7 +172,7 @@ static int btree_get_block(struct inode *inode, sector_t iblock, { int err; struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - err = map_bh_to_logical(root, bh, iblock); + err = btrfs_map_bh_to_logical(root, bh, iblock); return err; } @@ -396,6 +418,89 @@ printk("all worked\n"); return root; } +int btrfs_open_disk(struct btrfs_root *root, u64 block_start, u64 num_blocks, + char *filename, int name_len) +{ + char *null_filename; + struct block_device *bdev; + int ret; + + if (block_start == 0) { +printk("skipping disk with block_start == 0\n"); +return 0; + } + null_filename = kmalloc(name_len + 1, GFP_NOFS); + if (!null_filename) + return -ENOMEM; + memcpy(null_filename, filename, name_len); + null_filename[name_len] = '\0'; + + bdev = open_bdev_excl(null_filename, O_RDWR, root->fs_info->sb); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); + goto out; + } + set_blocksize(bdev, root->fs_info->sb->s_blocksize); + ret = btrfs_insert_dev_radix(root, bdev, block_start, num_blocks); + BUG_ON(ret); + ret = 0; +out: + kfree(null_filename); + return ret; +} + +static int read_device_info(struct btrfs_root *root) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct btrfs_leaf *leaf; + struct btrfs_device_item *dev_item; + int nritems; + int slot; + + root = root->fs_info->dev_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + key.objectid = 0; + key.offset = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY); + + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + while(1) { + slot = path->slots[0]; + if (slot >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + break; + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path->slots[0]; + } + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + if (btrfs_key_type(&key) != BTRFS_DEV_ITEM_KEY) { + path->slots[0]++; + continue; + } + dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_device_item); +printk("found key %Lu %Lu\n", key.objectid, key.offset); + ret = btrfs_open_disk(root, key.objectid, key.offset, + (char *)(dev_item + 1), + btrfs_device_pathlen(dev_item)); + BUG_ON(ret); + path->slots[0]++; + } + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), @@ -463,17 +568,26 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) return NULL; + i_size_write(fs_info->btree_inode, + btrfs_super_total_blocks(disk_super) << + fs_info->btree_inode->i_blkbits); + radix_tree_delete(&fs_info->dev_radix, (u32)-2); dev_lookup->block_start = btrfs_super_device_block_start(disk_super); dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); ret = radix_tree_insert(&fs_info->dev_radix, dev_lookup->block_start + - dev_lookup->num_blocks, dev_lookup); + dev_lookup->num_blocks - 1, dev_lookup); BUG_ON(ret); fs_info->disk_super = disk_super; + dev_root->node = read_tree_block(tree_root, btrfs_super_device_root(disk_super)); + + ret = read_device_info(dev_root); + BUG_ON(ret); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); @@ -562,7 +676,7 @@ static int free_dev_radix(struct btrfs_fs_info *fs_info) close_bdev_excl(lookup[i]->bdev); radix_tree_delete(&fs_info->dev_radix, lookup[i]->block_start + - lookup[i]->num_blocks); + lookup[i]->num_blocks - 1); kfree(lookup[i]); } } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 46cc5dc9d57..aa9f56c3841 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -42,4 +42,10 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location); u64 bh_blocknr(struct buffer_head *bh); +int btrfs_insert_dev_radix(struct btrfs_root *root, + struct block_device *bdev, + u64 block_start, + u64 num_blocks); +int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, + u64 logical); #endif diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 201fb327072..497fadd98c3 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -10,4 +10,6 @@ struct btrfs_ioctl_vol_args { #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_ADD_DISK _IOW(BTRFS_IOCTL_MAGIC, 2, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4ff0cc1efb1..c46d7eafcf6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1073,6 +1073,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, } map_bh(result, inode->i_sb, blocknr); + btrfs_map_bh_to_logical(root, result, blocknr); goto out; } @@ -1092,7 +1093,8 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, extent_end = extent_start + btrfs_file_extent_num_blocks(item); if (iblock >= extent_start && iblock < extent_end) { err = 0; - map_bh(result, inode->i_sb, blocknr + iblock - extent_start); + btrfs_map_bh_to_logical(root, result, blocknr + iblock - + extent_start); goto out; } allocate: @@ -1112,6 +1114,7 @@ allocate: set_buffer_new(result); map_bh(result, inode->i_sb, blocknr); + btrfs_map_bh_to_logical(root, result, blocknr); out: btrfs_release_path(root, path); btrfs_free_path(path); @@ -1153,12 +1156,6 @@ static int btrfs_readpage(struct file *file, struct page *page) return mpage_readpage(page, btrfs_get_block); } -static int btrfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) -{ - return mpage_readpages(mapping, pages, nr_pages, btrfs_get_block); -} - static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { return nobh_writepage(page, btrfs_get_block, wbc); @@ -1831,12 +1828,81 @@ printk("adding snapshot name %.*s root %Lu %Lu %u\n", namelen, name, key.objecti return 0; } +static int add_disk(struct btrfs_root *root, char *name, int namelen) +{ + struct block_device *bdev; + struct btrfs_path *path; + struct super_block *sb = root->fs_info->sb; + struct btrfs_root *dev_root = root->fs_info->dev_root; + struct btrfs_trans_handle *trans; + struct btrfs_device_item *dev_item; + struct btrfs_key key; + u16 item_size; + u64 num_blocks; + u64 new_blocks; + int ret; +printk("adding disk %s\n", name); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super); + bdev = open_bdev_excl(name, O_RDWR, sb); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); +printk("open bdev excl failed ret %d\n", ret); + goto out_nolock; + } + set_blocksize(bdev, sb->s_blocksize); + new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits; + key.objectid = num_blocks; + key.offset = new_blocks; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY); + + mutex_lock(&dev_root->fs_info->fs_mutex); + trans = btrfs_start_transaction(dev_root, 1); + item_size = sizeof(*dev_item) + namelen; +printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size); + ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size); + if (ret) { +printk("insert failed %d\n", ret); + close_bdev_excl(bdev); + if (ret > 0) + ret = -EEXIST; + goto out; + } + dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_device_item); + btrfs_set_device_pathlen(dev_item, namelen); + memcpy(dev_item + 1, name, namelen); + mark_buffer_dirty(path->nodes[0]); + + ret = btrfs_insert_dev_radix(root, bdev, num_blocks, new_blocks); + + if (!ret) { + btrfs_set_super_total_blocks(root->fs_info->disk_super, + num_blocks + new_blocks); + i_size_write(root->fs_info->btree_inode, + (num_blocks + new_blocks) << + root->fs_info->btree_inode->i_blkbits); + } + +out: + ret = btrfs_commit_transaction(trans, dev_root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); +out_nolock: + btrfs_free_path(path); + + return ret; +} + static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; - int ret; + int ret = 0; int namelen; struct btrfs_path *path; u64 root_dirid; @@ -1869,10 +1935,21 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int ret = create_snapshot(root, vol_args.name, namelen); WARN_ON(ret); break; + case BTRFS_IOC_ADD_DISK: + if (copy_from_user(&vol_args, + (struct btrfs_ioctl_vol_args __user *)arg, + sizeof(vol_args))) + return -EFAULT; + namelen = strlen(vol_args.name); + if (namelen > BTRFS_VOL_NAME_MAX) + return -EINVAL; + vol_args.name[namelen] = '\0'; + ret = add_disk(root, vol_args.name, namelen); + break; default: return -ENOTTY; } - return 0; + return ret; } static struct kmem_cache *btrfs_inode_cachep; @@ -2004,7 +2081,6 @@ static struct file_operations btrfs_dir_file_operations = { static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, - .readpages = btrfs_readpages, .writepage = btrfs_writepage, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3a15943ea8e..8740752f384 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -116,7 +116,13 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *dev_root = fs_info->dev_root; + if (btrfs_super_device_root(fs_info->disk_super) != + bh_blocknr(dev_root->node)) { + btrfs_set_super_device_root(fs_info->disk_super, + bh_blocknr(dev_root->node)); + } while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); if (old_extent_block == bh_blocknr(extent_root->node)) -- cgit v1.2.3-70-g09d2 From 6cbd55707802b98843f953d1ae6d8f5bcd9a76c0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 09:07:21 -0400 Subject: Btrfs: add GPLv2 Signed-off-by: Chris Mason --- fs/btrfs/COPYING | 356 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/TODO | 1 - fs/btrfs/bit-radix.c | 18 +++ fs/btrfs/bit-radix.h | 18 +++ fs/btrfs/btrfs_inode.h | 18 +++ fs/btrfs/ctree.c | 18 +++ fs/btrfs/ctree.h | 18 +++ fs/btrfs/dir-item.c | 18 +++ fs/btrfs/disk-io.c | 18 +++ fs/btrfs/disk-io.h | 18 +++ fs/btrfs/extent-tree.c | 18 +++ fs/btrfs/file-item.c | 18 +++ fs/btrfs/file.c | 18 +++ fs/btrfs/hash.c | 18 +++ fs/btrfs/hash.h | 18 +++ fs/btrfs/inode-item.c | 18 +++ fs/btrfs/inode-map.c | 18 +++ fs/btrfs/inode.c | 18 +++ fs/btrfs/ioctl.h | 18 +++ fs/btrfs/print-tree.c | 18 +++ fs/btrfs/print-tree.h | 18 +++ fs/btrfs/root-tree.c | 18 +++ fs/btrfs/super.c | 18 +++ fs/btrfs/sysfs.c | 18 +++ fs/btrfs/transaction.c | 18 +++ fs/btrfs/transaction.h | 18 +++ 26 files changed, 788 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/COPYING (limited to 'fs/btrfs/ioctl.h') diff --git a/fs/btrfs/COPYING b/fs/btrfs/COPYING new file mode 100644 index 00000000000..ca442d313d8 --- /dev/null +++ b/fs/btrfs/COPYING @@ -0,0 +1,356 @@ + + NOTE! This copyright does *not* cover user programs that use kernel + services by normal system calls - this is merely considered normal use + of the kernel, and does *not* fall under the heading of "derived work". + Also note that the GPL below is copyrighted by the Free Software + Foundation, but the instance of code that it refers to (the Linux + kernel) is copyrighted by me and others who actually wrote it. + + Also note that the only valid version of the GPL as far as the kernel + is concerned is _this_ particular version of the license (ie v2, not + v2.2 or v3.x or whatever), unless explicitly otherwise stated. + + Linus Torvalds + +---------------------------------------- + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 4a48a2365d5..d9b6d38c603 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,4 +1,3 @@ -* fix printk warnings * cleanup, add more error checking, get rid of BUG_ONs * Fix ENOSPC handling * Make allocator smarter diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 8f9cd427723..a4ddf4de979 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "bit-radix.h" diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h index 4e717e30db4..c100f54d5c3 100644 --- a/fs/btrfs/bit-radix.h +++ b/fs/btrfs/bit-radix.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BIT_RADIX__ #define __BIT_RADIX__ #include diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6b50076b02d..783f06b6325 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BTRFS_I__ #define __BTRFS_I__ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 20e01f95ce6..4736155af41 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 24410d42e3c..4bd648d68e8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BTRFS__ #define __BTRFS__ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index b408a3d20c7..0d179a44ebf 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 87810117254..8c68a64c0ce 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 822ccb8aa4a..7b76ccc4875 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __DISKIO__ #define __DISKIO__ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe02fbfa6d5..975a0eb9241 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index d5a98827e38..37dd3801a10 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0325dc03859..566e526b60f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 32de1ea1b64..e5c76903d72 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + /* * Original copy from: * linux/fs/ext3/hash.c diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index e56c4670093..d3be0267058 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __HASH__ #define __HASH__ int btrfs_name_hash(const char *name, int len, u64 *hash_result); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index c3b990b661c..449e9dbdf95 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 318e27a6378..58930fc48be 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1cabc1951b0..46f869d6d0f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 497fadd98c3..23bed48c51a 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __IOCTL_ #define __IOCTL_ #include diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 21791f03756..1fa70568300 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 396041a05cf..205cd03601a 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __PRINT_TREE_ #define __PRINT_TREE_ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 13c30284fd8..7a2d63c1ee8 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 94b3535a2a1..ef8ad18b275 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index ed8b7e20cfb..ead9dc8051b 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4f3c8ddcb40..30914521cf0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include "ctree.h" diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f25b4900db4..5fb1d322aa1 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __TRANSACTION__ #define __TRANSACTION__ #include "btrfs_inode.h" -- cgit v1.2.3-70-g09d2 From 6702ed490ca0bb44e17131818a5a18b773957c5a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 7 Aug 2007 16:15:09 -0400 Subject: Btrfs: Add run time btree defrag, and an ioctl to force btree defrag This adds two types of btree defrag, a run time form that tries to defrag recently allocated blocks in the btree when they are still in ram, and an ioctl that forces defrag of all btree blocks. File data blocks are not defragged yet, but this can make a huge difference in sequential btree reads. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 195 +++++++++++++++++++++++++++++++++---------- fs/btrfs/ctree.h | 19 ++++- fs/btrfs/disk-io.c | 8 ++ fs/btrfs/extent-tree.c | 53 +++++++----- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 33 +++++++- fs/btrfs/ioctl.h | 2 +- fs/btrfs/transaction.c | 105 +++++++++++++++++++---- fs/btrfs/transaction.h | 1 + fs/btrfs/tree-defrag.c | 222 +++++++++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 553 insertions(+), 89 deletions(-) create mode 100644 fs/btrfs/tree-defrag.c (limited to 'fs/btrfs/ioctl.h') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a4e2df66bdb..9321438c5ee 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ - transaction.o bit-radix.o inode.o file.o + transaction.o bit-radix.o inode.o file.o tree-defrag.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7a08491e208..c7e47e77723 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -65,44 +65,44 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } -static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root +static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf, struct buffer_head *parent, int parent_slot, struct buffer_head - **cow_ret) + **cow_ret, u64 search_start, u64 empty_size) { struct buffer_head *cow; struct btrfs_node *cow_node; - int ret; + int ret = 0; + int different_trans = 0; + WARN_ON(root->ref_cows && trans->transid != root->last_trans); WARN_ON(!buffer_uptodate(buf)); - if (trans->transaction != root->fs_info->running_transaction) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->running_transaction->transid); - WARN_ON(1); - } - if (trans->transid != root->fs_info->generation) { - printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, - root->fs_info->generation); - WARN_ON(1); - } - if (btrfs_header_generation(btrfs_buffer_header(buf)) == - trans->transid) { - *cow_ret = buf; - return 0; - } - cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr); + cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); + cow_node = btrfs_buffer_node(cow); if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); + memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); btrfs_set_header_generation(&cow_node->header, trans->transid); btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); - ret = btrfs_inc_ref(trans, root, buf); - if (ret) - return ret; + + WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) > + trans->transid); + if (btrfs_header_generation(btrfs_buffer_header(buf)) != + trans->transid) { + different_trans = 1; + ret = btrfs_inc_ref(trans, root, buf); + if (ret) + return ret; + } else { + WARN_ON(!root->ref_cows); + clean_tree_block(trans, root, buf); + } + if (buf == root->node) { root->node = cow; get_bh(cow); @@ -114,6 +114,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, bh_blocknr(cow)); btrfs_mark_buffer_dirty(parent); + WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) != + trans->transid); btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); } btrfs_block_release(root, buf); @@ -122,6 +124,115 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head + **cow_ret) +{ + u64 search_start; + if (trans->transaction != root->fs_info->running_transaction) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->running_transaction->transid); + WARN_ON(1); + } + if (trans->transid != root->fs_info->generation) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->generation); + WARN_ON(1); + } + if (btrfs_header_generation(btrfs_buffer_header(buf)) == + trans->transid) { + *cow_ret = buf; + return 0; + } + + search_start = bh_blocknr(buf) & ~((u64)65535); + return __btrfs_cow_block(trans, root, buf, parent, + parent_slot, cow_ret, search_start, 0); +} + +static int close_blocks(u64 blocknr, u64 other) +{ + if (blocknr < other && other - blocknr < 8) + return 1; + if (blocknr > other && blocknr - other < 8) + return 1; + return 0; +} + +int btrfs_realloc_node(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *parent, + int cache_only) +{ + struct btrfs_node *parent_node; + struct buffer_head *cur_bh; + struct buffer_head *tmp_bh; + u64 blocknr; + u64 search_start = 0; + u64 other; + u32 parent_nritems; + int start_slot; + int end_slot; + int i; + int err = 0; + + if (trans->transaction != root->fs_info->running_transaction) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->running_transaction->transid); + WARN_ON(1); + } + if (trans->transid != root->fs_info->generation) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->generation); + WARN_ON(1); + } + parent_node = btrfs_buffer_node(parent); + parent_nritems = btrfs_header_nritems(&parent_node->header); + + start_slot = 0; + end_slot = parent_nritems; + + if (parent_nritems == 1) + return 0; + + for (i = start_slot; i < end_slot; i++) { + int close = 1; + blocknr = btrfs_node_blockptr(parent_node, i); + if (i > 0) { + other = btrfs_node_blockptr(parent_node, i - 1); + close = close_blocks(blocknr, other); + } + if (close && i < end_slot - 1) { + other = btrfs_node_blockptr(parent_node, i + 1); + close = close_blocks(blocknr, other); + } + if (close) + continue; + + cur_bh = btrfs_find_tree_block(root, blocknr); + if (!cur_bh || !buffer_uptodate(cur_bh) || + buffer_locked(cur_bh)) { + if (cache_only) { + brelse(cur_bh); + continue; + } + brelse(cur_bh); + cur_bh = read_tree_block(root, blocknr); + } + if (search_start == 0) { + search_start = bh_blocknr(cur_bh) & ~((u64)65535); + } + err = __btrfs_cow_block(trans, root, cur_bh, parent, i, + &tmp_bh, search_start, + min(8, end_slot - i)); + if (err) + break; + search_start = bh_blocknr(tmp_bh); + brelse(tmp_bh); + } + return err; +} + /* * The leaf data grows from end-to-front in the node. * this returns the address of the start of the last item, @@ -221,6 +332,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, parent_slot = path->slots[level + 1]; parent_key = &parent->ptrs[parent_slot].key; + BUG_ON(memcmp(parent_key, &leaf->items[0].key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != @@ -643,7 +755,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, * readahead one full node of leaves */ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, - int slot) + int level, int slot) { struct btrfs_node *node; int i; @@ -659,10 +771,13 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, unsigned long gang[8]; struct buffer_head *bh; - if (!path->nodes[1]) + if (level == 0) + return; + + if (!path->nodes[level]) return; - node = btrfs_buffer_node(path->nodes[1]); + node = btrfs_buffer_node(path->nodes[level]); search = btrfs_node_blockptr(node, slot); bh = btrfs_find_tree_block(root, search); if (bh) { @@ -690,7 +805,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, for (i = 0; i < ret; i++) { blocknr = gang[i]; clear_radix_bit(&found, blocknr); - if (nread > 64) + if (nread > 32) continue; if (direction > 0 && cluster_start <= blocknr && cluster_start + 8 > blocknr) { @@ -726,7 +841,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *b; struct buffer_head *cow_buf; struct btrfs_node *c; - struct btrfs_root_item *root_item = &root->root_item; u64 blocknr; int slot; int ret; @@ -734,11 +848,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int should_reada = p->reada; u8 lowest_level = 0; - if (btrfs_root_refs(root_item) == 0 && root->ref_cows) { - lowest_level = root_item->drop_level; - WARN_ON(ins_len || cow); - } - + lowest_level = p->lowest_level; + WARN_ON(lowest_level && ins_len); WARN_ON(p->nodes[0] != NULL); WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); again: @@ -798,8 +909,8 @@ again: if (level == lowest_level) break; blocknr = btrfs_node_blockptr(c, slot); - if (level == 1 && should_reada) - reada_for_search(root, p, slot); + if (should_reada) + reada_for_search(root, p, level, slot); b = read_tree_block(root, btrfs_node_blockptr(c, slot)); } else { @@ -960,7 +1071,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); + t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0); if (IS_ERR(t)) return PTR_ERR(t); c = btrfs_buffer_node(t); @@ -1070,7 +1181,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); + split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0); if (IS_ERR(split_buffer)) return PTR_ERR(split_buffer); @@ -1461,7 +1572,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); if (IS_ERR(right_buffer)) return PTR_ERR(right_buffer); @@ -1560,7 +1671,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); if (IS_ERR(right_buffer)) return PTR_ERR(right_buffer); @@ -1988,8 +2099,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) blocknr = btrfs_node_blockptr(c_node, slot); if (next) btrfs_block_release(root, next); - if (level == 1 && path->reada) - reada_for_search(root, path, slot); + if (path->reada) + reada_for_search(root, path, level, slot); next = read_tree_block(root, blocknr); break; } @@ -2002,8 +2113,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) path->slots[level] = 0; if (!level) break; - if (level == 1 && path->reada) - reada_for_search(root, path, slot); + if (path->reada) + reada_for_search(root, path, level, slot); next = read_tree_block(root, btrfs_node_blockptr(btrfs_buffer_node(next), 0)); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c5a18d5d7f7..42aa20301bc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -178,6 +178,7 @@ struct btrfs_path { struct buffer_head *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; int reada; + int lowest_level; }; /* @@ -338,6 +339,9 @@ struct btrfs_root { u64 highest_inode; u64 last_inode_alloc; int ref_cows; + struct btrfs_key defrag_progress; + int defrag_running; + int defrag_level; }; /* the lower bits in the key flags defines the item type */ @@ -1031,10 +1035,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint); + struct btrfs_root *root, u64 hint, + u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 search_start, + u64 num_blocks, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); @@ -1051,6 +1056,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); /* ctree.c */ +int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head + **cow_ret); int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); int btrfs_truncate_item(struct btrfs_trans_handle *trans, @@ -1060,6 +1069,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); +int btrfs_realloc_node(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *parent, + int cache_only); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); @@ -1171,4 +1183,7 @@ extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 *hint_block); +/* tree-defrag.c */ +int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int cache_only); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60db85bc4ba..c948416fea3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -273,7 +273,9 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { WARN_ON(atomic_read(&buf->b_count) == 0); + lock_buffer(buf); clear_buffer_dirty(buf); + unlock_buffer(buf); return 0; } @@ -294,6 +296,9 @@ static int __setup_root(int blocksize, root->last_inode_alloc = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); + memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + root->defrag_running = 0; + root->defrag_level = 0; root->root_key.objectid = objectid; return 0; } @@ -585,6 +590,7 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -616,7 +622,9 @@ void btrfs_mark_buffer_dirty(struct buffer_head *bh) { struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + WARN_ON(!atomic_read(&bh->b_count)); + if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", (unsigned long long)bh->b_blocknr, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5d4d5d8db8e..26b8d340649 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -23,7 +23,8 @@ #include "transaction.h" static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 search_start, + *orig_root, u64 num_blocks, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_block, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data); @@ -379,7 +380,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, + ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); @@ -533,7 +534,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, + ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); /* FIXME, set bit to recalc cache groups on next mount */ if (ret) @@ -708,6 +709,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, static int try_remove_page(struct address_space *mapping, unsigned long index) { int ret; + return 0; ret = invalidate_mapping_pages(mapping, index, index); return ret; } @@ -866,7 +868,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); + ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); return ret; @@ -983,8 +985,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root * Any available blocks before search_start are skipped. */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, u64 hint_block, + *orig_root, u64 num_blocks, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_block, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data) { @@ -1042,6 +1044,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root data, 1); } + total_needed += empty_size; path = btrfs_alloc_path(); check_failed: @@ -1157,9 +1160,11 @@ check_pending: goto error; } search_start = orig_search_start; - if (wrapped) + if (wrapped) { + if (!full_scan) + total_needed -= empty_size; full_scan = 1; - else + } else wrapped = 1; goto new_group; } @@ -1238,9 +1243,11 @@ new_group: ret = -ENOSPC; goto error; } - if (wrapped) + if (wrapped) { + if (!full_scan) + total_needed -= empty_size; full_scan = 1; - else + } else wrapped = 1; } block_group = btrfs_lookup_block_group(info, search_start); @@ -1264,7 +1271,7 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 hint_block, + u64 num_blocks, u64 empty_size, u64 hint_block, u64 search_end, struct btrfs_key *ins, int data) { int ret; @@ -1303,7 +1310,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * in the correct block group. */ if (data) { - ret = find_free_extent(trans, root, 0, 0, + ret = find_free_extent(trans, root, 0, 0, 0, search_end, 0, &prealloc_key, 0, 0, 0); BUG_ON(ret); if (ret) @@ -1313,8 +1320,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } /* do the real allocation */ - ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, hint_block, ins, + ret = find_free_extent(trans, root, num_blocks, empty_size, + search_start, search_end, hint_block, ins, exclude_start, exclude_nr, data); BUG_ON(ret); if (ret) @@ -1333,7 +1340,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, exclude_start = ins->objectid; exclude_nr = ins->offset; hint_block = exclude_start + exclude_nr; - ret = find_free_extent(trans, root, 0, search_start, + ret = find_free_extent(trans, root, 0, 0, search_start, search_end, hint_block, &prealloc_key, exclude_start, exclude_nr, 0); @@ -1368,14 +1375,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * returns the tree buffer or NULL. */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint) + struct btrfs_root *root, u64 hint, + u64 empty_size) { struct btrfs_key ins; int ret; struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, hint, (unsigned long)-1, &ins, 0); + 1, empty_size, hint, + (unsigned long)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); return ERR_PTR(ret); @@ -1385,6 +1394,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_free_extent(trans, root, ins.objectid, 1, 0); return ERR_PTR(-ENOMEM); } + WARN_ON(buffer_dirty(buf)); set_buffer_uptodate(buf); set_buffer_checked(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); @@ -1591,13 +1601,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key key; struct btrfs_disk_key *found_key; struct btrfs_node *node; + btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + level = root_item->drop_level; + path->lowest_level = level; wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { + if (wret < 0) { ret = wret; goto out; } - level = root_item->drop_level; node = btrfs_buffer_node(path->nodes[level]); found_key = &node->ptrs[path->slots[level]].key; WARN_ON(memcmp(found_key, &root_item->drop_progress, @@ -1617,8 +1629,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; num_walks++; if (num_walks > 10) { - struct btrfs_key key; - btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); ret = -EAGAIN; get_bh(root->node); break; @@ -1627,6 +1637,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { btrfs_block_release(root, path->nodes[i]); + path->nodes[i] = 0; } } out: diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1fe38fe8415..00b118a2db6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -512,7 +512,7 @@ static int prepare_pages(struct btrfs_root *root, if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { err = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, hint_block, (u64)-1, + num_blocks, 0, hint_block, (u64)-1, &ins, 1); if (err) goto failed_truncate; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3889032fc44..12aa043b9f6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -554,7 +554,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) &alloc_hint); if (ret) goto out; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, + ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) goto out; @@ -1360,7 +1360,7 @@ not_found: if (create & BTRFS_GET_BLOCK_CREATE) { struct btrfs_key ins; ret = btrfs_alloc_extent(trans, root, inode->i_ino, - 1, alloc_hint, (u64)-1, + 1, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { err = ret; @@ -1998,7 +1998,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root, 0); + subvol = btrfs_alloc_free_block(trans, root, 0, 0); if (IS_ERR(subvol)) return PTR_ERR(subvol); leaf = btrfs_buffer_leaf(subvol); @@ -2159,7 +2159,9 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; + struct btrfs_trans_handle *trans; int ret = 0; + int err; struct btrfs_dir_item *di; int namelen; struct btrfs_path *path; @@ -2196,6 +2198,31 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int else ret = create_snapshot(root, vol_args.name, namelen); break; + + case BTRFS_IOC_DEFRAG: + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + memset(&root->defrag_progress, 0, + sizeof(root->defrag_progress)); + while (1) { + root->defrag_running = 1; + err = btrfs_defrag_leaves(trans, root, 0); + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + btrfs_btree_balance_dirty(root); + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (err != -EAGAIN) + break; + } + root->defrag_running = 0; + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + ret = 0; + break; default: return -ENOTTY; } diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 23bed48c51a..8bc47dec286 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -28,6 +28,6 @@ struct btrfs_ioctl_vol_args { #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_ADD_DISK _IOW(BTRFS_IOCTL_MAGIC, 2, \ +#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 498626470a0..338a7199363 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -29,6 +29,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; static struct workqueue_struct *trans_wq; #define BTRFS_ROOT_TRANS_TAG 0 +#define BTRFS_ROOT_DEFRAG_TAG 1 static void put_transaction(struct btrfs_transaction *transaction) { @@ -69,35 +70,41 @@ static int join_transaction(struct btrfs_root *root) return 0; } +static int record_root_in_trans(struct btrfs_root *root) +{ + u64 running_trans_id = root->fs_info->running_transaction->transid; + if (root->ref_cows && root->last_trans < running_trans_id) { + WARN_ON(root == root->fs_info->extent_root); + if (root->root_item.refs != 0) { + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + root->commit_root = root->node; + get_bh(root->node); + } else { + WARN_ON(1); + } + root->last_trans = running_trans_id; + } + return 0; +} + struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { struct btrfs_trans_handle *h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; - u64 running_trans_id; mutex_lock(&root->fs_info->trans_mutex); ret = join_transaction(root); BUG_ON(ret); - running_trans_id = root->fs_info->running_transaction->transid; - if (root != root->fs_info->tree_root && root->last_trans < - running_trans_id) { - WARN_ON(root == root->fs_info->extent_root); - WARN_ON(root->ref_cows != 1); - if (root->root_item.refs != 0) { - radix_tree_tag_set(&root->fs_info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_TRANS_TAG); - root->commit_root = root->node; - get_bh(root->node); - } else { - WARN_ON(1); - } - } - root->last_trans = running_trans_id; - h->transid = running_trans_id; + record_root_in_trans(root); + h->transid = root->fs_info->running_transaction->transid; h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; h->blocks_used = 0; @@ -155,6 +162,15 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, gang[i]); if (!page) continue; + if (PageWriteback(page)) { + if (PageDirty(page)) + wait_on_page_writeback(page); + else { + unlock_page(page); + page_cache_release(page); + continue; + } + } err = write_one_page(page, 0); if (err) werr = err; @@ -299,6 +315,58 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, return err; } +int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) +{ + struct btrfs_root *gang[1]; + struct btrfs_root *root; + struct btrfs_root *tree_root = info->tree_root; + struct btrfs_trans_handle *trans; + int i; + int ret; + int err = 0; + u64 last = 0; + + trans = btrfs_start_transaction(tree_root, 1); + while(1) { + ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix, + (void **)gang, last, + ARRAY_SIZE(gang), + BTRFS_ROOT_DEFRAG_TAG); + if (ret == 0) + break; + for (i = 0; i < ret; i++) { + root = gang[i]; + last = root->root_key.objectid + 1; + radix_tree_tag_clear(&info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + if (root->defrag_running) + continue; + + while (1) { + mutex_lock(&root->fs_info->trans_mutex); + record_root_in_trans(root); + mutex_unlock(&root->fs_info->trans_mutex); + + root->defrag_running = 1; + err = btrfs_defrag_leaves(trans, root, 1); + btrfs_end_transaction(trans, tree_root); + mutex_unlock(&info->fs_mutex); + + btrfs_btree_balance_dirty(root); + + mutex_lock(&info->fs_mutex); + trans = btrfs_start_transaction(tree_root, 1); + if (err != -EAGAIN) + break; + } + root->defrag_running = 0; + } + } + btrfs_end_transaction(trans, tree_root); + return err; +} + static int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { @@ -475,6 +543,7 @@ void btrfs_transaction_cleaner(struct work_struct *work) goto out; } mutex_unlock(&root->fs_info->trans_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); out: diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ebf44f3e111..8b2714e6556 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -70,5 +70,6 @@ void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); void btrfs_init_transaction_sys(void); void btrfs_exit_transaction_sys(void); int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); +int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c new file mode 100644 index 00000000000..15d0a486fb5 --- /dev/null +++ b/fs/btrfs/tree-defrag.c @@ -0,0 +1,222 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" +#include "transaction.h" + +static void reada_defrag(struct btrfs_root *root, + struct btrfs_node *node) +{ + int i; + u32 nritems; + u64 blocknr; + int ret; + + nritems = btrfs_header_nritems(&node->header); + for (i = 0; i < nritems; i++) { + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + +static int defrag_walk_down(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + int cache_only) +{ + struct buffer_head *next; + struct buffer_head *cur; + u64 blocknr; + int ret = 0; + + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + while(*level > 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + cur = path->nodes[*level]; + + if (!cache_only && *level > 1 && path->slots[*level] == 0) + reada_defrag(root, btrfs_buffer_node(cur)); + + if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + WARN_ON(1); + + if (path->slots[*level] >= + btrfs_header_nritems(btrfs_buffer_header(cur))) + break; + + if (*level == 1) { + ret = btrfs_realloc_node(trans, root, + path->nodes[*level], + cache_only); + break; + } + blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), + path->slots[*level]); + + if (cache_only) { + next = btrfs_find_tree_block(root, blocknr); + if (!next || !buffer_uptodate(next) || + buffer_locked(next)) { + brelse(next); + path->slots[*level]++; + continue; + } + } else { + next = read_tree_block(root, blocknr); + } + ret = btrfs_cow_block(trans, root, next, path->nodes[*level], + path->slots[*level], &next); + BUG_ON(ret); + ret = btrfs_realloc_node(trans, root, next, cache_only); + BUG_ON(ret); + WARN_ON(*level <= 0); + if (path->nodes[*level-1]) + btrfs_block_release(root, path->nodes[*level-1]); + path->nodes[*level-1] = next; + *level = btrfs_header_level(btrfs_buffer_header(next)); + path->slots[*level] = 0; + } + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + btrfs_block_release(root, path->nodes[*level]); + path->nodes[*level] = NULL; + *level += 1; + WARN_ON(ret); + return 0; +} + +static int defrag_walk_up(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + int cache_only) +{ + int i; + int slot; + struct btrfs_node *node; + + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + slot = path->slots[i]; + if (slot < btrfs_header_nritems( + btrfs_buffer_header(path->nodes[i])) - 1) { + path->slots[i]++; + *level = i; + node = btrfs_buffer_node(path->nodes[i]); + WARN_ON(i == 0); + btrfs_disk_key_to_cpu(&root->defrag_progress, + &node->ptrs[path->slots[i]].key); + root->defrag_level = i; + return 0; + } else { + btrfs_block_release(root, path->nodes[*level]); + path->nodes[*level] = NULL; + *level = i + 1; + } + } + return 1; +} + +int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int cache_only) +{ + struct btrfs_path *path = NULL; + struct buffer_head *tmp; + int ret = 0; + int wret; + int level; + int orig_level; + int i; + int num_runs = 0; + + if (root->ref_cows == 0) { + goto out; + } + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + level = btrfs_header_level(btrfs_buffer_header(root->node)); + orig_level = level; + if (level == 0) { + goto out; + } + if (root->defrag_progress.objectid == 0) { + get_bh(root->node); + ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); + BUG_ON(ret); + ret = btrfs_realloc_node(trans, root, root->node, cache_only); + BUG_ON(ret); + path->nodes[level] = root->node; + path->slots[level] = 0; + } else { + level = root->defrag_level; + path->lowest_level = level; + wret = btrfs_search_slot(trans, root, &root->defrag_progress, + path, 0, 1); + + if (wret < 0) { + ret = wret; + goto out; + } + while(level > 0 && !path->nodes[level]) + level--; + if (!path->nodes[level]) { + ret = 0; + goto out; + } + } + + while(1) { + wret = defrag_walk_down(trans, root, path, &level, cache_only); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + + wret = defrag_walk_up(trans, root, path, &level, cache_only); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + if (num_runs++ > 8) { + ret = -EAGAIN; + break; + } + } + for (i = 0; i <= orig_level; i++) { + if (path->nodes[i]) { + btrfs_block_release(root, path->nodes[i]); + path->nodes[i] = 0; + } + } +out: + if (path) + btrfs_free_path(path); + if (ret != -EAGAIN) { + memset(&root->defrag_progress, 0, + sizeof(root->defrag_progress)); + } + return ret; +} -- cgit v1.2.3-70-g09d2 From edbd8d4efe4ddaf29a175ae504e2c9a05a96ebee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 21 Dec 2007 16:27:24 -0500 Subject: Btrfs: Support for online FS resize (grow and shrink) Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 10 ++ fs/btrfs/disk-io.c | 32 +++- fs/btrfs/disk-io.h | 2 + fs/btrfs/extent-tree.c | 480 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/inode.c | 105 ++++++++++- fs/btrfs/ioctl.h | 2 + fs/btrfs/root-tree.c | 5 + fs/btrfs/super.c | 7 +- 8 files changed, 629 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/ioctl.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 10129cc6656..b83a1bec346 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -508,6 +508,8 @@ BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, used, 64); BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, used, 64); +BTRFS_SETGET_FUNCS(disk_block_group_flags, struct btrfs_block_group_item, + flags, 8); /* struct btrfs_inode_ref */ BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); @@ -960,6 +962,9 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int level, u64 hint, u64 empty_size); +int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 new_size); +int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, @@ -1117,6 +1122,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +unsigned long btrfs_force_ra(struct address_space *mapping, + struct file_ra_state *ra, struct file *file, + pgoff_t offset, pgoff_t last_index); int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int for_del); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); @@ -1162,4 +1170,6 @@ void btrfs_sysfs_del_super(struct btrfs_fs_info *root); ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); +/* super.c */ +u64 btrfs_parse_size(char *str); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eebb4fb65c6..de026d9d9b2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -471,13 +471,17 @@ insert: return root; } -struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location, - const char *name, int namelen) +struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) { struct btrfs_root *root; int ret; + if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) + return fs_info->tree_root; + if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID) + return fs_info->extent_root; + root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); if (root) @@ -494,6 +498,23 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } + ret = btrfs_find_dead_roots(fs_info->tree_root, + root->root_key.objectid, root); + BUG_ON(ret); + + return root; +} + +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location, + const char *name, int namelen) +{ + struct btrfs_root *root; + int ret; + + root = btrfs_read_fs_root_no_name(fs_info, location); + if (!root) + return NULL; ret = btrfs_set_root_name(root, name, namelen); if (ret) { @@ -509,11 +530,6 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } - - ret = btrfs_find_dead_roots(fs_info->tree_root, - root->root_key.objectid, root); - BUG_ON(ret); - return root; } #if 0 diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 33d2ccfd74d..8c3cfd02901 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -39,6 +39,8 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, const char *name, int namelen); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_key *location); +struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, + struct btrfs_key *location); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, u64 device_id, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1412d556313..de0fb0743cf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -18,6 +18,7 @@ #include #include +#include #include "hash.h" #include "ctree.h" #include "disk-io.h" @@ -1622,6 +1623,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, int pending_ret; u64 super_used, root_used; u64 search_start = 0; + u64 new_hint; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; @@ -1629,6 +1631,10 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_stack_extent_refs(&extent_item, 1); + new_hint = max(hint_byte, 16ULL * 1024 * 1024 * 1024); + if (new_hint < btrfs_super_total_bytes(&info->super_copy)) + hint_byte = new_hint; + WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, @@ -2100,6 +2106,480 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +static int relocate_inode_pages(struct inode *inode, u64 start, u64 len) +{ + u64 page_start; + u64 page_end; + u64 delalloc_start; + u64 existing_delalloc; + unsigned long last_index; + unsigned long first_index; + unsigned long i; + struct page *page; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct file_ra_state ra; + + mutex_lock(&inode->i_mutex); + first_index = start >> PAGE_CACHE_SHIFT; + last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; + + memset(&ra, 0, sizeof(ra)); + file_ra_state_init(&ra, inode->i_mapping); + btrfs_force_ra(inode->i_mapping, &ra, NULL, first_index, last_index); + + for (i = first_index; i <= last_index; i++) { + page = grab_cache_page(inode->i_mapping, i); + if (!page) + goto out_unlock; + if (!PageUptodate(page)) { + btrfs_readpage(NULL, page); + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + goto out_unlock; + } + } + page_start = (u64)page->index << PAGE_CACHE_SHIFT; + page_end = page_start + PAGE_CACHE_SIZE - 1; + + lock_extent(em_tree, page_start, page_end, GFP_NOFS); + + delalloc_start = page_start; + existing_delalloc = + count_range_bits(&BTRFS_I(inode)->extent_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); + + set_extent_delalloc(em_tree, page_start, + page_end, GFP_NOFS); + + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - + existing_delalloc; + spin_unlock(&root->fs_info->delalloc_lock); + + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + } + +out_unlock: + mutex_unlock(&inode->i_mutex); + return 0; +} + +static int relocate_one_reference(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key, + u64 ref_root, u64 ref_gen, u64 ref_objectid, + u64 ref_offset) +{ + struct inode *inode; + struct btrfs_root *found_root; + struct btrfs_key root_location; + int ret; + + root_location.objectid = ref_root; + if (ref_gen == 0) + root_location.offset = 0; + else + root_location.offset = (u64)-1; + root_location.type = BTRFS_ROOT_ITEM_KEY; + + found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, + &root_location); + BUG_ON(!found_root); + + if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + mutex_unlock(&extent_root->fs_info->fs_mutex); + inode = btrfs_iget_locked(extent_root->fs_info->sb, + ref_objectid, found_root); + if (inode->i_state & I_NEW) { + /* the inode and parent dir are two different roots */ + BTRFS_I(inode)->root = found_root; + BTRFS_I(inode)->location.objectid = ref_objectid; + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.offset = 0; + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + + } + /* this can happen if the reference is not against + * the latest version of the tree root + */ + if (is_bad_inode(inode)) { + mutex_lock(&extent_root->fs_info->fs_mutex); + goto out; + } + relocate_inode_pages(inode, ref_offset, extent_key->offset); + /* FIXME, data=ordered will help get rid of this */ + filemap_fdatawrite(inode->i_mapping); + iput(inode); + mutex_lock(&extent_root->fs_info->fs_mutex); + } else { + struct btrfs_trans_handle *trans; + struct btrfs_key found_key; + struct extent_buffer *eb; + int level; + int i; + + trans = btrfs_start_transaction(found_root, 1); + eb = read_tree_block(found_root, extent_key->objectid, + extent_key->offset); + level = btrfs_header_level(eb); + + if (level == 0) + btrfs_item_key_to_cpu(eb, &found_key, 0); + else + btrfs_node_key_to_cpu(eb, &found_key, 0); + + free_extent_buffer(eb); + + path->lowest_level = level; + path->reada = 0; + ret = btrfs_search_slot(trans, found_root, &found_key, path, + 0, 1); + path->lowest_level = 0; + for (i = level; i < BTRFS_MAX_LEVEL; i++) { + if (!path->nodes[i]) + break; + free_extent_buffer(path->nodes[i]); + path->nodes[i] = NULL; + } + btrfs_release_path(found_root, path); + btrfs_end_transaction(trans, found_root); + } + +out: + return 0; +} + +static int relocate_one_extent(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key) +{ + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_extent_ref *ref; + struct extent_buffer *leaf; + u64 ref_root; + u64 ref_gen; + u64 ref_objectid; + u64 ref_offset; + u32 nritems; + u32 item_size; + int ret = 0; + + key.objectid = extent_key->objectid; + key.type = BTRFS_EXTENT_REF_KEY; + key.offset = 0; + + while(1) { + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + + BUG_ON(ret == 0); + + if (ret < 0) + goto out; + + ret = 0; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (path->slots[0] == nritems) + goto out; + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid != extent_key->objectid) + break; + + if (found_key.type != BTRFS_EXTENT_REF_KEY) + break; + + key.offset = found_key.offset + 1; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref); + ref_root = btrfs_ref_root(leaf, ref); + ref_gen = btrfs_ref_generation(leaf, ref); + ref_objectid = btrfs_ref_objectid(leaf, ref); + ref_offset = btrfs_ref_offset(leaf, ref); + btrfs_release_path(extent_root, path); + + ret = relocate_one_reference(extent_root, path, + extent_key, ref_root, ref_gen, + ref_objectid, ref_offset); + if (ret) + goto out; + } + ret = 0; +out: + btrfs_release_path(extent_root, path); + return ret; +} + +static int find_overlapping_extent(struct btrfs_root *root, + struct btrfs_path *path, u64 new_size) +{ + struct btrfs_key found_key; + struct extent_buffer *leaf; + int ret; + + while(1) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret == 1) { + return 1; + } + if (ret < 0) + return ret; + } else { + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.type == BTRFS_EXTENT_ITEM_KEY) { + if (found_key.objectid + found_key.offset > new_size) + return 0; + else + return 1; + } + } + return 1; +} + +int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = root->fs_info->tree_root; + struct btrfs_path *path; + u64 cur_byte; + u64 total_found; + u64 ptr; + struct btrfs_fs_info *info = root->fs_info; + struct extent_map_tree *block_group_cache; + struct btrfs_key key; + struct btrfs_key found_key = { 0, 0, 0 }; + struct extent_buffer *leaf; + u32 nritems; + int ret; + int slot; + + btrfs_set_super_total_bytes(&info->super_copy, new_size); + block_group_cache = &info->block_group_cache; + path = btrfs_alloc_path(); + root = root->fs_info->extent_root; + +again: + total_found = 0; + key.objectid = new_size; + cur_byte = key.objectid; + key.offset = 0; + key.type = 0; + while(1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; +next: + leaf = path->nodes[0]; + if (key.objectid == new_size - 1) { + ret = find_overlapping_extent(root, path, new_size); + if (ret != 0) { + btrfs_release_path(root, path); + ret = btrfs_search_slot(NULL, root, &key, + path, 0, 0); + if (ret < 0) + goto out; + } + } + nritems = btrfs_header_nritems(leaf); + ret = 0; + slot = path->slots[0]; + if (slot < nritems) + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (slot == nritems || + btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY) { + path->slots[0]++; + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret == 1) { + ret = 0; + break; + } + } + goto next; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.objectid + found_key.offset <= cur_byte) + continue; + total_found++; + cur_byte = found_key.objectid + found_key.offset; + key.objectid = cur_byte; + btrfs_release_path(root, path); + ret = relocate_one_extent(root, path, &found_key); + } + + btrfs_release_path(root, path); + + if (total_found > 0) { + trans = btrfs_start_transaction(tree_root, 1); + btrfs_commit_transaction(trans, tree_root); + + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_clean_old_snapshots(tree_root); + mutex_lock(&root->fs_info->fs_mutex); + + trans = btrfs_start_transaction(tree_root, 1); + btrfs_commit_transaction(trans, tree_root); + goto again; + } + + trans = btrfs_start_transaction(root, 1); + key.objectid = new_size; + key.offset = 0; + key.type = 0; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; +bg_next: + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + ret = 0; + slot = path->slots[0]; + if (slot < nritems) + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (slot == nritems || + btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) { + if (slot < nritems) { + printk("shrinker found key %Lu %u %Lu\n", + found_key.objectid, found_key.type, + found_key.offset); + path->slots[0]++; + } + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + break; + if (ret == 1) { + ret = 0; + break; + } + } + goto bg_next; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + ret = get_state_private(&info->block_group_cache, + found_key.objectid, &ptr); + if (!ret) + kfree((void *)(unsigned long)ptr); + + clear_extent_bits(&info->block_group_cache, found_key.objectid, + found_key.objectid + found_key.offset - 1, + (unsigned int)-1, GFP_NOFS); + + key.objectid = found_key.objectid + 1; + btrfs_del_item(trans, root, path); + btrfs_release_path(root, path); + } + clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, + GFP_NOFS); + btrfs_commit_transaction(trans, root); +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 new_size) +{ + struct btrfs_path *path; + u64 nr = 0; + u64 cur_byte; + u64 old_size; + struct btrfs_block_group_cache *cache; + struct btrfs_block_group_item *item; + struct btrfs_fs_info *info = root->fs_info; + struct extent_map_tree *block_group_cache; + struct btrfs_key key; + struct extent_buffer *leaf; + int ret; + int bit; + + old_size = btrfs_super_total_bytes(&info->super_copy); + block_group_cache = &info->block_group_cache; + + root = info->extent_root; + + cache = btrfs_lookup_block_group(root->fs_info, old_size - 1); + + cur_byte = cache->key.objectid + cache->key.offset; + if (cur_byte >= new_size) + goto set_size; + + key.offset = BTRFS_BLOCK_GROUP_SIZE; + btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while(cur_byte < new_size) { + key.objectid = cur_byte; + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(struct btrfs_block_group_item)); + BUG_ON(ret); + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_block_group_item); + + btrfs_set_disk_block_group_used(leaf, item, 0); + if (nr % 3) { + btrfs_set_disk_block_group_flags(leaf, item, + BTRFS_BLOCK_GROUP_DATA); + } else { + btrfs_set_disk_block_group_flags(leaf, item, 0); + } + nr++; + + cache = kmalloc(sizeof(*cache), GFP_NOFS); + BUG_ON(!cache); + + read_extent_buffer(leaf, &cache->item, (unsigned long)item, + sizeof(cache->item)); + + memcpy(&cache->key, &key, sizeof(key)); + cache->cached = 0; + cache->pinned = 0; + cur_byte = key.objectid + key.offset; + btrfs_release_path(root, path); + + if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { + bit = BLOCK_GROUP_DATA; + cache->data = BTRFS_BLOCK_GROUP_DATA; + } else { + bit = BLOCK_GROUP_METADATA; + cache->data = 0; + } + + /* use EXTENT_LOCKED to prevent merging */ + set_extent_bits(block_group_cache, key.objectid, + key.objectid + key.offset - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, key.objectid, + (unsigned long)cache); + } + btrfs_free_path(path); +set_size: + btrfs_set_super_total_bytes(&info->super_copy, new_size); + return 0; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f5b7b89b14..f6a20112d9e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2346,7 +2346,7 @@ fail_unlock: return ret; } -static unsigned long force_ra(struct address_space *mapping, +unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index) { @@ -2372,6 +2372,8 @@ int btrfs_defrag_file(struct file *file) { unsigned long ra_index = 0; u64 page_start; u64 page_end; + u64 delalloc_start; + u64 existing_delalloc; unsigned long i; int ret; @@ -2385,8 +2387,9 @@ int btrfs_defrag_file(struct file *file) { last_index = inode->i_size >> PAGE_CACHE_SHIFT; for (i = 0; i <= last_index; i++) { if (i == ra_index) { - ra_index = force_ra(inode->i_mapping, &file->f_ra, - file, ra_index, last_index); + ra_index = btrfs_force_ra(inode->i_mapping, + &file->f_ra, + file, ra_index, last_index); } page = grab_cache_page(inode->i_mapping, i); if (!page) @@ -2404,8 +2407,19 @@ int btrfs_defrag_file(struct file *file) { page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(em_tree, page_start, page_end, GFP_NOFS); + delalloc_start = page_start; + existing_delalloc = + count_range_bits(&BTRFS_I(inode)->extent_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); set_extent_delalloc(em_tree, page_start, page_end, GFP_NOFS); + + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - + existing_delalloc; + spin_unlock(&root->fs_info->delalloc_lock); + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); @@ -2418,6 +2432,89 @@ out_unlock: return 0; } +static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) +{ + u64 new_size; + u64 old_size; + struct btrfs_ioctl_vol_args *vol_args; + struct btrfs_trans_handle *trans; + char *sizestr; + int ret = 0; + int namelen; + int mod = 0; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + namelen = strlen(vol_args->name); + if (namelen > BTRFS_VOL_NAME_MAX) { + ret = -EINVAL; + goto out; + } + + sizestr = vol_args->name; + if (!strcmp(sizestr, "max")) + new_size = root->fs_info->sb->s_bdev->bd_inode->i_size; + else { + if (sizestr[0] == '-') { + mod = -1; + sizestr++; + } else if (sizestr[0] == '+') { + mod = 1; + sizestr++; + } + new_size = btrfs_parse_size(sizestr); + if (new_size == 0) { + ret = -EINVAL; + goto out; + } + } + + mutex_lock(&root->fs_info->fs_mutex); + old_size = btrfs_super_total_bytes(&root->fs_info->super_copy); + + if (mod < 0) { + if (new_size > old_size) { + ret = -EINVAL; + goto out_unlock; + } + new_size = old_size - new_size; + } else if (mod > 0) { + new_size = old_size + new_size; + } + + if (new_size < 256 * 1024 * 1024) { + ret = -EINVAL; + goto out_unlock; + } + if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) { + ret = -EFBIG; + goto out_unlock; + } + new_size = (new_size / root->sectorsize) * root->sectorsize; + +printk("new size is %Lu\n", new_size); + if (new_size > old_size) { + trans = btrfs_start_transaction(root, 1); + ret = btrfs_grow_extent_tree(trans, root, new_size); + btrfs_commit_transaction(trans, root); + } else { + ret = btrfs_shrink_extent_tree(root, new_size); + } + +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); +out: + kfree(vol_args); + return ret; +} + static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) { struct btrfs_ioctl_vol_args *vol_args; @@ -2510,6 +2607,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_snap_create(root, (void __user *)arg); case BTRFS_IOC_DEFRAG: return btrfs_ioctl_defrag(file); + case BTRFS_IOC_RESIZE: + return btrfs_ioctl_resize(root, (void __user *)arg); } return -ENOTTY; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 8bc47dec286..8c6290665d4 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -30,4 +30,6 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 88bcdd33f56..e2b9b86af69 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -179,6 +179,11 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) goto out; + if (ret) { +btrfs_print_leaf(root, path->nodes[0]); +printk("failed to del %Lu %u %Lu\n", key->objectid, key->type, key->offset); + + } BUG_ON(ret != 0); leaf = path->nodes[0]; ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7cab680418a..3bbd7f66569 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -74,9 +74,9 @@ static match_table_t tokens = { {Opt_err, NULL} }; -static unsigned long parse_size(char *str) +u64 btrfs_parse_size(char *str) { - unsigned long res; + u64 res; int mult = 1; char *end; char last; @@ -150,7 +150,8 @@ static int parse_options (char * options, if (info) { char *num = match_strdup(&args[0]); if (num) { - info->max_extent = parse_size(num); + info->max_extent = + btrfs_parse_size(num); kfree(num); info->max_extent = max_t(u64, -- cgit v1.2.3-70-g09d2 From 8a4b83cc8bd75fca29ac68615896d9e92820e7c2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:07 -0400 Subject: Btrfs: Add support for device scanning and detection ioctls Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 21 ++++- fs/btrfs/disk-io.c | 24 ++++-- fs/btrfs/disk-io.h | 4 +- fs/btrfs/ioctl.h | 6 +- fs/btrfs/super.c | 61 ++++++++++---- fs/btrfs/volumes.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/volumes.h | 25 ++++++ 7 files changed, 333 insertions(+), 44 deletions(-) (limited to 'fs/btrfs/ioctl.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index acf22ad6115..7556f8319c6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -37,7 +37,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; -#define BTRFS_MAGIC "_B4RfS_M" +#define BTRFS_MAGIC "_B5RfS_M" #define BTRFS_MAX_LEVEL 8 @@ -238,6 +238,7 @@ struct btrfs_super_block { __le64 total_bytes; __le64 bytes_used; __le64 root_dir_objectid; + __le64 num_devices; __le32 sectorsize; __le32 nodesize; __le32 leafsize; @@ -440,6 +441,7 @@ struct btrfs_block_group_cache { }; struct btrfs_device; +struct btrfs_fs_devices; struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *extent_root; @@ -489,7 +491,7 @@ struct btrfs_fs_info { u64 total_pinned; struct list_head dirty_cowonly_roots; - struct list_head devices; + struct btrfs_fs_devices *fs_devices; struct list_head space_info; spinlock_t delalloc_lock; spinlock_t new_trans_lock; @@ -677,6 +679,19 @@ BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item, + io_align, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item, + io_width, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item, + sector_size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64); + static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) { return (char *)d + offsetof(struct btrfs_dev_item, uuid); @@ -1106,6 +1121,8 @@ BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, stripesize, 32); BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, root_dir_objectid, 64); +BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block, + num_devices, 64); static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4890151cd68..f971a29e4f2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -365,12 +365,12 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) struct list_head *next; struct btrfs_device *device; - list = &fs_info->devices; - while(!list_empty(list)) { - next = list->next; - list_del(next); + list = &fs_info->fs_devices->devices; + list_for_each(next, list) { device = list_entry(next, struct btrfs_device, dev_list); - kfree(device); + if (device->bdev && device->bdev != fs_info->sb->s_bdev) + close_bdev_excl(device->bdev); + device->bdev = NULL; } return 0; } @@ -655,7 +655,8 @@ static int add_hasher(struct btrfs_fs_info *info, char *type) { return 0; } #endif -struct btrfs_root *open_ctree(struct super_block *sb) +struct btrfs_root *open_ctree(struct super_block *sb, + struct btrfs_fs_devices *fs_devices) { u32 sectorsize; u32 nodesize; @@ -697,8 +698,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; fs_info->dev_root = dev_root; + fs_info->fs_devices = fs_devices; INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); - INIT_LIST_HEAD(&fs_info->devices); INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); fs_info->sb = sb; @@ -779,6 +780,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; + if (btrfs_super_num_devices(disk_super) != fs_devices->num_devices) { + printk("Btrfs: wanted %llu devices, but found %llu\n", + (unsigned long long)btrfs_super_num_devices(disk_super), + (unsigned long long)fs_devices->num_devices); + goto fail_sb_buffer; + } nodesize = btrfs_super_nodesize(disk_super); leafsize = btrfs_super_leafsize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); @@ -799,8 +806,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_lock(&fs_info->fs_mutex); - ret = btrfs_read_super_device(tree_root, fs_info->sb_buffer); - BUG_ON(ret); ret = btrfs_read_sys_array(tree_root); BUG_ON(ret); @@ -859,6 +864,7 @@ fail_sb_buffer: fail_iput: iput(fs_info->btree_inode); fail: + close_all_devices(fs_info); kfree(extent_root); kfree(tree_root); kfree(fs_info); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 206cb48638f..b7cbc58a555 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -21,6 +21,7 @@ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) struct btrfs_device; +struct btrfs_fs_devices; struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); @@ -29,7 +30,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); -struct btrfs_root *open_ctree(struct super_block *sb); +struct btrfs_root *open_ctree(struct super_block *sb, + struct btrfs_fs_devices *fs_devices); int close_ctree(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 8c6290665d4..4551e82013c 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -22,8 +22,10 @@ #define BTRFS_IOCTL_MAGIC 0x94 #define BTRFS_VOL_NAME_MAX 255 +#define BTRFS_PATH_NAME_MAX 4095 + struct btrfs_ioctl_vol_args { - char name[BTRFS_VOL_NAME_MAX + 1]; + char name[BTRFS_PATH_NAME_MAX + 1]; }; #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ @@ -32,4 +34,6 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 67ed216df47..9624923a33d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -44,6 +44,7 @@ #include "ioctl.h" #include "print-tree.h" #include "xattr.h" +#include "volumes.h" #define BTRFS_SUPER_MAGIC 0x9123683E @@ -216,7 +217,9 @@ static int parse_options (char * options, return 1; } -static int btrfs_fill_super(struct super_block * sb, void * data, int silent) +static int btrfs_fill_super(struct super_block * sb, + struct btrfs_fs_devices *fs_devices, + void * data, int silent) { struct inode * inode; struct dentry * root_dentry; @@ -231,7 +234,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; - tree_root = open_ctree(sb); + tree_root = open_ctree(sb, fs_devices); if (!tree_root || IS_ERR(tree_root)) { printk("btrfs: open_ctree failed\n"); @@ -334,18 +337,23 @@ static int test_bdev_super(struct super_block *s, void *data) int btrfs_get_sb_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int), struct vfsmount *mnt, const char *subvol) { struct block_device *bdev = NULL; struct super_block *s; struct dentry *root; + struct btrfs_fs_devices *fs_devices = NULL; int error = 0; - bdev = open_bdev_excl(dev_name, flags, fs_type); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); + error = btrfs_scan_one_device(dev_name, flags, fs_type, &fs_devices); + if (error) + return error; + error = btrfs_open_devices(fs_devices, flags, fs_type); + if (error) + return error; + + bdev = fs_devices->lowest_bdev; /* * once the super is inserted into the list by sget, s_umount * will protect the lockfs code from trying to start a snapshot @@ -372,7 +380,8 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, s->s_flags = flags; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); - error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); + error = btrfs_fill_super(s, fs_devices, data, + flags & MS_SILENT ? 1 : 0); if (error) { up_write(&s->s_umount); deactivate_super(s); @@ -408,7 +417,7 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, error_s: error = PTR_ERR(s); error_bdev: - close_bdev_excl(bdev); + btrfs_close_devices(fs_devices); error: return error; } @@ -421,8 +430,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, char *subvol_name = NULL; parse_options((char *)data, NULL, &subvol_name); - ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, - btrfs_fill_super, mnt, + ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, mnt, subvol_name ? subvol_name : "default"); if (subvol_name) kfree(subvol_name); @@ -445,13 +453,6 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static long btrfs_control_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - printk("btrfs control ioctl %d\n", cmd); - return 0; -} - static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -460,6 +461,31 @@ static struct file_system_type btrfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +static long btrfs_control_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct btrfs_ioctl_vol_args *vol; + struct btrfs_fs_devices *fs_devices; + int ret; + int len; + + vol = kmalloc(sizeof(*vol), GFP_KERNEL); + if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { + ret = -EFAULT; + goto out; + } + len = strnlen(vol->name, BTRFS_PATH_NAME_MAX); + switch (cmd) { + case BTRFS_IOC_SCAN_DEV: + ret = btrfs_scan_one_device(vol->name, MS_RDONLY, + &btrfs_fs_type, &fs_devices); + break; + } +out: + kfree(vol); + return 0; +} + static void btrfs_write_super_lockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); @@ -567,6 +593,7 @@ static void __exit exit_btrfs_fs(void) btrfs_interface_exit(); unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); + btrfs_cleanup_fs_uuids(); } module_init(init_btrfs_fs) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 16fb6bbe6e2..263f01cc3db 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -17,6 +17,7 @@ */ #include #include +#include #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -28,6 +29,215 @@ struct map_lookup { struct btrfs_device *dev; u64 physical; }; +static DEFINE_MUTEX(uuid_mutex); +static LIST_HEAD(fs_uuids); + +int btrfs_cleanup_fs_uuids(void) +{ + struct btrfs_fs_devices *fs_devices; + struct list_head *uuid_cur; + struct list_head *devices_cur; + struct btrfs_device *dev; + + list_for_each(uuid_cur, &fs_uuids) { + fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices, + list); + while(!list_empty(&fs_devices->devices)) { + devices_cur = fs_devices->devices.next; + dev = list_entry(devices_cur, struct btrfs_device, + dev_list); + printk("uuid cleanup finds %s\n", dev->name); + if (dev->bdev) { + printk("closing\n"); + close_bdev_excl(dev->bdev); + } + list_del(&dev->dev_list); + kfree(dev); + } + } + return 0; +} + +static struct btrfs_device *__find_device(struct list_head *head, u64 devid) +{ + struct btrfs_device *dev; + struct list_head *cur; + + list_for_each(cur, head) { + dev = list_entry(cur, struct btrfs_device, dev_list); + if (dev->devid == devid) + return dev; + } + return NULL; +} + +static struct btrfs_fs_devices *find_fsid(u8 *fsid) +{ + struct list_head *cur; + struct btrfs_fs_devices *fs_devices; + + list_for_each(cur, &fs_uuids) { + fs_devices = list_entry(cur, struct btrfs_fs_devices, list); + if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) + return fs_devices; + } + return NULL; +} + +static int device_list_add(const char *path, + struct btrfs_super_block *disk_super, + u64 devid, struct btrfs_fs_devices **fs_devices_ret) +{ + struct btrfs_device *device; + struct btrfs_fs_devices *fs_devices; + u64 found_transid = btrfs_super_generation(disk_super); + + fs_devices = find_fsid(disk_super->fsid); + if (!fs_devices) { + fs_devices = kmalloc(sizeof(*fs_devices), GFP_NOFS); + if (!fs_devices) + return -ENOMEM; + INIT_LIST_HEAD(&fs_devices->devices); + list_add(&fs_devices->list, &fs_uuids); + memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); + fs_devices->latest_devid = devid; + fs_devices->latest_trans = found_transid; + fs_devices->lowest_devid = (u64)-1; + fs_devices->num_devices = 0; + device = NULL; + } else { + device = __find_device(&fs_devices->devices, devid); + } + if (!device) { + device = kzalloc(sizeof(*device), GFP_NOFS); + if (!device) { + /* we can safely leave the fs_devices entry around */ + return -ENOMEM; + } + device->devid = devid; + device->name = kstrdup(path, GFP_NOFS); + if (!device->name) { + kfree(device); + return -ENOMEM; + } + list_add(&device->dev_list, &fs_devices->devices); + fs_devices->num_devices++; + } + + if (found_transid > fs_devices->latest_trans) { + fs_devices->latest_devid = devid; + fs_devices->latest_trans = found_transid; + } + if (fs_devices->lowest_devid > devid) { + fs_devices->lowest_devid = devid; + printk("lowest devid now %Lu\n", devid); + } + *fs_devices_ret = fs_devices; + return 0; +} + +int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) +{ + struct list_head *head = &fs_devices->devices; + struct list_head *cur; + struct btrfs_device *device; + + mutex_lock(&uuid_mutex); + list_for_each(cur, head) { + device = list_entry(cur, struct btrfs_device, dev_list); + if (device->bdev) { + close_bdev_excl(device->bdev); + printk("close devices closes %s\n", device->name); + } + device->bdev = NULL; + } + mutex_unlock(&uuid_mutex); + return 0; +} + +int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, + int flags, void *holder) +{ + struct block_device *bdev; + struct list_head *head = &fs_devices->devices; + struct list_head *cur; + struct btrfs_device *device; + int ret; + + mutex_lock(&uuid_mutex); + list_for_each(cur, head) { + device = list_entry(cur, struct btrfs_device, dev_list); + bdev = open_bdev_excl(device->name, flags, holder); +printk("opening %s devid %Lu\n", device->name, device->devid); + if (IS_ERR(bdev)) { + printk("open %s failed\n", device->name); + ret = PTR_ERR(bdev); + goto fail; + } + if (device->devid == fs_devices->latest_devid) + fs_devices->latest_bdev = bdev; + if (device->devid == fs_devices->lowest_devid) { + fs_devices->lowest_bdev = bdev; +printk("lowest bdev %s\n", device->name); + } + device->bdev = bdev; + } + mutex_unlock(&uuid_mutex); + return 0; +fail: + mutex_unlock(&uuid_mutex); + btrfs_close_devices(fs_devices); + return ret; +} + +int btrfs_scan_one_device(const char *path, int flags, void *holder, + struct btrfs_fs_devices **fs_devices_ret) +{ + struct btrfs_super_block *disk_super; + struct block_device *bdev; + struct buffer_head *bh; + int ret; + u64 devid; + + mutex_lock(&uuid_mutex); + + printk("scan one opens %s\n", path); + bdev = open_bdev_excl(path, flags, holder); + + if (IS_ERR(bdev)) { + printk("open failed\n"); + ret = PTR_ERR(bdev); + goto error; + } + + ret = set_blocksize(bdev, 4096); + if (ret) + goto error_close; + bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + if (!bh) { + ret = -EIO; + goto error_close; + } + disk_super = (struct btrfs_super_block *)bh->b_data; + if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, + sizeof(disk_super->magic))) { + printk("no btrfs found on %s\n", path); + ret = -ENOENT; + goto error_brelse; + } + devid = le64_to_cpu(disk_super->dev_item.devid); + printk("found device %Lu on %s\n", devid, path); + ret = device_list_add(path, disk_super, devid, fs_devices_ret); + +error_brelse: + brelse(bh); +error_close: + close_bdev_excl(bdev); + printk("scan one closes bdev %s\n", path); +error: + mutex_unlock(&uuid_mutex); + return ret; +} /* * this uses a pretty simple search, the expectation is that it is @@ -56,6 +266,10 @@ static int find_free_dev_extent(struct btrfs_trans_handle *trans, /* FIXME use last free of some kind */ + /* we don't want to overwrite the superblock on the drive, + * so we make sure to start at an offset of at least 1MB + */ + search_start = max((u64)1024 * 1024, search_start); key.objectid = device->devid; key.offset = search_start; key.type = BTRFS_DEV_EXTENT_KEY; @@ -285,6 +499,7 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); + device->devid = free_devid; btrfs_set_device_id(leaf, dev_item, device->devid); btrfs_set_device_type(leaf, dev_item, device->type); btrfs_set_device_io_align(leaf, dev_item, device->io_align); @@ -382,7 +597,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_device *device = NULL; struct btrfs_chunk *chunk; struct list_head private_devs; - struct list_head *dev_list = &extent_root->fs_info->devices; + struct list_head *dev_list = &extent_root->fs_info->fs_devices->devices; struct list_head *cur; struct extent_map_tree *em_tree; struct map_lookup *map; @@ -449,7 +664,7 @@ again: key.objectid, calc_size, &dev_offset); BUG_ON(ret); - +printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); device->bytes_used += calc_size; ret = btrfs_update_device(trans, device); BUG_ON(ret); @@ -592,17 +807,9 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid) { - struct btrfs_device *dev; - struct list_head *cur = root->fs_info->devices.next; - struct list_head *head = &root->fs_info->devices; + struct list_head *head = &root->fs_info->fs_devices->devices; - while(cur != head) { - dev = list_entry(cur, struct btrfs_device, dev_list); - if (dev->devid == devid) - return dev; - cur = cur->next; - } - return NULL; + return __find_device(head, devid); } static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, @@ -699,15 +906,16 @@ static int read_one_dev(struct btrfs_root *root, devid = btrfs_device_id(leaf, dev_item); device = btrfs_find_device(root, devid); if (!device) { + printk("warning devid %Lu not found already\n", devid); device = kmalloc(sizeof(*device), GFP_NOFS); if (!device) return -ENOMEM; - list_add(&device->dev_list, &root->fs_info->devices); + list_add(&device->dev_list, + &root->fs_info->fs_devices->devices); } fill_device_from_item(leaf, dev_item, device); device->dev_root = root->fs_info->dev_root; - device->bdev = root->fs_info->sb->s_bdev; ret = 0; #if 0 ret = btrfs_open_device(device); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 20259128152..12f297eb055 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -24,6 +24,8 @@ struct btrfs_device { struct block_device *bdev; + char *name; + /* the internal btrfs device id */ u64 devid; @@ -49,6 +51,20 @@ struct btrfs_device { u8 uuid[BTRFS_DEV_UUID_SIZE]; }; +struct btrfs_fs_devices { + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ + + /* the device with this id has the most recent coyp of the super */ + u64 latest_devid; + u64 latest_trans; + u64 lowest_devid; + u64 num_devices; + struct block_device *latest_bdev; + struct block_device *lowest_bdev; + struct list_head devices; + struct list_head list; +}; + int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 owner, u64 num_bytes, u64 *start); @@ -67,4 +83,13 @@ int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); int btrfs_map_block(struct btrfs_mapping_tree *map_tree, u64 logical, u64 *phys, u64 *length, struct btrfs_device **dev); +int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, + int flags, void *holder); +int btrfs_scan_one_device(const char *path, int flags, void *holder, + struct btrfs_fs_devices **fs_devices_ret); +int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); +int btrfs_add_device(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_device *device); +int btrfs_cleanup_fs_uuids(void); #endif -- cgit v1.2.3-70-g09d2 From 788f20eb5affef584e75ea84bb80a4c3352a2c0e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 28 Apr 2008 15:29:42 -0400 Subject: Btrfs: Add new ioctl to add devices Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/inode.c | 23 +++++++++++++++++ fs/btrfs/ioctl.h | 6 +++++ fs/btrfs/super.c | 1 + fs/btrfs/volumes.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.h | 1 + 6 files changed, 108 insertions(+) (limited to 'fs/btrfs/ioctl.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ac7106ec535..b9257b37bb9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -551,6 +551,8 @@ struct btrfs_fs_info { u64 data_alloc_profile; u64 metadata_alloc_profile; u64 system_alloc_profile; + + void *bdev_holder; }; /* diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ab707c0930d..1a74b501869 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3070,6 +3070,27 @@ static int btrfs_ioctl_defrag(struct file *file) return 0; } +long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_vol_args *vol_args; + int ret; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + ret = btrfs_init_new_device(root, vol_args->name); + +out: + kfree(vol_args); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3082,6 +3103,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_defrag(file); case BTRFS_IOC_RESIZE: return btrfs_ioctl_resize(root, (void __user *)arg); + case BTRFS_IOC_ADD_DEV: + return btrfs_ioctl_add_dev(root, (void __user *)arg); } return -ENOTTY; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 4551e82013c..8ad35fc4ba5 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -36,4 +36,10 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 89286490688..7153dfaa340 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -388,6 +388,7 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, goto error; } + btrfs_sb(s)->fs_info->bdev_holder = fs_type; s->s_flags |= MS_ACTIVE; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a2c56de1548..b93c15aa17d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "ctree.h" #include "extent_map.h" @@ -592,6 +593,80 @@ out: return ret; } +int btrfs_init_new_device(struct btrfs_root *root, char *device_path) +{ + struct btrfs_trans_handle *trans; + struct btrfs_device *device; + struct block_device *bdev; + struct list_head *cur; + struct list_head *devices; + u64 total_bytes; + int ret = 0; + + + bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder); + if (!bdev) { + return -EIO; + } + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + devices = &root->fs_info->fs_devices->devices; + list_for_each(cur, devices) { + device = list_entry(cur, struct btrfs_device, dev_list); + if (device->bdev == bdev) { + ret = -EEXIST; + goto out; + } + } + + device = kzalloc(sizeof(*device), GFP_NOFS); + if (!device) { + /* we can safely leave the fs_devices entry around */ + ret = -ENOMEM; + goto out_close_bdev; + } + + device->barriers = 1; + generate_random_uuid(device->uuid); + spin_lock_init(&device->io_lock); + device->name = kstrdup(device_path, GFP_NOFS); + if (!device->name) { + kfree(device); + goto out_close_bdev; + } + device->io_width = root->sectorsize; + device->io_align = root->sectorsize; + device->sector_size = root->sectorsize; + device->total_bytes = i_size_read(bdev->bd_inode); + device->dev_root = root->fs_info->dev_root; + device->bdev = bdev; + + ret = btrfs_add_device(trans, root, device); + if (ret) + goto out_close_bdev; + + total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); + btrfs_set_super_total_bytes(&root->fs_info->super_copy, + total_bytes + device->total_bytes); + + total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); + btrfs_set_super_num_devices(&root->fs_info->super_copy, + total_bytes + 1); + + list_add(&device->dev_list, &root->fs_info->fs_devices->devices); + list_add(&device->dev_alloc_list, + &root->fs_info->fs_devices->alloc_list); + root->fs_info->fs_devices->num_devices++; +out: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; + +out_close_bdev: + close_bdev_excl(bdev); + goto out; +} + int btrfs_update_device(struct btrfs_trans_handle *trans, struct btrfs_device *device) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 6fe8440b37a..6f173450378 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -133,4 +133,5 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, u8 *uuid); int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); +int btrfs_init_new_device(struct btrfs_root *root, char *path); #endif -- cgit v1.2.3-70-g09d2 From f2eb0a241f0e5c135d93243b0236cb1f14c305e0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 2 May 2008 14:43:14 -0400 Subject: Btrfs: Clone file data ioctl Add a new ioctl to clone file data Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 +- fs/btrfs/file-item.c | 12 ++-- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/ioctl.h | 1 + 5 files changed, 179 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/ioctl.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 73b92dd150f..3b6f8524a4a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1516,9 +1516,9 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root /* file-item.c */ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 objectid, u64 pos, u64 offset, + u64 objectid, u64 pos, u64 disk_offset, u64 disk_num_bytes, - u64 num_bytes); + u64 num_bytes, u64 offset); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 9259aece6ed..f537eb43c2c 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -28,10 +28,10 @@ sizeof(struct btrfs_item) * 2) / \ BTRFS_CRC32_SIZE) - 1)) int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, u64 pos, - u64 offset, u64 disk_num_bytes, - u64 num_bytes) + struct btrfs_root *root, + u64 objectid, u64 pos, + u64 disk_offset, u64 disk_num_bytes, + u64 num_bytes, u64 offset) { int ret = 0; struct btrfs_file_extent_item *item; @@ -53,9 +53,9 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_bytenr(leaf, item, offset); + btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset); btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); - btrfs_set_file_extent_offset(leaf, item, 0); + btrfs_set_file_extent_offset(leaf, item, offset); btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); btrfs_set_file_extent_generation(leaf, item, trans->transid); btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8effdf4f5d6..a50507f3056 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -285,7 +285,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, err = btrfs_insert_file_extent(trans, root, inode->i_ino, last_pos_in_file, - 0, 0, hole_size); + 0, 0, hole_size, 0); btrfs_drop_extent_cache(inode, last_pos_in_file, last_pos_in_file + hole_size -1); btrfs_check_file(root, inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 99483447459..c6fae29c0b9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -141,7 +142,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) cur_alloc_size = ins.offset; ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start, ins.objectid, ins.offset, - ins.offset); + ins.offset, 0); inode->i_blocks += ins.offset >> 9; btrfs_check_file(root, inode); if (num_bytes < cur_alloc_size) { @@ -1227,7 +1228,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) err = btrfs_insert_file_extent(trans, root, inode->i_ino, hole_start, 0, 0, - hole_size); + hole_size, 0); btrfs_drop_extent_cache(inode, hole_start, (u64)-1); btrfs_check_file(root, inode); @@ -3100,6 +3101,170 @@ out: return ret; } +void dup_item_to_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *leaf, + int slot, + struct btrfs_key *key, + u64 destino) +{ + struct btrfs_path *cpath = btrfs_alloc_path(); + int len = btrfs_item_size_nr(leaf, slot); + int dstoff; + struct btrfs_key ckey = *key; + int ret; + + ckey.objectid = destino; + ret = btrfs_insert_empty_item(trans, root, cpath, &ckey, len); + dstoff = btrfs_item_ptr_offset(cpath->nodes[0], cpath->slots[0]); + copy_extent_buffer(cpath->nodes[0], leaf, dstoff, + btrfs_item_ptr_offset(leaf, slot), + len); + btrfs_release_path(root, cpath); +} + +long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct file *src_file; + struct inode *src; + struct btrfs_trans_handle *trans; + int ret; + u64 pos; + struct btrfs_path *path; + struct btrfs_key key; + struct extent_buffer *leaf; + u32 nritems; + int nextret; + int slot; + + src_file = fget(src_fd); + if (!src_file) + return -EBADF; + src = src_file->f_dentry->d_inode; + + ret = -EXDEV; + if (src->i_sb != inode->i_sb) + goto out_fput; + + if (inode < src) { + mutex_lock(&inode->i_mutex); + mutex_lock(&src->i_mutex); + } else { + mutex_lock(&src->i_mutex); + mutex_lock(&inode->i_mutex); + } + + ret = -ENOTEMPTY; + if (inode->i_size) + goto out_unlock; + + /* do any pending delalloc/csum calc on src, one way or + another, and lock file content */ + while (1) { + filemap_write_and_wait(src->i_mapping); + lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); + if (BTRFS_I(src)->delalloc_bytes == 0) + break; + unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); + } + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 0); + path = btrfs_alloc_path(); + pos = 0; + while (1) { + ret = btrfs_lookup_file_extent(trans, root, path, src->i_ino, + pos, 0); + if (ret < 0) + goto out; + if (ret > 0) { + if (path->slots[0] == 0) { + ret = 0; + goto out; + } + path->slots[0]--; + } +next_slot: + leaf = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + nritems = btrfs_header_nritems(leaf); + + if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY || + key.objectid != src->i_ino) + goto out; + if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { + struct btrfs_file_extent_item *extent; + int found_type; + pos = key.offset; + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(leaf, extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + u64 len = btrfs_file_extent_num_bytes(leaf, + extent); + u64 ds = btrfs_file_extent_disk_bytenr(leaf, + extent); + u64 dl = btrfs_file_extent_disk_num_bytes(leaf, + extent); + u64 off = btrfs_file_extent_offset(leaf, + extent); + btrfs_insert_file_extent(trans, root, + inode->i_ino, pos, + ds, dl, len, off); + /* ds == 0 means there's a hole */ + if (ds != 0) { + btrfs_inc_extent_ref(trans, root, + ds, dl, + root->root_key.objectid, + trans->transid, + inode->i_ino, pos); + } + pos = key.offset + len; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + dup_item_to_inode(trans, root, path, leaf, slot, + &key, inode->i_ino); + pos = key.offset + btrfs_item_size_nr(leaf, + slot); + } + } else if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) + dup_item_to_inode(trans, root, path, leaf, slot, &key, + inode->i_ino); + + if (slot >= nritems - 1) { + nextret = btrfs_next_leaf(root, path); + if (nextret) + goto out; + } else { + path->slots[0]++; + } + goto next_slot; + } + +out: + btrfs_free_path(path); + ret = 0; + + inode->i_blocks = src->i_blocks; + i_size_write(inode, src->i_size); + btrfs_update_inode(trans, root, inode); + + unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + +out_unlock: + mutex_unlock(&src->i_mutex); + mutex_unlock(&inode->i_mutex); +out_fput: + fput(src_file); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3116,6 +3281,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_add_dev(root, (void __user *)arg); case BTRFS_IOC_BALANCE: return btrfs_balance(root->fs_info->dev_root); + case BTRFS_IOC_CLONE: + return btrfs_ioctl_clone(file, arg); } return -ENOTTY; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 8ad35fc4ba5..b0e73f51d63 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -36,6 +36,7 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) #define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ -- cgit v1.2.3-70-g09d2 From 6bf13c0cc833bf5ba013d6aa60379484bf48c4e6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 10 Jun 2008 10:07:39 -0400 Subject: Btrfs: transaction ioctls These ioctls let a user application hold a transaction open while it performs a series of operations. A final ioctl does a sync on the fs (closing the current transaction). This is the main requirement for Ceph's OSD to be able to keep the data it's storing in a btrfs volume consistent, and AFAICS it works just fine. The application would do something like fd = ::open("some/file", O_RDONLY); ::ioctl(fd, BTRFS_IOC_TRANS_START); /* do a bunch of stuff */ ::ioctl(fd, BTRFS_IOC_TRANS_END); or just ::close(fd); And to ensure it commits to disk, ::ioctl(fd, BTRFS_IOC_SYNC); When a transaction is held open, the trans_handle is attached to the struct file (via private_data) so that it will get cleaned up if the process dies unexpectedly. A held transaction is also ended on fsync() to avoid a deadlock. A misbehaving application could also deliberately hold a transaction open, effectively locking up the FS, so it may make sense to restrict something like this to root or something. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++++ fs/btrfs/file.c | 7 ++++++- fs/btrfs/inode.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ioctl.h | 9 ++++++++ fs/btrfs/super.c | 2 +- 5 files changed, 83 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ioctl.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ad4eacca7f5..1dcf4fb5b68 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1575,6 +1575,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_init_cachep(void); void btrfs_destroy_cachep(void); +long btrfs_ioctl_trans_end(struct file *file); long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); @@ -1595,6 +1596,8 @@ extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 inline_limit, u64 *hint_block); +int btrfs_release_file(struct inode *inode, struct file *file); + /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only); @@ -1615,4 +1618,5 @@ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, u64 btrfs_parse_size(char *str); int btrfs_parse_options(char *options, struct btrfs_root *root, char **subvol_name); +int btrfs_sync_fs(struct super_block *sb, int wait); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c4fa4664a45..73c6d085bd9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -978,9 +978,11 @@ out_nolock: return num_written ? num_written : err; } -static int btrfs_release_file (struct inode * inode, struct file * filp) +int btrfs_release_file(struct inode * inode, struct file * filp) { btrfs_del_ordered_inode(inode); + if (filp->private_data) + btrfs_ioctl_trans_end(filp); return 0; } @@ -1011,6 +1013,9 @@ static int btrfs_sync_file(struct file *file, /* * ok we haven't committed the transaction yet, lets do a commit */ + if (file->private_data) + btrfs_ioctl_trans_end(file); + trans = btrfs_start_transaction(root, 1); if (!trans) { ret = -ENOMEM; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 31aa4ba06fc..0f14697bece 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3336,6 +3336,61 @@ out_fput: return ret; } +/* + * there are many ways the trans_start and trans_end ioctls can lead + * to deadlocks. They should only be used by applications that + * basically own the machine, and have a very in depth understanding + * of all the possible deadlocks and enospc problems. + */ +long btrfs_ioctl_trans_start(struct file *file) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + int ret = 0; + + mutex_lock(&root->fs_info->fs_mutex); + if (file->private_data) { + ret = -EINPROGRESS; + goto out; + } + trans = btrfs_start_transaction(root, 0); + if (trans) + file->private_data = trans; + else + ret = -ENOMEM; + /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ +out: + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +/* + * there are many ways the trans_start and trans_end ioctls can lead + * to deadlocks. They should only be used by applications that + * basically own the machine, and have a very in depth understanding + * of all the possible deadlocks and enospc problems. + */ +long btrfs_ioctl_trans_end(struct file *file) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + int ret = 0; + + mutex_lock(&root->fs_info->fs_mutex); + trans = file->private_data; + if (!trans) { + ret = -EINVAL; + goto out; + } + btrfs_end_transaction(trans, root); + file->private_data = 0; +out: + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3356,6 +3411,13 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_balance(root->fs_info->dev_root); case BTRFS_IOC_CLONE: return btrfs_ioctl_clone(file, arg); + case BTRFS_IOC_TRANS_START: + return btrfs_ioctl_trans_start(file); + case BTRFS_IOC_TRANS_END: + return btrfs_ioctl_trans_end(file); + case BTRFS_IOC_SYNC: + btrfs_sync_fs(file->f_dentry->d_sb, 1); + return 0; } return -ENOTTY; @@ -3679,6 +3741,7 @@ static struct file_operations btrfs_dir_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_ioctl, #endif + .release = btrfs_release_file, }; static struct extent_io_ops btrfs_extent_io_ops = { diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index b0e73f51d63..85ed35a775b 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -36,6 +36,14 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ struct btrfs_ioctl_vol_args) +/* trans start and trans end are dangerous, and only for + * use by applications that know how to avoid the + * resulting deadlocks + */ +#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) +#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) +#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) + #define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) #define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ struct btrfs_ioctl_vol_args) @@ -43,4 +51,5 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ struct btrfs_ioctl_vol_args) + #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 77f44494e22..39bb86945ed 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -293,7 +293,7 @@ fail_close: return err; } -static int btrfs_sync_fs(struct super_block *sb, int wait) +int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; struct btrfs_root *root; -- cgit v1.2.3-70-g09d2 From c5c9cd4d1b827fe545ed2a945e91e3a6909f3886 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 12 Nov 2008 14:32:25 -0500 Subject: Btrfs: allow clone of an arbitrary file range This patch adds an additional CLONE_RANGE ioctl to clone an arbitrary (block-aligned) file range to another file. The original CLONE ioctl becomes a special case of cloning the entire file range. The logic is a bit more complex now since ranges may be cloned to different offsets, and because we may only be cloning the beginning or end of a particular extent or checksum item. An additional sanity check ensures the source and destination files aren't the same (which would previously deadlock), although eventually this could be extended to allow the duplication of file data at a different offset within the same file. Any extents within the destination range in the target file are dropped. We currently do not cope with the case where a compressed inline extent needs to be split. This will probably require decompressing the extent into a temporary address_space, and inserting just the cloned portion as a new compressed inline extent. For now, just return -EINVAL in this case. Note that this never comes up in the more common case of cloning an entire file. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 253 ++++++++++++++++++++++++++++++++++++++++++++++--------- fs/btrfs/ioctl.h | 9 ++ 2 files changed, 221 insertions(+), 41 deletions(-) (limited to 'fs/btrfs/ioctl.h') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9ff2b4e0e92..4d7cc7c504d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -592,7 +592,8 @@ out: return ret; } -long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) +long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 off, + u64 olen, u64 destoff) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -606,12 +607,29 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) u32 nritems; int slot; int ret; + u64 len = olen; + u64 bs = root->fs_info->sb->s_blocksize; + u64 hint_byte; - src_file = fget(src_fd); + /* + * TODO: + * - split compressed inline extents. annoying: we need to + * decompress into destination's address_space (the file offset + * may change, so source mapping won't do), then recompress (or + * otherwise reinsert) a subrange. + * - allow ranges within the same file to be cloned (provided + * they don't overlap)? + */ + + src_file = fget(srcfd); if (!src_file) return -EBADF; src = src_file->f_dentry->d_inode; + ret = -EINVAL; + if (src == inode) + goto out_fput; + ret = -EISDIR; if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) goto out_fput; @@ -640,27 +658,46 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) mutex_lock(&inode->i_mutex); } - ret = -ENOTEMPTY; - if (inode->i_size) + /* determine range to clone */ + ret = -EINVAL; + if (off >= src->i_size || off + len > src->i_size) goto out_unlock; + if (len == 0) + olen = len = src->i_size - off; + /* if we extend to eof, continue to block boundary */ + if (off + len == src->i_size) + len = ((src->i_size + bs-1) & ~(bs-1)) + - off; + + /* verify the end result is block aligned */ + if ((off & (bs-1)) || + ((off + len) & (bs-1))) + goto out_unlock; + + printk("final src extent is %llu~%llu\n", off, len); + printk("final dst extent is %llu~%llu\n", destoff, len); /* do any pending delalloc/csum calc on src, one way or another, and lock file content */ while (1) { struct btrfs_ordered_extent *ordered; - lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); - ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); + lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); + ordered = btrfs_lookup_first_ordered_extent(inode, off+len); if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) break; - unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); + unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); if (ordered) btrfs_put_ordered_extent(ordered); - btrfs_wait_ordered_range(src, 0, (u64)-1); + btrfs_wait_ordered_range(src, off, off+len); } trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); + /* punch hole in destination first */ + btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte); + + /* clone data */ key.objectid = src->i_ino; key.type = BTRFS_EXTENT_DATA_KEY; key.offset = 0; @@ -691,56 +728,178 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) key.objectid != src->i_ino) break; - if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY || - btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { + if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { + struct btrfs_file_extent_item *extent; + int type; u32 size; struct btrfs_key new_key; + u64 disko = 0, diskl = 0; + u64 datao = 0, datal = 0; + u8 comp; size = btrfs_item_size_nr(leaf, slot); read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, slot), size); + + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + comp = btrfs_file_extent_compression(leaf, extent); + type = btrfs_file_extent_type(leaf, extent); + if (type == BTRFS_FILE_EXTENT_REG) { + disko = btrfs_file_extent_disk_bytenr(leaf, extent); + diskl = btrfs_file_extent_disk_num_bytes(leaf, extent); + datao = btrfs_file_extent_offset(leaf, extent); + datal = btrfs_file_extent_num_bytes(leaf, extent); + } else if (type == BTRFS_FILE_EXTENT_INLINE) { + /* take upper bound, may be compressed */ + datal = btrfs_file_extent_ram_bytes(leaf, + extent); + } btrfs_release_path(root, path); + if (key.offset + datal < off || + key.offset >= off+len) + goto next; + memcpy(&new_key, &key, sizeof(new_key)); new_key.objectid = inode->i_ino; - ret = btrfs_insert_empty_item(trans, root, path, - &new_key, size); - if (ret) - goto out; + new_key.offset = key.offset + destoff - off; - leaf = path->nodes[0]; - slot = path->slots[0]; - write_extent_buffer(leaf, buf, + if (type == BTRFS_FILE_EXTENT_REG) { + ret = btrfs_insert_empty_item(trans, root, path, + &new_key, size); + if (ret) + goto out; + + leaf = path->nodes[0]; + slot = path->slots[0]; + write_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, slot), size); - btrfs_mark_buffer_dirty(leaf); - } - - if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { - struct btrfs_file_extent_item *extent; - int found_type; - extent = btrfs_item_ptr(leaf, slot, + extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(leaf, extent); - if (found_type == BTRFS_FILE_EXTENT_REG || - found_type == BTRFS_FILE_EXTENT_PREALLOC) { - u64 ds = btrfs_file_extent_disk_bytenr(leaf, - extent); - u64 dl = btrfs_file_extent_disk_num_bytes(leaf, - extent); - /* ds == 0 means there's a hole */ - if (ds != 0) { + printk(" orig disk %llu~%llu data %llu~%llu\n", + disko, diskl, datao, datal); + + if (off > key.offset) { + datao += off - key.offset; + datal -= off - key.offset; + } + if (key.offset + datao + datal + key.offset > + off + len) + datal = off + len - key.offset - datao; + /* disko == 0 means it's a hole */ + if (!disko) + datao = 0; + printk(" final disk %llu~%llu data %llu~%llu\n", + disko, diskl, datao, datal); + + btrfs_set_file_extent_offset(leaf, extent, + datao); + btrfs_set_file_extent_num_bytes(leaf, extent, + datal); + if (disko) { + inode_add_bytes(inode, datal); ret = btrfs_inc_extent_ref(trans, root, - ds, dl, leaf->start, - root->root_key.objectid, - trans->transid, - inode->i_ino); + disko, diskl, leaf->start, + root->root_key.objectid, + trans->transid, + inode->i_ino); BUG_ON(ret); } + } else if (type == BTRFS_FILE_EXTENT_INLINE) { + u64 skip = 0; + u64 trim = 0; + if (off > key.offset) { + skip = off - key.offset; + new_key.offset += skip; + } + if (key.offset + datal > off+len) + trim = key.offset + datal - (off+len); + printk("len %lld skip %lld trim %lld\n", + datal, skip, trim); + if (comp && (skip || trim)) { + printk("btrfs clone_range can't split compressed inline extents yet\n"); + ret = -EINVAL; + goto out; + } + size -= skip + trim; + datal -= skip + trim; + ret = btrfs_insert_empty_item(trans, root, path, + &new_key, size); + if (ret) + goto out; + + if (skip) { + u32 start = btrfs_file_extent_calc_inline_size(0); + memmove(buf+start, buf+start+skip, + datal); + } + + leaf = path->nodes[0]; + slot = path->slots[0]; + write_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, slot), + size); + inode_add_bytes(inode, datal); } + + btrfs_mark_buffer_dirty(leaf); } + + if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { + u32 size; + struct btrfs_key new_key; + u64 coverslen; + int coff, clen; + + size = btrfs_item_size_nr(leaf, slot); + coverslen = (size / BTRFS_CRC32_SIZE) << + root->fs_info->sb->s_blocksize_bits; + printk("csums for %llu~%llu\n", + key.offset, coverslen); + if (key.offset + coverslen < off || + key.offset >= off+len) + goto next; + + read_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, slot), + size); + btrfs_release_path(root, path); + + coff = 0; + if (off > key.offset) + coff = ((off - key.offset) >> + root->fs_info->sb->s_blocksize_bits) * + BTRFS_CRC32_SIZE; + clen = size - coff; + if (key.offset + coverslen > off+len) + clen -= ((key.offset+coverslen-off-len) >> + root->fs_info->sb->s_blocksize_bits) * + BTRFS_CRC32_SIZE; + printk(" will dup %d~%d of %d\n", + coff, clen, size); + + memcpy(&new_key, &key, sizeof(new_key)); + new_key.objectid = inode->i_ino; + new_key.offset = key.offset + destoff - off; + + ret = btrfs_insert_empty_item(trans, root, path, + &new_key, clen); + if (ret) + goto out; + + leaf = path->nodes[0]; + slot = path->slots[0]; + write_extent_buffer(leaf, buf + coff, + btrfs_item_ptr_offset(leaf, slot), + clen); + btrfs_mark_buffer_dirty(leaf); + } + + next: btrfs_release_path(root, path); key.offset++; } @@ -749,13 +908,13 @@ out: btrfs_release_path(root, path); if (ret == 0) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode_set_bytes(inode, inode_get_bytes(src)); - btrfs_i_size_write(inode, src->i_size); + if (destoff + olen > inode->i_size) + btrfs_i_size_write(inode, destoff + olen); BTRFS_I(inode)->flags = BTRFS_I(src)->flags; ret = btrfs_update_inode(trans, root, inode); } btrfs_end_transaction(trans, root); - unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); + unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); if (ret) vmtruncate(inode, 0); out_unlock: @@ -768,6 +927,16 @@ out_fput: return ret; } +long btrfs_ioctl_clone_range(struct file *file, unsigned long argptr) +{ + struct btrfs_ioctl_clone_range_args args; + + if (copy_from_user(&args, (void *)argptr, sizeof(args))) + return -EFAULT; + return btrfs_ioctl_clone(file, args.src_fd, args.src_offset, + args.src_length, args.dest_offset); +} + /* * there are many ways the trans_start and trans_end ioctls can lead * to deadlocks. They should only be used by applications that @@ -851,7 +1020,9 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_BALANCE: return btrfs_balance(root->fs_info->dev_root); case BTRFS_IOC_CLONE: - return btrfs_ioctl_clone(file, arg); + return btrfs_ioctl_clone(file, arg, 0, 0, 0); + case BTRFS_IOC_CLONE_RANGE: + return btrfs_ioctl_clone_range(file, arg); case BTRFS_IOC_TRANS_START: return btrfs_ioctl_trans_start(file); case BTRFS_IOC_TRANS_END: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 85ed35a775b..989ba8a0121 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -52,4 +52,13 @@ struct btrfs_ioctl_vol_args { #define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ struct btrfs_ioctl_vol_args) +struct btrfs_ioctl_clone_range_args { + __s64 src_fd; + __u64 src_offset, src_length; + __u64 dest_offset; +}; + +#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ + struct btrfs_ioctl_clone_range_args) + #endif -- cgit v1.2.3-70-g09d2 From 3de4586c5278a28107030c336956381f69ff7a9d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Nov 2008 21:02:50 -0500 Subject: Btrfs: Allow subvolumes and snapshots anywhere in the directory tree Before, all snapshots and subvolumes lived in a single flat directory. This was awkward and confusing because the single flat directory was only writable with the ioctls. This commit changes the ioctls to create subvols and snapshots at any point in the directory tree. This requires making separate ioctls for snapshot and subvol creation instead of a combining them into one. The subvol ioctl does: btrfsctl -S subvol_name parent_dir After the ioctl is done subvol_name lives inside parent_dir. The snapshot ioctl does: btrfsctl -s path_for_snapshot root_to_snapshot path_for_snapshot can be an absolute or relative path. btrfsctl breaks it up into directory and basename components. root_to_snapshot can be any file or directory in the FS. The snapshot is taken of the entire root where that file lives. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 7 +++-- fs/btrfs/disk-io.c | 14 +++++++--- fs/btrfs/inode.c | 50 ++++++++++++++++++++++------------- fs/btrfs/ioctl.c | 71 ++++++++++++++++++++++++++++++++++++-------------- fs/btrfs/ioctl.h | 7 +++-- fs/btrfs/super.c | 10 +++---- fs/btrfs/transaction.c | 66 +++++++++++++++++++++++++++++++++++++--------- fs/btrfs/transaction.h | 2 ++ 8 files changed, 162 insertions(+), 65 deletions(-) (limited to 'fs/btrfs/ioctl.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5ff74282a62..5611f8e035a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -606,6 +606,7 @@ struct btrfs_fs_info { struct btrfs_root *tree_root; struct btrfs_root *chunk_root; struct btrfs_root *dev_root; + struct btrfs_root *fs_root; /* the log root tree is a directory of all the other log roots */ struct btrfs_root *log_root_tree; @@ -758,7 +759,6 @@ struct btrfs_root { struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; - struct inode *inode; struct extent_io_tree dirty_log_pages; struct kobject root_kobj; @@ -1876,6 +1876,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, #define PageChecked PageFsMisc #endif +struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); +int btrfs_set_inode_index(struct inode *dir, u64 *index); int btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, struct inode *inode, @@ -1896,9 +1898,6 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry, struct btrfs_trans_handle *trans, u64 new_dirid, struct btrfs_block_group_cache *block_group); -void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, - int namelen); - int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 82833e5d84b..0a5350573f6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -838,7 +838,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, u64 objectid) { root->node = NULL; - root->inode = NULL; root->commit_root = NULL; root->ref_tree = NULL; root->sectorsize = sectorsize; @@ -1430,6 +1429,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 blocksize; u32 stripesize; u64 generation; + struct btrfs_key location; struct buffer_head *bh; struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); @@ -1729,7 +1729,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_cleaner; if (sb->s_flags & MS_RDONLY) - return tree_root; + goto read_fs_root; if (btrfs_super_log_root(disk_super) != 0) { u32 blocksize; @@ -1755,6 +1755,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_cleanup_reloc_trees(tree_root); BUG_ON(ret); + location.objectid = BTRFS_FS_TREE_OBJECTID; + location.type = BTRFS_ROOT_ITEM_KEY; + location.offset = (u64)-1; + +read_fs_root: + fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); + if (!fs_info->fs_root) + goto fail_cleaner; return tree_root; fail_cleaner: @@ -1944,8 +1952,6 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) (unsigned long)root->root_key.objectid); if (root->in_sysfs) btrfs_sysfs_del_root(root); - if (root->inode) - iput(root->inode); if (root->node) free_extent_buffer(root->node); if (root->commit_root) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3e3620e69bb..e163b1b7470 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3038,8 +3038,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, return inode; } -static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) +struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) { struct inode * inode; struct btrfs_inode *bi = BTRFS_I(dir); @@ -3067,13 +3066,21 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, inode = btrfs_iget(dir->i_sb, &location, sub_root, &new); if (IS_ERR(inode)) return ERR_CAST(inode); - - /* the inode and parent dir are two different roots */ - if (new && root != sub_root) { - igrab(inode); - sub_root->inode = inode; - } } + return inode; +} + +static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode *inode; + + if (dentry->d_name.len > BTRFS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + inode = btrfs_lookup_dentry(dir, dentry); + if (IS_ERR(inode)) + return ERR_CAST(inode); return d_splice_alias(inode, dentry); } @@ -3129,7 +3136,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, return 0; filp->f_pos = 2; } - path = btrfs_alloc_path(); path->reada = 2; @@ -3159,6 +3165,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, path->slots[0]++; } } + advance = 1; item = btrfs_item_nr(leaf, slot); btrfs_item_key_to_cpu(leaf, &found_key, slot); @@ -3194,16 +3201,25 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; btrfs_dir_item_key_to_cpu(leaf, di, &location); + + /* is this a reference to our own snapshot? If so + * skip it + */ + if (location.type == BTRFS_ROOT_ITEM_KEY && + location.objectid == root->root_key.objectid) { + over = 0; + goto skip; + } over = filldir(dirent, name_ptr, name_len, found_key.offset, location.objectid, d_type); +skip: if (name_ptr != tmp_name) kfree(name_ptr); if (over) goto nopos; - di_len = btrfs_dir_name_len(leaf, di) + btrfs_dir_data_len(leaf, di) + sizeof(*di); di_cur += di_len; @@ -3318,8 +3334,7 @@ out: * helper to find a free sequence number in a given directory. This current * code is very simple, later versions will do smarter things in the btree */ -static int btrfs_set_inode_index(struct inode *dir, struct inode *inode, - u64 *index) +int btrfs_set_inode_index(struct inode *dir, u64 *index) { int ret = 0; @@ -3365,7 +3380,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); if (dir) { - ret = btrfs_set_inode_index(dir, inode, index); + ret = btrfs_set_inode_index(dir, index); if (ret) return ERR_PTR(ret); } @@ -3651,7 +3666,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; - err = btrfs_set_inode_index(dir, inode, &index); + err = btrfs_set_inode_index(dir, &index); if (err) goto fail; @@ -4349,13 +4364,13 @@ out: * Invalidate a single dcache entry at the root of the filesystem. * Needed after creation of snapshot or subvolume. */ -void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, +void btrfs_invalidate_dcache_root(struct inode *dir, char *name, int namelen) { struct dentry *alias, *entry; struct qstr qstr; - alias = d_find_alias(root->fs_info->sb->s_root->d_inode); + alias = d_find_alias(dir); if (alias) { qstr.name = name; qstr.len = namelen; @@ -4387,7 +4402,6 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry, return PTR_ERR(inode); inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; - new_root->inode = inode; inode->i_nlink = 1; btrfs_i_size_write(inode, 0); @@ -4590,7 +4604,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, } } - ret = btrfs_set_inode_index(new_dir, old_inode, &index); + ret = btrfs_set_inode_index(new_dir, &index); if (ret) goto out_fail; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f43df72b0e1..ec45b308613 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -67,6 +67,7 @@ static noinline int create_subvol(struct btrfs_root *root, int err; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; + u64 index = 0; unsigned long nr = 1; ret = btrfs_check_free_space(root, 1, 0); @@ -126,6 +127,7 @@ static noinline int create_subvol(struct btrfs_root *root, key.objectid = objectid; key.offset = 1; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); +printk("inserting root objectid %Lu\n", objectid); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &root_item); if (ret) @@ -135,24 +137,27 @@ static noinline int create_subvol(struct btrfs_root *root, * insert the directory item */ key.offset = (u64)-1; - dir = root->fs_info->sb->s_root->d_inode; - ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + dir = dentry->d_parent->d_inode; + ret = btrfs_set_inode_index(dir, &index); + BUG_ON(ret); + + ret = btrfs_insert_dir_item(trans, root, name, namelen, dir->i_ino, &key, - BTRFS_FT_DIR, 0); + BTRFS_FT_DIR, index); if (ret) goto fail; - +#if 0 ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, name, namelen, objectid, root->fs_info->sb->s_root->d_inode->i_ino, 0); if (ret) goto fail; - +#endif ret = btrfs_commit_transaction(trans, root); if (ret) goto fail_commit; - new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen); + new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); BUG_ON(!new_root); trans = btrfs_start_transaction(new_root, 1); @@ -170,14 +175,16 @@ fail: ret = err; fail_commit: btrfs_btree_balance_dirty(root, nr); +printk("all done ret %d\n", ret); return ret; } -static int create_snapshot(struct btrfs_root *root, char *name, int namelen) +static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, + char *name, int namelen) { struct btrfs_pending_snapshot *pending_snapshot; struct btrfs_trans_handle *trans; - int ret; + int ret = 0; int err; unsigned long nr = 0; @@ -188,7 +195,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail_unlock; - pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS); + pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); if (!pending_snapshot) { ret = -ENOMEM; goto fail_unlock; @@ -201,12 +208,12 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) } memcpy(pending_snapshot->name, name, namelen); pending_snapshot->name[namelen] = '\0'; + pending_snapshot->dentry = dentry; trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); pending_snapshot->root = root; list_add(&pending_snapshot->list, &trans->transaction->pending_snapshots); - ret = btrfs_update_inode(trans, root, root->inode); err = btrfs_commit_transaction(trans, root); fail_unlock: @@ -230,7 +237,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) * inside this filesystem so it's quite a bit simpler. */ static noinline int btrfs_mksubvol(struct path *parent, char *name, - int mode, int namelen) + int mode, int namelen, + struct btrfs_root *snap_src) { struct dentry *dentry; int error; @@ -248,6 +256,7 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, if (!IS_POSIXACL(parent->dentry->d_inode)) mode &= ~current->fs->umask; + error = mnt_want_write(parent->mnt); if (error) goto out_dput; @@ -266,8 +275,12 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, * Also we should pass on the mode eventually to allow creating new * subvolume with specific mode bits. */ - error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, dentry, - name, namelen); + if (snap_src) { + error = create_snapshot(snap_src, dentry, name, namelen); + } else { + error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, + dentry, name, namelen); + } if (error) goto out_drop_write; @@ -471,15 +484,16 @@ out: } static noinline int btrfs_ioctl_snap_create(struct file *file, - void __user *arg) + void __user *arg, int subvol) { struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_vol_args *vol_args; struct btrfs_dir_item *di; struct btrfs_path *path; + struct file *src_file; u64 root_dirid; int namelen; - int ret; + int ret = 0; if (root->fs_info->sb->s_flags & MS_RDONLY) return -EROFS; @@ -523,12 +537,29 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, goto out; } - if (root == root->fs_info->tree_root) { + if (subvol) { ret = btrfs_mksubvol(&file->f_path, vol_args->name, file->f_path.dentry->d_inode->i_mode, - namelen); + namelen, NULL); } else { - ret = create_snapshot(root, vol_args->name, namelen); + struct inode *src_inode; + src_file = fget(vol_args->fd); + if (!src_file) { + ret = -EINVAL; + goto out; + } + + src_inode = src_file->f_path.dentry->d_inode; + if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { + printk("btrfs: Snapshot src from another FS\n"); + ret = -EINVAL; + fput(src_file); + goto out; + } + ret = btrfs_mksubvol(&file->f_path, vol_args->name, + file->f_path.dentry->d_inode->i_mode, + namelen, BTRFS_I(src_inode)->root); + fput(src_file); } out: @@ -1030,7 +1061,9 @@ long btrfs_ioctl(struct file *file, unsigned int switch (cmd) { case BTRFS_IOC_SNAP_CREATE: - return btrfs_ioctl_snap_create(file, (void __user *)arg); + return btrfs_ioctl_snap_create(file, (void __user *)arg, 0); + case BTRFS_IOC_SUBVOL_CREATE: + return btrfs_ioctl_snap_create(file, (void __user *)arg, 1); case BTRFS_IOC_DEFRAG: return btrfs_ioctl_defrag(file); case BTRFS_IOC_RESIZE: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 989ba8a0121..78049ea208d 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -22,9 +22,10 @@ #define BTRFS_IOCTL_MAGIC 0x94 #define BTRFS_VOL_NAME_MAX 255 -#define BTRFS_PATH_NAME_MAX 4095 +#define BTRFS_PATH_NAME_MAX 3072 struct btrfs_ioctl_vol_args { + __s64 fd; char name[BTRFS_PATH_NAME_MAX + 1]; }; @@ -51,7 +52,6 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ struct btrfs_ioctl_vol_args) - struct btrfs_ioctl_clone_range_args { __s64 src_fd; __u64 src_offset, src_length; @@ -61,4 +61,7 @@ struct btrfs_ioctl_clone_range_args { #define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ struct btrfs_ioctl_clone_range_args) +#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ + struct btrfs_ioctl_vol_args) + #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 92393cc60d0..77c5eff3e20 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -285,11 +285,11 @@ static int btrfs_parse_early_options(const char *options, int flags, out: /* * If no subvolume name is specified we use the default one. Allocate - * a copy of the string "default" here so that code later in the + * a copy of the string "." here so that code later in the * mount path doesn't care if it's the default volume or another one. */ if (!*subvol_name) { - *subvol_name = kstrdup("default", GFP_KERNEL); + *subvol_name = kstrdup(".", GFP_KERNEL); if (!*subvol_name) return -ENOMEM; } @@ -323,12 +323,12 @@ static int btrfs_fill_super(struct super_block * sb, } sb->s_fs_info = tree_root; disk_super = &tree_root->fs_info->super_copy; - inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), - tree_root); + inode = btrfs_iget_locked(sb, BTRFS_FIRST_FREE_OBJECTID, + tree_root->fs_info->fs_root); bi = BTRFS_I(inode); bi->location.objectid = inode->i_ino; bi->location.offset = 0; - bi->root = tree_root; + bi->root = tree_root->fs_info->fs_root; btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 202c1b6df4a..eec8b246503 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -779,7 +779,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct extent_buffer *tmp; struct extent_buffer *old; int ret; - int namelen; u64 objectid; new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); @@ -816,28 +815,48 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (ret) goto fail; + key.offset = (u64)-1; + memcpy(&pending->root_key, &key, sizeof(key)); +fail: + kfree(new_root_item); + return ret; +} + +static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, + struct btrfs_pending_snapshot *pending) +{ + int ret; + int namelen; + u64 index = 0; + struct btrfs_trans_handle *trans; + struct inode *parent_inode; + struct inode *inode; + + trans = btrfs_start_transaction(fs_info->fs_root, 1); + /* * insert the directory item */ - key.offset = (u64)-1; namelen = strlen(pending->name); - ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - pending->name, namelen, - root->fs_info->sb->s_root->d_inode->i_ino, - &key, BTRFS_FT_DIR, 0); + parent_inode = pending->dentry->d_parent->d_inode; + ret = btrfs_set_inode_index(parent_inode, &index); + ret = btrfs_insert_dir_item(trans, + BTRFS_I(parent_inode)->root, + pending->name, namelen, + parent_inode->i_ino, + &pending->root_key, BTRFS_FT_DIR, index); if (ret) goto fail; - +#if 0 ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, pending->name, strlen(pending->name), objectid, root->fs_info->sb->s_root->d_inode->i_ino, 0); - - /* Invalidate existing dcache entry for new snapshot. */ - btrfs_invalidate_dcache_root(root, pending->name, namelen); - +#endif + inode = btrfs_lookup_dentry(parent_inode, pending->dentry); + d_instantiate(pending->dentry, inode); fail: - kfree(new_root_item); + btrfs_end_transaction(trans, fs_info->fs_root); return ret; } @@ -846,6 +865,22 @@ fail: */ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) +{ + struct btrfs_pending_snapshot *pending; + struct list_head *head = &trans->transaction->pending_snapshots; + struct list_head *cur; + int ret; + + list_for_each(cur, head) { + pending = list_entry(cur, struct btrfs_pending_snapshot, list); + ret = create_pending_snapshot(trans, fs_info, pending); + BUG_ON(ret); + } + return 0; +} + +static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { struct btrfs_pending_snapshot *pending; struct list_head *head = &trans->transaction->pending_snapshots; @@ -854,7 +889,7 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, while(!list_empty(head)) { pending = list_entry(head->next, struct btrfs_pending_snapshot, list); - ret = create_pending_snapshot(trans, fs_info, pending); + ret = finish_pending_snapshot(fs_info, pending); BUG_ON(ret); list_del(&pending->list); kfree(pending->name); @@ -1033,11 +1068,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_drop_dead_reloc_roots(root); mutex_unlock(&root->fs_info->tree_reloc_mutex); + /* do the directory inserts of any pending snapshot creations */ + finish_pending_snapshots(trans, root->fs_info); + mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); + put_transaction(cur_trans); put_transaction(cur_trans); @@ -1046,6 +1085,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); mutex_unlock(&root->fs_info->trans_mutex); + kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->fs_info->closing) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index eef2cb7d7e7..202c8be6c05 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -47,8 +47,10 @@ struct btrfs_trans_handle { }; struct btrfs_pending_snapshot { + struct dentry *dentry; struct btrfs_root *root; char *name; + struct btrfs_key root_key; struct list_head list; }; -- cgit v1.2.3-70-g09d2