summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c306
1 files changed, 281 insertions, 25 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 75899a01dde..fa4ef18b66b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -40,6 +40,263 @@
#include "locking.h"
#include "compat.h"
+/*
+ * when auto defrag is enabled we
+ * queue up these defrag structs to remember which
+ * inodes need defragging passes
+ */
+struct inode_defrag {
+ struct rb_node rb_node;
+ /* objectid */
+ u64 ino;
+ /*
+ * transid where the defrag was added, we search for
+ * extents newer than this
+ */
+ u64 transid;
+
+ /* root objectid */
+ u64 root;
+
+ /* last offset we were able to defrag */
+ u64 last_offset;
+
+ /* if we've wrapped around back to zero once already */
+ int cycled;
+};
+
+/* pop a record for an inode into the defrag tree. The lock
+ * must be held already
+ *
+ * If you're inserting a record for an older transid than an
+ * existing record, the transid already in the tree is lowered
+ *
+ * If an existing record is found the defrag item you
+ * pass in is freed
+ */
+static int __btrfs_add_inode_defrag(struct inode *inode,
+ struct inode_defrag *defrag)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct inode_defrag *entry;
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+
+ p = &root->fs_info->defrag_inodes.rb_node;
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct inode_defrag, rb_node);
+
+ if (defrag->ino < entry->ino)
+ p = &parent->rb_left;
+ else if (defrag->ino > entry->ino)
+ p = &parent->rb_right;
+ else {
+ /* if we're reinserting an entry for
+ * an old defrag run, make sure to
+ * lower the transid of our existing record
+ */
+ if (defrag->transid < entry->transid)
+ entry->transid = defrag->transid;
+ if (defrag->last_offset > entry->last_offset)
+ entry->last_offset = defrag->last_offset;
+ goto exists;
+ }
+ }
+ BTRFS_I(inode)->in_defrag = 1;
+ rb_link_node(&defrag->rb_node, parent, p);
+ rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
+ return 0;
+
+exists:
+ kfree(defrag);
+ return 0;
+
+}
+
+/*
+ * insert a defrag record for this inode if auto defrag is
+ * enabled
+ */
+int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
+ struct inode *inode)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct inode_defrag *defrag;
+ int ret = 0;
+ u64 transid;
+
+ if (!btrfs_test_opt(root, AUTO_DEFRAG))
+ return 0;
+
+ if (btrfs_fs_closing(root->fs_info))
+ return 0;
+
+ if (BTRFS_I(inode)->in_defrag)
+ return 0;
+
+ if (trans)
+ transid = trans->transid;
+ else
+ transid = BTRFS_I(inode)->root->last_trans;
+
+ defrag = kzalloc(sizeof(*defrag), GFP_NOFS);
+ if (!defrag)
+ return -ENOMEM;
+
+ defrag->ino = btrfs_ino(inode);
+ defrag->transid = transid;
+ defrag->root = root->root_key.objectid;
+
+ spin_lock(&root->fs_info->defrag_inodes_lock);
+ if (!BTRFS_I(inode)->in_defrag)
+ ret = __btrfs_add_inode_defrag(inode, defrag);
+ spin_unlock(&root->fs_info->defrag_inodes_lock);
+ return ret;
+}
+
+/*
+ * must be called with the defrag_inodes lock held
+ */
+struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino,
+ struct rb_node **next)
+{
+ struct inode_defrag *entry = NULL;
+ struct rb_node *p;
+ struct rb_node *parent = NULL;
+
+ p = info->defrag_inodes.rb_node;
+ while (p) {
+ parent = p;
+ entry = rb_entry(parent, struct inode_defrag, rb_node);
+
+ if (ino < entry->ino)
+ p = parent->rb_left;
+ else if (ino > entry->ino)
+ p = parent->rb_right;
+ else
+ return entry;
+ }
+
+ if (next) {
+ while (parent && ino > entry->ino) {
+ parent = rb_next(parent);
+ entry = rb_entry(parent, struct inode_defrag, rb_node);
+ }
+ *next = parent;
+ }
+ return NULL;
+}
+
+/*
+ * run through the list of inodes in the FS that need
+ * defragging
+ */
+int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
+{
+ struct inode_defrag *defrag;
+ struct btrfs_root *inode_root;
+ struct inode *inode;
+ struct rb_node *n;
+ struct btrfs_key key;
+ struct btrfs_ioctl_defrag_range_args range;
+ u64 first_ino = 0;
+ int num_defrag;
+ int defrag_batch = 1024;
+
+ memset(&range, 0, sizeof(range));
+ range.len = (u64)-1;
+
+ atomic_inc(&fs_info->defrag_running);
+ spin_lock(&fs_info->defrag_inodes_lock);
+ while(1) {
+ n = NULL;
+
+ /* find an inode to defrag */
+ defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n);
+ if (!defrag) {
+ if (n)
+ defrag = rb_entry(n, struct inode_defrag, rb_node);
+ else if (first_ino) {
+ first_ino = 0;
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ /* remove it from the rbtree */
+ first_ino = defrag->ino + 1;
+ rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
+
+ if (btrfs_fs_closing(fs_info))
+ goto next_free;
+
+ spin_unlock(&fs_info->defrag_inodes_lock);
+
+ /* get the inode */
+ key.objectid = defrag->root;
+ btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
+ key.offset = (u64)-1;
+ inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(inode_root))
+ goto next;
+
+ key.objectid = defrag->ino;
+ btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+ key.offset = 0;
+
+ inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
+ if (IS_ERR(inode))
+ goto next;
+
+ /* do a chunk of defrag */
+ BTRFS_I(inode)->in_defrag = 0;
+ range.start = defrag->last_offset;
+ num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
+ defrag_batch);
+ /*
+ * if we filled the whole defrag batch, there
+ * must be more work to do. Queue this defrag
+ * again
+ */
+ if (num_defrag == defrag_batch) {
+ defrag->last_offset = range.start;
+ __btrfs_add_inode_defrag(inode, defrag);
+ /*
+ * we don't want to kfree defrag, we added it back to
+ * the rbtree
+ */
+ defrag = NULL;
+ } else if (defrag->last_offset && !defrag->cycled) {
+ /*
+ * we didn't fill our defrag batch, but
+ * we didn't start at zero. Make sure we loop
+ * around to the start of the file.
+ */
+ defrag->last_offset = 0;
+ defrag->cycled = 1;
+ __btrfs_add_inode_defrag(inode, defrag);
+ defrag = NULL;
+ }
+
+ iput(inode);
+next:
+ spin_lock(&fs_info->defrag_inodes_lock);
+next_free:
+ kfree(defrag);
+ }
+ spin_unlock(&fs_info->defrag_inodes_lock);
+
+ atomic_dec(&fs_info->defrag_running);
+
+ /*
+ * during unmount, we use the transaction_wait queue to
+ * wait for the defragger to stop
+ */
+ wake_up(&fs_info->transaction_wait);
+ return 0;
+}
/* simple helper to fault in pages and copy. This should go away
* and be replaced with calls into generic code.
@@ -191,9 +448,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
}
while (1) {
if (!split)
- split = alloc_extent_map(GFP_NOFS);
+ split = alloc_extent_map();
if (!split2)
- split2 = alloc_extent_map(GFP_NOFS);
+ split2 = alloc_extent_map();
BUG_ON(!split || !split2);
write_lock(&em_tree->lock);
@@ -298,6 +555,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_key new_key;
+ u64 ino = btrfs_ino(inode);
u64 search_start = start;
u64 disk_bytenr = 0;
u64 num_bytes = 0;
@@ -318,14 +576,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
while (1) {
recow = 0;
- ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
+ ret = btrfs_lookup_file_extent(trans, root, path, ino,
search_start, -1);
if (ret < 0)
break;
if (ret > 0 && path->slots[0] > 0 && search_start == start) {
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
- if (key.objectid == inode->i_ino &&
+ if (key.objectid == ino &&
key.type == BTRFS_EXTENT_DATA_KEY)
path->slots[0]--;
}
@@ -346,7 +604,7 @@ next_slot:
}
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid > inode->i_ino ||
+ if (key.objectid > ino ||
key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
break;
@@ -376,7 +634,7 @@ next_slot:
search_start = max(key.offset, start);
if (recow) {
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
continue;
}
@@ -393,7 +651,7 @@ next_slot:
ret = btrfs_duplicate_item(trans, root, path,
&new_key);
if (ret == -EAGAIN) {
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
continue;
}
if (ret < 0)
@@ -516,7 +774,7 @@ next_slot:
del_nr = 0;
del_slot = 0;
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
continue;
}
@@ -592,6 +850,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
int del_slot = 0;
int recow;
int ret;
+ u64 ino = btrfs_ino(inode);
btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -600,7 +859,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
again:
recow = 0;
split = start;
- key.objectid = inode->i_ino;
+ key.objectid = ino;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = split;
@@ -612,8 +871,7 @@ again:
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- BUG_ON(key.objectid != inode->i_ino ||
- key.type != BTRFS_EXTENT_DATA_KEY);
+ BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY);
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
BUG_ON(btrfs_file_extent_type(leaf, fi) !=
@@ -630,7 +888,7 @@ again:
other_start = 0;
other_end = start;
if (extent_mergeable(leaf, path->slots[0] - 1,
- inode->i_ino, bytenr, orig_offset,
+ ino, bytenr, orig_offset,
&other_start, &other_end)) {
new_key.offset = end;
btrfs_set_item_key_safe(trans, root, path, &new_key);
@@ -653,7 +911,7 @@ again:
other_start = end;
other_end = 0;
if (extent_mergeable(leaf, path->slots[0] + 1,
- inode->i_ino, bytenr, orig_offset,
+ ino, bytenr, orig_offset,
&other_start, &other_end)) {
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -681,7 +939,7 @@ again:
new_key.offset = split;
ret = btrfs_duplicate_item(trans, root, path, &new_key);
if (ret == -EAGAIN) {
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
goto again;
}
BUG_ON(ret < 0);
@@ -702,7 +960,7 @@ again:
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
root->root_key.objectid,
- inode->i_ino, orig_offset);
+ ino, orig_offset);
BUG_ON(ret);
if (split == start) {
@@ -718,10 +976,10 @@ again:
other_start = end;
other_end = 0;
if (extent_mergeable(leaf, path->slots[0] + 1,
- inode->i_ino, bytenr, orig_offset,
+ ino, bytenr, orig_offset,
&other_start, &other_end)) {
if (recow) {
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
goto again;
}
extent_end = other_end;
@@ -729,16 +987,16 @@ again:
del_nr++;
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
- inode->i_ino, orig_offset);
+ ino, orig_offset);
BUG_ON(ret);
}
other_start = 0;
other_end = start;
if (extent_mergeable(leaf, path->slots[0] - 1,
- inode->i_ino, bytenr, orig_offset,
+ ino, bytenr, orig_offset,
&other_start, &other_end)) {
if (recow) {
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
goto again;
}
key.offset = other_start;
@@ -746,7 +1004,7 @@ again:
del_nr++;
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
- inode->i_ino, orig_offset);
+ ino, orig_offset);
BUG_ON(ret);
}
if (del_nr == 0) {
@@ -1222,14 +1480,12 @@ int btrfs_sync_file(struct file *file, int datasync)
* the current transaction, we can bail out now without any
* syncing
*/
- mutex_lock(&root->fs_info->trans_mutex);
+ smp_mb();
if (BTRFS_I(inode)->last_trans <=
root->fs_info->last_trans_committed) {
BTRFS_I(inode)->last_trans = 0;
- mutex_unlock(&root->fs_info->trans_mutex);
goto out;
}
- mutex_unlock(&root->fs_info->trans_mutex);
/*
* ok we haven't committed the transaction yet, lets do a commit
@@ -1375,7 +1631,7 @@ static long btrfs_fallocate(struct file *file, int mode,
while (1) {
em = btrfs_get_extent(inode, NULL, 0, cur_offset,
alloc_end - cur_offset, 0);
- BUG_ON(IS_ERR(em) || !em);
+ BUG_ON(IS_ERR_OR_NULL(em));
last_byte = min(extent_map_end(em), alloc_end);
last_byte = (last_byte + mask) & ~mask;
if (em->block_start == EXTENT_MAP_HOLE ||