summaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/acl.c21
-rw-r--r--fs/btrfs/compression.c17
-rw-r--r--fs/btrfs/ctree.c57
-rw-r--r--fs/btrfs/ctree.h102
-rw-r--r--fs/btrfs/dir-item.c2
-rw-r--r--fs/btrfs/disk-io.c73
-rw-r--r--fs/btrfs/export.c82
-rw-r--r--fs/btrfs/extent-tree.c749
-rw-r--r--fs/btrfs/extent_io.c245
-rw-r--r--fs/btrfs/extent_io.h7
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/file.c99
-rw-r--r--fs/btrfs/free-space-cache.c753
-rw-r--r--fs/btrfs/free-space-cache.h18
-rw-r--r--fs/btrfs/inode.c518
-rw-r--r--fs/btrfs/ioctl.c441
-rw-r--r--fs/btrfs/ioctl.h17
-rw-r--r--fs/btrfs/ordered-data.c69
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/orphan.c6
-rw-r--r--fs/btrfs/relocation.c109
-rw-r--r--fs/btrfs/root-tree.c2
-rw-r--r--fs/btrfs/super.c100
-rw-r--r--fs/btrfs/transaction.c239
-rw-r--r--fs/btrfs/transaction.h8
-rw-r--r--fs/btrfs/tree-defrag.c2
-rw-r--r--fs/btrfs/tree-log.c38
-rw-r--r--fs/btrfs/volumes.c27
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/btrfs/zlib.c5
31 files changed, 3124 insertions, 693 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 2222d161c7b..6ae2c8cac9d 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -185,18 +185,23 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
return ret;
}
-int btrfs_check_acl(struct inode *inode, int mask)
+int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags)
{
- struct posix_acl *acl;
int error = -EAGAIN;
- acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ error = -ECHILD;
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- error = posix_acl_permission(inode, acl, mask);
- posix_acl_release(acl);
+ } else {
+ struct posix_acl *acl;
+ acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ }
}
return error;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 396039b3a8a..b50bc4bd5c5 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -91,23 +91,10 @@ static inline int compressed_bio_size(struct btrfs_root *root,
static struct bio *compressed_bio_alloc(struct block_device *bdev,
u64 first_byte, gfp_t gfp_flags)
{
- struct bio *bio;
int nr_vecs;
nr_vecs = bio_get_nr_vecs(bdev);
- bio = bio_alloc(gfp_flags, nr_vecs);
-
- if (bio == NULL && (current->flags & PF_MEMALLOC)) {
- while (!bio && (nr_vecs /= 2))
- bio = bio_alloc(gfp_flags, nr_vecs);
- }
-
- if (bio) {
- bio->bi_size = 0;
- bio->bi_bdev = bdev;
- bio->bi_sector = first_byte >> 9;
- }
- return bio;
+ return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
}
static int check_compressed_csum(struct inode *inode,
@@ -163,7 +150,6 @@ fail:
*/
static void end_compressed_bio_read(struct bio *bio, int err)
{
- struct extent_io_tree *tree;
struct compressed_bio *cb = bio->bi_private;
struct inode *inode;
struct page *page;
@@ -187,7 +173,6 @@ static void end_compressed_bio_read(struct bio *bio, int err)
/* ok, we're the last bio for this extent, lets start
* the decompression.
*/
- tree = &BTRFS_I(inode)->io_tree;
ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
cb->start,
cb->orig_bio->bi_io_vec,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c3df14ce2cc..9ac17159925 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -200,7 +200,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct extent_buffer **cow_ret, u64 new_root_objectid)
{
struct extent_buffer *cow;
- u32 nritems;
int ret = 0;
int level;
struct btrfs_disk_key disk_key;
@@ -210,7 +209,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
WARN_ON(root->ref_cows && trans->transid != root->last_trans);
level = btrfs_header_level(buf);
- nritems = btrfs_header_nritems(buf);
if (level == 0)
btrfs_item_key(buf, &disk_key, 0);
else
@@ -1008,7 +1006,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
int wret;
int pslot;
int orig_slot = path->slots[level];
- int err_on_enospc = 0;
u64 orig_ptr;
if (level == 0)
@@ -1071,8 +1068,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
return 0;
- if (btrfs_header_nritems(mid) < 2)
- err_on_enospc = 1;
+ btrfs_header_nritems(mid);
left = read_node_slot(root, parent, pslot - 1);
if (left) {
@@ -1103,8 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
wret = push_node_left(trans, root, left, mid, 1);
if (wret < 0)
ret = wret;
- if (btrfs_header_nritems(mid) < 2)
- err_on_enospc = 1;
+ btrfs_header_nritems(mid);
}
/*
@@ -1224,14 +1219,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
int wret;
int pslot;
int orig_slot = path->slots[level];
- u64 orig_ptr;
if (level == 0)
return 1;
mid = path->nodes[level];
WARN_ON(btrfs_header_generation(mid) != trans->transid);
- orig_ptr = btrfs_node_blockptr(mid, orig_slot);
if (level < BTRFS_MAX_LEVEL - 1)
parent = path->nodes[level + 1];
@@ -1577,13 +1570,33 @@ read_block_for_search(struct btrfs_trans_handle *trans,
blocksize = btrfs_level_size(root, level - 1);
tmp = btrfs_find_tree_block(root, blocknr, blocksize);
- if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
- /*
- * we found an up to date block without sleeping, return
- * right away
- */
- *eb_ret = tmp;
- return 0;
+ if (tmp) {
+ if (btrfs_buffer_uptodate(tmp, 0)) {
+ if (btrfs_buffer_uptodate(tmp, gen)) {
+ /*
+ * we found an up to date block without
+ * sleeping, return
+ * right away
+ */
+ *eb_ret = tmp;
+ return 0;
+ }
+ /* the pages were up to date, but we failed
+ * the generation number check. Do a full
+ * read for the generation number that is correct.
+ * We must do this without dropping locks so
+ * we can trust our generation number
+ */
+ free_extent_buffer(tmp);
+ tmp = read_tree_block(root, blocknr, blocksize, gen);
+ if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+ *eb_ret = tmp;
+ return 0;
+ }
+ free_extent_buffer(tmp);
+ btrfs_release_path(NULL, p);
+ return -EIO;
+ }
}
/*
@@ -1596,8 +1609,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
btrfs_unlock_up_safe(p, level + 1);
btrfs_set_path_blocking(p);
- if (tmp)
- free_extent_buffer(tmp);
+ free_extent_buffer(tmp);
if (p->reada)
reada_for_search(root, p, level, slot, key->objectid);
@@ -2548,7 +2560,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
{
struct btrfs_disk_key disk_key;
struct extent_buffer *right = path->nodes[0];
- int slot;
int i;
int push_space = 0;
int push_items = 0;
@@ -2560,8 +2571,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
u32 this_item_size;
u32 old_left_item_size;
- slot = path->slots[1];
-
if (empty)
nr = min(right_nritems, max_slot);
else
@@ -3330,7 +3339,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
{
int ret = 0;
int slot;
- int slot_orig;
struct extent_buffer *leaf;
struct btrfs_item *item;
u32 nritems;
@@ -3340,7 +3348,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
unsigned int size_diff;
int i;
- slot_orig = path->slots[0];
leaf = path->nodes[0];
slot = path->slots[0];
@@ -3445,7 +3452,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
{
int ret = 0;
int slot;
- int slot_orig;
struct extent_buffer *leaf;
struct btrfs_item *item;
u32 nritems;
@@ -3454,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
unsigned int old_size;
int i;
- slot_orig = path->slots[0];
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
@@ -3787,7 +3792,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_key *cpu_key, u32 *data_size,
int nr)
{
- struct extent_buffer *leaf;
int ret = 0;
int slot;
int i;
@@ -3804,7 +3808,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
if (ret < 0)
goto out;
- leaf = path->nodes[0];
slot = path->slots[0];
BUG_ON(slot < 0);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index eaf286abad1..a142d204b52 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -99,6 +99,9 @@ struct btrfs_ordered_sum;
*/
#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
+/* For storing free space cache */
+#define BTRFS_FREE_SPACE_OBJECTID -11ULL
+
/* dummy objectid represents multiple objectids */
#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
@@ -265,6 +268,22 @@ struct btrfs_chunk {
/* additional stripes go here */
} __attribute__ ((__packed__));
+#define BTRFS_FREE_SPACE_EXTENT 1
+#define BTRFS_FREE_SPACE_BITMAP 2
+
+struct btrfs_free_space_entry {
+ __le64 offset;
+ __le64 bytes;
+ u8 type;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_header {
+ struct btrfs_disk_key location;
+ __le64 generation;
+ __le64 num_entries;
+ __le64 num_bitmaps;
+} __attribute__ ((__packed__));
+
static inline unsigned long btrfs_chunk_item_size(int num_stripes)
{
BUG_ON(num_stripes == 0);
@@ -365,8 +384,10 @@ struct btrfs_super_block {
char label[BTRFS_LABEL_SIZE];
+ __le64 cache_generation;
+
/* future expansion */
- __le64 reserved[32];
+ __le64 reserved[31];
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
} __attribute__ ((__packed__));
@@ -375,13 +396,15 @@ struct btrfs_super_block {
* ones specified below then we will fail to mount
*/
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
-#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0)
+#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
-#define BTRFS_FEATURE_INCOMPAT_SUPP \
- (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
- BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)
+#define BTRFS_FEATURE_INCOMPAT_SUPP \
+ (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
+ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
+ BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
/*
* A leaf is full of items. offset and size tell us where to find
@@ -675,7 +698,8 @@ struct btrfs_block_group_item {
struct btrfs_space_info {
u64 flags;
- u64 total_bytes; /* total bytes in the space */
+ u64 total_bytes; /* total bytes in the space,
+ this doesn't take mirrors into account */
u64 bytes_used; /* total bytes used,
this does't take mirrors into account */
u64 bytes_pinned; /* total bytes pinned, will be freed when the
@@ -687,6 +711,8 @@ struct btrfs_space_info {
u64 bytes_may_use; /* number of bytes that may be used for
delalloc/allocations */
u64 disk_used; /* total bytes used on disk */
+ u64 disk_total; /* total bytes on disk, takes mirrors into
+ account */
int full; /* indicates that we cannot allocate any more
chunks for this space */
@@ -750,6 +776,14 @@ enum btrfs_caching_type {
BTRFS_CACHE_FINISHED = 2,
};
+enum btrfs_disk_cache_state {
+ BTRFS_DC_WRITTEN = 0,
+ BTRFS_DC_ERROR = 1,
+ BTRFS_DC_CLEAR = 2,
+ BTRFS_DC_SETUP = 3,
+ BTRFS_DC_NEED_WRITE = 4,
+};
+
struct btrfs_caching_control {
struct list_head list;
struct mutex mutex;
@@ -763,6 +797,7 @@ struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
struct btrfs_fs_info *fs_info;
+ struct inode *inode;
spinlock_t lock;
u64 pinned;
u64 reserved;
@@ -773,8 +808,11 @@ struct btrfs_block_group_cache {
int extents_thresh;
int free_extents;
int total_bitmaps;
- int ro;
- int dirty;
+ unsigned int ro:1;
+ unsigned int dirty:1;
+ unsigned int iref:1;
+
+ int disk_cache_state;
/* cache tracking stuff */
int cached;
@@ -863,6 +901,7 @@ struct btrfs_fs_info {
struct btrfs_transaction *running_transaction;
wait_queue_head_t transaction_throttle;
wait_queue_head_t transaction_wait;
+ wait_queue_head_t transaction_blocked_wait;
wait_queue_head_t async_submit_wait;
struct btrfs_super_block super_copy;
@@ -949,6 +988,7 @@ struct btrfs_fs_info {
struct btrfs_workers endio_meta_workers;
struct btrfs_workers endio_meta_write_workers;
struct btrfs_workers endio_write_workers;
+ struct btrfs_workers endio_freespace_worker;
struct btrfs_workers submit_workers;
/*
* fixup workers take dirty pages that didn't properly go through
@@ -1192,6 +1232,9 @@ struct btrfs_root {
#define BTRFS_MOUNT_NOSSD (1 << 9)
#define BTRFS_MOUNT_DISCARD (1 << 10)
#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
+#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
+#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
+#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1665,6 +1708,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
write_eb_member(eb, item, struct btrfs_dir_item, location, key);
}
+BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
+ num_entries, 64);
+BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
+ num_bitmaps, 64);
+BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
+ generation, 64);
+
+static inline void btrfs_free_space_key(struct extent_buffer *eb,
+ struct btrfs_free_space_header *h,
+ struct btrfs_disk_key *key)
+{
+ read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
+static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
+ struct btrfs_free_space_header *h,
+ struct btrfs_disk_key *key)
+{
+ write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
/* struct btrfs_disk_key */
BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
objectid, 64);
@@ -1876,6 +1940,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
incompat_flags, 64);
BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
csum_type, 16);
+BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
+ cache_generation, 64);
static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
{
@@ -1988,6 +2054,12 @@ static inline struct dentry *fdentry(struct file *file)
return file->f_path.dentry;
}
+static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
+{
+ return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+ (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
+}
+
/* extent-tree.c */
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2079,7 +2151,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- int num_items, int *retries);
+ int num_items);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2100,7 +2172,7 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, int *retries);
+ u64 num_bytes);
int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
@@ -2115,6 +2187,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
+void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@@ -2373,7 +2446,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type);
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+ int sync);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_writepages(struct address_space *mapping,
@@ -2426,6 +2500,10 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root);
int btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
+int btrfs_prealloc_file_range_trans(struct inode *inode,
+ struct btrfs_trans_handle *trans, int mode,
+ u64 start, u64 num_bytes, u64 min_size,
+ loff_t actual_len, u64 *alloc_hint);
extern const struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */
@@ -2466,7 +2544,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait);
/* acl.c */
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
-int btrfs_check_acl(struct inode *inode, int mask);
+int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags);
#else
#define btrfs_check_acl NULL
#endif
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index e9103b3baa4..f0cad5ae5be 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -427,5 +427,5 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
ret = btrfs_truncate_item(trans, root, path,
item_len - sub_item_len, 1);
}
- return 0;
+ return ret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5e789f4a3ed..51d2e4de34e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -28,6 +28,7 @@
#include <linux/freezer.h>
#include <linux/crc32c.h>
#include <linux/slab.h>
+#include <linux/migrate.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
@@ -338,7 +339,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
struct extent_io_tree *tree;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 found_start;
- int found_level;
unsigned long len;
struct extent_buffer *eb;
int ret;
@@ -356,6 +356,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
btrfs_header_generation(eb));
BUG_ON(ret);
+ WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
+
found_start = btrfs_header_bytenr(eb);
if (found_start != start) {
WARN_ON(1);
@@ -369,8 +371,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
WARN_ON(1);
goto err;
}
- found_level = btrfs_header_level(eb);
-
csum_tree_block(root, eb, 0);
err:
free_extent_buffer(eb);
@@ -481,9 +481,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
end_io_wq->work.flags = 0;
if (bio->bi_rw & REQ_WRITE) {
- if (end_io_wq->metadata)
+ if (end_io_wq->metadata == 1)
btrfs_queue_worker(&fs_info->endio_meta_write_workers,
&end_io_wq->work);
+ else if (end_io_wq->metadata == 2)
+ btrfs_queue_worker(&fs_info->endio_freespace_worker,
+ &end_io_wq->work);
else
btrfs_queue_worker(&fs_info->endio_write_workers,
&end_io_wq->work);
@@ -497,6 +500,13 @@ static void end_workqueue_bio(struct bio *bio, int err)
}
}
+/*
+ * For the metadata arg you want
+ *
+ * 0 - if data
+ * 1 - if normal metadta
+ * 2 - if writing to the free space cache area
+ */
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
int metadata)
{
@@ -533,11 +543,9 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
static void run_one_async_start(struct btrfs_work *work)
{
- struct btrfs_fs_info *fs_info;
struct async_submit_bio *async;
async = container_of(work, struct async_submit_bio, work);
- fs_info = BTRFS_I(async->inode)->root->fs_info;
async->submit_bio_start(async->inode, async->rw, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
@@ -688,6 +696,27 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
__btree_submit_bio_done);
}
+#ifdef CONFIG_MIGRATION
+static int btree_migratepage(struct address_space *mapping,
+ struct page *newpage, struct page *page)
+{
+ /*
+ * we can't safely write a btree page from here,
+ * we haven't done the locking hook
+ */
+ if (PageDirty(page))
+ return -EAGAIN;
+ /*
+ * Buffers may be managed in a filesystem specific way.
+ * We must have no buffers or drop them.
+ */
+ if (page_has_private(page) &&
+ !try_to_release_page(page, GFP_KERNEL))
+ return -EAGAIN;
+ return migrate_page(mapping, newpage, page);
+}
+#endif
+
static int btree_writepage(struct page *page, struct writeback_control *wbc)
{
struct extent_io_tree *tree;
@@ -702,8 +731,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc)
}
redirty_page_for_writepage(wbc, page);
- eb = btrfs_find_tree_block(root, page_offset(page),
- PAGE_CACHE_SIZE);
+ eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
WARN_ON(!eb);
was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
@@ -794,6 +822,9 @@ static const struct address_space_operations btree_aops = {
.releasepage = btree_releasepage,
.invalidatepage = btree_invalidatepage,
.sync_page = block_sync_page,
+#ifdef CONFIG_MIGRATION
+ .migratepage = btree_migratepage,
+#endif
};
int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
@@ -850,12 +881,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
u32 blocksize, u64 parent_transid)
{
struct extent_buffer *buf = NULL;
- struct inode *btree_inode = root->fs_info->btree_inode;
- struct extent_io_tree *io_tree;
int ret;
- io_tree = &BTRFS_I(btree_inode)->io_tree;
-
buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
if (!buf)
return NULL;
@@ -980,7 +1007,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
- BUG_ON(!root->node);
+ if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
+ free_extent_buffer(root->node);
+ return -EIO;
+ }
root->commit_root = btrfs_root_node(root);
return 0;
}
@@ -1377,7 +1407,6 @@ static int bio_ready_for_csum(struct bio *bio)
u64 start = 0;
struct page *page;
struct extent_io_tree *io_tree = NULL;
- struct btrfs_fs_info *info = NULL;
struct bio_vec *bvec;
int i;
int ret;
@@ -1396,7 +1425,6 @@ static int bio_ready_for_csum(struct bio *bio)
buf_len = page->private >> 2;
start = page_offset(page) + bvec->bv_offset;
io_tree = &BTRFS_I(page->mapping->host)->io_tree;
- info = BTRFS_I(page->mapping->host)->root->fs_info;
}
/* are we fully contained in this bio? */
if (buf_len <= length)
@@ -1539,10 +1567,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
GFP_NOFS);
struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
GFP_NOFS);
- struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root),
- GFP_NOFS);
- struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
- GFP_NOFS);
+ struct btrfs_root *tree_root = btrfs_sb(sb);
+ struct btrfs_fs_info *fs_info = tree_root->fs_info;
struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
GFP_NOFS);
struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
@@ -1680,12 +1706,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->transaction_throttle);
init_waitqueue_head(&fs_info->transaction_wait);
+ init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
__setup_root(4096, 4096, 4096, 4096, tree_root,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
-
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
if (!bh)
goto fail_iput;
@@ -1775,6 +1801,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
fs_info->thread_pool_size,
&fs_info->generic_worker);
+ btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
+ 1, &fs_info->generic_worker);
/*
* endios are largely parallel and should have a very
@@ -1795,6 +1823,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_start_workers(&fs_info->endio_meta_workers, 1);
btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
btrfs_start_workers(&fs_info->endio_write_workers, 1);
+ btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1993,6 +2022,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!(sb->s_flags & MS_RDONLY)) {
down_read(&fs_info->cleanup_work_sem);
btrfs_orphan_cleanup(fs_info->fs_root);
+ btrfs_orphan_cleanup(fs_info->tree_root);
up_read(&fs_info->cleanup_work_sem);
}
@@ -2035,6 +2065,7 @@ fail_sb_buffer:
btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
+ btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
fail_iput:
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
@@ -2410,6 +2441,7 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1;
smp_mb();
+ btrfs_put_block_group_cache(fs_info);
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
if (ret)
@@ -2456,6 +2488,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
+ btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_close_devices(fs_info->fs_devices);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 951ef09b82f..0ccf9a8afcd 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
dentry = d_obtain_alias(inode);
if (!IS_ERR(dentry))
- dentry->d_op = &btrfs_dentry_operations;
+ d_set_d_op(dentry, &btrfs_dentry_operations);
return dentry;
fail:
srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -166,7 +166,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
static struct dentry *btrfs_get_parent(struct dentry *child)
{
struct inode *dir = child->d_inode;
- static struct dentry *dentry;
+ struct dentry *dentry;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_path *path;
struct extent_buffer *leaf;
@@ -225,16 +225,92 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
key.offset = 0;
dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
if (!IS_ERR(dentry))
- dentry->d_op = &btrfs_dentry_operations;
+ d_set_d_op(dentry, &btrfs_dentry_operations);
return dentry;
fail:
btrfs_free_path(path);
return ERR_PTR(ret);
}
+static int btrfs_get_name(struct dentry *parent, char *name,
+ struct dentry *child)
+{
+ struct inode *inode = child->d_inode;
+ struct inode *dir = parent->d_inode;
+ struct btrfs_path *path;
+ struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct btrfs_inode_ref *iref;
+ struct btrfs_root_ref *rref;
+ struct extent_buffer *leaf;
+ unsigned long name_ptr;
+ struct btrfs_key key;
+ int name_len;
+ int ret;
+
+ if (!dir || !inode)
+ return -EINVAL;
+
+ if (!S_ISDIR(dir->i_mode))
+ return -EINVAL;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->leave_spinning = 1;
+
+ if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+ key.objectid = BTRFS_I(inode)->root->root_key.objectid;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
+ key.offset = (u64)-1;
+ root = root->fs_info->tree_root;
+ } else {
+ key.objectid = inode->i_ino;
+ key.offset = dir->i_ino;
+ key.type = BTRFS_INODE_REF_KEY;
+ }
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ btrfs_free_path(path);
+ return ret;
+ } else if (ret > 0) {
+ if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+ path->slots[0]--;
+ } else {
+ btrfs_free_path(path);
+ return -ENOENT;
+ }
+ }
+ leaf = path->nodes[0];
+
+ if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+ rref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_root_ref);
+ name_ptr = (unsigned long)(rref + 1);
+ name_len = btrfs_root_ref_name_len(leaf, rref);
+ } else {
+ iref = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_inode_ref);
+ name_ptr = (unsigned long)(iref + 1);
+ name_len = btrfs_inode_ref_name_len(leaf, iref);
+ }
+
+ read_extent_buffer(leaf, name, name_ptr, name_len);
+ btrfs_free_path(path);
+
+ /*
+ * have to add the null termination to make sure that reconnect_path
+ * gets the right len for strlen
+ */
+ name[name_len] = '\0';
+
+ return 0;
+}
+
const struct export_operations btrfs_export_ops = {
.encode_fh = btrfs_encode_fh,
.fh_to_dentry = btrfs_fh_to_dentry,
.fh_to_parent = btrfs_fh_to_parent,
.get_parent = btrfs_get_parent,
+ .get_name = btrfs_get_name,
};
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0b81ecdb101..227e5815d83 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -242,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache)
return NULL;
}
+ /* We're loading it the fast way, so we don't have a caching_ctl. */
+ if (!cache->caching_ctl) {
+ spin_unlock(&cache->lock);
+ return NULL;
+ }
+
ctl = cache->caching_ctl;
atomic_inc(&ctl->count);
spin_unlock(&cache->lock);
@@ -421,7 +427,10 @@ err:
return 0;
}
-static int cache_block_group(struct btrfs_block_group_cache *cache)
+static int cache_block_group(struct btrfs_block_group_cache *cache,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ int load_cache_only)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl;
@@ -432,6 +441,39 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
if (cache->cached != BTRFS_CACHE_NO)
return 0;
+ /*
+ * We can't do the read from on-disk cache during a commit since we need
+ * to have the normal tree locking. Also if we are currently trying to
+ * allocate blocks for the tree root we can't do the fast caching since
+ * we likely hold important locks.
+ */
+ if (!trans->transaction->in_commit &&
+ (root && root != root->fs_info->tree_root)) {
+ spin_lock(&cache->lock);
+ if (cache->cached != BTRFS_CACHE_NO) {
+ spin_unlock(&cache->lock);
+ return 0;
+ }
+ cache->cached = BTRFS_CACHE_STARTED;
+ spin_unlock(&cache->lock);
+
+ ret = load_free_space_cache(fs_info, cache);
+
+ spin_lock(&cache->lock);
+ if (ret == 1) {
+ cache->cached = BTRFS_CACHE_FINISHED;
+ cache->last_byte_to_unpin = (u64)-1;
+ } else {
+ cache->cached = BTRFS_CACHE_NO;
+ }
+ spin_unlock(&cache->lock);
+ if (ret == 1)
+ return 0;
+ }
+
+ if (load_cache_only)
+ return 0;
+
caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
BUG_ON(!caching_ctl);
@@ -509,7 +551,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
- if (found->flags == flags) {
+ if (found->flags & flags) {
rcu_read_unlock();
return found;
}
@@ -542,6 +584,15 @@ static u64 div_factor(u64 num, int factor)
return num;
}
+static u64 div_factor_fine(u64 num, int factor)
+{
+ if (factor == 100)
+ return num;
+ num *= factor;
+ do_div(num, 100);
+ return num;
+}
+
u64 btrfs_find_block_group(struct btrfs_root *root,
u64 search_start, u64 search_hint, int owner)
{
@@ -2687,6 +2738,111 @@ next_block_group(struct btrfs_root *root,
return cache;
}
+static int cache_save_setup(struct btrfs_block_group_cache *block_group,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path)
+{
+ struct btrfs_root *root = block_group->fs_info->tree_root;
+ struct inode *inode = NULL;
+ u64 alloc_hint = 0;
+ int dcs = BTRFS_DC_ERROR;
+ int num_pages = 0;
+ int retries = 0;
+ int ret = 0;
+
+ /*
+ * If this block group is smaller than 100 megs don't bother caching the
+ * block group.
+ */
+ if (block_group->key.offset < (100 * 1024 * 1024)) {
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = BTRFS_DC_WRITTEN;
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+
+again:
+ inode = lookup_free_space_inode(root, block_group, path);
+ if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
+ ret = PTR_ERR(inode);
+ btrfs_release_path(root, path);
+ goto out;
+ }
+
+ if (IS_ERR(inode)) {
+ BUG_ON(retries);
+ retries++;
+
+ if (block_group->ro)
+ goto out_free;
+
+ ret = create_free_space_inode(root, trans, block_group, path);
+ if (ret)
+ goto out_free;
+ goto again;
+ }
+
+ /*
+ * We want to set the generation to 0, that way if anything goes wrong
+ * from here on out we know not to trust this cache when we load up next
+ * time.
+ */
+ BTRFS_I(inode)->generation = 0;
+ ret = btrfs_update_inode(trans, root, inode);
+ WARN_ON(ret);
+
+ if (i_size_read(inode) > 0) {
+ ret = btrfs_truncate_free_space_cache(root, trans, path,
+ inode);
+ if (ret)
+ goto out_put;
+ }
+
+ spin_lock(&block_group->lock);
+ if (block_group->cached != BTRFS_CACHE_FINISHED) {
+ /* We're not cached, don't bother trying to write stuff out */
+ dcs = BTRFS_DC_WRITTEN;
+ spin_unlock(&block_group->lock);
+ goto out_put;
+ }
+ spin_unlock(&block_group->lock);
+
+ num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
+ if (!num_pages)
+ num_pages = 1;
+
+ /*
+ * Just to make absolutely sure we have enough space, we're going to
+ * preallocate 12 pages worth of space for each block group. In
+ * practice we ought to use at most 8, but we need extra space so we can
+ * add our header and have a terminator between the extents and the
+ * bitmaps.
+ */
+ num_pages *= 16;
+ num_pages *= PAGE_CACHE_SIZE;
+
+ ret = btrfs_check_data_free_space(inode, num_pages);
+ if (ret)
+ goto out_put;
+
+ ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
+ num_pages, num_pages,
+ &alloc_hint);
+ if (!ret)
+ dcs = BTRFS_DC_SETUP;
+ btrfs_free_reserved_data_space(inode, num_pages);
+out_put:
+ iput(inode);
+out_free:
+ btrfs_release_path(root, path);
+out:
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = dcs;
+ spin_unlock(&block_group->lock);
+
+ return ret;
+}
+
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
@@ -2699,6 +2855,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
+again:
+ while (1) {
+ cache = btrfs_lookup_first_block_group(root->fs_info, last);
+ while (cache) {
+ if (cache->disk_cache_state == BTRFS_DC_CLEAR)
+ break;
+ cache = next_block_group(root, cache);
+ }
+ if (!cache) {
+ if (last == 0)
+ break;
+ last = 0;
+ continue;
+ }
+ err = cache_save_setup(cache, trans, path);
+ last = cache->key.objectid + cache->key.offset;
+ btrfs_put_block_group(cache);
+ }
+
while (1) {
if (last == 0) {
err = btrfs_run_delayed_refs(trans, root,
@@ -2708,6 +2883,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
cache = btrfs_lookup_first_block_group(root->fs_info, last);
while (cache) {
+ if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
+ btrfs_put_block_group(cache);
+ goto again;
+ }
+
if (cache->dirty)
break;
cache = next_block_group(root, cache);
@@ -2719,6 +2899,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
continue;
}
+ if (cache->disk_cache_state == BTRFS_DC_SETUP)
+ cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
cache->dirty = 0;
last = cache->key.objectid + cache->key.offset;
@@ -2727,6 +2909,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
btrfs_put_block_group(cache);
}
+ while (1) {
+ /*
+ * I don't think this is needed since we're just marking our
+ * preallocated extent as written, but just in case it can't
+ * hurt.
+ */
+ if (last == 0) {
+ err = btrfs_run_delayed_refs(trans, root,
+ (unsigned long)-1);
+ BUG_ON(err);
+ }
+
+ cache = btrfs_lookup_first_block_group(root->fs_info, last);
+ while (cache) {
+ /*
+ * Really this shouldn't happen, but it could if we
+ * couldn't write the entire preallocated extent and
+ * splitting the extent resulted in a new block.
+ */
+ if (cache->dirty) {
+ btrfs_put_block_group(cache);
+ goto again;
+ }
+ if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+ break;
+ cache = next_block_group(root, cache);
+ }
+ if (!cache) {
+ if (last == 0)
+ break;
+ last = 0;
+ continue;
+ }
+
+ btrfs_write_out_cache(root, trans, cache, path);
+
+ /*
+ * If we didn't have an error then the cache state is still
+ * NEED_WRITE, so we can set it to WRITTEN.
+ */
+ if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+ cache->disk_cache_state = BTRFS_DC_WRITTEN;
+ last = cache->key.objectid + cache->key.offset;
+ btrfs_put_block_group(cache);
+ }
+
btrfs_free_path(path);
return 0;
}
@@ -2762,6 +2990,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
if (found) {
spin_lock(&found->lock);
found->total_bytes += total_bytes;
+ found->disk_total += total_bytes * factor;
found->bytes_used += bytes_used;
found->disk_used += bytes_used * factor;
found->full = 0;
@@ -2781,6 +3010,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
BTRFS_BLOCK_GROUP_SYSTEM |
BTRFS_BLOCK_GROUP_METADATA);
found->total_bytes = total_bytes;
+ found->disk_total = total_bytes * factor;
found->bytes_used = bytes_used;
found->disk_used = bytes_used * factor;
found->bytes_pinned = 0;
@@ -2813,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
- u64 num_devices = root->fs_info->fs_devices->rw_devices;
+ /*
+ * we add in the count of missing devices because we want
+ * to make sure that any RAID levels on a degraded FS
+ * continue to be honored.
+ */
+ u64 num_devices = root->fs_info->fs_devices->rw_devices +
+ root->fs_info->fs_devices->missing_devices;
if (num_devices == 1)
flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
@@ -2882,11 +3118,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
struct btrfs_space_info *data_sinfo;
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 used;
- int ret = 0, committed = 0;
+ int ret = 0, committed = 0, alloc_chunk = 1;
/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
+ if (root == root->fs_info->tree_root) {
+ alloc_chunk = 0;
+ committed = 1;
+ }
+
data_sinfo = BTRFS_I(inode)->space_info;
if (!data_sinfo)
goto alloc;
@@ -2905,7 +3146,7 @@ again:
* if we don't have enough free bytes in this space then we need
* to alloc a new chunk.
*/
- if (!data_sinfo->full) {
+ if (!data_sinfo->full && alloc_chunk) {
u64 alloc_target;
data_sinfo->force_alloc = 1;
@@ -2997,10 +3238,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
rcu_read_unlock();
}
-static int should_alloc_chunk(struct btrfs_space_info *sinfo,
- u64 alloc_bytes)
+static int should_alloc_chunk(struct btrfs_root *root,
+ struct btrfs_space_info *sinfo, u64 alloc_bytes)
{
u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+ u64 thresh;
if (sinfo->bytes_used + sinfo->bytes_reserved +
alloc_bytes + 256 * 1024 * 1024 < num_bytes)
@@ -3010,6 +3252,12 @@ static int should_alloc_chunk(struct btrfs_space_info *sinfo,
alloc_bytes < div_factor(num_bytes, 8))
return 0;
+ thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+ thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
+
+ if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
+ return 0;
+
return 1;
}
@@ -3041,13 +3289,21 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
goto out;
}
- if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
+ if (!force && !should_alloc_chunk(extent_root, space_info,
+ alloc_bytes)) {
spin_unlock(&space_info->lock);
goto out;
}
spin_unlock(&space_info->lock);
/*
+ * If we have mixed data/metadata chunks we want to make sure we keep
+ * allocating mixed chunks instead of individual chunks.
+ */
+ if (btrfs_mixed_space_info(space_info))
+ flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
+
+ /*
* if we're doing a data chunk, go ahead and make sure that
* we keep a reasonable number of metadata chunks allocated in the
* FS as well.
@@ -3072,55 +3328,25 @@ out:
return ret;
}
-static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_space_info *sinfo, u64 num_bytes)
-{
- int ret;
- int end_trans = 0;
-
- if (sinfo->full)
- return 0;
-
- spin_lock(&sinfo->lock);
- ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
- spin_unlock(&sinfo->lock);
- if (!ret)
- return 0;
-
- if (!trans) {
- trans = btrfs_join_transaction(root, 1);
- BUG_ON(IS_ERR(trans));
- end_trans = 1;
- }
-
- ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- num_bytes + 2 * 1024 * 1024,
- get_alloc_profile(root, sinfo->flags), 0);
-
- if (end_trans)
- btrfs_end_transaction(trans, root);
-
- return ret == 1 ? 1 : 0;
-}
-
/*
* shrink metadata reservation for delalloc
*/
static int shrink_delalloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 to_reclaim)
+ struct btrfs_root *root, u64 to_reclaim, int sync)
{
struct btrfs_block_rsv *block_rsv;
+ struct btrfs_space_info *space_info;
u64 reserved;
u64 max_reclaim;
u64 reclaimed = 0;
int pause = 1;
- int ret;
+ int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
block_rsv = &root->fs_info->delalloc_block_rsv;
- spin_lock(&block_rsv->lock);
- reserved = block_rsv->reserved;
- spin_unlock(&block_rsv->lock);
+ space_info = block_rsv->space_info;
+
+ smp_mb();
+ reserved = space_info->bytes_reserved;
if (reserved == 0)
return 0;
@@ -3128,104 +3354,169 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
max_reclaim = min(reserved, to_reclaim);
while (1) {
- ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
- if (!ret) {
- __set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(pause);
- pause <<= 1;
- if (pause > HZ / 10)
- pause = HZ / 10;
- } else {
- pause = 1;
- }
+ /* have the flusher threads jump in and do some IO */
+ smp_mb();
+ nr_pages = min_t(unsigned long, nr_pages,
+ root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
+ writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
- spin_lock(&block_rsv->lock);
- if (reserved > block_rsv->reserved)
- reclaimed = reserved - block_rsv->reserved;
- reserved = block_rsv->reserved;
- spin_unlock(&block_rsv->lock);
+ spin_lock(&space_info->lock);
+ if (reserved > space_info->bytes_reserved)
+ reclaimed += reserved - space_info->bytes_reserved;
+ reserved = space_info->bytes_reserved;
+ spin_unlock(&space_info->lock);
if (reserved == 0 || reclaimed >= max_reclaim)
break;
if (trans && trans->transaction->blocked)
return -EAGAIN;
+
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(pause);
+ pause <<= 1;
+ if (pause > HZ / 10)
+ pause = HZ / 10;
+
}
return reclaimed >= to_reclaim;
}
-static int should_retry_reserve(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, int *retries)
+/*
+ * Retries tells us how many times we've called reserve_metadata_bytes. The
+ * idea is if this is the first call (retries == 0) then we will add to our
+ * reserved count if we can't make the allocation in order to hold our place
+ * while we go and try and free up space. That way for retries > 1 we don't try
+ * and add space, we just check to see if the amount of unused space is >= the
+ * total space, meaning that our reservation is valid.
+ *
+ * However if we don't intend to retry this reservation, pass -1 as retries so
+ * that it short circuits this logic.
+ */
+static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 orig_bytes, int flush)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
- int ret;
+ u64 unused;
+ u64 num_bytes = orig_bytes;
+ int retries = 0;
+ int ret = 0;
+ bool reserved = false;
+ bool committed = false;
- if ((*retries) > 2)
- return -ENOSPC;
+again:
+ ret = -ENOSPC;
+ if (reserved)
+ num_bytes = 0;
- ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
- if (ret)
- return 1;
+ spin_lock(&space_info->lock);
+ unused = space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly +
+ space_info->bytes_may_use;
- if (trans && trans->transaction->in_commit)
- return -ENOSPC;
+ /*
+ * The idea here is that we've not already over-reserved the block group
+ * then we can go ahead and save our reservation first and then start
+ * flushing if we need to. Otherwise if we've already overcommitted
+ * lets start flushing stuff first and then come back and try to make
+ * our reservation.
+ */
+ if (unused <= space_info->total_bytes) {
+ unused = space_info->total_bytes - unused;
+ if (unused >= num_bytes) {
+ if (!reserved)
+ space_info->bytes_reserved += orig_bytes;
+ ret = 0;
+ } else {
+ /*
+ * Ok set num_bytes to orig_bytes since we aren't
+ * overocmmitted, this way we only try and reclaim what
+ * we need.
+ */
+ num_bytes = orig_bytes;
+ }
+ } else {
+ /*
+ * Ok we're over committed, set num_bytes to the overcommitted
+ * amount plus the amount of bytes that we need for this
+ * reservation.
+ */
+ num_bytes = unused - space_info->total_bytes +
+ (orig_bytes * (retries + 1));
+ }
- ret = shrink_delalloc(trans, root, num_bytes);
- if (ret)
- return ret;
+ /*
+ * Couldn't make our reservation, save our place so while we're trying
+ * to reclaim space we can actually use it instead of somebody else
+ * stealing it from us.
+ */
+ if (ret && !reserved) {
+ space_info->bytes_reserved += orig_bytes;
+ reserved = true;
+ }
- spin_lock(&space_info->lock);
- if (space_info->bytes_pinned < num_bytes)
- ret = 1;
spin_unlock(&space_info->lock);
- if (ret)
- return -ENOSPC;
- (*retries)++;
-
- if (trans)
- return -EAGAIN;
+ if (!ret)
+ return 0;
- trans = btrfs_join_transaction(root, 1);
- BUG_ON(IS_ERR(trans));
- ret = btrfs_commit_transaction(trans, root);
- BUG_ON(ret);
+ if (!flush)
+ goto out;
- return 1;
-}
+ /*
+ * We do synchronous shrinking since we don't actually unreserve
+ * metadata until after the IO is completed.
+ */
+ ret = shrink_delalloc(trans, root, num_bytes, 1);
+ if (ret > 0)
+ return 0;
+ else if (ret < 0)
+ goto out;
-static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
-{
- struct btrfs_space_info *space_info = block_rsv->space_info;
- u64 unused;
- int ret = -ENOSPC;
+ /*
+ * So if we were overcommitted it's possible that somebody else flushed
+ * out enough space and we simply didn't have enough space to reclaim,
+ * so go back around and try again.
+ */
+ if (retries < 2) {
+ retries++;
+ goto again;
+ }
spin_lock(&space_info->lock);
- unused = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly;
+ /*
+ * Not enough space to be reclaimed, don't bother committing the
+ * transaction.
+ */
+ if (space_info->bytes_pinned < orig_bytes)
+ ret = -ENOSPC;
+ spin_unlock(&space_info->lock);
+ if (ret)
+ goto out;
- if (unused < space_info->total_bytes)
- unused = space_info->total_bytes - unused;
- else
- unused = 0;
+ ret = -EAGAIN;
+ if (trans || committed)
+ goto out;
- if (unused >= num_bytes) {
- if (block_rsv->priority >= 10) {
- space_info->bytes_reserved += num_bytes;
- ret = 0;
- } else {
- if ((unused + block_rsv->reserved) *
- block_rsv->priority >=
- (num_bytes + block_rsv->reserved) * 10) {
- space_info->bytes_reserved += num_bytes;
- ret = 0;
- }
- }
+ ret = -ENOSPC;
+ trans = btrfs_join_transaction(root, 1);
+ if (IS_ERR(trans))
+ goto out;
+ ret = btrfs_commit_transaction(trans, root);
+ if (!ret) {
+ trans = NULL;
+ committed = true;
+ goto again;
+ }
+
+out:
+ if (reserved) {
+ spin_lock(&space_info->lock);
+ space_info->bytes_reserved -= orig_bytes;
+ spin_unlock(&space_info->lock);
}
- spin_unlock(&space_info->lock);
return ret;
}
@@ -3327,18 +3618,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_fs_info *fs_info = root->fs_info;
- u64 alloc_target;
block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
if (!block_rsv)
return NULL;
btrfs_init_block_rsv(block_rsv);
-
- alloc_target = btrfs_get_alloc_profile(root, 0);
block_rsv->space_info = __find_space_info(fs_info,
BTRFS_BLOCK_GROUP_METADATA);
-
return block_rsv;
}
@@ -3369,23 +3656,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, int *retries)
+ u64 num_bytes)
{
int ret;
if (num_bytes == 0)
return 0;
-again:
- ret = reserve_metadata_bytes(block_rsv, num_bytes);
+
+ ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 1);
return 0;
}
- ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
- if (ret > 0)
- goto again;
-
return ret;
}
@@ -3420,7 +3703,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
return 0;
if (block_rsv->refill_used) {
- ret = reserve_metadata_bytes(block_rsv, num_bytes);
+ ret = reserve_metadata_bytes(trans, root, block_rsv,
+ num_bytes, 0);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 0);
return 0;
@@ -3499,6 +3783,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
spin_lock(&sinfo->lock);
+ if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
+ data_used = 0;
meta_used = sinfo->bytes_used;
spin_unlock(&sinfo->lock);
@@ -3526,7 +3812,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
block_rsv->size = num_bytes;
num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
- sinfo->bytes_reserved + sinfo->bytes_readonly;
+ sinfo->bytes_reserved + sinfo->bytes_readonly +
+ sinfo->bytes_may_use;
if (sinfo->total_bytes > num_bytes) {
num_bytes = sinfo->total_bytes - num_bytes;
@@ -3597,7 +3884,7 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- int num_items, int *retries)
+ int num_items)
{
u64 num_bytes;
int ret;
@@ -3607,7 +3894,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
num_bytes = calc_trans_metadata_size(root, num_items);
ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
- num_bytes, retries);
+ num_bytes);
if (!ret) {
trans->bytes_reserved += num_bytes;
trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -3681,14 +3968,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
u64 to_reserve;
int nr_extents;
- int retries = 0;
int ret;
if (btrfs_transaction_in_commit(root->fs_info))
schedule_timeout(1);
num_bytes = ALIGN(num_bytes, root->sectorsize);
-again:
+
spin_lock(&BTRFS_I(inode)->accounting_lock);
nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
if (nr_extents > BTRFS_I(inode)->reserved_extents) {
@@ -3698,18 +3984,14 @@ again:
nr_extents = 0;
to_reserve = 0;
}
+ spin_unlock(&BTRFS_I(inode)->accounting_lock);
to_reserve += calc_csum_metadata_size(inode, num_bytes);
- ret = reserve_metadata_bytes(block_rsv, to_reserve);
- if (ret) {
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
- ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
- &retries);
- if (ret > 0)
- goto again;
+ ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
+ if (ret)
return ret;
- }
+ spin_lock(&BTRFS_I(inode)->accounting_lock);
BTRFS_I(inode)->reserved_extents += nr_extents;
atomic_inc(&BTRFS_I(inode)->outstanding_extents);
spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -3717,7 +3999,7 @@ again:
block_rsv_add_bytes(block_rsv, to_reserve, 1);
if (block_rsv->size > 512 * 1024 * 1024)
- shrink_delalloc(NULL, root, to_reserve);
+ shrink_delalloc(NULL, root, to_reserve, 0);
return 0;
}
@@ -3776,12 +4058,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group_cache *cache = NULL;
struct btrfs_fs_info *info = root->fs_info;
- int factor;
u64 total = num_bytes;
u64 old_val;
u64 byte_in_group;
+ int factor;
/* block accounting for super block */
spin_lock(&info->delalloc_lock);
@@ -3803,11 +4085,25 @@ static int update_block_group(struct btrfs_trans_handle *trans,
factor = 2;
else
factor = 1;
+ /*
+ * If this block group has free space cache written out, we
+ * need to make sure to load it if we are removing space. This
+ * is because we need the unpinning stage to actually add the
+ * space back to the block group, otherwise we will leak space.
+ */
+ if (!alloc && cache->cached == BTRFS_CACHE_NO)
+ cache_block_group(cache, trans, NULL, 1);
+
byte_in_group = bytenr - cache->key.objectid;
WARN_ON(byte_in_group > cache->key.offset);
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
+
+ if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
+ cache->disk_cache_state < BTRFS_DC_CLEAR)
+ cache->disk_cache_state = BTRFS_DC_CLEAR;
+
cache->dirty = 1;
old_val = btrfs_block_group_used(&cache->item);
num_bytes = min(total, cache->key.offset - byte_in_group);
@@ -4554,6 +4850,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
bool found_uncached_bg = false;
bool failed_cluster_refill = false;
bool failed_alloc = false;
+ bool use_cluster = true;
u64 ideal_cache_percent = 0;
u64 ideal_cache_offset = 0;
@@ -4568,16 +4865,24 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
return -ENOSPC;
}
+ /*
+ * If the space info is for both data and metadata it means we have a
+ * small filesystem and we can't use the clustering stuff.
+ */
+ if (btrfs_mixed_space_info(space_info))
+ use_cluster = false;
+
if (orig_root->ref_cows || empty_size)
allowed_chunk_alloc = 1;
- if (data & BTRFS_BLOCK_GROUP_METADATA) {
+ if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
last_ptr = &root->fs_info->meta_alloc_cluster;
if (!btrfs_test_opt(root, SSD))
empty_cluster = 64 * 1024;
}
- if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
+ if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
+ btrfs_test_opt(root, SSD)) {
last_ptr = &root->fs_info->data_alloc_cluster;
}
@@ -4637,10 +4942,34 @@ search:
btrfs_get_block_group(block_group);
search_start = block_group->key.objectid;
+ /*
+ * this can happen if we end up cycling through all the
+ * raid types, but we want to make sure we only allocate
+ * for the proper type.
+ */
+ if (!block_group_bits(block_group, data)) {
+ u64 extra = BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10;
+
+ /*
+ * if they asked for extra copies and this block group
+ * doesn't provide them, bail. This does allow us to
+ * fill raid0 from raid1.
+ */
+ if ((data & extra) && !(block_group->flags & extra))
+ goto loop;
+ }
+
have_block_group:
if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
u64 free_percent;
+ ret = cache_block_group(block_group, trans,
+ orig_root, 1);
+ if (block_group->cached == BTRFS_CACHE_FINISHED)
+ goto have_block_group;
+
free_percent = btrfs_block_group_used(&block_group->item);
free_percent *= 100;
free_percent = div64_u64(free_percent,
@@ -4661,7 +4990,8 @@ have_block_group:
if (loop > LOOP_CACHING_NOWAIT ||
(loop > LOOP_FIND_IDEAL &&
atomic_read(&space_info->caching_threads) < 2)) {
- ret = cache_block_group(block_group);
+ ret = cache_block_group(block_group, trans,
+ orig_root, 0);
BUG_ON(ret);
}
found_uncached_bg = true;
@@ -5218,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
u64 num_bytes = ins->offset;
block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
- cache_block_group(block_group);
+ cache_block_group(block_group, trans, NULL, 0);
caching_ctl = get_caching_control(block_group);
if (!caching_ctl) {
@@ -5308,7 +5638,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
block_rsv = get_block_rsv(trans, root);
if (block_rsv->size == 0) {
- ret = reserve_metadata_bytes(block_rsv, blocksize);
+ ret = reserve_metadata_bytes(trans, root, block_rsv,
+ blocksize, 0);
if (ret)
return ERR_PTR(ret);
return block_rsv;
@@ -5318,11 +5649,6 @@ use_block_rsv(struct btrfs_trans_handle *trans,
if (!ret)
return block_rsv;
- WARN_ON(1);
- printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
- block_rsv->size, block_rsv->reserved,
- block_rsv->freed[0], block_rsv->freed[1]);
-
return ERR_PTR(-ENOSPC);
}
@@ -5421,7 +5747,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
u64 generation;
u64 refs;
u64 flags;
- u64 last = 0;
u32 nritems;
u32 blocksize;
struct btrfs_key key;
@@ -5489,7 +5814,6 @@ reada:
generation);
if (ret)
break;
- last = bytenr + blocksize;
nread++;
}
wc->reada_slot = slot;
@@ -6009,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
NULL, NULL);
BUG_ON(ret < 0);
if (ret > 0) {
- ret = btrfs_del_orphan_item(trans, tree_root,
- root->root_key.objectid);
- BUG_ON(ret);
+ /* if we fail to delete the orphan item this time
+ * around, it'll get picked up the next time.
+ *
+ * The most common failure here is just -ENOENT.
+ */
+ btrfs_del_orphan_item(trans, tree_root,
+ root->root_key.objectid);
}
}
@@ -7587,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
- num_devices = root->fs_info->fs_devices->rw_devices;
+ /*
+ * we add in the count of missing devices because we want
+ * to make sure that any RAID levels on a degraded FS
+ * continue to be honored.
+ */
+ num_devices = root->fs_info->fs_devices->rw_devices +
+ root->fs_info->fs_devices->missing_devices;
+
if (num_devices == 1) {
stripped |= BTRFS_BLOCK_GROUP_DUP;
stripped = flags & ~stripped;
@@ -7813,6 +8148,40 @@ out:
return ret;
}
+void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
+{
+ struct btrfs_block_group_cache *block_group;
+ u64 last = 0;
+
+ while (1) {
+ struct inode *inode;
+
+ block_group = btrfs_lookup_first_block_group(info, last);
+ while (block_group) {
+ spin_lock(&block_group->lock);
+ if (block_group->iref)
+ break;
+ spin_unlock(&block_group->lock);
+ block_group = next_block_group(info->tree_root,
+ block_group);
+ }
+ if (!block_group) {
+ if (last == 0)
+ break;
+ last = 0;
+ continue;
+ }
+
+ inode = block_group->inode;
+ block_group->iref = 0;
+ block_group->inode = NULL;
+ spin_unlock(&block_group->lock);
+ iput(inode);
+ last = block_group->key.objectid + block_group->key.offset;
+ btrfs_put_block_group(block_group);
+ }
+}
+
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_block_group_cache *block_group;
@@ -7896,6 +8265,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
struct btrfs_key key;
struct btrfs_key found_key;
struct extent_buffer *leaf;
+ int need_clear = 0;
+ u64 cache_gen;
root = info->extent_root;
key.objectid = 0;
@@ -7905,13 +8276,21 @@ int btrfs_read_block_groups(struct btrfs_root *root)
if (!path)
return -ENOMEM;
+ cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
+ if (cache_gen != 0 &&
+ btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
+ need_clear = 1;
+ if (btrfs_test_opt(root, CLEAR_CACHE))
+ need_clear = 1;
+ if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
+ printk(KERN_INFO "btrfs: disk space caching is enabled\n");
+
while (1) {
ret = find_first_block_group(root, path, &key);
if (ret > 0)
break;
if (ret != 0)
goto error;
-
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
cache = kzalloc(sizeof(*cache), GFP_NOFS);
@@ -7927,6 +8306,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
+ if (need_clear)
+ cache->disk_cache_state = BTRFS_DC_CLEAR;
+
/*
* we only want to have 32k of ram per block group for keeping
* track of free space, and if we pass 1/2 of that we want to
@@ -8031,6 +8413,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->key.offset = size;
cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
cache->sectorsize = root->sectorsize;
+ cache->fs_info = root->fs_info;
/*
* we only want to have 32k of ram per block group for keeping track
@@ -8087,8 +8470,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct btrfs_block_group_cache *block_group;
struct btrfs_free_cluster *cluster;
+ struct btrfs_root *tree_root = root->fs_info->tree_root;
struct btrfs_key key;
+ struct inode *inode;
int ret;
+ int factor;
root = root->fs_info->extent_root;
@@ -8097,6 +8483,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
BUG_ON(!block_group->ro);
memcpy(&key, &block_group->key, sizeof(key));
+ if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10))
+ factor = 2;
+ else
+ factor = 1;
/* make sure this block group isn't part of an allocation cluster */
cluster = &root->fs_info->data_alloc_cluster;
@@ -8116,6 +8508,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
BUG_ON(!path);
+ inode = lookup_free_space_inode(root, block_group, path);
+ if (!IS_ERR(inode)) {
+ btrfs_orphan_add(trans, inode);
+ clear_nlink(inode);
+ /* One for the block groups ref */
+ spin_lock(&block_group->lock);
+ if (block_group->iref) {
+ block_group->iref = 0;
+ block_group->inode = NULL;
+ spin_unlock(&block_group->lock);
+ iput(inode);
+ } else {
+ spin_unlock(&block_group->lock);
+ }
+ /* One for our lookup ref */
+ iput(inode);
+ }
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+ if (ret > 0)
+ btrfs_release_path(tree_root, path);
+ if (ret == 0) {
+ ret = btrfs_del_item(trans, tree_root, path);
+ if (ret)
+ goto out;
+ btrfs_release_path(tree_root, path);
+ }
+
spin_lock(&root->fs_info->block_group_cache_lock);
rb_erase(&block_group->cache_node,
&root->fs_info->block_group_cache_tree);
@@ -8137,8 +8563,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_lock(&block_group->space_info->lock);
block_group->space_info->total_bytes -= block_group->key.offset;
block_group->space_info->bytes_readonly -= block_group->key.offset;
+ block_group->space_info->disk_total -= block_group->key.offset * factor;
spin_unlock(&block_group->space_info->lock);
+ memcpy(&key, &block_group->key, sizeof(key));
+
btrfs_clear_space_info_full(root->fs_info);
btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d74e6af9b53..3e86b9f3650 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -104,7 +104,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
struct address_space *mapping, gfp_t mask)
{
tree->state = RB_ROOT;
- tree->buffer = RB_ROOT;
+ INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
tree->ops = NULL;
tree->dirty_bytes = 0;
spin_lock_init(&tree->lock);
@@ -235,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
return ret;
}
-static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
- u64 offset, struct rb_node *node)
-{
- struct rb_root *root = &tree->buffer;
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct extent_buffer *eb;
-
- while (*p) {
- parent = *p;
- eb = rb_entry(parent, struct extent_buffer, rb_node);
-
- if (offset < eb->start)
- p = &(*p)->rb_left;
- else if (offset > eb->start)
- p = &(*p)->rb_right;
- else
- return eb;
- }
-
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
- return NULL;
-}
-
-static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
- u64 offset)
-{
- struct rb_root *root = &tree->buffer;
- struct rb_node *n = root->rb_node;
- struct extent_buffer *eb;
-
- while (n) {
- eb = rb_entry(n, struct extent_buffer, rb_node);
- if (offset < eb->start)
- n = n->rb_left;
- else if (offset > eb->start)
- n = n->rb_right;
- else
- return eb;
- }
- return NULL;
-}
-
static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
struct extent_state *other)
{
@@ -1872,9 +1828,9 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
bio_put(bio);
}
-static struct bio *
-extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
- gfp_t gfp_flags)
+struct bio *
+btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+ gfp_t gfp_flags)
{
struct bio *bio;
@@ -1901,10 +1857,8 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
struct page *page = bvec->bv_page;
struct extent_io_tree *tree = bio->bi_private;
u64 start;
- u64 end;
start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
- end = start + bvec->bv_len - 1;
bio->bi_private = NULL;
@@ -1965,7 +1919,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
else
nr = bio_get_nr_vecs(bdev);
- bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+ bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
@@ -2204,7 +2158,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 last_byte = i_size_read(inode);
u64 block_start;
u64 iosize;
- u64 unlock_start;
sector_t sector;
struct extent_state *cached_state = NULL;
struct extent_map *em;
@@ -2329,7 +2282,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, start,
page_end, NULL, 1);
- unlock_start = page_end + 1;
goto done;
}
@@ -2340,7 +2292,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, cur,
page_end, NULL, 1);
- unlock_start = page_end + 1;
break;
}
em = epd->get_extent(inode, page, pg_offset, cur,
@@ -2387,7 +2338,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
cur += iosize;
pg_offset += iosize;
- unlock_start = cur;
continue;
}
/* leave this out until we have a page_mkwrite call */
@@ -2473,7 +2423,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
pgoff_t index;
pgoff_t end; /* Inclusive */
int scanned = 0;
- int range_whole = 0;
pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
@@ -2482,8 +2431,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
} else {
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
- if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
- range_whole = 1;
scanned = 1;
}
retry:
@@ -2823,6 +2770,8 @@ int extent_prepare_write(struct extent_io_tree *tree,
NULL, 1,
end_bio_extent_preparewrite, 0,
0, 0);
+ if (ret && !err)
+ err = ret;
iocount++;
block_start = block_start + iosize;
} else {
@@ -2952,21 +2901,53 @@ out:
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
- int ret;
+ int ret = 0;
u64 off = start;
u64 max = start + len;
u32 flags = 0;
+ u32 found_type;
+ u64 last;
u64 disko = 0;
+ struct btrfs_key found_key;
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
+ struct btrfs_path *path;
+ struct btrfs_file_extent_item *item;
int end = 0;
u64 em_start = 0, em_len = 0;
unsigned long emflags;
- ret = 0;
+ int hole = 0;
if (len == 0)
return -EINVAL;
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->leave_spinning = 1;
+
+ ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
+ path, inode->i_ino, -1, 0);
+ if (ret < 0) {
+ btrfs_free_path(path);
+ return ret;
+ }
+ WARN_ON(!ret);
+ path->slots[0]--;
+ item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+ found_type = btrfs_key_type(&found_key);
+
+ /* No extents, just return */
+ if (found_key.objectid != inode->i_ino ||
+ found_type != BTRFS_EXTENT_DATA_KEY) {
+ btrfs_free_path(path);
+ return 0;
+ }
+ last = found_key.offset;
+ btrfs_free_path(path);
+
lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
&cached_state, GFP_NOFS);
em = get_extent(inode, NULL, 0, off, max - off, 0);
@@ -2976,11 +2957,18 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
ret = PTR_ERR(em);
goto out;
}
+
while (!end) {
+ hole = 0;
off = em->start + em->len;
if (off >= max)
end = 1;
+ if (em->block_start == EXTENT_MAP_HOLE) {
+ hole = 1;
+ goto next;
+ }
+
em_start = em->start;
em_len = em->len;
@@ -2990,8 +2978,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (em->block_start == EXTENT_MAP_LAST_BYTE) {
end = 1;
flags |= FIEMAP_EXTENT_LAST;
- } else if (em->block_start == EXTENT_MAP_HOLE) {
- flags |= FIEMAP_EXTENT_UNWRITTEN;
} else if (em->block_start == EXTENT_MAP_INLINE) {
flags |= (FIEMAP_EXTENT_DATA_INLINE |
FIEMAP_EXTENT_NOT_ALIGNED);
@@ -3004,10 +2990,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
+next:
emflags = em->flags;
free_extent_map(em);
em = NULL;
-
if (!end) {
em = get_extent(inode, NULL, 0, off, max - off, 0);
if (!em)
@@ -3018,15 +3004,23 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
emflags = em->flags;
}
+
if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
- ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
- em_len, flags);
- if (ret)
- goto out_free;
+ if (em_start == last) {
+ flags |= FIEMAP_EXTENT_LAST;
+ end = 1;
+ }
+
+ if (!hole) {
+ ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+ em_len, flags);
+ if (ret)
+ goto out_free;
+ }
}
out_free:
free_extent_map(em);
@@ -3104,6 +3098,39 @@ static void __free_extent_buffer(struct extent_buffer *eb)
kmem_cache_free(extent_buffer_cache, eb);
}
+/*
+ * Helper for releasing extent buffer page.
+ */
+static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
+ unsigned long start_idx)
+{
+ unsigned long index;
+ struct page *page;
+
+ if (!eb->first_page)
+ return;
+
+ index = num_extent_pages(eb->start, eb->len);
+ if (start_idx >= index)
+ return;
+
+ do {
+ index--;
+ page = extent_buffer_page(eb, index);
+ if (page)
+ page_cache_release(page);
+ } while (index != start_idx);
+}
+
+/*
+ * Helper for releasing the extent buffer.
+ */
+static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
+{
+ btrfs_release_extent_buffer_page(eb, 0);
+ __free_extent_buffer(eb);
+}
+
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len,
struct page *page0,
@@ -3117,16 +3144,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
struct page *p;
struct address_space *mapping = tree->mapping;
int uptodate = 1;
+ int ret;
- spin_lock(&tree->buffer_lock);
- eb = buffer_search(tree, start);
- if (eb) {
- atomic_inc(&eb->refs);
- spin_unlock(&tree->buffer_lock);
+ rcu_read_lock();
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ if (eb && atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
mark_page_accessed(eb->first_page);
return eb;
}
- spin_unlock(&tree->buffer_lock);
+ rcu_read_unlock();
eb = __alloc_extent_buffer(tree, start, len, mask);
if (!eb)
@@ -3165,26 +3192,31 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
if (uptodate)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+ ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+ if (ret)
+ goto free_eb;
+
spin_lock(&tree->buffer_lock);
- exists = buffer_tree_insert(tree, start, &eb->rb_node);
- if (exists) {
+ ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
+ if (ret == -EEXIST) {
+ exists = radix_tree_lookup(&tree->buffer,
+ start >> PAGE_CACHE_SHIFT);
/* add one reference for the caller */
atomic_inc(&exists->refs);
spin_unlock(&tree->buffer_lock);
+ radix_tree_preload_end();
goto free_eb;
}
/* add one reference for the tree */
atomic_inc(&eb->refs);
spin_unlock(&tree->buffer_lock);
+ radix_tree_preload_end();
return eb;
free_eb:
if (!atomic_dec_and_test(&eb->refs))
return exists;
- for (index = 1; index < i; index++)
- page_cache_release(extent_buffer_page(eb, index));
- page_cache_release(extent_buffer_page(eb, 0));
- __free_extent_buffer(eb);
+ btrfs_release_extent_buffer(eb);
return exists;
}
@@ -3194,16 +3226,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
{
struct extent_buffer *eb;
- spin_lock(&tree->buffer_lock);
- eb = buffer_search(tree, start);
- if (eb)
- atomic_inc(&eb->refs);
- spin_unlock(&tree->buffer_lock);
-
- if (eb)
+ rcu_read_lock();
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ if (eb && atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
mark_page_accessed(eb->first_page);
+ return eb;
+ }
+ rcu_read_unlock();
- return eb;
+ return NULL;
}
void free_extent_buffer(struct extent_buffer *eb)
@@ -3833,34 +3865,47 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
}
}
+static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
+{
+ struct extent_buffer *eb =
+ container_of(head, struct extent_buffer, rcu_head);
+
+ btrfs_release_extent_buffer(eb);
+}
+
int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
{
u64 start = page_offset(page);
struct extent_buffer *eb;
int ret = 1;
- unsigned long i;
- unsigned long num_pages;
spin_lock(&tree->buffer_lock);
- eb = buffer_search(tree, start);
- if (!eb)
- goto out;
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ if (!eb) {
+ spin_unlock(&tree->buffer_lock);
+ return ret;
+ }
- if (atomic_read(&eb->refs) > 1) {
+ if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
ret = 0;
goto out;
}
- if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+
+ /*
+ * set @eb->refs to 0 if it is already 1, and then release the @eb.
+ * Or go back.
+ */
+ if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
ret = 0;
goto out;
}
- /* at this point we can safely release the extent buffer */
- num_pages = num_extent_pages(eb->start, eb->len);
- for (i = 0; i < num_pages; i++)
- page_cache_release(extent_buffer_page(eb, i));
- rb_erase(&eb->rb_node, &tree->buffer);
- __free_extent_buffer(eb);
+
+ radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
out:
spin_unlock(&tree->buffer_lock);
+
+ /* at this point we can safely release the extent buffer */
+ if (atomic_read(&eb->refs) == 0)
+ call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
return ret;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5691c7b590d..4183c8178f0 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -85,7 +85,7 @@ struct extent_io_ops {
struct extent_io_tree {
struct rb_root state;
- struct rb_root buffer;
+ struct radix_tree_root buffer;
struct address_space *mapping;
u64 dirty_bytes;
spinlock_t lock;
@@ -123,7 +123,7 @@ struct extent_buffer {
unsigned long bflags;
atomic_t refs;
struct list_head leak_list;
- struct rb_node rb_node;
+ struct rcu_head rcu_head;
/* the spinlock is used to protect most operations */
spinlock_t lock;
@@ -310,4 +310,7 @@ int extent_clear_unlock_delalloc(struct inode *inode,
struct extent_io_tree *tree,
u64 start, u64 end, struct page *locked_page,
unsigned long op);
+struct bio *
+btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+ gfp_t gfp_flags);
#endif
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 454ca52d645..23cb8da3ff6 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -335,7 +335,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
goto out;
}
if (IS_ERR(rb_node)) {
- em = ERR_PTR(PTR_ERR(rb_node));
+ em = ERR_CAST(rb_node);
goto out;
}
em = rb_entry(rb_node, struct extent_map, rb_node);
@@ -384,7 +384,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
goto out;
}
if (IS_ERR(rb_node)) {
- em = ERR_PTR(PTR_ERR(rb_node));
+ em = ERR_CAST(rb_node);
goto out;
}
em = rb_entry(rb_node, struct extent_map, rb_node);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e354c33df08..66836d85763 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
struct page **prepared_pages,
struct iov_iter *i)
{
- size_t copied;
+ size_t copied = 0;
int pg = 0;
int offset = pos & (PAGE_CACHE_SIZE - 1);
+ int total_copied = 0;
while (write_bytes > 0) {
size_t count = min_t(size_t,
PAGE_CACHE_SIZE - offset, write_bytes);
struct page *page = prepared_pages[pg];
-again:
- if (unlikely(iov_iter_fault_in_readable(i, count)))
- return -EFAULT;
-
- /* Copy data from userspace to the current page */
- copied = iov_iter_copy_from_user(page, i, offset, count);
+ /*
+ * Copy data from userspace to the current page
+ *
+ * Disable pagefault to avoid recursive lock since
+ * the pages are already locked
+ */
+ pagefault_disable();
+ copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
+ pagefault_enable();
/* Flush processor's dcache for this page */
flush_dcache_page(page);
iov_iter_advance(i, copied);
write_bytes -= copied;
+ total_copied += copied;
+ /* Return to btrfs_file_aio_write to fault page */
if (unlikely(copied == 0)) {
- count = min_t(size_t, PAGE_CACHE_SIZE - offset,
- iov_iter_single_seg_count(i));
- goto again;
+ break;
}
if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
@@ -81,7 +85,7 @@ again:
offset = 0;
}
}
- return 0;
+ return total_copied;
}
/*
@@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
unsigned long last_index;
int will_write;
int buffered = 0;
+ int copied = 0;
+ int dirty_pages = 0;
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
@@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
WARN_ON(num_pages > nrptrs);
memset(pages, 0, sizeof(struct page *) * nrptrs);
- ret = btrfs_delalloc_reserve_space(inode, write_bytes);
+ /*
+ * Fault pages before locking them in prepare_pages
+ * to avoid recursive lock
+ */
+ if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = btrfs_delalloc_reserve_space(inode,
+ num_pages << PAGE_CACHE_SHIFT);
if (ret)
goto out;
@@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
pos, first_index, last_index,
write_bytes);
if (ret) {
- btrfs_delalloc_release_space(inode, write_bytes);
+ btrfs_delalloc_release_space(inode,
+ num_pages << PAGE_CACHE_SHIFT);
goto out;
}
- ret = btrfs_copy_from_user(pos, num_pages,
+ copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, &i);
- if (ret == 0) {
+ dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
+
+ if (num_pages > dirty_pages) {
+ if (copied > 0)
+ atomic_inc(
+ &BTRFS_I(inode)->outstanding_extents);
+ btrfs_delalloc_release_space(inode,
+ (num_pages - dirty_pages) <<
+ PAGE_CACHE_SHIFT);
+ }
+
+ if (copied > 0) {
dirty_and_release_pages(NULL, root, file, pages,
- num_pages, pos, write_bytes);
+ dirty_pages, pos, copied);
}
btrfs_drop_pages(pages, num_pages);
- if (ret) {
- btrfs_delalloc_release_space(inode, write_bytes);
- goto out;
- }
- if (will_write) {
- filemap_fdatawrite_range(inode->i_mapping, pos,
- pos + write_bytes - 1);
- } else {
- balance_dirty_pages_ratelimited_nr(inode->i_mapping,
- num_pages);
- if (num_pages <
- (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
- btrfs_btree_balance_dirty(root, 1);
- btrfs_throttle(root);
+ if (copied > 0) {
+ if (will_write) {
+ filemap_fdatawrite_range(inode->i_mapping, pos,
+ pos + copied - 1);
+ } else {
+ balance_dirty_pages_ratelimited_nr(
+ inode->i_mapping,
+ dirty_pages);
+ if (dirty_pages <
+ (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
+ btrfs_btree_balance_dirty(root, 1);
+ btrfs_throttle(root);
+ }
}
- pos += write_bytes;
- num_written += write_bytes;
+ pos += copied;
+ num_written += copied;
cond_resched();
}
@@ -1047,8 +1075,14 @@ out:
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ num_written = PTR_ERR(trans);
+ goto done;
+ }
+ mutex_lock(&inode->i_mutex);
ret = btrfs_log_dentry_safe(trans, root,
file->f_dentry);
+ mutex_unlock(&inode->i_mutex);
if (ret == 0) {
ret = btrfs_sync_log(trans, root);
if (ret == 0)
@@ -1067,6 +1101,7 @@ out:
(start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
}
}
+done:
current->backing_dev_info = NULL;
return num_written ? num_written : err;
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f488fac04d9..60d68426695 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -23,10 +23,763 @@
#include "ctree.h"
#include "free-space-cache.h"
#include "transaction.h"
+#include "disk-io.h"
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
+static void recalculate_thresholds(struct btrfs_block_group_cache
+ *block_group);
+static int link_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_space *info);
+
+struct inode *lookup_free_space_inode(struct btrfs_root *root,
+ struct btrfs_block_group_cache
+ *block_group, struct btrfs_path *path)
+{
+ struct btrfs_key key;
+ struct btrfs_key location;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_free_space_header *header;
+ struct extent_buffer *leaf;
+ struct inode *inode = NULL;
+ int ret;
+
+ spin_lock(&block_group->lock);
+ if (block_group->inode)
+ inode = igrab(block_group->inode);
+ spin_unlock(&block_group->lock);
+ if (inode)
+ return inode;
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0) {
+ btrfs_release_path(root, path);
+ return ERR_PTR(-ENOENT);
+ }
+
+ leaf = path->nodes[0];
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ btrfs_free_space_key(leaf, header, &disk_key);
+ btrfs_disk_key_to_cpu(&location, &disk_key);
+ btrfs_release_path(root, path);
+
+ inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
+ if (!inode)
+ return ERR_PTR(-ENOENT);
+ if (IS_ERR(inode))
+ return inode;
+ if (is_bad_inode(inode)) {
+ iput(inode);
+ return ERR_PTR(-ENOENT);
+ }
+
+ spin_lock(&block_group->lock);
+ if (!root->fs_info->closing) {
+ block_group->inode = igrab(inode);
+ block_group->iref = 1;
+ }
+ spin_unlock(&block_group->lock);
+
+ return inode;
+}
+
+int create_free_space_inode(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_key key;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_free_space_header *header;
+ struct btrfs_inode_item *inode_item;
+ struct extent_buffer *leaf;
+ u64 objectid;
+ int ret;
+
+ ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
+ if (ret < 0)
+ return ret;
+
+ ret = btrfs_insert_empty_inode(trans, root, path, objectid);
+ if (ret)
+ return ret;
+
+ leaf = path->nodes[0];
+ inode_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_inode_item);
+ btrfs_item_key(leaf, &disk_key, path->slots[0]);
+ memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
+ sizeof(*inode_item));
+ btrfs_set_inode_generation(leaf, inode_item, trans->transid);
+ btrfs_set_inode_size(leaf, inode_item, 0);
+ btrfs_set_inode_nbytes(leaf, inode_item, 0);
+ btrfs_set_inode_uid(leaf, inode_item, 0);
+ btrfs_set_inode_gid(leaf, inode_item, 0);
+ btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
+ btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
+ BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
+ btrfs_set_inode_nlink(leaf, inode_item, 1);
+ btrfs_set_inode_transid(leaf, inode_item, trans->transid);
+ btrfs_set_inode_block_group(leaf, inode_item,
+ block_group->key.objectid);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(root, path);
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ sizeof(struct btrfs_free_space_header));
+ if (ret < 0) {
+ btrfs_release_path(root, path);
+ return ret;
+ }
+ leaf = path->nodes[0];
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
+ btrfs_set_free_space_key(leaf, header, &disk_key);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(root, path);
+
+ return 0;
+}
+
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct inode *inode)
+{
+ loff_t oldsize;
+ int ret = 0;
+
+ trans->block_rsv = root->orphan_block_rsv;
+ ret = btrfs_block_rsv_check(trans, root,
+ root->orphan_block_rsv,
+ 0, 5);
+ if (ret)
+ return ret;
+
+ oldsize = i_size_read(inode);
+ btrfs_i_size_write(inode, 0);
+ truncate_pagecache(inode, oldsize, 0);
+
+ /*
+ * We don't need an orphan item because truncating the free space cache
+ * will never be split across transactions.
+ */
+ ret = btrfs_truncate_inode_items(trans, root, inode,
+ 0, BTRFS_EXTENT_DATA_KEY);
+ if (ret) {
+ WARN_ON(1);
+ return ret;
+ }
+
+ return btrfs_update_inode(trans, root, inode);
+}
+
+static int readahead_cache(struct inode *inode)
+{
+ struct file_ra_state *ra;
+ unsigned long last_index;
+
+ ra = kzalloc(sizeof(*ra), GFP_NOFS);
+ if (!ra)
+ return -ENOMEM;
+
+ file_ra_state_init(ra, inode->i_mapping);
+ last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+
+ page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
+
+ kfree(ra);
+
+ return 0;
+}
+
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group)
+{
+ struct btrfs_root *root = fs_info->tree_root;
+ struct inode *inode;
+ struct btrfs_free_space_header *header;
+ struct extent_buffer *leaf;
+ struct page *page;
+ struct btrfs_path *path;
+ u32 *checksums = NULL, *crc;
+ char *disk_crcs = NULL;
+ struct btrfs_key key;
+ struct list_head bitmaps;
+ u64 num_entries;
+ u64 num_bitmaps;
+ u64 generation;
+ u32 cur_crc = ~(u32)0;
+ pgoff_t index = 0;
+ unsigned long first_page_offset;
+ int num_checksums;
+ int ret = 0;
+
+ /*
+ * If we're unmounting then just return, since this does a search on the
+ * normal root and not the commit root and we could deadlock.
+ */
+ smp_mb();
+ if (fs_info->closing)
+ return 0;
+
+ /*
+ * If this block group has been marked to be cleared for one reason or
+ * another then we can't trust the on disk cache, so just return.
+ */
+ spin_lock(&block_group->lock);
+ if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+ spin_unlock(&block_group->lock);
+
+ INIT_LIST_HEAD(&bitmaps);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return 0;
+
+ inode = lookup_free_space_inode(root, block_group, path);
+ if (IS_ERR(inode)) {
+ btrfs_free_path(path);
+ return 0;
+ }
+
+ /* Nothing in the space cache, goodbye */
+ if (!i_size_read(inode)) {
+ btrfs_free_path(path);
+ goto out;
+ }
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret) {
+ btrfs_free_path(path);
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ num_entries = btrfs_free_space_entries(leaf, header);
+ num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
+ generation = btrfs_free_space_generation(leaf, header);
+ btrfs_free_path(path);
+
+ if (BTRFS_I(inode)->generation != generation) {
+ printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
+ " not match free space cache generation (%llu) for "
+ "block group %llu\n",
+ (unsigned long long)BTRFS_I(inode)->generation,
+ (unsigned long long)generation,
+ (unsigned long long)block_group->key.objectid);
+ goto free_cache;
+ }
+
+ if (!num_entries)
+ goto out;
+
+ /* Setup everything for doing checksumming */
+ num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+ checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+ if (!checksums)
+ goto out;
+ first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+ disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
+ if (!disk_crcs)
+ goto out;
+
+ ret = readahead_cache(inode);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ while (1) {
+ struct btrfs_free_space_entry *entry;
+ struct btrfs_free_space *e;
+ void *addr;
+ unsigned long offset = 0;
+ unsigned long start_offset = 0;
+ int need_loop = 0;
+
+ if (!num_entries && !num_bitmaps)
+ break;
+
+ if (index == 0) {
+ start_offset = first_page_offset;
+ offset = start_offset;
+ }
+
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page) {
+ ret = 0;
+ goto free_cache;
+ }
+
+ if (!PageUptodate(page)) {
+ btrfs_readpage(NULL, page);
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+ printk(KERN_ERR "btrfs: error reading free "
+ "space cache: %llu\n",
+ (unsigned long long)
+ block_group->key.objectid);
+ goto free_cache;
+ }
+ }
+ addr = kmap(page);
+
+ if (index == 0) {
+ u64 *gen;
+
+ memcpy(disk_crcs, addr, first_page_offset);
+ gen = addr + (sizeof(u32) * num_checksums);
+ if (*gen != BTRFS_I(inode)->generation) {
+ printk(KERN_ERR "btrfs: space cache generation"
+ " (%llu) does not match inode (%llu) "
+ "for block group %llu\n",
+ (unsigned long long)*gen,
+ (unsigned long long)
+ BTRFS_I(inode)->generation,
+ (unsigned long long)
+ block_group->key.objectid);
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+ crc = (u32 *)disk_crcs;
+ }
+ entry = addr + start_offset;
+
+ /* First lets check our crc before we do anything fun */
+ cur_crc = ~(u32)0;
+ cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
+ PAGE_CACHE_SIZE - start_offset);
+ btrfs_csum_final(cur_crc, (char *)&cur_crc);
+ if (cur_crc != *crc) {
+ printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
+ "block group %llu\n", index,
+ (unsigned long long)block_group->key.objectid);
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+ crc++;
+
+ while (1) {
+ if (!num_entries)
+ break;
+
+ need_loop = 1;
+ e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
+ if (!e) {
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+
+ e->offset = le64_to_cpu(entry->offset);
+ e->bytes = le64_to_cpu(entry->bytes);
+ if (!e->bytes) {
+ kunmap(page);
+ kfree(e);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+
+ if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
+ spin_lock(&block_group->tree_lock);
+ ret = link_free_space(block_group, e);
+ spin_unlock(&block_group->tree_lock);
+ BUG_ON(ret);
+ } else {
+ e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+ if (!e->bitmap) {
+ kunmap(page);
+ kfree(e);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+ spin_lock(&block_group->tree_lock);
+ ret = link_free_space(block_group, e);
+ block_group->total_bitmaps++;
+ recalculate_thresholds(block_group);
+ spin_unlock(&block_group->tree_lock);
+ list_add_tail(&e->list, &bitmaps);
+ }
+
+ num_entries--;
+ offset += sizeof(struct btrfs_free_space_entry);
+ if (offset + sizeof(struct btrfs_free_space_entry) >=
+ PAGE_CACHE_SIZE)
+ break;
+ entry++;
+ }
+
+ /*
+ * We read an entry out of this page, we need to move on to the
+ * next page.
+ */
+ if (need_loop) {
+ kunmap(page);
+ goto next;
+ }
+
+ /*
+ * We add the bitmaps at the end of the entries in order that
+ * the bitmap entries are added to the cache.
+ */
+ e = list_entry(bitmaps.next, struct btrfs_free_space, list);
+ list_del_init(&e->list);
+ memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
+ kunmap(page);
+ num_bitmaps--;
+next:
+ unlock_page(page);
+ page_cache_release(page);
+ index++;
+ }
+
+ ret = 1;
+out:
+ kfree(checksums);
+ kfree(disk_crcs);
+ iput(inode);
+ return ret;
+
+free_cache:
+ /* This cache is bogus, make sure it gets cleared */
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = BTRFS_DC_CLEAR;
+ spin_unlock(&block_group->lock);
+ btrfs_remove_free_space_cache(block_group);
+ goto out;
+}
+
+int btrfs_write_out_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_free_space_header *header;
+ struct extent_buffer *leaf;
+ struct inode *inode;
+ struct rb_node *node;
+ struct list_head *pos, *n;
+ struct page *page;
+ struct extent_state *cached_state = NULL;
+ struct list_head bitmap_list;
+ struct btrfs_key key;
+ u64 bytes = 0;
+ u32 *crc, *checksums;
+ pgoff_t index = 0, last_index = 0;
+ unsigned long first_page_offset;
+ int num_checksums;
+ int entries = 0;
+ int bitmaps = 0;
+ int ret = 0;
+
+ root = root->fs_info->tree_root;
+
+ INIT_LIST_HEAD(&bitmap_list);
+
+ spin_lock(&block_group->lock);
+ if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+ spin_unlock(&block_group->lock);
+
+ inode = lookup_free_space_inode(root, block_group, path);
+ if (IS_ERR(inode))
+ return 0;
+
+ if (!i_size_read(inode)) {
+ iput(inode);
+ return 0;
+ }
+
+ node = rb_first(&block_group->free_space_offset);
+ if (!node) {
+ iput(inode);
+ return 0;
+ }
+
+ last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+ filemap_write_and_wait(inode->i_mapping);
+ btrfs_wait_ordered_range(inode, inode->i_size &
+ ~(root->sectorsize - 1), (u64)-1);
+
+ /* We need a checksum per page. */
+ num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+ crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+ if (!crc) {
+ iput(inode);
+ return 0;
+ }
+
+ /* Since the first page has all of our checksums and our generation we
+ * need to calculate the offset into the page that we can start writing
+ * our entries.
+ */
+ first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+
+ /*
+ * Lock all pages first so we can lock the extent safely.
+ *
+ * NOTE: Because we hold the ref the entire time we're going to write to
+ * the page find_get_page should never fail, so we don't do a check
+ * after find_get_page at this point. Just putting this here so people
+ * know and don't freak out.
+ */
+ while (index <= last_index) {
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page) {
+ pgoff_t i = 0;
+
+ while (i < index) {
+ page = find_get_page(inode->i_mapping, i);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ i++;
+ }
+ goto out_free;
+ }
+ index++;
+ }
+
+ index = 0;
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
+ 0, &cached_state, GFP_NOFS);
+
+ /* Write out the extent entries */
+ do {
+ struct btrfs_free_space_entry *entry;
+ void *addr;
+ unsigned long offset = 0;
+ unsigned long start_offset = 0;
+
+ if (index == 0) {
+ start_offset = first_page_offset;
+ offset = start_offset;
+ }
+
+ page = find_get_page(inode->i_mapping, index);
+
+ addr = kmap(page);
+ entry = addr + start_offset;
+
+ memset(addr, 0, PAGE_CACHE_SIZE);
+ while (1) {
+ struct btrfs_free_space *e;
+
+ e = rb_entry(node, struct btrfs_free_space, offset_index);
+ entries++;
+
+ entry->offset = cpu_to_le64(e->offset);
+ entry->bytes = cpu_to_le64(e->bytes);
+ if (e->bitmap) {
+ entry->type = BTRFS_FREE_SPACE_BITMAP;
+ list_add_tail(&e->list, &bitmap_list);
+ bitmaps++;
+ } else {
+ entry->type = BTRFS_FREE_SPACE_EXTENT;
+ }
+ node = rb_next(node);
+ if (!node)
+ break;
+ offset += sizeof(struct btrfs_free_space_entry);
+ if (offset + sizeof(struct btrfs_free_space_entry) >=
+ PAGE_CACHE_SIZE)
+ break;
+ entry++;
+ }
+ *crc = ~(u32)0;
+ *crc = btrfs_csum_data(root, addr + start_offset, *crc,
+ PAGE_CACHE_SIZE - start_offset);
+ kunmap(page);
+
+ btrfs_csum_final(*crc, (char *)crc);
+ crc++;
+
+ bytes += PAGE_CACHE_SIZE;
+
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+
+ /*
+ * We need to release our reference we got for grab_cache_page,
+ * except for the first page which will hold our checksums, we
+ * do that below.
+ */
+ if (index != 0) {
+ unlock_page(page);
+ page_cache_release(page);
+ }
+
+ page_cache_release(page);
+
+ index++;
+ } while (node);
+
+ /* Write out the bitmaps */
+ list_for_each_safe(pos, n, &bitmap_list) {
+ void *addr;
+ struct btrfs_free_space *entry =
+ list_entry(pos, struct btrfs_free_space, list);
+
+ page = find_get_page(inode->i_mapping, index);
+
+ addr = kmap(page);
+ memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
+ *crc = ~(u32)0;
+ *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
+ kunmap(page);
+ btrfs_csum_final(*crc, (char *)crc);
+ crc++;
+ bytes += PAGE_CACHE_SIZE;
+
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ list_del_init(&entry->list);
+ index++;
+ }
+
+ /* Zero out the rest of the pages just to make sure */
+ while (index <= last_index) {
+ void *addr;
+
+ page = find_get_page(inode->i_mapping, index);
+
+ addr = kmap(page);
+ memset(addr, 0, PAGE_CACHE_SIZE);
+ kunmap(page);
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ bytes += PAGE_CACHE_SIZE;
+ index++;
+ }
+
+ btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
+
+ /* Write the checksums and trans id to the first page */
+ {
+ void *addr;
+ u64 *gen;
+
+ page = find_get_page(inode->i_mapping, 0);
+
+ addr = kmap(page);
+ memcpy(addr, checksums, sizeof(u32) * num_checksums);
+ gen = addr + (sizeof(u32) * num_checksums);
+ *gen = trans->transid;
+ kunmap(page);
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ }
+ BTRFS_I(inode)->generation = trans->transid;
+
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
+ i_size_read(inode) - 1, &cached_state, GFP_NOFS);
+
+ filemap_write_and_wait(inode->i_mapping);
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+ if (ret < 0) {
+ ret = 0;
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
+ goto out_free;
+ }
+ leaf = path->nodes[0];
+ if (ret > 0) {
+ struct btrfs_key found_key;
+ BUG_ON(!path->slots[0]);
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
+ found_key.offset != block_group->key.objectid) {
+ ret = 0;
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, 0, 0, NULL,
+ GFP_NOFS);
+ btrfs_release_path(root, path);
+ goto out_free;
+ }
+ }
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ btrfs_set_free_space_entries(leaf, header, entries);
+ btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
+ btrfs_set_free_space_generation(leaf, header, trans->transid);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(root, path);
+
+ ret = 1;
+
+out_free:
+ if (ret == 0) {
+ invalidate_inode_pages2_range(inode->i_mapping, 0, index);
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = BTRFS_DC_ERROR;
+ spin_unlock(&block_group->lock);
+ BTRFS_I(inode)->generation = 0;
+ }
+ kfree(checksums);
+ btrfs_update_inode(trans, root, inode);
+ iput(inode);
+ return ret;
+}
+
static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
u64 offset)
{
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 890a8e79011..e49ca5c321b 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,24 @@ struct btrfs_free_space {
struct list_head list;
};
+struct inode *lookup_free_space_inode(struct btrfs_root *root,
+ struct btrfs_block_group_cache
+ *block_group, struct btrfs_path *path);
+int create_free_space_inode(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path);
+
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct inode *inode);
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group);
+int btrfs_write_out_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path);
int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size);
int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 64f99cf69ce..a0ff46a4789 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -319,8 +319,6 @@ static noinline int compress_file_range(struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
u64 num_bytes;
- u64 orig_start;
- u64 disk_num_bytes;
u64 blocksize = root->sectorsize;
u64 actual_end;
u64 isize = i_size_read(inode);
@@ -335,8 +333,6 @@ static noinline int compress_file_range(struct inode *inode,
int i;
int will_compress;
- orig_start = start;
-
actual_end = min_t(u64, isize, end + 1);
again:
will_compress = 0;
@@ -371,7 +367,6 @@ again:
total_compressed = min(total_compressed, max_uncompressed);
num_bytes = (end - start + blocksize) & ~(blocksize - 1);
num_bytes = max(blocksize, num_bytes);
- disk_num_bytes = num_bytes;
total_in = 0;
ret = 0;
@@ -467,7 +462,6 @@ again:
if (total_compressed >= total_in) {
will_compress = 0;
} else {
- disk_num_bytes = total_compressed;
num_bytes = total_in;
}
}
@@ -501,7 +495,7 @@ again:
add_async_extent(async_cow, start, num_bytes,
total_compressed, pages, nr_pages_ret);
- if (start + num_bytes < end && start + num_bytes < actual_end) {
+ if (start + num_bytes < end) {
start += num_bytes;
pages = NULL;
cond_resched();
@@ -757,20 +751,17 @@ static noinline int cow_file_range(struct inode *inode,
u64 disk_num_bytes;
u64 cur_alloc_size;
u64 blocksize = root->sectorsize;
- u64 actual_end;
- u64 isize = i_size_read(inode);
struct btrfs_key ins;
struct extent_map *em;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
+ BUG_ON(root == root->fs_info->tree_root);
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- actual_end = min_t(u64, isize, end + 1);
-
num_bytes = (end - start + blocksize) & ~(blocksize - 1);
num_bytes = max(blocksize, num_bytes);
disk_num_bytes = num_bytes;
@@ -1035,10 +1026,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
int type;
int nocow;
int check_prev = 1;
+ bool nolock = false;
path = btrfs_alloc_path();
BUG_ON(!path);
- trans = btrfs_join_transaction(root, 1);
+ if (root == root->fs_info->tree_root) {
+ nolock = true;
+ trans = btrfs_join_transaction_nolock(root, 1);
+ } else {
+ trans = btrfs_join_transaction(root, 1);
+ }
BUG_ON(!trans);
cow_start = (u64)-1;
@@ -1211,8 +1208,13 @@ out_check:
BUG_ON(ret);
}
- ret = btrfs_end_transaction(trans, root);
- BUG_ON(ret);
+ if (nolock) {
+ ret = btrfs_end_transaction_nolock(trans, root);
+ BUG_ON(ret);
+ } else {
+ ret = btrfs_end_transaction(trans, root);
+ BUG_ON(ret);
+ }
btrfs_free_path(path);
return 0;
}
@@ -1289,6 +1291,8 @@ static int btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
+ int do_list = (root->root_key.objectid !=
+ BTRFS_ROOT_TREE_OBJECTID);
if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1298,7 +1302,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
spin_lock(&root->fs_info->delalloc_lock);
BTRFS_I(inode)->delalloc_bytes += len;
root->fs_info->delalloc_bytes += len;
- if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+ if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
&root->fs_info->delalloc_inodes);
}
@@ -1321,6 +1325,8 @@ static int btrfs_clear_bit_hook(struct inode *inode,
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
+ int do_list = (root->root_key.objectid !=
+ BTRFS_ROOT_TREE_OBJECTID);
if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1330,14 +1336,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
if (*bits & EXTENT_DO_ACCOUNTING)
btrfs_delalloc_release_metadata(inode, len);
- if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+ if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
+ && do_list)
btrfs_free_reserved_data_space(inode, len);
spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->delalloc_bytes -= len;
BTRFS_I(inode)->delalloc_bytes -= len;
- if (BTRFS_I(inode)->delalloc_bytes == 0 &&
+ if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_del_init(&BTRFS_I(inode)->delalloc_inodes);
}
@@ -1372,7 +1379,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
if (map_length < length + size)
return 1;
- return 0;
+ return ret;
}
/*
@@ -1426,7 +1433,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
- ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+ if (root == root->fs_info->tree_root)
+ ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
+ else
+ ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
if (!(rw & REQ_WRITE)) {
@@ -1662,6 +1672,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
struct extent_state *cached_state = NULL;
int compressed = 0;
int ret;
+ bool nolock = false;
ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
end - start + 1);
@@ -1669,11 +1680,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
return 0;
BUG_ON(!ordered_extent);
+ nolock = (root == root->fs_info->tree_root);
+
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list));
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (!ret) {
- trans = btrfs_join_transaction(root, 1);
+ if (nolock)
+ trans = btrfs_join_transaction_nolock(root, 1);
+ else
+ trans = btrfs_join_transaction(root, 1);
+ BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
@@ -1686,7 +1703,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->file_offset + ordered_extent->len - 1,
0, &cached_state, GFP_NOFS);
- trans = btrfs_join_transaction(root, 1);
+ if (nolock)
+ trans = btrfs_join_transaction_nolock(root, 1);
+ else
+ trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1700,6 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->len);
BUG_ON(ret);
} else {
+ BUG_ON(root == root->fs_info->tree_root);
ret = insert_reserved_file_extent(trans, inode,
ordered_extent->file_offset,
ordered_extent->start,
@@ -1724,9 +1745,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
out:
- btrfs_delalloc_release_metadata(inode, ordered_extent->len);
- if (trans)
- btrfs_end_transaction(trans, root);
+ if (nolock) {
+ if (trans)
+ btrfs_end_transaction_nolock(trans, root);
+ } else {
+ btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+ if (trans)
+ btrfs_end_transaction(trans, root);
+ }
+
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */
@@ -2237,7 +2264,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
{
struct btrfs_path *path;
struct extent_buffer *leaf;
- struct btrfs_item *item;
struct btrfs_key key, found_key;
struct btrfs_trans_handle *trans;
struct inode *inode;
@@ -2275,7 +2301,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
/* pull out the item */
leaf = path->nodes[0];
- item = btrfs_item_nr(leaf, path->slots[0]);
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
/* make sure the item matches what we want */
@@ -2651,7 +2676,8 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
dir, index);
- BUG_ON(ret);
+ if (ret == -ENOENT)
+ ret = 0;
err:
btrfs_free_path(path);
if (ret)
@@ -2672,8 +2698,8 @@ static int check_path_shared(struct btrfs_root *root,
{
struct extent_buffer *eb;
int level;
- int ret;
u64 refs = 1;
+ int uninitialized_var(ret);
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
if (!path->nodes[level])
@@ -2686,7 +2712,7 @@ static int check_path_shared(struct btrfs_root *root,
if (refs > 1)
return 1;
}
- return 0;
+ return ret; /* XXX callers? */
}
/*
@@ -3196,7 +3222,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
- if (root->ref_cows)
+ if (root->ref_cows || root == root->fs_info->tree_root)
btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
path = btrfs_alloc_path();
@@ -3344,7 +3370,8 @@ delete:
} else {
break;
}
- if (found_extent && root->ref_cows) {
+ if (found_extent && (root->ref_cows ||
+ root == root->fs_info->tree_root)) {
btrfs_set_path_blocking(path);
ret = btrfs_free_extent(trans, root, extent_start,
extent_num_bytes, 0,
@@ -3675,7 +3702,8 @@ void btrfs_evict_inode(struct inode *inode)
int ret;
truncate_inode_pages(&inode->i_data, 0);
- if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
+ if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
+ root == root->fs_info->tree_root))
goto no_delete;
if (is_bad_inode(inode)) {
@@ -3888,7 +3916,14 @@ static void inode_tree_del(struct inode *inode)
}
spin_unlock(&root->inode_lock);
- if (empty && btrfs_root_refs(&root->root_item) == 0) {
+ /*
+ * Free space cache has inodes in the tree root, but the tree root has a
+ * root_refs of 0, so this could end up dropping the tree root as a
+ * snapshot, so we need the extra !root->fs_info->tree_root check to
+ * make sure we don't drop it.
+ */
+ if (empty && btrfs_root_refs(&root->root_item) == 0 &&
+ root != root->fs_info->tree_root) {
synchronize_srcu(&root->fs_info->subvol_srcu);
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4049,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
int index;
int ret;
- dentry->d_op = &btrfs_dentry_operations;
+ d_set_d_op(dentry, &btrfs_dentry_operations);
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
@@ -4092,7 +4127,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
return inode;
}
-static int btrfs_dentry_delete(struct dentry *dentry)
+static int btrfs_dentry_delete(const struct dentry *dentry)
{
struct btrfs_root *root;
@@ -4282,14 +4317,24 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret = 0;
+ bool nolock = false;
if (BTRFS_I(inode)->dummy_inode)
return 0;
+ smp_mb();
+ nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
+
if (wbc->sync_mode == WB_SYNC_ALL) {
- trans = btrfs_join_transaction(root, 1);
+ if (nolock)
+ trans = btrfs_join_transaction_nolock(root, 1);
+ else
+ trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
- ret = btrfs_commit_transaction(trans, root);
+ if (nolock)
+ ret = btrfs_end_transaction_nolock(trans, root);
+ else
+ ret = btrfs_commit_transaction(trans, root);
}
return ret;
}
@@ -4456,6 +4501,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->index_cnt = 2;
BTRFS_I(inode)->root = root;
BTRFS_I(inode)->generation = trans->transid;
+ inode->i_generation = BTRFS_I(inode)->generation;
btrfs_set_inode_space_info(root, inode);
if (mode & S_IFDIR)
@@ -4577,12 +4623,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
}
static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
- struct dentry *dentry, struct inode *inode,
- int backref, u64 index)
+ struct inode *dir, struct dentry *dentry,
+ struct inode *inode, int backref, u64 index)
{
- int err = btrfs_add_link(trans, dentry->d_parent->d_inode,
- inode, dentry->d_name.name,
- dentry->d_name.len, backref, index);
+ int err = btrfs_add_link(trans, dir, inode,
+ dentry->d_name.name, dentry->d_name.len,
+ backref, index);
if (!err) {
d_instantiate(dentry, inode);
return 0;
@@ -4623,8 +4669,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
btrfs_set_trans_block_group(trans, dir);
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len,
- dentry->d_parent->d_inode->i_ino, objectid,
+ dentry->d_name.len, dir->i_ino, objectid,
BTRFS_I(dir)->block_group, mode, &index);
err = PTR_ERR(inode);
if (IS_ERR(inode))
@@ -4637,7 +4682,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
}
btrfs_set_trans_block_group(trans, inode);
- err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+ err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
drop_inode = 1;
else {
@@ -4685,10 +4730,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
btrfs_set_trans_block_group(trans, dir);
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len,
- dentry->d_parent->d_inode->i_ino,
- objectid, BTRFS_I(dir)->block_group, mode,
- &index);
+ dentry->d_name.len, dir->i_ino, objectid,
+ BTRFS_I(dir)->block_group, mode, &index);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_unlock;
@@ -4700,7 +4743,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
}
btrfs_set_trans_block_group(trans, inode);
- err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+ err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
drop_inode = 1;
else {
@@ -4742,6 +4785,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
return -EPERM;
btrfs_inc_nlink(inode);
+ inode->i_ctime = CURRENT_TIME;
err = btrfs_set_inode_index(dir, &index);
if (err)
@@ -4760,15 +4804,17 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
btrfs_set_trans_block_group(trans, dir);
ihold(inode);
- err = btrfs_add_nondir(trans, dentry, inode, 1, index);
+ err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
if (err) {
drop_inode = 1;
} else {
+ struct dentry *parent = dget_parent(dentry);
btrfs_update_inode_block_group(trans, dir);
err = btrfs_update_inode(trans, root, inode);
BUG_ON(err);
- btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
+ btrfs_log_new_name(trans, inode, NULL, parent);
+ dput(parent);
}
nr = trans->blocks_used;
@@ -4808,8 +4854,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
btrfs_set_trans_block_group(trans, dir);
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len,
- dentry->d_parent->d_inode->i_ino, objectid,
+ dentry->d_name.len, dir->i_ino, objectid,
BTRFS_I(dir)->block_group, S_IFDIR | mode,
&index);
if (IS_ERR(inode)) {
@@ -4832,9 +4877,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (err)
goto out_fail;
- err = btrfs_add_link(trans, dentry->d_parent->d_inode,
- inode, dentry->d_name.name,
- dentry->d_name.len, 0, index);
+ err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
+ dentry->d_name.len, 0, index);
if (err)
goto out_fail;
@@ -5490,13 +5534,21 @@ struct btrfs_dio_private {
u64 bytes;
u32 *csums;
void *private;
+
+ /* number of bios pending for this dio */
+ atomic_t pending_bios;
+
+ /* IO errors */
+ int errors;
+
+ struct bio *orig_bio;
};
static void btrfs_endio_direct_read(struct bio *bio, int err)
{
+ struct btrfs_dio_private *dip = bio->bi_private;
struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
struct bio_vec *bvec = bio->bi_io_vec;
- struct btrfs_dio_private *dip = bio->bi_private;
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 start;
@@ -5550,15 +5602,18 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
struct btrfs_trans_handle *trans;
struct btrfs_ordered_extent *ordered = NULL;
struct extent_state *cached_state = NULL;
+ u64 ordered_offset = dip->logical_offset;
+ u64 ordered_bytes = dip->bytes;
int ret;
if (err)
goto out_done;
-
- ret = btrfs_dec_test_ordered_pending(inode, &ordered,
- dip->logical_offset, dip->bytes);
+again:
+ ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
+ &ordered_offset,
+ ordered_bytes);
if (!ret)
- goto out_done;
+ goto out_test;
BUG_ON(!ordered);
@@ -5618,8 +5673,20 @@ out_unlock:
out:
btrfs_delalloc_release_metadata(inode, ordered->len);
btrfs_end_transaction(trans, root);
+ ordered_offset = ordered->file_offset + ordered->len;
btrfs_put_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
+
+out_test:
+ /*
+ * our bio might span multiple ordered extents. If we haven't
+ * completed the accounting for the whole dio, go back and try again
+ */
+ if (ordered_offset < dip->logical_offset + dip->bytes) {
+ ordered_bytes = dip->logical_offset + dip->bytes -
+ ordered_offset;
+ goto again;
+ }
out_done:
bio->bi_private = dip->private;
@@ -5639,13 +5706,182 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
return 0;
}
+static void btrfs_end_dio_bio(struct bio *bio, int err)
+{
+ struct btrfs_dio_private *dip = bio->bi_private;
+
+ if (err) {
+ printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
+ "sector %#Lx len %u err no %d\n",
+ dip->inode->i_ino, bio->bi_rw,
+ (unsigned long long)bio->bi_sector, bio->bi_size, err);
+ dip->errors = 1;
+
+ /*
+ * before atomic variable goto zero, we must make sure
+ * dip->errors is perceived to be set.
+ */
+ smp_mb__before_atomic_dec();
+ }
+
+ /* if there are more bios still pending for this dio, just exit */
+ if (!atomic_dec_and_test(&dip->pending_bios))
+ goto out;
+
+ if (dip->errors)
+ bio_io_error(dip->orig_bio);
+ else {
+ set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
+ bio_endio(dip->orig_bio, 0);
+ }
+out:
+ bio_put(bio);
+}
+
+static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
+ u64 first_sector, gfp_t gfp_flags)
+{
+ int nr_vecs = bio_get_nr_vecs(bdev);
+ return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
+}
+
+static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
+ int rw, u64 file_offset, int skip_sum,
+ u32 *csums)
+{
+ int write = rw & REQ_WRITE;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret;
+
+ bio_get(bio);
+ ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+ if (ret)
+ goto err;
+
+ if (write && !skip_sum) {
+ ret = btrfs_wq_submit_bio(root->fs_info,
+ inode, rw, bio, 0, 0,
+ file_offset,
+ __btrfs_submit_bio_start_direct_io,
+ __btrfs_submit_bio_done);
+ goto err;
+ } else if (!skip_sum)
+ btrfs_lookup_bio_sums_dio(root, inode, bio,
+ file_offset, csums);
+
+ ret = btrfs_map_bio(root, rw, bio, 0, 1);
+err:
+ bio_put(bio);
+ return ret;
+}
+
+static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
+ int skip_sum)
+{
+ struct inode *inode = dip->inode;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+ struct bio *bio;
+ struct bio *orig_bio = dip->orig_bio;
+ struct bio_vec *bvec = orig_bio->bi_io_vec;
+ u64 start_sector = orig_bio->bi_sector;
+ u64 file_offset = dip->logical_offset;
+ u64 submit_len = 0;
+ u64 map_length;
+ int nr_pages = 0;
+ u32 *csums = dip->csums;
+ int ret = 0;
+
+ bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
+ if (!bio)
+ return -ENOMEM;
+ bio->bi_private = dip;
+ bio->bi_end_io = btrfs_end_dio_bio;
+ atomic_inc(&dip->pending_bios);
+
+ map_length = orig_bio->bi_size;
+ ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+ &map_length, NULL, 0);
+ if (ret) {
+ bio_put(bio);
+ return -EIO;
+ }
+
+ while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
+ if (unlikely(map_length < submit_len + bvec->bv_len ||
+ bio_add_page(bio, bvec->bv_page, bvec->bv_len,
+ bvec->bv_offset) < bvec->bv_len)) {
+ /*
+ * inc the count before we submit the bio so
+ * we know the end IO handler won't happen before
+ * we inc the count. Otherwise, the dip might get freed
+ * before we're done setting it up
+ */
+ atomic_inc(&dip->pending_bios);
+ ret = __btrfs_submit_dio_bio(bio, inode, rw,
+ file_offset, skip_sum,
+ csums);
+ if (ret) {
+ bio_put(bio);
+ atomic_dec(&dip->pending_bios);
+ goto out_err;
+ }
+
+ if (!skip_sum)
+ csums = csums + nr_pages;
+ start_sector += submit_len >> 9;
+ file_offset += submit_len;
+
+ submit_len = 0;
+ nr_pages = 0;
+
+ bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
+ start_sector, GFP_NOFS);
+ if (!bio)
+ goto out_err;
+ bio->bi_private = dip;
+ bio->bi_end_io = btrfs_end_dio_bio;
+
+ map_length = orig_bio->bi_size;
+ ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+ &map_length, NULL, 0);
+ if (ret) {
+ bio_put(bio);
+ goto out_err;
+ }
+ } else {
+ submit_len += bvec->bv_len;
+ nr_pages ++;
+ bvec++;
+ }
+ }
+
+ ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
+ csums);
+ if (!ret)
+ return 0;
+
+ bio_put(bio);
+out_err:
+ dip->errors = 1;
+ /*
+ * before atomic variable goto zero, we must
+ * make sure dip->errors is perceived to be set.
+ */
+ smp_mb__before_atomic_dec();
+ if (atomic_dec_and_test(&dip->pending_bios))
+ bio_io_error(dip->orig_bio);
+
+ /* bio_end_io() will handle error, so we needn't return it */
+ return 0;
+}
+
static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
loff_t file_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_dio_private *dip;
struct bio_vec *bvec = bio->bi_io_vec;
- u64 start;
int skip_sum;
int write = rw & REQ_WRITE;
int ret = 0;
@@ -5671,7 +5907,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
dip->inode = inode;
dip->logical_offset = file_offset;
- start = dip->logical_offset;
dip->bytes = 0;
do {
dip->bytes += bvec->bv_len;
@@ -5680,36 +5915,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
dip->disk_bytenr = (u64)bio->bi_sector << 9;
bio->bi_private = dip;
+ dip->errors = 0;
+ dip->orig_bio = bio;
+ atomic_set(&dip->pending_bios, 0);
if (write)
bio->bi_end_io = btrfs_endio_direct_write;
else
bio->bi_end_io = btrfs_endio_direct_read;
- ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
- if (ret)
- goto out_err;
-
- if (write && !skip_sum) {
- ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
- inode, rw, bio, 0, 0,
- dip->logical_offset,
- __btrfs_submit_bio_start_direct_io,
- __btrfs_submit_bio_done);
- if (ret)
- goto out_err;
+ ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
+ if (!ret)
return;
- } else if (!skip_sum)
- btrfs_lookup_bio_sums_dio(root, inode, bio,
- dip->logical_offset, dip->csums);
-
- ret = btrfs_map_bio(root, rw, bio, 0, 1);
- if (ret)
- goto out_err;
- return;
-out_err:
- kfree(dip->csums);
- kfree(dip);
free_ordered:
/*
* If this is a write, we need to clean up the reserved space and kill
@@ -5717,8 +5934,7 @@ free_ordered:
*/
if (write) {
struct btrfs_ordered_extent *ordered;
- ordered = btrfs_lookup_ordered_extent(inode,
- dip->logical_offset);
+ ordered = btrfs_lookup_ordered_extent(inode, file_offset);
if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
btrfs_free_reserved_extent(root, ordered->start,
@@ -6279,6 +6495,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
return inode;
}
+static void btrfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+}
+
void btrfs_destroy_inode(struct inode *inode)
{
struct btrfs_ordered_extent *ordered;
@@ -6308,6 +6531,21 @@ void btrfs_destroy_inode(struct inode *inode)
spin_unlock(&root->fs_info->ordered_extent_lock);
}
+ if (root == root->fs_info->tree_root) {
+ struct btrfs_block_group_cache *block_group;
+
+ block_group = btrfs_lookup_block_group(root->fs_info,
+ BTRFS_I(inode)->block_group);
+ if (block_group && block_group->inode == inode) {
+ spin_lock(&block_group->lock);
+ block_group->inode = NULL;
+ spin_unlock(&block_group->lock);
+ btrfs_put_block_group(block_group);
+ } else if (block_group) {
+ btrfs_put_block_group(block_group);
+ }
+ }
+
spin_lock(&root->orphan_lock);
if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
@@ -6333,14 +6571,15 @@ void btrfs_destroy_inode(struct inode *inode)
inode_tree_del(inode);
btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
free:
- kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+ call_rcu(&inode->i_rcu, btrfs_i_callback);
}
int btrfs_drop_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- if (btrfs_root_refs(&root->root_item) == 0)
+ if (btrfs_root_refs(&root->root_item) == 0 &&
+ root != root->fs_info->tree_root)
return 1;
else
return generic_drop_inode(inode);
@@ -6548,8 +6787,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
BUG_ON(ret);
if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
- btrfs_log_new_name(trans, old_inode, old_dir,
- new_dentry->d_parent);
+ struct dentry *parent = dget_parent(new_dentry);
+ btrfs_log_new_name(trans, old_inode, old_dir, parent);
+ dput(parent);
btrfs_end_log_trans(root);
}
out_fail:
@@ -6609,7 +6849,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
return 0;
}
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+ int sync)
{
struct btrfs_inode *binode;
struct inode *inode = NULL;
@@ -6631,7 +6872,26 @@ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
spin_unlock(&root->fs_info->delalloc_lock);
if (inode) {
- write_inode_now(inode, 0);
+ if (sync) {
+ filemap_write_and_wait(inode->i_mapping);
+ /*
+ * We have to do this because compression doesn't
+ * actually set PG_writeback until it submits the pages
+ * for IO, which happens in an async thread, so we could
+ * race and not actually wait for any writeback pages
+ * because they've not been submitted yet. Technically
+ * this could still be the case for the ordered stuff
+ * since the async thread may not have started to do its
+ * work yet. If this becomes the case then we need to
+ * figure out a way to make sure that in writepage we
+ * wait for any async pages to be submitted before
+ * returning so that fdatawait does what its supposed to
+ * do.
+ */
+ btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ } else {
+ filemap_flush(inode->i_mapping);
+ }
if (delay_iput)
btrfs_add_delayed_iput(inode);
else
@@ -6679,8 +6939,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
btrfs_set_trans_block_group(trans, dir);
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len,
- dentry->d_parent->d_inode->i_ino, objectid,
+ dentry->d_name.len, dir->i_ino, objectid,
BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
&index);
err = PTR_ERR(inode);
@@ -6694,7 +6953,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
}
btrfs_set_trans_block_group(trans, inode);
- err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+ err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
drop_inode = 1;
else {
@@ -6757,27 +7016,34 @@ out_unlock:
return err;
}
-int btrfs_prealloc_file_range(struct inode *inode, int mode,
- u64 start, u64 num_bytes, u64 min_size,
- loff_t actual_len, u64 *alloc_hint)
+static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
+ u64 start, u64 num_bytes, u64 min_size,
+ loff_t actual_len, u64 *alloc_hint,
+ struct btrfs_trans_handle *trans)
{
- struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins;
u64 cur_offset = start;
+ u64 i_size;
int ret = 0;
+ bool own_trans = true;
+ if (trans)
+ own_trans = false;
while (num_bytes > 0) {
- trans = btrfs_start_transaction(root, 3);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- break;
+ if (own_trans) {
+ trans = btrfs_start_transaction(root, 3);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
}
ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
0, *alloc_hint, (u64)-1, &ins, 1);
if (ret) {
- btrfs_end_transaction(trans, root);
+ if (own_trans)
+ btrfs_end_transaction(trans, root);
break;
}
@@ -6800,21 +7066,40 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode,
(actual_len > inode->i_size) &&
(cur_offset > inode->i_size)) {
if (cur_offset > actual_len)
- i_size_write(inode, actual_len);
+ i_size = actual_len;
else
- i_size_write(inode, cur_offset);
- i_size_write(inode, cur_offset);
- btrfs_ordered_update_i_size(inode, cur_offset, NULL);
+ i_size = cur_offset;
+ i_size_write(inode, i_size);
+ btrfs_ordered_update_i_size(inode, i_size, NULL);
}
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
- btrfs_end_transaction(trans, root);
+ if (own_trans)
+ btrfs_end_transaction(trans, root);
}
return ret;
}
+int btrfs_prealloc_file_range(struct inode *inode, int mode,
+ u64 start, u64 num_bytes, u64 min_size,
+ loff_t actual_len, u64 *alloc_hint)
+{
+ return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+ min_size, actual_len, alloc_hint,
+ NULL);
+}
+
+int btrfs_prealloc_file_range_trans(struct inode *inode,
+ struct btrfs_trans_handle *trans, int mode,
+ u64 start, u64 num_bytes, u64 min_size,
+ loff_t actual_len, u64 *alloc_hint)
+{
+ return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+ min_size, actual_len, alloc_hint, trans);
+}
+
static long btrfs_fallocate(struct inode *inode, int mode,
loff_t offset, loff_t len)
{
@@ -6839,6 +7124,10 @@ static long btrfs_fallocate(struct inode *inode, int mode,
btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
mutex_lock(&inode->i_mutex);
+ ret = inode_newsize_ok(inode, alloc_end);
+ if (ret)
+ goto out;
+
if (alloc_start > inode->i_size) {
ret = btrfs_cont_expand(inode, alloc_start);
if (ret)
@@ -6922,11 +7211,11 @@ static int btrfs_set_page_dirty(struct page *page)
return __set_page_dirty_nobuffers(page);
}
-static int btrfs_permission(struct inode *inode, int mask)
+static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
{
if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
return -EACCES;
- return generic_permission(inode, mask, btrfs_check_acl);
+ return generic_permission(inode, mask, flags, btrfs_check_acl);
}
static const struct inode_operations btrfs_dir_inode_operations = {
@@ -7035,6 +7324,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
+ .getattr = btrfs_getattr,
.permission = btrfs_permission,
.setxattr = btrfs_setxattr,
.getxattr = btrfs_getxattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9254b3d58db..f87552a1d7e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -224,7 +224,8 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
static noinline int create_subvol(struct btrfs_root *root,
struct dentry *dentry,
- char *name, int namelen)
+ char *name, int namelen,
+ u64 *async_transid)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
@@ -232,7 +233,8 @@ static noinline int create_subvol(struct btrfs_root *root,
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
struct btrfs_root *new_root;
- struct inode *dir = dentry->d_parent->d_inode;
+ struct dentry *parent = dget_parent(dentry);
+ struct inode *dir;
int ret;
int err;
u64 objectid;
@@ -241,8 +243,13 @@ static noinline int create_subvol(struct btrfs_root *root,
ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root,
0, &objectid);
- if (ret)
+ if (ret) {
+ dput(parent);
return ret;
+ }
+
+ dir = parent->d_inode;
+
/*
* 1 - inode item
* 2 - refs
@@ -250,8 +257,10 @@ static noinline int create_subvol(struct btrfs_root *root,
* 2 - dir items
*/
trans = btrfs_start_transaction(root, 6);
- if (IS_ERR(trans))
+ if (IS_ERR(trans)) {
+ dput(parent);
return PTR_ERR(trans);
+ }
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0);
@@ -338,15 +347,23 @@ static noinline int create_subvol(struct btrfs_root *root,
d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
fail:
- err = btrfs_commit_transaction(trans, root);
+ dput(parent);
+ if (async_transid) {
+ *async_transid = trans->transid;
+ err = btrfs_commit_transaction_async(trans, root, 1);
+ } else {
+ err = btrfs_commit_transaction(trans, root);
+ }
if (err && !ret)
ret = err;
return ret;
}
-static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
+static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
+ char *name, int namelen, u64 *async_transid)
{
struct inode *inode;
+ struct dentry *parent;
struct btrfs_pending_snapshot *pending_snapshot;
struct btrfs_trans_handle *trans;
int ret;
@@ -373,7 +390,14 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
- ret = btrfs_commit_transaction(trans, root->fs_info->extent_root);
+ if (async_transid) {
+ *async_transid = trans->transid;
+ ret = btrfs_commit_transaction_async(trans,
+ root->fs_info->extent_root, 1);
+ } else {
+ ret = btrfs_commit_transaction(trans,
+ root->fs_info->extent_root);
+ }
BUG_ON(ret);
ret = pending_snapshot->error;
@@ -382,7 +406,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
btrfs_orphan_cleanup(pending_snapshot->snap);
- inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
+ parent = dget_parent(dentry);
+ inode = btrfs_lookup_dentry(parent->d_inode, dentry);
+ dput(parent);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
goto fail;
@@ -395,6 +421,76 @@ fail:
return ret;
}
+/* copy of check_sticky in fs/namei.c()
+* It's inline, so penalty for filesystems that don't use sticky bit is
+* minimal.
+*/
+static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
+{
+ uid_t fsuid = current_fsuid();
+
+ if (!(dir->i_mode & S_ISVTX))
+ return 0;
+ if (inode->i_uid == fsuid)
+ return 0;
+ if (dir->i_uid == fsuid)
+ return 0;
+ return !capable(CAP_FOWNER);
+}
+
+/* copy of may_delete in fs/namei.c()
+ * Check whether we can remove a link victim from directory dir, check
+ * whether the type of victim is right.
+ * 1. We can't do it if dir is read-only (done in permission())
+ * 2. We should have write and exec permissions on dir
+ * 3. We can't remove anything from append-only dir
+ * 4. We can't do anything with immutable dir (done in permission())
+ * 5. If the sticky bit on dir is set we should either
+ * a. be owner of dir, or
+ * b. be owner of victim, or
+ * c. have CAP_FOWNER capability
+ * 6. If the victim is append-only or immutable we can't do antyhing with
+ * links pointing to it.
+ * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
+ * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
+ * 9. We can't remove a root or mountpoint.
+ * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
+ * nfs_async_unlink().
+ */
+
+static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
+{
+ int error;
+
+ if (!victim->d_inode)
+ return -ENOENT;
+
+ BUG_ON(victim->d_parent->d_inode != dir);
+ audit_inode_child(victim, dir);
+
+ error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ if (error)
+ return error;
+ if (IS_APPEND(dir))
+ return -EPERM;
+ if (btrfs_check_sticky(dir, victim->d_inode)||
+ IS_APPEND(victim->d_inode)||
+ IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+ return -EPERM;
+ if (isdir) {
+ if (!S_ISDIR(victim->d_inode->i_mode))
+ return -ENOTDIR;
+ if (IS_ROOT(victim))
+ return -EBUSY;
+ } else if (S_ISDIR(victim->d_inode->i_mode))
+ return -EISDIR;
+ if (IS_DEADDIR(dir))
+ return -ENOENT;
+ if (victim->d_flags & DCACHE_NFSFS_RENAMED)
+ return -EBUSY;
+ return 0;
+}
+
/* copy of may_create in fs/namei.c() */
static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
{
@@ -412,7 +508,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
*/
static noinline int btrfs_mksubvol(struct path *parent,
char *name, int namelen,
- struct btrfs_root *snap_src)
+ struct btrfs_root *snap_src,
+ u64 *async_transid)
{
struct inode *dir = parent->dentry->d_inode;
struct dentry *dentry;
@@ -443,10 +540,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
goto out_up_read;
if (snap_src) {
- error = create_snapshot(snap_src, dentry);
+ error = create_snapshot(snap_src, dentry,
+ name, namelen, async_transid);
} else {
error = create_subvol(BTRFS_I(dir)->root, dentry,
- name, namelen);
+ name, namelen, async_transid);
}
if (!error)
fsnotify_mkdir(dir, dentry);
@@ -708,7 +806,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
char *sizestr;
char *devstr = NULL;
int ret = 0;
- int namelen;
int mod = 0;
if (root->fs_info->sb->s_flags & MS_RDONLY)
@@ -722,7 +819,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- namelen = strlen(vol_args->name);
mutex_lock(&root->fs_info->volume_mutex);
sizestr = vol_args->name;
@@ -801,11 +897,13 @@ out_unlock:
return ret;
}
-static noinline int btrfs_ioctl_snap_create(struct file *file,
- void __user *arg, int subvol)
+static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
+ char *name,
+ unsigned long fd,
+ int subvol,
+ u64 *transid)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
- struct btrfs_ioctl_vol_args *vol_args;
struct file *src_file;
int namelen;
int ret = 0;
@@ -813,23 +911,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
- vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args))
- return PTR_ERR(vol_args);
-
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- namelen = strlen(vol_args->name);
- if (strchr(vol_args->name, '/')) {
+ namelen = strlen(name);
+ if (strchr(name, '/')) {
ret = -EINVAL;
goto out;
}
if (subvol) {
- ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
- NULL);
+ ret = btrfs_mksubvol(&file->f_path, name, namelen,
+ NULL, transid);
} else {
struct inode *src_inode;
- src_file = fget(vol_args->fd);
+ src_file = fget(fd);
if (!src_file) {
ret = -EINVAL;
goto out;
@@ -843,12 +936,68 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
fput(src_file);
goto out;
}
- ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
- BTRFS_I(src_inode)->root);
+ ret = btrfs_mksubvol(&file->f_path, name, namelen,
+ BTRFS_I(src_inode)->root,
+ transid);
fput(src_file);
}
out:
+ return ret;
+}
+
+static noinline int btrfs_ioctl_snap_create(struct file *file,
+ void __user *arg, int subvol,
+ int v2)
+{
+ struct btrfs_ioctl_vol_args *vol_args = NULL;
+ struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL;
+ char *name;
+ u64 fd;
+ int ret;
+
+ if (v2) {
+ u64 transid = 0;
+ u64 *ptr = NULL;
+
+ vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2));
+ if (IS_ERR(vol_args_v2))
+ return PTR_ERR(vol_args_v2);
+
+ if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ name = vol_args_v2->name;
+ fd = vol_args_v2->fd;
+ vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+
+ if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC)
+ ptr = &transid;
+
+ ret = btrfs_ioctl_snap_create_transid(file, name, fd,
+ subvol, ptr);
+
+ if (ret == 0 && ptr &&
+ copy_to_user(arg +
+ offsetof(struct btrfs_ioctl_vol_args_v2,
+ transid), ptr, sizeof(*ptr)))
+ ret = -EFAULT;
+ } else {
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+ name = vol_args->name;
+ fd = vol_args->fd;
+ vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+
+ ret = btrfs_ioctl_snap_create_transid(file, name, fd,
+ subvol, NULL);
+ }
+out:
kfree(vol_args);
+ kfree(vol_args_v2);
+
return ret;
}
@@ -1073,14 +1222,10 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- args = kmalloc(sizeof(*args), GFP_KERNEL);
- if (!args)
- return -ENOMEM;
+ args = memdup_user(argp, sizeof(*args));
+ if (IS_ERR(args))
+ return PTR_ERR(args);
- if (copy_from_user(args, argp, sizeof(*args))) {
- kfree(args);
- return -EFAULT;
- }
inode = fdentry(file)->d_inode;
ret = search_ioctl(inode, args);
if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
@@ -1188,14 +1333,10 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- args = kmalloc(sizeof(*args), GFP_KERNEL);
- if (!args)
- return -ENOMEM;
+ args = memdup_user(argp, sizeof(*args));
+ if (IS_ERR(args))
+ return PTR_ERR(args);
- if (copy_from_user(args, argp, sizeof(*args))) {
- kfree(args);
- return -EFAULT;
- }
inode = fdentry(file)->d_inode;
if (args->treeid == 0)
@@ -1227,9 +1368,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
int ret;
int err = 0;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@@ -1259,13 +1397,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
}
inode = dentry->d_inode;
+ dest = BTRFS_I(inode)->root;
+ if (!capable(CAP_SYS_ADMIN)){
+ /*
+ * Regular user. Only allow this with a special mount
+ * option, when the user has write+exec access to the
+ * subvol root, and when rmdir(2) would have been
+ * allowed.
+ *
+ * Note that this is _not_ check that the subvol is
+ * empty or doesn't contain data that we wouldn't
+ * otherwise be able to delete.
+ *
+ * Users who want to delete empty subvols should try
+ * rmdir(2).
+ */
+ err = -EPERM;
+ if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
+ goto out_dput;
+
+ /*
+ * Do not allow deletion if the parent dir is the same
+ * as the dir to be deleted. That means the ioctl
+ * must be called on the dentry referencing the root
+ * of the subvol, not a random directory contained
+ * within it.
+ */
+ err = -EINVAL;
+ if (root == dest)
+ goto out_dput;
+
+ err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
+ if (err)
+ goto out_dput;
+
+ /* check if subvolume may be deleted by a non-root user */
+ err = btrfs_may_delete(dir, dentry, 1);
+ if (err)
+ goto out_dput;
+ }
+
if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
err = -EINVAL;
goto out_dput;
}
- dest = BTRFS_I(inode)->root;
-
mutex_lock(&inode->i_mutex);
err = d_invalidate(dentry);
if (err)
@@ -1304,7 +1480,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
BUG_ON(ret);
}
- ret = btrfs_commit_transaction(trans, root);
+ ret = btrfs_end_transaction(trans, root);
BUG_ON(ret);
inode->i_flags |= S_DEAD;
out_up_write:
@@ -1502,11 +1678,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
path->reada = 2;
if (inode < src) {
- mutex_lock(&inode->i_mutex);
- mutex_lock(&src->i_mutex);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
} else {
- mutex_lock(&src->i_mutex);
- mutex_lock(&inode->i_mutex);
+ mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
}
/* determine range to clone */
@@ -1517,12 +1693,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
olen = len = src->i_size - off;
/* if we extend to eof, continue to block boundary */
if (off + len == src->i_size)
- len = ((src->i_size + bs-1) & ~(bs-1))
- - off;
+ len = ALIGN(src->i_size, bs) - off;
/* verify the end result is block aligned */
- if ((off & (bs-1)) ||
- ((off + len) & (bs-1)))
+ if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
+ !IS_ALIGNED(destoff, bs))
goto out_unlock;
/* do any pending delalloc/csum calc on src, one way or
@@ -1530,13 +1705,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
while (1) {
struct btrfs_ordered_extent *ordered;
lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
- ordered = btrfs_lookup_first_ordered_extent(inode, off+len);
- if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
+ ordered = btrfs_lookup_first_ordered_extent(src, off+len);
+ if (!ordered &&
+ !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
+ EXTENT_DELALLOC, 0, NULL))
break;
unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
if (ordered)
btrfs_put_ordered_extent(ordered);
- btrfs_wait_ordered_range(src, off, off+len);
+ btrfs_wait_ordered_range(src, off, len);
}
/* clone data */
@@ -1605,7 +1782,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
}
btrfs_release_path(root, path);
- if (key.offset + datal < off ||
+ if (key.offset + datal <= off ||
key.offset >= off+len)
goto next;
@@ -1720,8 +1897,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
* but shouldn't round up the file size
*/
endoff = new_key.offset + datal;
- if (endoff > off+olen)
- endoff = off+olen;
+ if (endoff > destoff+olen)
+ endoff = destoff+olen;
if (endoff > inode->i_size)
btrfs_i_size_write(inode, endoff);
@@ -1879,6 +2056,22 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
return 0;
}
+static void get_block_group_info(struct list_head *groups_list,
+ struct btrfs_ioctl_space_info *space)
+{
+ struct btrfs_block_group_cache *block_group;
+
+ space->total_bytes = 0;
+ space->used_bytes = 0;
+ space->flags = 0;
+ list_for_each_entry(block_group, groups_list, list) {
+ space->flags = block_group->flags;
+ space->total_bytes += block_group->key.offset;
+ space->used_bytes +=
+ btrfs_block_group_used(&block_group->item);
+ }
+}
+
long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_space_args space_args;
@@ -1887,27 +2080,56 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
struct btrfs_ioctl_space_info *dest_orig;
struct btrfs_ioctl_space_info *user_dest;
struct btrfs_space_info *info;
+ u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
+ BTRFS_BLOCK_GROUP_SYSTEM,
+ BTRFS_BLOCK_GROUP_METADATA,
+ BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
+ int num_types = 4;
int alloc_size;
int ret = 0;
int slot_count = 0;
+ int i, c;
if (copy_from_user(&space_args,
(struct btrfs_ioctl_space_args __user *)arg,
sizeof(space_args)))
return -EFAULT;
- /* first we count slots */
- rcu_read_lock();
- list_for_each_entry_rcu(info, &root->fs_info->space_info, list)
- slot_count++;
- rcu_read_unlock();
+ for (i = 0; i < num_types; i++) {
+ struct btrfs_space_info *tmp;
+
+ info = NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
+ list) {
+ if (tmp->flags == types[i]) {
+ info = tmp;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (!info)
+ continue;
+
+ down_read(&info->groups_sem);
+ for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
+ if (!list_empty(&info->block_groups[c]))
+ slot_count++;
+ }
+ up_read(&info->groups_sem);
+ }
/* space_slots == 0 means they are asking for a count */
if (space_args.space_slots == 0) {
space_args.total_spaces = slot_count;
goto out;
}
+
+ slot_count = min_t(int, space_args.space_slots, slot_count);
+
alloc_size = sizeof(*dest) * slot_count;
+
/* we generally have at most 6 or so space infos, one for each raid
* level. So, a whole page should be more than enough for everyone
*/
@@ -1921,27 +2143,34 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
dest_orig = dest;
/* now we have a buffer to copy into */
- rcu_read_lock();
- list_for_each_entry_rcu(info, &root->fs_info->space_info, list) {
- /* make sure we don't copy more than we allocated
- * in our buffer
- */
- if (slot_count == 0)
- break;
- slot_count--;
-
- /* make sure userland has enough room in their buffer */
- if (space_args.total_spaces >= space_args.space_slots)
- break;
+ for (i = 0; i < num_types; i++) {
+ struct btrfs_space_info *tmp;
+
+ info = NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
+ list) {
+ if (tmp->flags == types[i]) {
+ info = tmp;
+ break;
+ }
+ }
+ rcu_read_unlock();
- space.flags = info->flags;
- space.total_bytes = info->total_bytes;
- space.used_bytes = info->bytes_used;
- memcpy(dest, &space, sizeof(space));
- dest++;
- space_args.total_spaces++;
+ if (!info)
+ continue;
+ down_read(&info->groups_sem);
+ for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
+ if (!list_empty(&info->block_groups[c])) {
+ get_block_group_info(&info->block_groups[c],
+ &space);
+ memcpy(dest, &space, sizeof(space));
+ dest++;
+ space_args.total_spaces++;
+ }
+ }
+ up_read(&info->groups_sem);
}
- rcu_read_unlock();
user_dest = (struct btrfs_ioctl_space_info *)
(arg + sizeof(struct btrfs_ioctl_space_args));
@@ -1984,6 +2213,36 @@ long btrfs_ioctl_trans_end(struct file *file)
return 0;
}
+static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
+{
+ struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+ struct btrfs_trans_handle *trans;
+ u64 transid;
+
+ trans = btrfs_start_transaction(root, 0);
+ transid = trans->transid;
+ btrfs_commit_transaction_async(trans, root, 0);
+
+ if (argp)
+ if (copy_to_user(argp, &transid, sizeof(transid)))
+ return -EFAULT;
+ return 0;
+}
+
+static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
+{
+ struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+ u64 transid;
+
+ if (argp) {
+ if (copy_from_user(&transid, argp, sizeof(transid)))
+ return -EFAULT;
+ } else {
+ transid = 0; /* current trans */
+ }
+ return btrfs_wait_for_commit(root, transid);
+}
+
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@@ -1998,9 +2257,11 @@ long btrfs_ioctl(struct file *file, unsigned int
case FS_IOC_GETVERSION:
return btrfs_ioctl_getversion(file, argp);
case BTRFS_IOC_SNAP_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 0);
+ return btrfs_ioctl_snap_create(file, argp, 0, 0);
+ case BTRFS_IOC_SNAP_CREATE_V2:
+ return btrfs_ioctl_snap_create(file, argp, 0, 1);
case BTRFS_IOC_SUBVOL_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 1);
+ return btrfs_ioctl_snap_create(file, argp, 1, 0);
case BTRFS_IOC_SNAP_DESTROY:
return btrfs_ioctl_snap_destroy(file, argp);
case BTRFS_IOC_DEFAULT_SUBVOL:
@@ -2034,6 +2295,10 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SYNC:
btrfs_sync_fs(file->f_dentry->d_sb, 1);
return 0;
+ case BTRFS_IOC_START_SYNC:
+ return btrfs_ioctl_start_sync(file, argp);
+ case BTRFS_IOC_WAIT_SYNC:
+ return btrfs_ioctl_wait_sync(file, argp);
}
return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 424694aa517..c344d12c646 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -22,14 +22,25 @@
#define BTRFS_IOCTL_MAGIC 0x94
#define BTRFS_VOL_NAME_MAX 255
-#define BTRFS_PATH_NAME_MAX 4087
/* this should be 4k */
+#define BTRFS_PATH_NAME_MAX 4087
struct btrfs_ioctl_vol_args {
__s64 fd;
char name[BTRFS_PATH_NAME_MAX + 1];
};
+#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
+
+#define BTRFS_SUBVOL_NAME_MAX 4039
+struct btrfs_ioctl_vol_args_v2 {
+ __s64 fd;
+ __u64 transid;
+ __u64 flags;
+ __u64 unused[4];
+ char name[BTRFS_SUBVOL_NAME_MAX + 1];
+};
+
#define BTRFS_INO_LOOKUP_PATH_MAX 4080
struct btrfs_ioctl_ino_lookup_args {
__u64 treeid;
@@ -178,4 +189,8 @@ struct btrfs_ioctl_space_args {
#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
struct btrfs_ioctl_space_args)
+#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
+#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
+#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
+ struct btrfs_ioctl_vol_args_v2)
#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e56c72bc5ad..ae7737e352c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -250,6 +250,73 @@ int btrfs_add_ordered_sum(struct inode *inode,
/*
* this is used to account for finished IO across a given range
+ * of the file. The IO may span ordered extents. If
+ * a given ordered_extent is completely done, 1 is returned, otherwise
+ * 0.
+ *
+ * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
+ * to make sure this function only returns 1 once for a given ordered extent.
+ *
+ * file_offset is updated to one byte past the range that is recorded as
+ * complete. This allows you to walk forward in the file.
+ */
+int btrfs_dec_test_first_ordered_pending(struct inode *inode,
+ struct btrfs_ordered_extent **cached,
+ u64 *file_offset, u64 io_size)
+{
+ struct btrfs_ordered_inode_tree *tree;
+ struct rb_node *node;
+ struct btrfs_ordered_extent *entry = NULL;
+ int ret;
+ u64 dec_end;
+ u64 dec_start;
+ u64 to_dec;
+
+ tree = &BTRFS_I(inode)->ordered_tree;
+ spin_lock(&tree->lock);
+ node = tree_search(tree, *file_offset);
+ if (!node) {
+ ret = 1;
+ goto out;
+ }
+
+ entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+ if (!offset_in_entry(entry, *file_offset)) {
+ ret = 1;
+ goto out;
+ }
+
+ dec_start = max(*file_offset, entry->file_offset);
+ dec_end = min(*file_offset + io_size, entry->file_offset +
+ entry->len);
+ *file_offset = dec_end;
+ if (dec_start > dec_end) {
+ printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
+ (unsigned long long)dec_start,
+ (unsigned long long)dec_end);
+ }
+ to_dec = dec_end - dec_start;
+ if (to_dec > entry->bytes_left) {
+ printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
+ (unsigned long long)entry->bytes_left,
+ (unsigned long long)to_dec);
+ }
+ entry->bytes_left -= to_dec;
+ if (entry->bytes_left == 0)
+ ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
+ else
+ ret = 1;
+out:
+ if (!ret && cached && entry) {
+ *cached = entry;
+ atomic_inc(&entry->refs);
+ }
+ spin_unlock(&tree->lock);
+ return ret == 0;
+}
+
+/*
+ * this is used to account for finished IO across a given range
* of the file. The IO should not span ordered extents. If
* a given ordered_extent is completely done, 1 is returned, otherwise
* 0.
@@ -526,7 +593,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{
u64 end;
u64 orig_end;
- u64 wait_end;
struct btrfs_ordered_extent *ordered;
int found;
@@ -537,7 +603,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
if (orig_end > INT_LIMIT(loff_t))
orig_end = INT_LIMIT(loff_t);
}
- wait_end = orig_end;
again:
/* start IO across the range first to instantiate any delalloc
* extents
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 8ac365492a3..61dca83119d 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -141,6 +141,9 @@ int btrfs_remove_ordered_extent(struct inode *inode,
int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size);
+int btrfs_dec_test_first_ordered_pending(struct inode *inode,
+ struct btrfs_ordered_extent **cached,
+ u64 *file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 79cba5fbc28..f8be250963a 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -56,8 +56,12 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
return -ENOMEM;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret)
+ if (ret < 0)
goto out;
+ if (ret) {
+ ret = -ENOENT;
+ goto out;
+ }
ret = btrfs_del_item(trans, root, path);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b37d723b9d4..045c9c2b2d7 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -29,6 +29,7 @@
#include "locking.h"
#include "btrfs_inode.h"
#include "async-thread.h"
+#include "free-space-cache.h"
/*
* backref_node, mapping_node and tree_block start with this
@@ -178,8 +179,6 @@ struct reloc_control {
u64 search_start;
u64 extents_found;
- int block_rsv_retries;
-
unsigned int stage:8;
unsigned int create_reloc_tree:1;
unsigned int merge_reloc_tree:1;
@@ -2133,7 +2132,6 @@ int prepare_to_merge(struct reloc_control *rc, int err)
LIST_HEAD(reloc_roots);
u64 num_bytes = 0;
int ret;
- int retries = 0;
mutex_lock(&root->fs_info->trans_mutex);
rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
@@ -2143,7 +2141,7 @@ again:
if (!err) {
num_bytes = rc->merging_rsv_size;
ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
- num_bytes, &retries);
+ num_bytes);
if (ret)
err = ret;
}
@@ -2155,7 +2153,6 @@ again:
btrfs_end_transaction(trans, rc->extent_root);
btrfs_block_rsv_release(rc->extent_root,
rc->block_rsv, num_bytes);
- retries = 0;
goto again;
}
}
@@ -2405,15 +2402,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
num_bytes = calcu_metadata_size(rc, node, 1) * 2;
trans->block_rsv = rc->block_rsv;
- ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes,
- &rc->block_rsv_retries);
+ ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
if (ret) {
if (ret == -EAGAIN)
rc->commit_transaction = 1;
return ret;
}
- rc->block_rsv_retries = 0;
return 0;
}
@@ -3099,6 +3094,8 @@ static int add_tree_block(struct reloc_control *rc,
BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
ret = get_ref_objectid_v0(rc, path, extent_key,
&ref_owner, NULL);
+ if (ret < 0)
+ return ret;
BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
level = (int)ref_owner;
/* FIXME: get real generation */
@@ -3191,6 +3188,54 @@ static int block_use_full_backref(struct reloc_control *rc,
return ret;
}
+static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
+ struct inode *inode, u64 ino)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ struct btrfs_root *root = fs_info->tree_root;
+ struct btrfs_trans_handle *trans;
+ unsigned long nr;
+ int ret = 0;
+
+ if (inode)
+ goto truncate;
+
+ key.objectid = ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
+ if (inode && !IS_ERR(inode))
+ iput(inode);
+ return -ENOENT;
+ }
+
+truncate:
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ trans = btrfs_join_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ goto out;
+ }
+
+ ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
+
+ btrfs_free_path(path);
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ btrfs_btree_balance_dirty(root, nr);
+out:
+ iput(inode);
+ return ret;
+}
+
/*
* helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
* this function scans fs tree to find blocks reference the data extent
@@ -3217,15 +3262,27 @@ static int find_data_references(struct reloc_control *rc,
int counted;
int ret;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
ref_root = btrfs_extent_data_ref_root(leaf, ref);
ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
ref_count = btrfs_extent_data_ref_count(leaf, ref);
+ /*
+ * This is an extent belonging to the free space cache, lets just delete
+ * it and redo the search.
+ */
+ if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
+ ret = delete_block_group_cache(rc->extent_root->fs_info,
+ NULL, ref_objectid);
+ if (ret != -ENOENT)
+ return ret;
+ ret = 0;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
root = read_fs_root(rc->extent_root->fs_info, ref_root);
if (IS_ERR(root)) {
err = PTR_ERR(root);
@@ -3554,8 +3611,7 @@ int prepare_to_relocate(struct reloc_control *rc)
* is no reservation in transaction handle.
*/
ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
- rc->extent_root->nodesize * 256,
- &rc->block_rsv_retries);
+ rc->extent_root->nodesize * 256);
if (ret)
return ret;
@@ -3567,7 +3623,6 @@ int prepare_to_relocate(struct reloc_control *rc)
rc->extents_found = 0;
rc->nodes_relocated = 0;
rc->merging_rsv_size = 0;
- rc->block_rsv_retries = 0;
rc->create_reloc_tree = 1;
set_reloc_control(rc);
@@ -3860,6 +3915,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
{
struct btrfs_fs_info *fs_info = extent_root->fs_info;
struct reloc_control *rc;
+ struct inode *inode;
+ struct btrfs_path *path;
int ret;
int rw = 0;
int err = 0;
@@ -3882,6 +3939,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
rw = 1;
}
+ path = btrfs_alloc_path();
+ if (!path) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
+ path);
+ btrfs_free_path(path);
+
+ if (!IS_ERR(inode))
+ ret = delete_block_group_cache(fs_info, inode, 0);
+ else
+ ret = PTR_ERR(inode);
+
+ if (ret && ret != -ENOENT) {
+ err = ret;
+ goto out;
+ }
+
rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
if (IS_ERR(rc->data_inode)) {
err = PTR_ERR(rc->data_inode);
@@ -4143,7 +4220,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
btrfs_add_ordered_sum(inode, ordered, sums);
}
btrfs_put_ordered_extent(ordered);
- return 0;
+ return ret;
}
void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 2d958be761c..6a1086e83ff 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -181,7 +181,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
{
struct btrfs_root *dead_root;
- struct btrfs_item *item;
struct btrfs_root_item *ri;
struct btrfs_key key;
struct btrfs_key found_key;
@@ -214,7 +213,6 @@ again:
nritems = btrfs_header_nritems(leaf);
slot = path->slots[0];
}
- item = btrfs_item_nr(leaf, slot);
btrfs_item_key_to_cpu(leaf, &key, slot);
if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
goto next;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 144f8a5730f..883c6fa1367 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -61,6 +61,8 @@ static void btrfs_put_super(struct super_block *sb)
ret = close_ctree(root);
sb->s_fs_info = NULL;
+
+ (void)ret; /* FIXME: need to fix VFS to return error? */
}
enum {
@@ -68,7 +70,8 @@ enum {
Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
- Opt_discard, Opt_err,
+ Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
+ Opt_user_subvol_rm_allowed,
};
static match_table_t tokens = {
@@ -92,6 +95,9 @@ static match_table_t tokens = {
{Opt_flushoncommit, "flushoncommit"},
{Opt_ratio, "metadata_ratio=%d"},
{Opt_discard, "discard"},
+ {Opt_space_cache, "space_cache"},
+ {Opt_clear_cache, "clear_cache"},
+ {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
{Opt_err, NULL},
};
@@ -235,6 +241,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_discard:
btrfs_set_opt(info->mount_opt, DISCARD);
break;
+ case Opt_space_cache:
+ printk(KERN_INFO "btrfs: enabling disk space caching\n");
+ btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+ break;
+ case Opt_clear_cache:
+ printk(KERN_INFO "btrfs: force clearing of disk cache\n");
+ btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
+ break;
+ case Opt_user_subvol_rm_allowed:
+ btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
+ break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
@@ -380,7 +397,7 @@ static struct dentry *get_default_root(struct super_block *sb,
find_root:
new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
if (IS_ERR(new_root))
- return ERR_PTR(PTR_ERR(new_root));
+ return ERR_CAST(new_root);
if (btrfs_root_refs(&new_root->root_item) == 0)
return ERR_PTR(-ENOENT);
@@ -436,7 +453,6 @@ static int btrfs_fill_super(struct super_block *sb,
{
struct inode *inode;
struct dentry *root_dentry;
- struct btrfs_super_block *disk_super;
struct btrfs_root *tree_root;
struct btrfs_key key;
int err;
@@ -458,7 +474,6 @@ static int btrfs_fill_super(struct super_block *sb,
return PTR_ERR(tree_root);
}
sb->s_fs_info = tree_root;
- disk_super = &tree_root->fs_info->super_copy;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
@@ -548,30 +563,45 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
static int btrfs_test_super(struct super_block *s, void *data)
{
- struct btrfs_fs_devices *test_fs_devices = data;
+ struct btrfs_root *test_root = data;
struct btrfs_root *root = btrfs_sb(s);
- return root->fs_info->fs_devices == test_fs_devices;
+ /*
+ * If this super block is going away, return false as it
+ * can't match as an existing super block.
+ */
+ if (!atomic_read(&s->s_active))
+ return 0;
+ return root->fs_info->fs_devices == test_root->fs_info->fs_devices;
+}
+
+static int btrfs_set_super(struct super_block *s, void *data)
+{
+ s->s_fs_info = data;
+
+ return set_anon_super(s, data);
}
+
/*
* Find a superblock for the given device / mount point.
*
* Note: This is based on get_sb_bdev from fs/super.c with a few additions
* for multiple device setup. Make sure to keep it in sync.
*/
-static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
struct block_device *bdev = NULL;
struct super_block *s;
struct dentry *root;
struct btrfs_fs_devices *fs_devices = NULL;
+ struct btrfs_root *tree_root = NULL;
+ struct btrfs_fs_info *fs_info = NULL;
fmode_t mode = FMODE_READ;
char *subvol_name = NULL;
u64 subvol_objectid = 0;
int error = 0;
- int found = 0;
if (!(flags & MS_RDONLY))
mode |= FMODE_WRITE;
@@ -580,7 +610,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
&subvol_name, &subvol_objectid,
&fs_devices);
if (error)
- return error;
+ return ERR_PTR(error);
error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
if (error)
@@ -595,8 +625,24 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
goto error_close_devices;
}
+ /*
+ * Setup a dummy root and fs_info for test/set super. This is because
+ * we don't actually fill this stuff out until open_ctree, but we need
+ * it for searching for existing supers, so this lets us do that and
+ * then open_ctree will properly initialize everything later.
+ */
+ fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
+ tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+ if (!fs_info || !tree_root) {
+ error = -ENOMEM;
+ goto error_close_devices;
+ }
+ fs_info->tree_root = tree_root;
+ fs_info->fs_devices = fs_devices;
+ tree_root->fs_info = fs_info;
+
bdev = fs_devices->latest_bdev;
- s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
+ s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
if (IS_ERR(s))
goto error_s;
@@ -607,7 +653,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
goto error_close_devices;
}
- found = 1;
btrfs_close_devices(fs_devices);
} else {
char b[BDEVNAME_SIZE];
@@ -629,7 +674,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
if (IS_ERR(root)) {
error = PTR_ERR(root);
deactivate_locked_super(s);
- goto error;
+ goto error_free_subvol_name;
}
/* if they gave us a subvolume name bind mount into that */
if (strcmp(subvol_name, ".")) {
@@ -640,36 +685,34 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
mutex_unlock(&root->d_inode->i_mutex);
if (IS_ERR(new_root)) {
+ dput(root);
deactivate_locked_super(s);
error = PTR_ERR(new_root);
- dput(root);
- goto error_close_devices;
+ goto error_free_subvol_name;
}
if (!new_root->d_inode) {
dput(root);
dput(new_root);
deactivate_locked_super(s);
error = -ENXIO;
- goto error_close_devices;
+ goto error_free_subvol_name;
}
dput(root);
root = new_root;
}
- mnt->mnt_sb = s;
- mnt->mnt_root = root;
-
kfree(subvol_name);
- return 0;
+ return root;
error_s:
error = PTR_ERR(s);
error_close_devices:
btrfs_close_devices(fs_devices);
+ kfree(fs_info);
+ kfree(tree_root);
error_free_subvol_name:
kfree(subvol_name);
-error:
- return error;
+ return ERR_PTR(error);
}
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@ -716,18 +759,25 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct list_head *head = &root->fs_info->space_info;
struct btrfs_space_info *found;
u64 total_used = 0;
+ u64 total_used_data = 0;
int bits = dentry->d_sb->s_blocksize_bits;
__be32 *fsid = (__be32 *)root->fs_info->fsid;
rcu_read_lock();
- list_for_each_entry_rcu(found, head, list)
+ list_for_each_entry_rcu(found, head, list) {
+ if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_SYSTEM))
+ total_used_data += found->disk_total;
+ else
+ total_used_data += found->disk_used;
total_used += found->disk_used;
+ }
rcu_read_unlock();
buf->f_namelen = BTRFS_NAME_LEN;
buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
buf->f_bfree = buf->f_blocks - (total_used >> bits);
- buf->f_bavail = buf->f_bfree;
+ buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
buf->f_bsize = dentry->d_sb->s_blocksize;
buf->f_type = BTRFS_SUPER_MAGIC;
@@ -746,7 +796,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
static struct file_system_type btrfs_fs_type = {
.owner = THIS_MODULE,
.name = "btrfs",
- .get_sb = btrfs_get_sb,
+ .mount = btrfs_mount,
.kill_sb = kill_anon_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 66e4c66cc63..f50e931fc21 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -163,6 +163,7 @@ enum btrfs_trans_type {
TRANS_START,
TRANS_JOIN,
TRANS_USERSPACE,
+ TRANS_JOIN_NOLOCK,
};
static int may_wait_transaction(struct btrfs_root *root, int type)
@@ -179,14 +180,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
{
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
- int retries = 0;
int ret;
again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h)
return ERR_PTR(-ENOMEM);
- mutex_lock(&root->fs_info->trans_mutex);
+ if (type != TRANS_JOIN_NOLOCK)
+ mutex_lock(&root->fs_info->trans_mutex);
if (may_wait_transaction(root, type))
wait_current_trans(root);
@@ -195,7 +196,8 @@ again:
cur_trans = root->fs_info->running_transaction;
cur_trans->use_count++;
- mutex_unlock(&root->fs_info->trans_mutex);
+ if (type != TRANS_JOIN_NOLOCK)
+ mutex_unlock(&root->fs_info->trans_mutex);
h->transid = cur_trans->transid;
h->transaction = cur_trans;
@@ -212,8 +214,7 @@ again:
}
if (num_items > 0) {
- ret = btrfs_trans_reserve_metadata(h, root, num_items,
- &retries);
+ ret = btrfs_trans_reserve_metadata(h, root, num_items);
if (ret == -EAGAIN) {
btrfs_commit_transaction(h, root);
goto again;
@@ -224,9 +225,11 @@ again:
}
}
- mutex_lock(&root->fs_info->trans_mutex);
+ if (type != TRANS_JOIN_NOLOCK)
+ mutex_lock(&root->fs_info->trans_mutex);
record_root_in_trans(h, root);
- mutex_unlock(&root->fs_info->trans_mutex);
+ if (type != TRANS_JOIN_NOLOCK)
+ mutex_unlock(&root->fs_info->trans_mutex);
if (!current->journal_info && type != TRANS_USERSPACE)
current->journal_info = h;
@@ -244,6 +247,12 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
return start_transaction(root, 0, TRANS_JOIN);
}
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
+ int num_blocks)
+{
+ return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
+}
+
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
int num_blocks)
{
@@ -270,6 +279,58 @@ static noinline int wait_for_commit(struct btrfs_root *root,
return 0;
}
+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
+{
+ struct btrfs_transaction *cur_trans = NULL, *t;
+ int ret;
+
+ mutex_lock(&root->fs_info->trans_mutex);
+
+ ret = 0;
+ if (transid) {
+ if (transid <= root->fs_info->last_trans_committed)
+ goto out_unlock;
+
+ /* find specified transaction */
+ list_for_each_entry(t, &root->fs_info->trans_list, list) {
+ if (t->transid == transid) {
+ cur_trans = t;
+ break;
+ }
+ if (t->transid > transid)
+ break;
+ }
+ ret = -EINVAL;
+ if (!cur_trans)
+ goto out_unlock; /* bad transid */
+ } else {
+ /* find newest transaction that is committing | committed */
+ list_for_each_entry_reverse(t, &root->fs_info->trans_list,
+ list) {
+ if (t->in_commit) {
+ if (t->commit_done)
+ goto out_unlock;
+ cur_trans = t;
+ break;
+ }
+ }
+ if (!cur_trans)
+ goto out_unlock; /* nothing committing|committed */
+ }
+
+ cur_trans->use_count++;
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ wait_for_commit(root, cur_trans);
+
+ mutex_lock(&root->fs_info->trans_mutex);
+ put_transaction(cur_trans);
+ ret = 0;
+out_unlock:
+ mutex_unlock(&root->fs_info->trans_mutex);
+ return ret;
+}
+
#if 0
/*
* rate limit against the drop_snapshot code. This helps to slow down new
@@ -348,7 +409,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
}
static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int throttle)
+ struct btrfs_root *root, int throttle, int lock)
{
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_fs_info *info = root->fs_info;
@@ -376,26 +437,29 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_trans_release_metadata(trans, root);
- if (!root->fs_info->open_ioctl_trans &&
+ if (lock && !root->fs_info->open_ioctl_trans &&
should_end_transaction(trans, root))
trans->transaction->blocked = 1;
- if (cur_trans->blocked && !cur_trans->in_commit) {
+ if (lock && cur_trans->blocked && !cur_trans->in_commit) {
if (throttle)
return btrfs_commit_transaction(trans, root);
else
wake_up_process(info->transaction_kthread);
}
- mutex_lock(&info->trans_mutex);
+ if (lock)
+ mutex_lock(&info->trans_mutex);
WARN_ON(cur_trans != info->running_transaction);
WARN_ON(cur_trans->num_writers < 1);
cur_trans->num_writers--;
+ smp_mb();
if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait);
put_transaction(cur_trans);
- mutex_unlock(&info->trans_mutex);
+ if (lock)
+ mutex_unlock(&info->trans_mutex);
if (current->journal_info == trans)
current->journal_info = NULL;
@@ -411,13 +475,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- return __btrfs_end_transaction(trans, root, 0);
+ return __btrfs_end_transaction(trans, root, 0, 1);
}
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- return __btrfs_end_transaction(trans, root, 1);
+ return __btrfs_end_transaction(trans, root, 1, 1);
+}
+
+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ return __btrfs_end_transaction(trans, root, 0, 0);
}
/*
@@ -832,11 +902,11 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_root *root = pending->root;
struct btrfs_root *parent_root;
struct inode *parent_inode;
+ struct dentry *parent;
struct dentry *dentry;
struct extent_buffer *tmp;
struct extent_buffer *old;
int ret;
- int retries = 0;
u64 to_reserve = 0;
u64 index = 0;
u64 objectid;
@@ -858,7 +928,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
if (to_reserve > 0) {
ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
- to_reserve, &retries);
+ to_reserve);
if (ret) {
pending->error = ret;
goto fail;
@@ -872,7 +942,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
trans->block_rsv = &pending->block_rsv;
dentry = pending->dentry;
- parent_inode = dentry->d_parent->d_inode;
+ parent = dget_parent(dentry);
+ parent_inode = parent->d_inode;
parent_root = BTRFS_I(parent_inode)->root;
record_root_in_trans(trans, parent_root);
@@ -920,6 +991,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
parent_inode->i_ino, index,
dentry->d_name.name, dentry->d_name.len);
BUG_ON(ret);
+ dput(parent);
key.offset = (u64)-1;
pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
@@ -966,6 +1038,8 @@ static void update_super_roots(struct btrfs_root *root)
super->root = root_item->bytenr;
super->generation = root_item->generation;
super->root_level = root_item->level;
+ if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
+ super->cache_generation = root_item->generation;
}
int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -988,11 +1062,127 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
return ret;
}
+/*
+ * wait for the current transaction commit to start and block subsequent
+ * transaction joins
+ */
+static void wait_current_trans_commit_start(struct btrfs_root *root,
+ struct btrfs_transaction *trans)
+{
+ DEFINE_WAIT(wait);
+
+ if (trans->in_commit)
+ return;
+
+ while (1) {
+ prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (trans->in_commit) {
+ finish_wait(&root->fs_info->transaction_blocked_wait,
+ &wait);
+ break;
+ }
+ mutex_unlock(&root->fs_info->trans_mutex);
+ schedule();
+ mutex_lock(&root->fs_info->trans_mutex);
+ finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
+ }
+}
+
+/*
+ * wait for the current transaction to start and then become unblocked.
+ * caller holds ref.
+ */
+static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
+ struct btrfs_transaction *trans)
+{
+ DEFINE_WAIT(wait);
+
+ if (trans->commit_done || (trans->in_commit && !trans->blocked))
+ return;
+
+ while (1) {
+ prepare_to_wait(&root->fs_info->transaction_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (trans->commit_done ||
+ (trans->in_commit && !trans->blocked)) {
+ finish_wait(&root->fs_info->transaction_wait,
+ &wait);
+ break;
+ }
+ mutex_unlock(&root->fs_info->trans_mutex);
+ schedule();
+ mutex_lock(&root->fs_info->trans_mutex);
+ finish_wait(&root->fs_info->transaction_wait,
+ &wait);
+ }
+}
+
+/*
+ * commit transactions asynchronously. once btrfs_commit_transaction_async
+ * returns, any subsequent transaction will not be allowed to join.
+ */
+struct btrfs_async_commit {
+ struct btrfs_trans_handle *newtrans;
+ struct btrfs_root *root;
+ struct delayed_work work;
+};
+
+static void do_async_commit(struct work_struct *work)
+{
+ struct btrfs_async_commit *ac =
+ container_of(work, struct btrfs_async_commit, work.work);
+
+ btrfs_commit_transaction(ac->newtrans, ac->root);
+ kfree(ac);
+}
+
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ int wait_for_unblock)
+{
+ struct btrfs_async_commit *ac;
+ struct btrfs_transaction *cur_trans;
+
+ ac = kmalloc(sizeof(*ac), GFP_NOFS);
+ BUG_ON(!ac);
+
+ INIT_DELAYED_WORK(&ac->work, do_async_commit);
+ ac->root = root;
+ ac->newtrans = btrfs_join_transaction(root, 0);
+
+ /* take transaction reference */
+ mutex_lock(&root->fs_info->trans_mutex);
+ cur_trans = trans->transaction;
+ cur_trans->use_count++;
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ btrfs_end_transaction(trans, root);
+ schedule_delayed_work(&ac->work, 0);
+
+ /* wait for transaction to start and unblock */
+ mutex_lock(&root->fs_info->trans_mutex);
+ if (wait_for_unblock)
+ wait_current_trans_commit_start_and_unblock(root, cur_trans);
+ else
+ wait_current_trans_commit_start(root, cur_trans);
+ put_transaction(cur_trans);
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ return 0;
+}
+
+/*
+ * btrfs_transaction state sequence:
+ * in_commit = 0, blocked = 0 (initial)
+ * in_commit = 1, blocked = 1
+ * blocked = 0
+ * commit_done = 1
+ */
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
unsigned long joined = 0;
- unsigned long timeout = 1;
struct btrfs_transaction *cur_trans;
struct btrfs_transaction *prev_trans = NULL;
DEFINE_WAIT(wait);
@@ -1039,6 +1229,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
trans->transaction->in_commit = 1;
trans->transaction->blocked = 1;
+ wake_up(&root->fs_info->transaction_blocked_wait);
+
if (cur_trans->list.prev != &root->fs_info->trans_list) {
prev_trans = list_entry(cur_trans->list.prev,
struct btrfs_transaction, list);
@@ -1063,11 +1255,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
snap_pending = 1;
WARN_ON(cur_trans != trans->transaction);
- if (cur_trans->num_writers > 1)
- timeout = MAX_SCHEDULE_TIMEOUT;
- else if (should_grow)
- timeout = 1;
-
mutex_unlock(&root->fs_info->trans_mutex);
if (flush_on_commit || snap_pending) {
@@ -1089,8 +1276,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
TASK_UNINTERRUPTIBLE);
smp_mb();
- if (cur_trans->num_writers > 1 || should_grow)
- schedule_timeout(timeout);
+ if (cur_trans->num_writers > 1)
+ schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+ else if (should_grow)
+ schedule_timeout(1);
mutex_lock(&root->fs_info->trans_mutex);
finish_wait(&cur_trans->writer_wait, &wait);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index e104986d0bf..f104b57ad4e 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -87,12 +87,17 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
int num_items);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
int num_blocks);
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
+ int num_blocks);
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
int num_blocks);
+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
@@ -104,6 +109,9 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
int btrfs_clean_old_snapshots(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ int wait_for_unblock);
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index f7ac8e013ed..992ab425599 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
int ret = 0;
int wret;
int level;
- int orig_level;
int is_extent = 0;
int next_key_ret = 0;
u64 last_ret = 0;
@@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
return -ENOMEM;
level = btrfs_header_level(root->node);
- orig_level = level;
if (level == 0)
goto out;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fb102a9aee9..054744ac571 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -786,7 +786,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
{
struct inode *dir;
int ret;
- struct btrfs_key location;
struct btrfs_inode_ref *ref;
struct btrfs_dir_item *di;
struct inode *inode;
@@ -795,10 +794,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
unsigned long ref_ptr;
unsigned long ref_end;
- location.objectid = key->objectid;
- location.type = BTRFS_INODE_ITEM_KEY;
- location.offset = 0;
-
/*
* it is possible that we didn't log all the parent directories
* for a given inode. If we don't find the dir, just don't
@@ -1583,7 +1578,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
struct btrfs_path *path;
struct btrfs_root *root = wc->replay_dest;
struct btrfs_key key;
- u32 item_size;
int level;
int i;
int ret;
@@ -1601,7 +1595,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
nritems = btrfs_header_nritems(eb);
for (i = 0; i < nritems; i++) {
btrfs_item_key_to_cpu(eb, &key, i);
- item_size = btrfs_item_size_nr(eb, i);
/* inode keys are done during the first stage */
if (key.type == BTRFS_INODE_ITEM_KEY &&
@@ -1668,7 +1661,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
struct walk_control *wc)
{
u64 root_owner;
- u64 root_gen;
u64 bytenr;
u64 ptr_gen;
struct extent_buffer *next;
@@ -1698,7 +1690,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
parent = path->nodes[*level];
root_owner = btrfs_header_owner(parent);
- root_gen = btrfs_header_generation(parent);
next = btrfs_find_create_tree_block(root, bytenr, blocksize);
@@ -1749,7 +1740,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
struct walk_control *wc)
{
u64 root_owner;
- u64 root_gen;
int i;
int slot;
int ret;
@@ -1757,8 +1747,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
slot = path->slots[i];
if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
- struct extent_buffer *node;
- node = path->nodes[i];
path->slots[i]++;
*level = i;
WARN_ON(*level == 0);
@@ -1771,7 +1759,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
parent = path->nodes[*level + 1];
root_owner = btrfs_header_owner(parent);
- root_gen = btrfs_header_generation(parent);
wc->process_func(root, path->nodes[*level], wc,
btrfs_header_generation(path->nodes[*level]));
if (wc->free) {
@@ -2273,7 +2260,7 @@ fail:
}
btrfs_end_log_trans(root);
- return 0;
+ return err;
}
/* see comments for btrfs_del_dir_entries_in_log */
@@ -2729,7 +2716,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
struct extent_buffer *src = NULL;
- u32 size;
int err = 0;
int ret;
int nritems;
@@ -2793,7 +2779,6 @@ again:
break;
src = path->nodes[0];
- size = btrfs_item_size_nr(src, path->slots[0]);
if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
ins_nr++;
goto next_slot;
@@ -2884,6 +2869,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
{
int ret = 0;
struct btrfs_root *root;
+ struct dentry *old_parent = NULL;
/*
* for regular files, if its inode is already on disk, we don't
@@ -2925,10 +2911,13 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
if (IS_ROOT(parent))
break;
- parent = parent->d_parent;
+ parent = dget_parent(parent);
+ dput(old_parent);
+ old_parent = parent;
inode = parent->d_inode;
}
+ dput(old_parent);
out:
return ret;
}
@@ -2960,6 +2949,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
{
int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
struct super_block *sb;
+ struct dentry *old_parent = NULL;
int ret = 0;
u64 last_committed = root->fs_info->last_trans_committed;
@@ -3031,10 +3021,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (IS_ROOT(parent))
break;
- parent = parent->d_parent;
+ parent = dget_parent(parent);
+ dput(old_parent);
+ old_parent = parent;
}
ret = 0;
end_trans:
+ dput(old_parent);
if (ret < 0) {
BUG_ON(ret != -ENOSPC);
root->fs_info->last_trans_log_full_commit = trans->transid;
@@ -3054,8 +3047,13 @@ end_no_trans:
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry)
{
- return btrfs_log_inode_parent(trans, root, dentry->d_inode,
- dentry->d_parent, 0);
+ struct dentry *parent = dget_parent(dentry);
+ int ret;
+
+ ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0);
+ dput(parent);
+
+ return ret;
}
/*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e25e46a8b4e..6b988450783 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -412,12 +412,16 @@ static noinline int device_list_add(const char *path,
device->fs_devices = fs_devices;
fs_devices->num_devices++;
- } else if (strcmp(device->name, path)) {
+ } else if (!device->name || strcmp(device->name, path)) {
name = kstrdup(path, GFP_NOFS);
if (!name)
return -ENOMEM;
kfree(device->name);
device->name = name;
+ if (device->missing) {
+ fs_devices->missing_devices--;
+ device->missing = 0;
+ }
}
if (found_transid > fs_devices->latest_trans) {
@@ -1236,6 +1240,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
device->fs_devices->num_devices--;
+ if (device->missing)
+ root->fs_info->fs_devices->missing_devices--;
+
next_device = list_entry(root->fs_info->fs_devices->devices.next,
struct btrfs_device, dev_list);
if (device->bdev == root->fs_info->sb->s_bdev)
@@ -1898,7 +1905,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
u64 size_to_free;
struct btrfs_path *path;
struct btrfs_key key;
- struct btrfs_chunk *chunk;
struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
struct btrfs_trans_handle *trans;
struct btrfs_key found_key;
@@ -1962,9 +1968,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
if (found_key.objectid != key.objectid)
break;
- chunk = btrfs_item_ptr(path->nodes[0],
- path->slots[0],
- struct btrfs_chunk);
/* chunk zero is special */
if (found_key.offset == 0)
break;
@@ -3031,8 +3034,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
}
bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
dev = multi->stripes[dev_nr].dev;
- BUG_ON(rw == WRITE && !dev->writeable);
- if (dev && dev->bdev) {
+ if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
bio->bi_bdev = dev->bdev;
if (async_submit)
schedule_bio(root, dev, rw, bio);
@@ -3085,7 +3087,9 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
device->devid = devid;
device->work.func = pending_bios_fn;
device->fs_devices = fs_devices;
+ device->missing = 1;
fs_devices->num_devices++;
+ fs_devices->missing_devices++;
spin_lock_init(&device->io_lock);
INIT_LIST_HEAD(&device->dev_alloc_list);
memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
@@ -3283,6 +3287,15 @@ static int read_one_dev(struct btrfs_root *root,
device = add_missing_dev(root, devid, dev_uuid);
if (!device)
return -ENOMEM;
+ } else if (!device->missing) {
+ /*
+ * this happens when a device that was properly setup
+ * in the device info lists suddenly goes bad.
+ * device->bdev is NULL, and so we have to set
+ * device->missing to one here
+ */
+ root->fs_info->fs_devices->missing_devices++;
+ device->missing = 1;
}
}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2b638b6e4ee..2740db49eb0 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -44,6 +44,7 @@ struct btrfs_device {
int writeable;
int in_fs_metadata;
+ int missing;
spinlock_t io_lock;
@@ -93,6 +94,7 @@ struct btrfs_fs_devices {
u64 num_devices;
u64 open_devices;
u64 rw_devices;
+ u64 missing_devices;
u64 total_rw_bytes;
struct block_device *latest_bdev;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 88ecbb21587..698fdd2c739 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -178,7 +178,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
struct inode *inode = dentry->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
- struct btrfs_item *item;
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
int ret = 0, slot, advance;
@@ -234,7 +233,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
}
advance = 1;
- item = btrfs_item_nr(leaf, slot);
btrfs_item_key_to_cpu(leaf, &found_key, slot);
/* check to make sure this item is what we want */
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 3e2b90eaa23..b9cd5445f71 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -199,8 +199,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
int nr_pages = 0;
struct page *in_page = NULL;
struct page *out_page = NULL;
- int out_written = 0;
- int in_read = 0;
unsigned long bytes_left;
*out_pages = 0;
@@ -233,9 +231,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
- out_written = 0;
- in_read = 0;
-
while (workspace->def_strm.total_in < len) {
ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
if (ret != Z_OK) {