summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/bio.c11
-rw-r--r--fs/block_dev.c3
-rw-r--r--fs/btrfs/backref.c4
-rw-r--r--fs/btrfs/compression.c1
-rw-r--r--fs/btrfs/ctree.c9
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/delayed-inode.c12
-rw-r--r--fs/btrfs/delayed-ref.c163
-rw-r--r--fs/btrfs/delayed-ref.h6
-rw-r--r--fs/btrfs/disk-io.c53
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c123
-rw-r--r--fs/btrfs/extent_io.c17
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/inode.c330
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/locking.c2
-rw-r--r--fs/btrfs/qgroup.c4
-rw-r--r--fs/btrfs/root-tree.c4
-rw-r--r--fs/btrfs/super.c15
-rw-r--r--fs/btrfs/transaction.c3
-rw-r--r--fs/btrfs/volumes.c33
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/buffer.c66
-rw-r--r--fs/ceph/debugfs.c1
-rw-r--r--fs/ceph/inode.c15
-rw-r--r--fs/ceph/ioctl.c3
-rw-r--r--fs/cifs/cifs_unicode.c2
-rw-r--r--fs/cifs/cifssmb.c11
-rw-r--r--fs/cifs/dir.c9
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/inode.c24
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/cifs/smb2misc.c16
-rw-r--r--fs/cifs/smb2pdu.h10
-rw-r--r--fs/cifs/transport.c9
-rw-r--r--fs/dcache.c12
-rw-r--r--fs/debugfs/file.c76
-rw-r--r--fs/direct-io.c5
-rw-r--r--fs/ecryptfs/file.c10
-rw-r--r--fs/ecryptfs/inode.c5
-rw-r--r--fs/ecryptfs/main.c1
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/ext2/balloc.c2
-rw-r--r--fs/ext3/balloc.c2
-rw-r--r--fs/ext3/inode.c19
-rw-r--r--fs/ext4/inode.c14
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/fs-writeback.c4
-rw-r--r--fs/fuse/control.c4
-rw-r--r--fs/fuse/cuse.c4
-rw-r--r--fs/fuse/dev.c1
-rw-r--r--fs/fuse/inode.c12
-rw-r--r--fs/gfs2/aops.c11
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c35
-rw-r--r--fs/gfs2/glock.c60
-rw-r--r--fs/gfs2/glops.c1
-rw-r--r--fs/gfs2/incore.h30
-rw-r--r--fs/gfs2/inode.c28
-rw-r--r--fs/gfs2/ops_fstype.c8
-rw-r--r--fs/gfs2/quota.c11
-rw-r--r--fs/gfs2/rgrp.c1221
-rw-r--r--fs/gfs2/rgrp.h28
-rw-r--r--fs/gfs2/super.c9
-rw-r--r--fs/gfs2/trace_gfs2.h20
-rw-r--r--fs/gfs2/trans.h7
-rw-r--r--fs/gfs2/xattr.c96
-rw-r--r--fs/jbd/journal.c5
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/lockd/svclock.c3
-rw-r--r--fs/logfs/dev_bdev.c15
-rw-r--r--fs/logfs/inode.c18
-rw-r--r--fs/logfs/journal.c2
-rw-r--r--fs/logfs/readwrite.c1
-rw-r--r--fs/logfs/segment.c2
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c10
-rw-r--r--fs/nfs/Makefile18
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/idmap.c62
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs4_fs.h3
-rw-r--r--fs/nfs/nfs4client.c2
-rw-r--r--fs/nfs/nfs4file.c4
-rw-r--r--fs/nfs/nfs4proc.c127
-rw-r--r--fs/nfs/nfs4super.c15
-rw-r--r--fs/nfs/nfs4xdr.c41
-rw-r--r--fs/nfs/objlayout/objio_osd.c55
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/pnfs.c39
-rw-r--r--fs/nfs/pnfs.h2
-rw-r--r--fs/nfs/super.c43
-rw-r--r--fs/nfs/write.c15
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/state.h1
-rw-r--r--fs/proc/proc_sysctl.c5
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/reiserfs/bitmap.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/stat.c2
-rw-r--r--fs/ubifs/debug.h2
-rw-r--r--fs/ubifs/lpt.c5
-rw-r--r--fs/ubifs/recovery.c2
-rw-r--r--fs/ubifs/replay.c3
-rw-r--r--fs/ubifs/super.c3
-rw-r--r--fs/udf/file.c35
-rw-r--r--fs/udf/inode.c5
-rw-r--r--fs/udf/super.c7
-rw-r--r--fs/xfs/xfs_buf.c5
-rw-r--r--fs/xfs/xfs_buf.h41
-rw-r--r--fs/xfs/xfs_discard.c6
-rw-r--r--fs/xfs/xfs_ialloc.c17
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_super.c1
117 files changed, 1775 insertions, 1597 deletions
diff --git a/fs/bio.c b/fs/bio.c
index 5eaa70c9d96..71072ab9912 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -73,7 +73,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
{
unsigned int sz = sizeof(struct bio) + extra_size;
struct kmem_cache *slab = NULL;
- struct bio_slab *bslab;
+ struct bio_slab *bslab, *new_bio_slabs;
unsigned int i, entry = -1;
mutex_lock(&bio_slab_lock);
@@ -97,11 +97,12 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
if (bio_slab_nr == bio_slab_max && entry == -1) {
bio_slab_max <<= 1;
- bio_slabs = krealloc(bio_slabs,
- bio_slab_max * sizeof(struct bio_slab),
- GFP_KERNEL);
- if (!bio_slabs)
+ new_bio_slabs = krealloc(bio_slabs,
+ bio_slab_max * sizeof(struct bio_slab),
+ GFP_KERNEL);
+ if (!new_bio_slabs)
goto out_unlock;
+ bio_slabs = new_bio_slabs;
}
if (entry == -1)
entry = bio_slab_nr++;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1e519195d45..38e721b35d4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1578,10 +1578,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
+ struct blk_plug plug;
ssize_t ret;
BUG_ON(iocb->ki_pos != pos);
+ blk_start_plug(&plug);
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;
@@ -1590,6 +1592,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0 && ret > 0)
ret = err;
}
+ blk_finish_plug(&plug);
return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_write);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index a256f3b2a84..ff6475f409d 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1438,10 +1438,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
ret = extent_from_logical(fs_info, logical, path,
&found_key);
btrfs_release_path(path);
- if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
- ret = -EINVAL;
if (ret < 0)
return ret;
+ if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+ return -EINVAL;
extent_item_pos = logical - found_key.objectid;
ret = iterate_extent_inodes(fs_info, found_key.objectid,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 86eff48dab7..43d1c5a3a03 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -818,6 +818,7 @@ static void free_workspace(int type, struct list_head *workspace)
btrfs_compress_op[idx]->free_workspace(workspace);
atomic_dec(alloc_workspace);
wake:
+ smp_mb();
if (waitqueue_active(workspace_wait))
wake_up(workspace_wait);
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9d7621f271f..6d183f60d63 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -421,12 +421,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
spin_unlock(&fs_info->tree_mod_seq_lock);
/*
- * we removed the lowest blocker from the blocker list, so there may be
- * more processible delayed refs.
- */
- wake_up(&fs_info->tree_mod_seq_wait);
-
- /*
* anything that's lower than the lowest existing (read: blocked)
* sequence number can be removed from the tree.
*/
@@ -631,6 +625,9 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
u32 nritems;
int ret;
+ if (btrfs_header_level(eb) == 0)
+ return;
+
nritems = btrfs_header_nritems(eb);
for (i = nritems - 1; i >= 0; i--) {
ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4bab807227a..9821b672f5a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -116,7 +116,7 @@ struct btrfs_ordered_sum;
#define BTRFS_FREE_SPACE_OBJECTID -11ULL
/*
- * The inode number assigned to the special inode for sotring
+ * The inode number assigned to the special inode for storing
* free ino cache
*/
#define BTRFS_FREE_INO_OBJECTID -12ULL
@@ -1252,7 +1252,6 @@ struct btrfs_fs_info {
atomic_t tree_mod_seq;
struct list_head tree_mod_seq_list;
struct seq_list tree_mod_seq_elem;
- wait_queue_head_t tree_mod_seq_wait;
/* this protects tree_mod_log */
rwlock_t tree_mod_log_lock;
@@ -3192,7 +3191,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u32 *dst);
int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u64 logical_offset, u32 *dst);
+ struct bio *bio, u64 logical_offset);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 335605c8cea..07d5eeb1e6f 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -512,8 +512,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
rb_erase(&delayed_item->rb_node, root);
delayed_item->delayed_node->count--;
- atomic_dec(&delayed_root->items);
- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
+ if (atomic_dec_return(&delayed_root->items) <
+ BTRFS_DELAYED_BACKGROUND &&
waitqueue_active(&delayed_root->wait))
wake_up(&delayed_root->wait);
}
@@ -1028,9 +1028,10 @@ do_again:
btrfs_release_delayed_item(prev);
ret = 0;
btrfs_release_path(path);
- if (curr)
+ if (curr) {
+ mutex_unlock(&node->mutex);
goto do_again;
- else
+ } else
goto delete_fail;
}
@@ -1055,8 +1056,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
delayed_node->count--;
delayed_root = delayed_node->root->fs_info->delayed_root;
- atomic_dec(&delayed_root->items);
- if (atomic_read(&delayed_root->items) <
+ if (atomic_dec_return(&delayed_root->items) <
BTRFS_DELAYED_BACKGROUND &&
waitqueue_active(&delayed_root->wait))
wake_up(&delayed_root->wait);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index da7419ed01b..ae941177339 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -38,17 +38,14 @@
static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
struct btrfs_delayed_tree_ref *ref1)
{
- if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
- if (ref1->root < ref2->root)
- return -1;
- if (ref1->root > ref2->root)
- return 1;
- } else {
- if (ref1->parent < ref2->parent)
- return -1;
- if (ref1->parent > ref2->parent)
- return 1;
- }
+ if (ref1->root < ref2->root)
+ return -1;
+ if (ref1->root > ref2->root)
+ return 1;
+ if (ref1->parent < ref2->parent)
+ return -1;
+ if (ref1->parent > ref2->parent)
+ return 1;
return 0;
}
@@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
* type of the delayed backrefs and content of delayed backrefs.
*/
static int comp_entry(struct btrfs_delayed_ref_node *ref2,
- struct btrfs_delayed_ref_node *ref1)
+ struct btrfs_delayed_ref_node *ref1,
+ bool compare_seq)
{
if (ref1->bytenr < ref2->bytenr)
return -1;
@@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
if (ref1->type > ref2->type)
return 1;
/* merging of sequenced refs is not allowed */
- if (ref1->seq < ref2->seq)
- return -1;
- if (ref1->seq > ref2->seq)
- return 1;
+ if (compare_seq) {
+ if (ref1->seq < ref2->seq)
+ return -1;
+ if (ref1->seq > ref2->seq)
+ return 1;
+ }
if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
@@ -139,7 +139,7 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
rb_node);
- cmp = comp_entry(entry, ins);
+ cmp = comp_entry(entry, ins, 1);
if (cmp < 0)
p = &(*p)->rb_left;
else if (cmp > 0)
@@ -233,6 +233,114 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
return 0;
}
+static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_node *ref)
+{
+ rb_erase(&ref->rb_node, &delayed_refs->root);
+ ref->in_tree = 0;
+ btrfs_put_delayed_ref(ref);
+ delayed_refs->num_entries--;
+ if (trans->delayed_ref_updates)
+ trans->delayed_ref_updates--;
+}
+
+static int merge_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_node *ref, u64 seq)
+{
+ struct rb_node *node;
+ int merged = 0;
+ int mod = 0;
+ int done = 0;
+
+ node = rb_prev(&ref->rb_node);
+ while (node) {
+ struct btrfs_delayed_ref_node *next;
+
+ next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+ node = rb_prev(node);
+ if (next->bytenr != ref->bytenr)
+ break;
+ if (seq && next->seq >= seq)
+ break;
+ if (comp_entry(ref, next, 0))
+ continue;
+
+ if (ref->action == next->action) {
+ mod = next->ref_mod;
+ } else {
+ if (ref->ref_mod < next->ref_mod) {
+ struct btrfs_delayed_ref_node *tmp;
+
+ tmp = ref;
+ ref = next;
+ next = tmp;
+ done = 1;
+ }
+ mod = -next->ref_mod;
+ }
+
+ merged++;
+ drop_delayed_ref(trans, delayed_refs, next);
+ ref->ref_mod += mod;
+ if (ref->ref_mod == 0) {
+ drop_delayed_ref(trans, delayed_refs, ref);
+ break;
+ } else {
+ /*
+ * You can't have multiples of the same ref on a tree
+ * block.
+ */
+ WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
+ ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
+ }
+
+ if (done)
+ break;
+ node = rb_prev(&ref->rb_node);
+ }
+
+ return merged;
+}
+
+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head)
+{
+ struct rb_node *node;
+ u64 seq = 0;
+
+ spin_lock(&fs_info->tree_mod_seq_lock);
+ if (!list_empty(&fs_info->tree_mod_seq_list)) {
+ struct seq_list *elem;
+
+ elem = list_first_entry(&fs_info->tree_mod_seq_list,
+ struct seq_list, list);
+ seq = elem->seq;
+ }
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+
+ node = rb_prev(&head->node.rb_node);
+ while (node) {
+ struct btrfs_delayed_ref_node *ref;
+
+ ref = rb_entry(node, struct btrfs_delayed_ref_node,
+ rb_node);
+ if (ref->bytenr != head->node.bytenr)
+ break;
+
+ /* We can't merge refs that are outside of our seq count */
+ if (seq && ref->seq >= seq)
+ break;
+ if (merge_ref(trans, delayed_refs, ref, seq))
+ node = rb_prev(&head->node.rb_node);
+ else
+ node = rb_prev(node);
+ }
+}
+
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq)
@@ -336,18 +444,11 @@ update_existing_ref(struct btrfs_trans_handle *trans,
* every changing the extent allocation tree.
*/
existing->ref_mod--;
- if (existing->ref_mod == 0) {
- rb_erase(&existing->rb_node,
- &delayed_refs->root);
- existing->in_tree = 0;
- btrfs_put_delayed_ref(existing);
- delayed_refs->num_entries--;
- if (trans->delayed_ref_updates)
- trans->delayed_ref_updates--;
- } else {
+ if (existing->ref_mod == 0)
+ drop_delayed_ref(trans, delayed_refs, existing);
+ else
WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
- }
} else {
WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
@@ -662,9 +763,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
- if (!need_ref_seq(for_cow, ref_root) &&
- waitqueue_active(&fs_info->tree_mod_seq_wait))
- wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -713,9 +811,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
- if (!need_ref_seq(for_cow, ref_root) &&
- waitqueue_active(&fs_info->tree_mod_seq_wait))
- wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -744,8 +839,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data);
- if (waitqueue_active(&fs_info->tree_mod_seq_wait))
- wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 0d7c90c366b..c9d703693df 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -18,7 +18,7 @@
#ifndef __DELAYED_REF__
#define __DELAYED_REF__
-/* these are the possible values of struct btrfs_delayed_ref->action */
+/* these are the possible values of struct btrfs_delayed_ref_node->action */
#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */
#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */
#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
@@ -167,6 +167,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op);
+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head);
struct btrfs_delayed_ref_head *
btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 62e0cafd6e2..22e98e04c2e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -377,9 +377,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
ret = read_extent_buffer_pages(io_tree, eb, start,
WAIT_COMPLETE,
btree_get_extent, mirror_num);
- if (!ret && !verify_parent_transid(io_tree, eb,
+ if (!ret) {
+ if (!verify_parent_transid(io_tree, eb,
parent_transid, 0))
- break;
+ break;
+ else
+ ret = -EIO;
+ }
/*
* This buffer's crc is fine, but its contents are corrupted, so
@@ -754,9 +758,7 @@ static void run_one_async_done(struct btrfs_work *work)
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
- atomic_dec(&fs_info->nr_async_submits);
-
- if (atomic_read(&fs_info->nr_async_submits) < limit &&
+ if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
waitqueue_active(&fs_info->async_submit_wait))
wake_up(&fs_info->async_submit_wait);
@@ -2032,8 +2034,6 @@ int open_ctree(struct super_block *sb,
fs_info->free_chunk_space = 0;
fs_info->tree_mod_log = RB_ROOT;
- init_waitqueue_head(&fs_info->tree_mod_seq_wait);
-
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
spin_lock_init(&fs_info->reada_lock);
@@ -2528,8 +2528,7 @@ retry_root_backup:
goto fail_trans_kthread;
/* do not make disk changes in broken FS */
- if (btrfs_super_log_root(disk_super) != 0 &&
- !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
+ if (btrfs_super_log_root(disk_super) != 0) {
u64 bytenr = btrfs_super_log_root(disk_super);
if (fs_devices->rw_devices == 0) {
@@ -3189,30 +3188,14 @@ int close_ctree(struct btrfs_root *root)
/* clear out the rbtree of defraggable inodes */
btrfs_run_defrag_inodes(fs_info);
- /*
- * Here come 2 situations when btrfs is broken to flip readonly:
- *
- * 1. when btrfs flips readonly somewhere else before
- * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
- * and btrfs will skip to write sb directly to keep
- * ERROR state on disk.
- *
- * 2. when btrfs flips readonly just in btrfs_commit_super,
- * and in such case, btrfs cannot write sb via btrfs_commit_super,
- * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
- * btrfs will cleanup all FS resources first and write sb then.
- */
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
if (ret)
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
}
- if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
- ret = btrfs_error_commit_super(root);
- if (ret)
- printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
- }
+ if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+ btrfs_error_commit_super(root);
btrfs_put_block_group_cache(fs_info);
@@ -3434,18 +3417,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
if (read_only)
return 0;
- if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
- printk(KERN_WARNING "warning: mount fs with errors, "
- "running btrfsck is recommended\n");
- }
-
return 0;
}
-int btrfs_error_commit_super(struct btrfs_root *root)
+void btrfs_error_commit_super(struct btrfs_root *root)
{
- int ret;
-
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
@@ -3455,10 +3431,6 @@ int btrfs_error_commit_super(struct btrfs_root *root)
/* cleanup FS via transaction */
btrfs_cleanup_transaction(root);
-
- ret = write_ctree_super(NULL, root, 0);
-
- return ret;
}
static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
@@ -3782,14 +3754,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
/* FIXME: cleanup wait for commit */
t->in_commit = 1;
t->blocked = 1;
+ smp_mb();
if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
wake_up(&root->fs_info->transaction_blocked_wait);
t->blocked = 0;
+ smp_mb();
if (waitqueue_active(&root->fs_info->transaction_wait))
wake_up(&root->fs_info->transaction_wait);
t->commit_done = 1;
+ smp_mb();
if (waitqueue_active(&t->commit_wait))
wake_up(&t->commit_wait);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 95e147eea23..c5b00a735fe 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -54,7 +54,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_commit_super(struct btrfs_root *root);
-int btrfs_error_commit_super(struct btrfs_root *root);
+void btrfs_error_commit_super(struct btrfs_root *root);
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4e1b153b7c4..ba58024d40d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2252,6 +2252,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
}
/*
+ * We need to try and merge add/drops of the same ref since we
+ * can run into issues with relocate dropping the implicit ref
+ * and then it being added back again before the drop can
+ * finish. If we merged anything we need to re-loop so we can
+ * get a good ref.
+ */
+ btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
+ locked_ref);
+
+ /*
* locked_ref is the head node, so we have to go one
* node back for any delayed ref updates
*/
@@ -2318,12 +2328,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ref->in_tree = 0;
rb_erase(&ref->rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
- /*
- * we modified num_entries, but as we're currently running
- * delayed refs, skip
- * wake_up(&delayed_refs->seq_wait);
- * here.
- */
+ if (locked_ref) {
+ /*
+ * when we play the delayed ref, also correct the
+ * ref_mod on head
+ */
+ switch (ref->action) {
+ case BTRFS_ADD_DELAYED_REF:
+ case BTRFS_ADD_DELAYED_EXTENT:
+ locked_ref->node.ref_mod -= ref->ref_mod;
+ break;
+ case BTRFS_DROP_DELAYED_REF:
+ locked_ref->node.ref_mod += ref->ref_mod;
+ break;
+ default:
+ WARN_ON(1);
+ }
+ }
spin_unlock(&delayed_refs->lock);
ret = run_one_delayed_ref(trans, root, ref, extent_op,
@@ -2350,22 +2371,6 @@ next:
return count;
}
-static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_root *delayed_refs,
- unsigned long num_refs,
- struct list_head *first_seq)
-{
- spin_unlock(&delayed_refs->lock);
- pr_debug("waiting for more refs (num %ld, first %p)\n",
- num_refs, first_seq);
- wait_event(fs_info->tree_mod_seq_wait,
- num_refs != delayed_refs->num_entries ||
- fs_info->tree_mod_seq_list.next != first_seq);
- pr_debug("done waiting for more refs (num %ld, first %p)\n",
- delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
- spin_lock(&delayed_refs->lock);
-}
-
#ifdef SCRAMBLE_DELAYED_REFS
/*
* Normally delayed refs get processed in ascending bytenr order. This
@@ -2460,13 +2465,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref;
struct list_head cluster;
- struct list_head *first_seq = NULL;
int ret;
u64 delayed_start;
int run_all = count == (unsigned long)-1;
int run_most = 0;
- unsigned long num_refs = 0;
- int consider_waiting;
+ int loops;
/* We'll clean this up in btrfs_cleanup_transaction */
if (trans->aborted)
@@ -2484,7 +2487,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
delayed_refs = &trans->transaction->delayed_refs;
INIT_LIST_HEAD(&cluster);
again:
- consider_waiting = 0;
+ loops = 0;
spin_lock(&delayed_refs->lock);
#ifdef SCRAMBLE_DELAYED_REFS
@@ -2512,31 +2515,6 @@ again:
if (ret)
break;
- if (delayed_start >= delayed_refs->run_delayed_start) {
- if (consider_waiting == 0) {
- /*
- * btrfs_find_ref_cluster looped. let's do one
- * more cycle. if we don't run any delayed ref
- * during that cycle (because we can't because
- * all of them are blocked) and if the number of
- * refs doesn't change, we avoid busy waiting.
- */
- consider_waiting = 1;
- num_refs = delayed_refs->num_entries;
- first_seq = root->fs_info->tree_mod_seq_list.next;
- } else {
- wait_for_more_refs(root->fs_info, delayed_refs,
- num_refs, first_seq);
- /*
- * after waiting, things have changed. we
- * dropped the lock and someone else might have
- * run some refs, built new clusters and so on.
- * therefore, we restart staleness detection.
- */
- consider_waiting = 0;
- }
- }
-
ret = run_clustered_refs(trans, root, &cluster);
if (ret < 0) {
spin_unlock(&delayed_refs->lock);
@@ -2549,9 +2527,26 @@ again:
if (count == 0)
break;
- if (ret || delayed_refs->run_delayed_start == 0) {
+ if (delayed_start >= delayed_refs->run_delayed_start) {
+ if (loops == 0) {
+ /*
+ * btrfs_find_ref_cluster looped. let's do one
+ * more cycle. if we don't run any delayed ref
+ * during that cycle (because we can't because
+ * all of them are blocked), bail out.
+ */
+ loops = 1;
+ } else {
+ /*
+ * no runnable refs left, stop trying
+ */
+ BUG_ON(run_all);
+ break;
+ }
+ }
+ if (ret) {
/* refs were run, let's reset staleness detection */
- consider_waiting = 0;
+ loops = 0;
}
}
@@ -3007,17 +3002,16 @@ again:
}
spin_unlock(&block_group->lock);
- num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
+ /*
+ * Try to preallocate enough space based on how big the block group is.
+ * Keep in mind this has to include any pinned space which could end up
+ * taking up quite a bit since it's not folded into the other space
+ * cache.
+ */
+ num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
if (!num_pages)
num_pages = 1;
- /*
- * Just to make absolutely sure we have enough space, we're going to
- * preallocate 12 pages worth of space for each block group. In
- * practice we ought to use at most 8, but we need extra space so we can
- * add our header and have a terminator between the extents and the
- * bitmaps.
- */
num_pages *= 16;
num_pages *= PAGE_CACHE_SIZE;
@@ -4571,8 +4565,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
if (root->fs_info->quota_enabled) {
ret = btrfs_qgroup_reserve(root, num_bytes +
nr_extents * root->leafsize);
- if (ret)
+ if (ret) {
+ mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
return ret;
+ }
}
ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@@ -5294,9 +5290,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
rb_erase(&head->node.rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
- smp_mb();
- if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
- wake_up(&root->fs_info->tree_mod_seq_wait);
/*
* we don't take a ref on the node because we're removing it from the
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 45c81bb4ac8..4c878476bb9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2330,23 +2330,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
state, mirror);
- if (ret) {
- /* no IO indicated but software detected errors
- * in the block, either checksum errors or
- * issues with the contents */
- struct btrfs_root *root =
- BTRFS_I(page->mapping->host)->root;
- struct btrfs_device *device;
-
+ if (ret)
uptodate = 0;
- device = btrfs_find_device_for_logical(
- root, start, mirror);
- if (device)
- btrfs_dev_stat_inc_and_print(device,
- BTRFS_DEV_STAT_CORRUPTION_ERRS);
- } else {
+ else
clean_io_failure(start, page);
- }
}
if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b45b9de0c21..857d93cd01d 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -272,9 +272,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
}
int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u64 offset, u32 *dst)
+ struct bio *bio, u64 offset)
{
- return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1);
+ return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
}
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6e8f416773d..316b07a866d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1008,9 +1008,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
PAGE_CACHE_SHIFT;
- atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages);
-
- if (atomic_read(&root->fs_info->async_delalloc_pages) <
+ if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
5 * 1024 * 1024 &&
waitqueue_active(&root->fs_info->async_submit_wait))
wake_up(&root->fs_info->async_submit_wait);
@@ -1885,8 +1883,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
trans = btrfs_join_transaction_nolock(root);
else
trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ trans = NULL;
+ goto out;
+ }
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) /* -ENOMEM or corruption */
@@ -1970,8 +1971,8 @@ out:
ordered_extent->len - 1, NULL, GFP_NOFS);
/*
- * This needs to be dont to make sure anybody waiting knows we are done
- * upating everything for this ordered extent.
+ * This needs to be done to make sure anybody waiting knows we are done
+ * updating everything for this ordered extent.
*/
btrfs_remove_ordered_extent(inode, ordered_extent);
@@ -3174,7 +3175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
btrfs_i_size_write(dir, dir->i_size - name_len * 2);
inode_inc_iversion(dir);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
- ret = btrfs_update_inode(trans, root, dir);
+ ret = btrfs_update_inode_fallback(trans, root, dir);
if (ret)
btrfs_abort_transaction(trans, root, ret);
out:
@@ -5774,18 +5775,112 @@ out:
return ret;
}
+static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
+ struct extent_state **cached_state, int writing)
+{
+ struct btrfs_ordered_extent *ordered;
+ int ret = 0;
+
+ while (1) {
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ 0, cached_state);
+ /*
+ * We're concerned with the entire range that we're going to be
+ * doing DIO to, so we need to make sure theres no ordered
+ * extents in this range.
+ */
+ ordered = btrfs_lookup_ordered_range(inode, lockstart,
+ lockend - lockstart + 1);
+
+ /*
+ * We need to make sure there are no buffered pages in this
+ * range either, we could have raced between the invalidate in
+ * generic_file_direct_write and locking the extent. The
+ * invalidate needs to happen so that reads after a write do not
+ * get stale data.
+ */
+ if (!ordered && (!writing ||
+ !test_range_bit(&BTRFS_I(inode)->io_tree,
+ lockstart, lockend, EXTENT_UPTODATE, 0,
+ *cached_state)))
+ break;
+
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ cached_state, GFP_NOFS);
+
+ if (ordered) {
+ btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_put_ordered_extent(ordered);
+ } else {
+ /* Screw you mmap */
+ ret = filemap_write_and_wait_range(inode->i_mapping,
+ lockstart,
+ lockend);
+ if (ret)
+ break;
+
+ /*
+ * If we found a page that couldn't be invalidated just
+ * fall back to buffered.
+ */
+ ret = invalidate_inode_pages2_range(inode->i_mapping,
+ lockstart >> PAGE_CACHE_SHIFT,
+ lockend >> PAGE_CACHE_SHIFT);
+ if (ret)
+ break;
+ }
+
+ cond_resched();
+ }
+
+ return ret;
+}
+
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
struct extent_map *em;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_state *cached_state = NULL;
u64 start = iblock << inode->i_blkbits;
+ u64 lockstart, lockend;
u64 len = bh_result->b_size;
struct btrfs_trans_handle *trans;
+ int unlock_bits = EXTENT_LOCKED;
+ int ret;
+
+ if (create) {
+ ret = btrfs_delalloc_reserve_space(inode, len);
+ if (ret)
+ return ret;
+ unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
+ } else {
+ len = min_t(u64, len, root->sectorsize);
+ }
+
+ lockstart = start;
+ lockend = start + len - 1;
+
+ /*
+ * If this errors out it's because we couldn't invalidate pagecache for
+ * this range and we need to fallback to buffered.
+ */
+ if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
+ return -ENOTBLK;
+
+ if (create) {
+ ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, EXTENT_DELALLOC, NULL,
+ &cached_state, GFP_NOFS);
+ if (ret)
+ goto unlock_err;
+ }
em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
- if (IS_ERR(em))
- return PTR_ERR(em);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto unlock_err;
+ }
/*
* Ok for INLINE and COMPRESSED extents we need to fallback on buffered
@@ -5804,17 +5899,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
em->block_start == EXTENT_MAP_INLINE) {
free_extent_map(em);
- return -ENOTBLK;
+ ret = -ENOTBLK;
+ goto unlock_err;
}
/* Just a good old fashioned hole, return */
if (!create && (em->block_start == EXTENT_MAP_HOLE ||
test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
free_extent_map(em);
- /* DIO will do one hole at a time, so just unlock a sector */
- unlock_extent(&BTRFS_I(inode)->io_tree, start,
- start + root->sectorsize - 1);
- return 0;
+ ret = 0;
+ goto unlock_err;
}
/*
@@ -5827,8 +5921,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
*
*/
if (!create) {
- len = em->len - (start - em->start);
- goto map;
+ len = min(len, em->len - (start - em->start));
+ lockstart = start + len;
+ goto unlock;
}
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
@@ -5860,7 +5955,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
btrfs_end_transaction(trans, root);
if (ret) {
free_extent_map(em);
- return ret;
+ goto unlock_err;
}
goto unlock;
}
@@ -5873,14 +5968,12 @@ must_cow:
*/
len = bh_result->b_size;
em = btrfs_new_extent_direct(inode, em, start, len);
- if (IS_ERR(em))
- return PTR_ERR(em);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto unlock_err;
+ }
len = min(len, em->len - (start - em->start));
unlock:
- clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
- EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
- 0, NULL, GFP_NOFS);
-map:
bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
inode->i_blkbits;
bh_result->b_size = len;
@@ -5898,9 +5991,44 @@ map:
i_size_write(inode, start + len);
}
+ /*
+ * In the case of write we need to clear and unlock the entire range,
+ * in the case of read we need to unlock only the end area that we
+ * aren't using if there is any left over space.
+ */
+ if (lockstart < lockend) {
+ if (create && len < lockend - lockstart) {
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+ lockstart + len - 1, unlock_bits, 1, 0,
+ &cached_state, GFP_NOFS);
+ /*
+ * Beside unlock, we also need to cleanup reserved space
+ * for the left range by attaching EXTENT_DO_ACCOUNTING.
+ */
+ clear_extent_bit(&BTRFS_I(inode)->io_tree,
+ lockstart + len, lockend,
+ unlock_bits | EXTENT_DO_ACCOUNTING,
+ 1, 0, NULL, GFP_NOFS);
+ } else {
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, unlock_bits, 1, 0,
+ &cached_state, GFP_NOFS);
+ }
+ } else {
+ free_extent_state(cached_state);
+ }
+
free_extent_map(em);
return 0;
+
+unlock_err:
+ if (create)
+ unlock_bits |= EXTENT_DO_ACCOUNTING;
+
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ unlock_bits, 1, 0, &cached_state, GFP_NOFS);
+ return ret;
}
struct btrfs_dio_private {
@@ -5908,7 +6036,6 @@ struct btrfs_dio_private {
u64 logical_offset;
u64 disk_bytenr;
u64 bytes;
- u32 *csums;
void *private;
/* number of bios pending for this dio */
@@ -5928,7 +6055,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 start;
- u32 *private = dip->csums;
start = dip->logical_offset;
do {
@@ -5936,8 +6062,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
struct page *page = bvec->bv_page;
char *kaddr;
u32 csum = ~(u32)0;
+ u64 private = ~(u32)0;
unsigned long flags;
+ if (get_state_private(&BTRFS_I(inode)->io_tree,
+ start, &private))
+ goto failed;
local_irq_save(flags);
kaddr = kmap_atomic(page);
csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
@@ -5947,18 +6077,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
local_irq_restore(flags);
flush_dcache_page(bvec->bv_page);
- if (csum != *private) {
+ if (csum != private) {
+failed:
printk(KERN_ERR "btrfs csum failed ino %llu off"
" %llu csum %u private %u\n",
(unsigned long long)btrfs_ino(inode),
(unsigned long long)start,
- csum, *private);
+ csum, (unsigned)private);
err = -EIO;
}
}
start += bvec->bv_len;
- private++;
bvec++;
} while (bvec <= bvec_end);
@@ -5966,7 +6096,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
dip->logical_offset + dip->bytes - 1);
bio->bi_private = dip->private;
- kfree(dip->csums);
kfree(dip);
/* If we had a csum failure make sure to clear the uptodate flag */
@@ -6072,7 +6201,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
int rw, u64 file_offset, int skip_sum,
- u32 *csums, int async_submit)
+ int async_submit)
{
int write = rw & REQ_WRITE;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -6105,8 +6234,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
if (ret)
goto err;
} else if (!skip_sum) {
- ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
- file_offset, csums);
+ ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
if (ret)
goto err;
}
@@ -6132,10 +6260,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
u64 submit_len = 0;
u64 map_length;
int nr_pages = 0;
- u32 *csums = dip->csums;
int ret = 0;
int async_submit = 0;
- int write = rw & REQ_WRITE;
map_length = orig_bio->bi_size;
ret = btrfs_map_block(map_tree, READ, start_sector << 9,
@@ -6171,16 +6297,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
atomic_inc(&dip->pending_bios);
ret = __btrfs_submit_dio_bio(bio, inode, rw,
file_offset, skip_sum,
- csums, async_submit);
+ async_submit);
if (ret) {
bio_put(bio);
atomic_dec(&dip->pending_bios);
goto out_err;
}
- /* Write's use the ordered csums */
- if (!write && !skip_sum)
- csums = csums + nr_pages;
start_sector += submit_len >> 9;
file_offset += submit_len;
@@ -6210,7 +6333,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
submit:
ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
- csums, async_submit);
+ async_submit);
if (!ret)
return 0;
@@ -6246,17 +6369,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
ret = -ENOMEM;
goto free_ordered;
}
- dip->csums = NULL;
-
- /* Write's use the ordered csum stuff, so we don't need dip->csums */
- if (!write && !skip_sum) {
- dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
- if (!dip->csums) {
- kfree(dip);
- ret = -ENOMEM;
- goto free_ordered;
- }
- }
dip->private = bio->bi_private;
dip->inode = inode;
@@ -6341,132 +6453,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
out:
return retval;
}
+
static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- struct btrfs_ordered_extent *ordered;
- struct extent_state *cached_state = NULL;
- u64 lockstart, lockend;
- ssize_t ret;
- int writing = rw & WRITE;
- int write_bits = 0;
- size_t count = iov_length(iov, nr_segs);
if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
- offset, nr_segs)) {
+ offset, nr_segs))
return 0;
- }
-
- lockstart = offset;
- lockend = offset + count - 1;
-
- if (writing) {
- ret = btrfs_delalloc_reserve_space(inode, count);
- if (ret)
- goto out;
- }
-
- while (1) {
- lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- 0, &cached_state);
- /*
- * We're concerned with the entire range that we're going to be
- * doing DIO to, so we need to make sure theres no ordered
- * extents in this range.
- */
- ordered = btrfs_lookup_ordered_range(inode, lockstart,
- lockend - lockstart + 1);
-
- /*
- * We need to make sure there are no buffered pages in this
- * range either, we could have raced between the invalidate in
- * generic_file_direct_write and locking the extent. The
- * invalidate needs to happen so that reads after a write do not
- * get stale data.
- */
- if (!ordered && (!writing ||
- !test_range_bit(&BTRFS_I(inode)->io_tree,
- lockstart, lockend, EXTENT_UPTODATE, 0,
- cached_state)))
- break;
-
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- &cached_state, GFP_NOFS);
-
- if (ordered) {
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- } else {
- /* Screw you mmap */
- ret = filemap_write_and_wait_range(file->f_mapping,
- lockstart,
- lockend);
- if (ret)
- goto out;
-
- /*
- * If we found a page that couldn't be invalidated just
- * fall back to buffered.
- */
- ret = invalidate_inode_pages2_range(file->f_mapping,
- lockstart >> PAGE_CACHE_SHIFT,
- lockend >> PAGE_CACHE_SHIFT);
- if (ret) {
- if (ret == -EBUSY)
- ret = 0;
- goto out;
- }
- }
-
- cond_resched();
- }
- /*
- * we don't use btrfs_set_extent_delalloc because we don't want
- * the dirty or uptodate bits
- */
- if (writing) {
- write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
- ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- EXTENT_DELALLOC, NULL, &cached_state,
- GFP_NOFS);
- if (ret) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
- lockend, EXTENT_LOCKED | write_bits,
- 1, 0, &cached_state, GFP_NOFS);
- goto out;
- }
- }
-
- free_extent_state(cached_state);
- cached_state = NULL;
-
- ret = __blockdev_direct_IO(rw, iocb, inode,
+ return __blockdev_direct_IO(rw, iocb, inode,
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
btrfs_submit_direct, 0);
-
- if (ret < 0 && ret != -EIOCBQUEUED) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
- offset + iov_length(iov, nr_segs) - 1,
- EXTENT_LOCKED | write_bits, 1, 0,
- &cached_state, GFP_NOFS);
- } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
- /*
- * We're falling back to buffered, unlock the section we didn't
- * do IO on.
- */
- clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
- offset + iov_length(iov, nr_segs) - 1,
- EXTENT_LOCKED | write_bits, 1, 0,
- &cached_state, GFP_NOFS);
- }
-out:
- free_extent_state(cached_state);
- return ret;
}
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7bb755677a2..9df50fa8a07 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -424,7 +424,7 @@ static noinline int create_subvol(struct btrfs_root *root,
uuid_le_gen(&new_uuid);
memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
- root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec);
+ root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
root_item.ctime = root_item.otime;
btrfs_set_root_ctransid(&root_item, trans->transid);
btrfs_set_root_otransid(&root_item, trans->transid);
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index a44eff07480..2a1762c6604 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -67,7 +67,7 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
if (eb->lock_nested) {
read_lock(&eb->lock);
- if (&eb->lock_nested && current->pid == eb->lock_owner) {
+ if (eb->lock_nested && current->pid == eb->lock_owner) {
read_unlock(&eb->lock);
return;
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index bc424ae5a81..b6501558174 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1364,8 +1364,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
spin_lock(&fs_info->qgroup_lock);
dstgroup = add_qgroup_rb(fs_info, objectid);
- if (!dstgroup)
+ if (IS_ERR(dstgroup)) {
+ ret = PTR_ERR(dstgroup);
goto unlock;
+ }
if (srcid) {
srcgroup = find_qgroup_rb(fs_info, srcid);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 6bb465cca20..10d8e4d8807 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -544,8 +544,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct timespec ct = CURRENT_TIME;
spin_lock(&root->root_times_lock);
- item->ctransid = trans->transid;
+ item->ctransid = cpu_to_le64(trans->transid);
item->ctime.sec = cpu_to_le64(ct.tv_sec);
- item->ctime.nsec = cpu_to_le64(ct.tv_nsec);
+ item->ctime.nsec = cpu_to_le32(ct.tv_nsec);
spin_unlock(&root->root_times_lock);
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f2eb24c477a..83d6f9f9c22 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -838,7 +838,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
struct btrfs_trans_handle *trans;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root = fs_info->tree_root;
- int ret;
trace_btrfs_sync_fs(wait);
@@ -849,11 +848,17 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
btrfs_wait_ordered_extents(root, 0, 0);
- trans = btrfs_start_transaction(root, 0);
+ spin_lock(&fs_info->trans_lock);
+ if (!fs_info->running_transaction) {
+ spin_unlock(&fs_info->trans_lock);
+ return 0;
+ }
+ spin_unlock(&fs_info->trans_lock);
+
+ trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans, root);
- return ret;
+ return btrfs_commit_transaction(trans, root);
}
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
@@ -1530,6 +1535,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
while (cur_devices) {
head = &cur_devices->devices;
list_for_each_entry(dev, head, dev_list) {
+ if (dev->missing)
+ continue;
if (!first_dev || dev->devid < first_dev->devid)
first_dev = dev;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 17be3dedacb..27c26004e05 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1031,6 +1031,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_i_size_write(parent_inode, parent_inode->i_size +
dentry->d_name.len * 2);
+ parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, parent_root, parent_inode);
if (ret)
goto abort_trans_dput;
@@ -1066,7 +1067,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
memcpy(new_root_item->parent_uuid, root->root_item.uuid,
BTRFS_UUID_SIZE);
new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
- new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec);
+ new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
btrfs_set_root_otransid(new_root_item, trans->transid);
memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e86ae04abe6..88b969aeeb7 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -227,9 +227,8 @@ loop_lock:
cur = pending;
pending = pending->bi_next;
cur->bi_next = NULL;
- atomic_dec(&fs_info->nr_async_bios);
- if (atomic_read(&fs_info->nr_async_bios) < limit &&
+ if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
waitqueue_active(&fs_info->async_submit_wait))
wake_up(&fs_info->async_submit_wait);
@@ -569,9 +568,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
memcpy(new_device, device, sizeof(*new_device));
/* Safe because we are under uuid_mutex */
- name = rcu_string_strdup(device->name->str, GFP_NOFS);
- BUG_ON(device->name && !name); /* -ENOMEM */
- rcu_assign_pointer(new_device->name, name);
+ if (device->name) {
+ name = rcu_string_strdup(device->name->str, GFP_NOFS);
+ BUG_ON(device->name && !name); /* -ENOMEM */
+ rcu_assign_pointer(new_device->name, name);
+ }
new_device->bdev = NULL;
new_device->writeable = 0;
new_device->in_fs_metadata = 0;
@@ -4605,28 +4606,6 @@ int btrfs_read_sys_array(struct btrfs_root *root)
return ret;
}
-struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
- u64 logical, int mirror_num)
-{
- struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
- int ret;
- u64 map_length = 0;
- struct btrfs_bio *bbio = NULL;
- struct btrfs_device *device;
-
- BUG_ON(mirror_num == 0);
- ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
- mirror_num);
- if (ret) {
- BUG_ON(bbio != NULL);
- return NULL;
- }
- BUG_ON(mirror_num != bbio->mirror_num);
- device = bbio->stripes[mirror_num - 1].dev;
- kfree(bbio);
- return device;
-}
-
int btrfs_read_chunk_tree(struct btrfs_root *root)
{
struct btrfs_path *path;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5479325987b..53c06af92e8 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -289,8 +289,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *max_avail);
-struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
- u64 logical, int mirror_num);
void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_root *root,
diff --git a/fs/buffer.c b/fs/buffer.c
index 9f6d2e41281..58e2e7b7737 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -914,7 +914,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
/*
* Initialise the state of a blockdev page's buffers.
*/
-static void
+static sector_t
init_page_buffers(struct page *page, struct block_device *bdev,
sector_t block, int size)
{
@@ -936,33 +936,41 @@ init_page_buffers(struct page *page, struct block_device *bdev,
block++;
bh = bh->b_this_page;
} while (bh != head);
+
+ /*
+ * Caller needs to validate requested block against end of device.
+ */
+ return end_block;
}
/*
* Create the page-cache page that contains the requested block.
*
- * This is user purely for blockdev mappings.
+ * This is used purely for blockdev mappings.
*/
-static struct page *
+static int
grow_dev_page(struct block_device *bdev, sector_t block,
- pgoff_t index, int size)
+ pgoff_t index, int size, int sizebits)
{
struct inode *inode = bdev->bd_inode;
struct page *page;
struct buffer_head *bh;
+ sector_t end_block;
+ int ret = 0; /* Will call free_more_memory() */
page = find_or_create_page(inode->i_mapping, index,
(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
if (!page)
- return NULL;
+ return ret;
BUG_ON(!PageLocked(page));
if (page_has_buffers(page)) {
bh = page_buffers(page);
if (bh->b_size == size) {
- init_page_buffers(page, bdev, block, size);
- return page;
+ end_block = init_page_buffers(page, bdev,
+ index << sizebits, size);
+ goto done;
}
if (!try_to_free_buffers(page))
goto failed;
@@ -982,14 +990,14 @@ grow_dev_page(struct block_device *bdev, sector_t block,
*/
spin_lock(&inode->i_mapping->private_lock);
link_dev_buffers(page, bh);
- init_page_buffers(page, bdev, block, size);
+ end_block = init_page_buffers(page, bdev, index << sizebits, size);
spin_unlock(&inode->i_mapping->private_lock);
- return page;
-
+done:
+ ret = (block < end_block) ? 1 : -ENXIO;
failed:
unlock_page(page);
page_cache_release(page);
- return NULL;
+ return ret;
}
/*
@@ -999,7 +1007,6 @@ failed:
static int
grow_buffers(struct block_device *bdev, sector_t block, int size)
{
- struct page *page;
pgoff_t index;
int sizebits;
@@ -1023,22 +1030,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
bdevname(bdev, b));
return -EIO;
}
- block = index << sizebits;
+
/* Create a page with the proper size buffers.. */
- page = grow_dev_page(bdev, block, index, size);
- if (!page)
- return 0;
- unlock_page(page);
- page_cache_release(page);
- return 1;
+ return grow_dev_page(bdev, block, index, size, sizebits);
}
static struct buffer_head *
__getblk_slow(struct block_device *bdev, sector_t block, int size)
{
- int ret;
- struct buffer_head *bh;
-
/* Size must be multiple of hard sectorsize */
if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
(size < 512 || size > PAGE_SIZE))) {
@@ -1051,21 +1050,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
return NULL;
}
-retry:
- bh = __find_get_block(bdev, block, size);
- if (bh)
- return bh;
+ for (;;) {
+ struct buffer_head *bh;
+ int ret;
- ret = grow_buffers(bdev, block, size);
- if (ret == 0) {
- free_more_memory();
- goto retry;
- } else if (ret > 0) {
bh = __find_get_block(bdev, block, size);
if (bh)
return bh;
+
+ ret = grow_buffers(bdev, block, size);
+ if (ret < 0)
+ return NULL;
+ if (ret == 0)
+ free_more_memory();
}
- return NULL;
}
/*
@@ -1321,10 +1319,6 @@ EXPORT_SYMBOL(__find_get_block);
* which corresponds to the passed block_device, block and size. The
* returned buffer has its reference count incremented.
*
- * __getblk() cannot fail - it just keeps trying. If you pass it an
- * illegal block number, __getblk() will happily return a buffer_head
- * which represents the non-existent block. Very weird.
- *
* __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
* attempt is failing. FIXME, perhaps?
*/
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index fb962efdace..6d59006bfa2 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
int err = -ENOMEM;
dout("ceph_fs_debugfs_init\n");
+ BUG_ON(!fsc->client->debugfs_dir);
fsc->debugfs_congestion_kb =
debugfs_create_file("writeback_congestion_kb",
0600,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 9fff9f3b17e..4b5762ef7c2 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -992,11 +992,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
if (rinfo->head->is_dentry) {
struct inode *dir = req->r_locked_dir;
- err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
- session, req->r_request_started, -1,
- &req->r_caps_reservation);
- if (err < 0)
- return err;
+ if (dir) {
+ err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
+ session, req->r_request_started, -1,
+ &req->r_caps_reservation);
+ if (err < 0)
+ return err;
+ } else {
+ WARN_ON_ONCE(1);
+ }
}
/*
@@ -1004,6 +1008,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
* will have trouble splicing in the virtual snapdir later
*/
if (rinfo->head->is_dentry && !req->r_aborted &&
+ req->r_locked_dir &&
(rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
fsc->mount_options->snapdir_name,
req->r_dentry->d_name.len))) {
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 8e3fb69fbe6..1396ceb4679 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -42,7 +42,8 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
/* validate striping parameters */
if ((l->object_size & ~PAGE_MASK) ||
(l->stripe_unit & ~PAGE_MASK) ||
- ((unsigned)l->object_size % (unsigned)l->stripe_unit))
+ (l->stripe_unit != 0 &&
+ ((unsigned)l->object_size % (unsigned)l->stripe_unit)))
return -EINVAL;
/* make sure it's a valid data pool */
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 7dab9c04ad5..53cf2aabce8 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -328,7 +328,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
}
ctoUTF16_out:
- return i;
+ return j;
}
#ifdef CONFIG_CIFS_SMB2
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 074923ce593..f0cf934ba87 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1576,9 +1576,14 @@ cifs_readv_callback(struct mid_q_entry *mid)
/* result already set, check signature */
if (server->sec_mode &
(SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
- if (cifs_verify_signature(rdata->iov, rdata->nr_iov,
- server, mid->sequence_number + 1))
- cERROR(1, "Unexpected SMB signature");
+ int rc = 0;
+
+ rc = cifs_verify_signature(rdata->iov, rdata->nr_iov,
+ server,
+ mid->sequence_number + 1);
+ if (rc)
+ cERROR(1, "SMB signature verification returned "
+ "error = %d", rc);
}
/* FIXME: should this be counted toward the initiating task? */
task_io_account_read(rdata->bytes);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index cbe709ad666..781025be48b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -356,19 +356,12 @@ cifs_create_get_file_info:
cifs_create_set_dentry:
if (rc != 0) {
cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
+ CIFSSMBClose(xid, tcon, *fileHandle);
goto out;
}
d_drop(direntry);
d_add(direntry, newinode);
- /* ENOENT for create? How weird... */
- rc = -ENOENT;
- if (!newinode) {
- CIFSSMBClose(xid, tcon, *fileHandle);
- goto out;
- }
- rc = 0;
-
out:
kfree(buf);
kfree(full_path);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9154192b068..71e9ad9f596 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -917,7 +917,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
if (!buf) {
mutex_unlock(&cinode->lock_mutex);
free_xid(xid);
- return rc;
+ return -ENOMEM;
}
for (i = 0; i < 2; i++) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7354877fa3b..cb79c7edecb 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -124,10 +124,10 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
{
struct cifsInodeInfo *cifs_i = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
- unsigned long oldtime = cifs_i->time;
cifs_revalidate_cache(inode, fattr);
+ spin_lock(&inode->i_lock);
inode->i_atime = fattr->cf_atime;
inode->i_mtime = fattr->cf_mtime;
inode->i_ctime = fattr->cf_ctime;
@@ -148,9 +148,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
else
cifs_i->time = jiffies;
- cFYI(1, "inode 0x%p old_time=%ld new_time=%ld", inode,
- oldtime, cifs_i->time);
-
cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING;
cifs_i->server_eof = fattr->cf_eof;
@@ -158,7 +155,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
* Can't safely change the file size here if the client is writing to
* it due to potential races.
*/
- spin_lock(&inode->i_lock);
if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) {
i_size_write(inode, fattr->cf_eof);
@@ -859,12 +855,14 @@ struct inode *cifs_root_iget(struct super_block *sb)
if (rc && tcon->ipc) {
cFYI(1, "ipc connection - fake read inode");
+ spin_lock(&inode->i_lock);
inode->i_mode |= S_IFDIR;
set_nlink(inode, 2);
inode->i_op = &cifs_ipc_inode_ops;
inode->i_fop = &simple_dir_operations;
inode->i_uid = cifs_sb->mnt_uid;
inode->i_gid = cifs_sb->mnt_gid;
+ spin_unlock(&inode->i_lock);
} else if (rc) {
iget_failed(inode);
inode = ERR_PTR(rc);
@@ -1110,6 +1108,15 @@ undo_setattr:
goto out_close;
}
+/* copied from fs/nfs/dir.c with small changes */
+static void
+cifs_drop_nlink(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ if (inode->i_nlink > 0)
+ drop_nlink(inode);
+ spin_unlock(&inode->i_lock);
+}
/*
* If dentry->d_inode is null (usually meaning the cached dentry
@@ -1166,13 +1173,13 @@ retry_std_delete:
psx_del_no_retry:
if (!rc) {
if (inode)
- drop_nlink(inode);
+ cifs_drop_nlink(inode);
} else if (rc == -ENOENT) {
d_drop(dentry);
} else if (rc == -ETXTBSY) {
rc = cifs_rename_pending_delete(full_path, dentry, xid);
if (rc == 0)
- drop_nlink(inode);
+ cifs_drop_nlink(inode);
} else if ((rc == -EACCES) && (dosattr == 0) && inode) {
attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
if (attrs == NULL) {
@@ -1241,9 +1248,10 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode,
* setting nlink not necessary except in cases where we failed to get it
* from the server or was set bogus
*/
+ spin_lock(&dentry->d_inode->i_lock);
if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2))
set_nlink(dentry->d_inode, 2);
-
+ spin_unlock(&dentry->d_inode->i_lock);
mode &= ~current_umask();
/* must turn on setgid bit if parent dir has it */
if (inode->i_mode & S_ISGID)
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 09e4b3ae456..e6ce3b11287 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -433,7 +433,9 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
if (old_file->d_inode) {
cifsInode = CIFS_I(old_file->d_inode);
if (rc == 0) {
+ spin_lock(&old_file->d_inode->i_lock);
inc_nlink(old_file->d_inode);
+ spin_unlock(&old_file->d_inode->i_lock);
/* BB should we make this contingent on superblock flag NOATIME? */
/* old_file->d_inode->i_ctime = CURRENT_TIME;*/
/* parent dir timestamps will update from srv
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index a4ff5d54755..e4d3b996416 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -52,7 +52,8 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
cERROR(1, "Bad protocol string signature header %x",
*(unsigned int *) hdr->ProtocolId);
if (mid != hdr->MessageId)
- cERROR(1, "Mids do not match");
+ cERROR(1, "Mids do not match: %llu and %llu", mid,
+ hdr->MessageId);
}
cERROR(1, "Bad SMB detected. The Mid=%llu", hdr->MessageId);
return 1;
@@ -107,7 +108,7 @@ smb2_check_message(char *buf, unsigned int length)
* ie Validate the wct via smb2_struct_sizes table above
*/
- if (length < 2 + sizeof(struct smb2_hdr)) {
+ if (length < sizeof(struct smb2_pdu)) {
if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) {
pdu->StructureSize2 = 0;
/*
@@ -121,15 +122,15 @@ smb2_check_message(char *buf, unsigned int length)
return 1;
}
if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) {
- cERROR(1, "SMB length greater than maximum, mid=%lld", mid);
+ cERROR(1, "SMB length greater than maximum, mid=%llu", mid);
return 1;
}
if (check_smb2_hdr(hdr, mid))
return 1;
- if (hdr->StructureSize != SMB2_HEADER_SIZE) {
- cERROR(1, "Illegal structure size %d",
+ if (hdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) {
+ cERROR(1, "Illegal structure size %u",
le16_to_cpu(hdr->StructureSize));
return 1;
}
@@ -161,8 +162,9 @@ smb2_check_message(char *buf, unsigned int length)
if (4 + len != clc_len) {
cFYI(1, "Calculated size %u length %u mismatch mid %llu",
clc_len, 4 + len, mid);
- if (clc_len == 4 + len + 1) /* BB FIXME (fix samba) */
- return 0; /* BB workaround Samba 3 bug SessSetup rsp */
+ /* server can return one byte more */
+ if (clc_len == 4 + len + 1)
+ return 0;
return 1;
}
return 0;
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index f37a1b41b40..15dc8eea827 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -87,10 +87,6 @@
#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe)
-#define SMB2_HEADER_SIZE __constant_le16_to_cpu(64)
-
-#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_le16_to_cpu(9)
-
/*
* SMB2 Header Definition
*
@@ -99,6 +95,9 @@
* "PDU" : "Protocol Data Unit" (ie a network "frame")
*
*/
+
+#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64)
+
struct smb2_hdr {
__be32 smb2_buf_length; /* big endian on wire */
/* length is only two or three bytes - with
@@ -140,6 +139,9 @@ struct smb2_pdu {
* command code name for the struct. Note that structures must be packed.
*
*/
+
+#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9)
+
struct smb2_err_rsp {
struct smb2_hdr hdr;
__le16 StructureSize;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 83867ef348d..d9b639b95fa 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -503,13 +503,16 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
/* convert the length into a more usable form */
if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
struct kvec iov;
+ int rc = 0;
iov.iov_base = mid->resp_buf;
iov.iov_len = len;
/* FIXME: add code to kill session */
- if (cifs_verify_signature(&iov, 1, server,
- mid->sequence_number + 1) != 0)
- cERROR(1, "Unexpected SMB signature");
+ rc = cifs_verify_signature(&iov, 1, server,
+ mid->sequence_number + 1);
+ if (rc)
+ cERROR(1, "SMB signature verification returned error = "
+ "%d", rc);
}
/* BB special case reconnect tid and uid here? */
diff --git a/fs/dcache.c b/fs/dcache.c
index 8086636bf79..693f95bf1ca 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -389,7 +389,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
* Inform try_to_ascend() that we are no longer attached to the
* dentry tree
*/
- dentry->d_flags |= DCACHE_DISCONNECTED;
+ dentry->d_flags |= DCACHE_DENTRY_KILLED;
if (parent)
spin_unlock(&parent->d_lock);
dentry_iput(dentry);
@@ -1048,7 +1048,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq
* or deletion
*/
if (new != old->d_parent ||
- (old->d_flags & DCACHE_DISCONNECTED) ||
+ (old->d_flags & DCACHE_DENTRY_KILLED) ||
(!locked && read_seqretry(&rename_lock, seq))) {
spin_unlock(&new->d_lock);
new = NULL;
@@ -1134,6 +1134,8 @@ positive:
return 1;
rename_retry:
+ if (locked)
+ goto again;
locked = 1;
write_seqlock(&rename_lock);
goto again;
@@ -1141,7 +1143,7 @@ rename_retry:
EXPORT_SYMBOL(have_submounts);
/*
- * Search the dentry child list for the specified parent,
+ * Search the dentry child list of the specified parent,
* and move any unused dentries to the end of the unused
* list for prune_dcache(). We descend to the next level
* whenever the d_subdirs list is non-empty and continue
@@ -1236,6 +1238,8 @@ out:
rename_retry:
if (found)
return found;
+ if (locked)
+ goto again;
locked = 1;
write_seqlock(&rename_lock);
goto again;
@@ -3035,6 +3039,8 @@ resume:
return;
rename_retry:
+ if (locked)
+ goto again;
locked = 1;
write_seqlock(&rename_lock);
goto again;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 2340f6978d6..c5ca6ae5a30 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -526,73 +526,51 @@ struct array_data {
u32 elements;
};
-static int u32_array_open(struct inode *inode, struct file *file)
-{
- file->private_data = NULL;
- return nonseekable_open(inode, file);
-}
-
-static size_t format_array(char *buf, size_t bufsize, const char *fmt,
- u32 *array, u32 array_size)
+static size_t u32_format_array(char *buf, size_t bufsize,
+ u32 *array, int array_size)
{
size_t ret = 0;
- u32 i;
- for (i = 0; i < array_size; i++) {
+ while (--array_size >= 0) {
size_t len;
+ char term = array_size ? ' ' : '\n';
- len = snprintf(buf, bufsize, fmt, array[i]);
- len++; /* ' ' or '\n' */
+ len = snprintf(buf, bufsize, "%u%c", *array++, term);
ret += len;
- if (buf) {
- buf += len;
- bufsize -= len;
- buf[-1] = (i == array_size-1) ? '\n' : ' ';
- }
+ buf += len;
+ bufsize -= len;
}
-
- ret++; /* \0 */
- if (buf)
- *buf = '\0';
-
return ret;
}
-static char *format_array_alloc(const char *fmt, u32 *array,
- u32 array_size)
+static int u32_array_open(struct inode *inode, struct file *file)
{
- size_t len = format_array(NULL, 0, fmt, array, array_size);
- char *ret;
-
- ret = kmalloc(len, GFP_KERNEL);
- if (ret == NULL)
- return NULL;
+ struct array_data *data = inode->i_private;
+ int size, elements = data->elements;
+ char *buf;
+
+ /*
+ * Max size:
+ * - 10 digits + ' '/'\n' = 11 bytes per number
+ * - terminating NUL character
+ */
+ size = elements*11;
+ buf = kmalloc(size+1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ buf[size] = 0;
+
+ file->private_data = buf;
+ u32_format_array(buf, size, data->array, data->elements);
- format_array(ret, len, fmt, array, array_size);
- return ret;
+ return nonseekable_open(inode, file);
}
static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
loff_t *ppos)
{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct array_data *data = inode->i_private;
- size_t size;
-
- if (*ppos == 0) {
- if (file->private_data) {
- kfree(file->private_data);
- file->private_data = NULL;
- }
-
- file->private_data = format_array_alloc("%u", data->array,
- data->elements);
- }
-
- size = 0;
- if (file->private_data)
- size = strlen(file->private_data);
+ size_t size = strlen(file->private_data);
return simple_read_from_buffer(buf, len, ppos,
file->private_data, size);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1faf4cb56f3..f86c720dba0 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1062,6 +1062,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
unsigned long user_addr;
size_t bytes;
struct buffer_head map_bh = { 0, };
+ struct blk_plug plug;
if (rw & WRITE)
rw = WRITE_ODIRECT;
@@ -1177,6 +1178,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
PAGE_SIZE - user_addr / PAGE_SIZE);
}
+ blk_start_plug(&plug);
+
for (seg = 0; seg < nr_segs; seg++) {
user_addr = (unsigned long)iov[seg].iov_base;
sdio.size += bytes = iov[seg].iov_len;
@@ -1235,6 +1238,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
if (sdio.bio)
dio_bio_submit(dio, &sdio);
+ blk_finish_plug(&plug);
+
/*
* It is possible that, we return short IO due to end of file.
* In that case, we need to release all the pages we got hold on.
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 44ce5c6a541..d45ba456812 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -275,8 +275,14 @@ out:
static int ecryptfs_flush(struct file *file, fl_owner_t td)
{
- return file->f_mode & FMODE_WRITE
- ? filemap_write_and_wait(file->f_mapping) : 0;
+ struct file *lower_file = ecryptfs_file_to_lower(file);
+
+ if (lower_file->f_op && lower_file->f_op->flush) {
+ filemap_write_and_wait(file->f_mapping);
+ return lower_file->f_op->flush(lower_file, td);
+ }
+
+ return 0;
}
static int ecryptfs_release(struct inode *inode, struct file *file)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 534b129ea67..cc7709e7c50 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -619,6 +619,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct dentry *lower_old_dir_dentry;
struct dentry *lower_new_dir_dentry;
struct dentry *trap = NULL;
+ struct inode *target_inode;
lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
@@ -626,6 +627,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
dget(lower_new_dentry);
lower_old_dir_dentry = dget_parent(lower_old_dentry);
lower_new_dir_dentry = dget_parent(lower_new_dentry);
+ target_inode = new_dentry->d_inode;
trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
/* source should not be ancestor of target */
if (trap == lower_old_dentry) {
@@ -641,6 +643,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
lower_new_dir_dentry->d_inode, lower_new_dentry);
if (rc)
goto out_lock;
+ if (target_inode)
+ fsstack_copy_attr_all(target_inode,
+ ecryptfs_inode_to_lower(target_inode));
fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
if (new_dir != old_dir)
fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 2768138eefe..9b627c15010 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -162,6 +162,7 @@ void ecryptfs_put_lower_file(struct inode *inode)
inode_info = ecryptfs_inode_to_private(inode);
if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count,
&inode_info->lower_file_mutex)) {
+ filemap_write_and_wait(inode->i_mapping);
fput(inode_info->lower_file);
inode_info->lower_file = NULL;
mutex_unlock(&inode_info->lower_file_mutex);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1c8b5567080..eedec84c180 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1654,8 +1654,8 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
error = PTR_ERR(file);
goto out_free_fd;
}
- fd_install(fd, file);
ep->file = file;
+ fd_install(fd, file);
return fd;
out_free_fd:
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 376aa77f3ca..2616d0ea5c5 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -479,7 +479,7 @@ void ext2_discard_reservation(struct inode *inode)
/**
* ext2_free_blocks() -- Free given blocks and update quota and i_blocks
* @inode: inode
- * @block: start physcial block to free
+ * @block: start physical block to free
* @count: number of blocks to free
*/
void ext2_free_blocks (struct inode * inode, unsigned long block,
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 90d901f0486..7320a66e958 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -483,7 +483,7 @@ void ext3_discard_reservation(struct inode *inode)
* ext3_free_blocks_sb() -- Free given blocks and update quota
* @handle: handle to this transaction
* @sb: super block
- * @block: start physcial block to free
+ * @block: start physical block to free
* @count: number of blocks to free
* @pdquot_freed_blocks: pointer to quota
*/
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a07597307fd..7e87e37a372 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3072,6 +3072,8 @@ static int ext3_do_update_inode(handle_t *handle,
struct ext3_inode_info *ei = EXT3_I(inode);
struct buffer_head *bh = iloc->bh;
int err = 0, rc, block;
+ int need_datasync = 0;
+ __le32 disksize;
uid_t i_uid;
gid_t i_gid;
@@ -3113,7 +3115,11 @@ again:
raw_inode->i_gid_high = 0;
}
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
- raw_inode->i_size = cpu_to_le32(ei->i_disksize);
+ disksize = cpu_to_le32(ei->i_disksize);
+ if (disksize != raw_inode->i_size) {
+ need_datasync = 1;
+ raw_inode->i_size = disksize;
+ }
raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
@@ -3129,8 +3135,11 @@ again:
if (!S_ISREG(inode->i_mode)) {
raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
} else {
- raw_inode->i_size_high =
- cpu_to_le32(ei->i_disksize >> 32);
+ disksize = cpu_to_le32(ei->i_disksize >> 32);
+ if (disksize != raw_inode->i_size_high) {
+ raw_inode->i_size_high = disksize;
+ need_datasync = 1;
+ }
if (ei->i_disksize > 0x7fffffffULL) {
struct super_block *sb = inode->i_sb;
if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
@@ -3183,6 +3192,8 @@ again:
ext3_clear_inode_state(inode, EXT3_STATE_NEW);
atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
+ if (need_datasync)
+ atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
out_brelse:
brelse (bh);
ext3_std_error(inode->i_sb, err);
@@ -3196,7 +3207,7 @@ out_brelse:
*
* - Within generic_file_write() for O_SYNC files.
* Here, there will be no transaction running. We wait for any running
- * trasnaction to commit.
+ * transaction to commit.
*
* - Within sys_sync(), kupdate and such.
* We wait on commit, if tol to.
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index dff171c3a12..c862ee5fe79 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3313,7 +3313,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
* handle: The journal handle
* inode: The files inode
* page: A locked page that contains the offset "from"
- * from: The starting byte offset (from the begining of the file)
+ * from: The starting byte offset (from the beginning of the file)
* to begin discarding
* len: The length of bytes to discard
* flags: Optional flags that may be used:
@@ -3321,11 +3321,11 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
* EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
* Only zero the regions of the page whose buffer heads
* have already been unmapped. This flag is appropriate
- * for updateing the contents of a page whose blocks may
+ * for updating the contents of a page whose blocks may
* have already been released, and we only want to zero
* out the regions that correspond to those released blocks.
*
- * Returns zero on sucess or negative on failure.
+ * Returns zero on success or negative on failure.
*/
static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
struct inode *inode, struct page *page, loff_t from,
@@ -3486,7 +3486,7 @@ int ext4_can_truncate(struct inode *inode)
* @offset: The offset where the hole will begin
* @len: The length of the hole
*
- * Returns: 0 on sucess or negative on failure
+ * Returns: 0 on success or negative on failure
*/
int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
@@ -4008,7 +4008,7 @@ static int ext4_inode_blocks_set(handle_t *handle,
if (i_blocks <= ~0U) {
/*
- * i_blocks can be represnted in a 32 bit variable
+ * i_blocks can be represented in a 32 bit variable
* as multiple of 512 bytes
*/
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
@@ -4169,7 +4169,7 @@ out_brelse:
*
* - Within generic_file_write() for O_SYNC files.
* Here, there will be no transaction running. We wait for any running
- * trasnaction to commit.
+ * transaction to commit.
*
* - Within sys_sync(), kupdate and such.
* We wait on commit, if tol to.
@@ -4413,7 +4413,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
* worse case, the indexs blocks spread over different block groups
*
* If datablocks are discontiguous, they are possible to spread over
- * different block groups too. If they are contiuguous, with flexbg,
+ * different block groups too. If they are contiguous, with flexbg,
* they could still across block group boundary.
*
* Also account for superblock, inode, quota and xattr blocks
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8eae94771c4..08778f6cdfe 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4709,7 +4709,7 @@ error_return:
* ext4_group_add_blocks() -- Add given blocks to an existing group
* @handle: handle to this transaction
* @sb: super block
- * @block: start physcial block to add to the block group
+ * @block: start physical block to add to the block group
* @count: number of blocks to free
*
* This marks the blocks as free in the bitmap and buddy.
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index be3efc4f64f..6d46c0d7833 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -577,10 +577,6 @@ static long writeback_chunk_size(struct backing_dev_info *bdi,
/*
* Write a portion of b_io inodes which belong to @sb.
*
- * If @only_this_sb is true, then find and write all such
- * inodes. Otherwise write only ones which go sequentially
- * in reverse order.
- *
* Return the number of pages and/or inodes written.
*/
static long writeback_sb_inodes(struct super_block *sb,
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 03ff5b1eba9..75a20c092dd 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -117,7 +117,7 @@ static ssize_t fuse_conn_max_background_write(struct file *file,
const char __user *buf,
size_t count, loff_t *ppos)
{
- unsigned val;
+ unsigned uninitialized_var(val);
ssize_t ret;
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
@@ -154,7 +154,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
const char __user *buf,
size_t count, loff_t *ppos)
{
- unsigned val;
+ unsigned uninitialized_var(val);
ssize_t ret;
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 3426521f320..ee8d5504229 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -396,7 +396,7 @@ err_device:
err_region:
unregister_chrdev_region(devt, 1);
err:
- fc->conn_error = 1;
+ fuse_conn_kill(fc);
goto out;
}
@@ -532,8 +532,6 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
cdev_del(cc->cdev);
}
- /* kill connection and shutdown channel */
- fuse_conn_kill(&cc->fc);
rc = fuse_dev_release(inode, file); /* puts the base reference */
return rc;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 7df2b5e8fbe..f4246cfc8d8 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1576,6 +1576,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
req->pages[req->num_pages] = page;
req->num_pages++;
+ offset = 0;
num -= this_num;
total_len += this_num;
index++;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ce0a2838ccd..fca222dabe3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -367,11 +367,6 @@ void fuse_conn_kill(struct fuse_conn *fc)
wake_up_all(&fc->waitq);
wake_up_all(&fc->blocked_waitq);
wake_up_all(&fc->reserved_req_waitq);
- mutex_lock(&fuse_mutex);
- list_del(&fc->entry);
- fuse_ctl_remove_conn(fc);
- mutex_unlock(&fuse_mutex);
- fuse_bdi_destroy(fc);
}
EXPORT_SYMBOL_GPL(fuse_conn_kill);
@@ -380,7 +375,14 @@ static void fuse_put_super(struct super_block *sb)
struct fuse_conn *fc = get_fuse_conn_super(sb);
fuse_send_destroy(fc);
+
fuse_conn_kill(fc);
+ mutex_lock(&fuse_mutex);
+ list_del(&fc->entry);
+ fuse_ctl_remove_conn(fc);
+ mutex_unlock(&fuse_mutex);
+ fuse_bdi_destroy(fc);
+
fuse_conn_put(fc);
}
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index d6526347d38..01c4975da4b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -612,6 +612,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
+ unsigned requested = 0;
int alloc_required;
int error = 0;
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
@@ -641,7 +642,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
if (error)
goto out_unlock;
- error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
+ requested = data_blocks + ind_blocks;
+ error = gfs2_inplace_reserve(ip, requested);
if (error)
goto out_qunlock;
}
@@ -654,7 +656,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
if (&ip->i_inode == sdp->sd_rindex)
rblocks += 2 * RES_STATFS;
if (alloc_required)
- rblocks += gfs2_rg_blocks(ip);
+ rblocks += gfs2_rg_blocks(ip, requested);
error = gfs2_trans_begin(sdp, rblocks,
PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
@@ -868,8 +870,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
brelse(dibh);
failed:
gfs2_trans_end(sdp);
- if (gfs2_mb_reserved(ip))
- gfs2_inplace_release(ip);
+ gfs2_inplace_release(ip);
if (ip->i_res->rs_qa_qd_num)
gfs2_quota_unlock(ip);
if (inode == sdp->sd_rindex) {
@@ -1023,7 +1024,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
offset, nr_segs, gfs2_get_block_direct,
NULL, NULL, 0);
out:
- gfs2_glock_dq_m(1, &gh);
+ gfs2_glock_dq(&gh);
gfs2_holder_uninit(&gh);
return rv;
}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 49cd7dd4a9f..1fd3ae237bd 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -786,7 +786,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
goto out_rlist;
if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
- gfs2_rs_deltree(ip->i_res);
+ gfs2_rs_deltree(ip, ip->i_res);
error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
RES_INDIRECT + RES_STATFS + RES_QUOTA,
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index d1d791ef38d..30e21997a1a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -323,6 +323,29 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
/**
+ * gfs2_size_hint - Give a hint to the size of a write request
+ * @file: The struct file
+ * @offset: The file offset of the write
+ * @size: The length of the write
+ *
+ * When we are about to do a write, this function records the total
+ * write size in order to provide a suitable hint to the lower layers
+ * about how many blocks will be required.
+ *
+ */
+
+static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
+{
+ struct inode *inode = filep->f_dentry->d_inode;
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct gfs2_inode *ip = GFS2_I(inode);
+ size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
+ int hint = min_t(size_t, INT_MAX, blks);
+
+ atomic_set(&ip->i_res->rs_sizehint, hint);
+}
+
+/**
* gfs2_allocate_page_backing - Use bmap to allocate blocks
* @page: The (locked) page to allocate backing for
*
@@ -382,8 +405,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (ret)
return ret;
- atomic_set(&ip->i_res->rs_sizehint,
- PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift);
+ gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
@@ -419,7 +441,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
rblocks += data_blocks ? data_blocks : 1;
if (ind_blocks || data_blocks) {
rblocks += RES_STATFS + RES_QUOTA;
- rblocks += gfs2_rg_blocks(ip);
+ rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
}
ret = gfs2_trans_begin(sdp, rblocks, 0);
if (ret)
@@ -663,7 +685,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (ret)
return ret;
- atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift);
+ gfs2_size_hint(file, pos, writesize);
+
if (file->f_flags & O_APPEND) {
struct gfs2_holder gh;
@@ -789,7 +812,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
if (unlikely(error))
goto out_uninit;
- atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift);
+ gfs2_size_hint(file, offset, len);
while (len > 0) {
if (len < bytes)
@@ -822,7 +845,7 @@ retry:
&max_bytes, &data_blocks, &ind_blocks);
rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
- RES_RG_HDR + gfs2_rg_blocks(ip);
+ RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
if (gfs2_is_jdata(ip))
rblocks += data_blocks ? data_blocks : 1;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1ed81f40da0..e6c2fd53cab 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -186,20 +186,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
}
/**
- * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
- * @gl: the glock
- *
- * If the glock is demotable, then we add it (or move it) to the end
- * of the glock LRU list.
- */
-
-static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
-{
- if (demote_ok(gl))
- gfs2_glock_add_to_lru(gl);
-}
-
-/**
* gfs2_glock_put_nolock() - Decrement reference count on glock
* @gl: The glock to put
*
@@ -883,7 +869,14 @@ static int gfs2_glock_demote_wait(void *word)
return 0;
}
-static void wait_on_holder(struct gfs2_holder *gh)
+/**
+ * gfs2_glock_wait - wait on a glock acquisition
+ * @gh: the glock holder
+ *
+ * Returns: 0 on success
+ */
+
+int gfs2_glock_wait(struct gfs2_holder *gh)
{
unsigned long time1 = jiffies;
@@ -894,12 +887,7 @@ static void wait_on_holder(struct gfs2_holder *gh)
gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
GL_GLOCK_HOLD_INCR,
GL_GLOCK_MAX_HOLD);
-}
-
-static void wait_on_demote(struct gfs2_glock *gl)
-{
- might_sleep();
- wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
+ return gh->gh_error;
}
/**
@@ -929,19 +917,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
trace_gfs2_demote_rq(gl);
}
-/**
- * gfs2_glock_wait - wait on a glock acquisition
- * @gh: the glock holder
- *
- * Returns: 0 on success
- */
-
-int gfs2_glock_wait(struct gfs2_holder *gh)
-{
- wait_on_holder(gh);
- return gh->gh_error;
-}
-
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
{
struct va_format vaf;
@@ -979,7 +954,7 @@ __acquires(&gl->gl_spin)
struct gfs2_sbd *sdp = gl->gl_sbd;
struct list_head *insert_pt = NULL;
struct gfs2_holder *gh2;
- int try_lock = 0;
+ int try_futile = 0;
BUG_ON(gh->gh_owner_pid == NULL);
if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
@@ -987,7 +962,7 @@ __acquires(&gl->gl_spin)
if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
if (test_bit(GLF_LOCK, &gl->gl_flags))
- try_lock = 1;
+ try_futile = !may_grant(gl, gh);
if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
goto fail;
}
@@ -996,9 +971,8 @@ __acquires(&gl->gl_spin)
if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
(gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
goto trap_recursive;
- if (try_lock &&
- !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) &&
- !may_grant(gl, gh)) {
+ if (try_futile &&
+ !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
fail:
gh->gh_error = GLR_TRYFAILED;
gfs2_holder_wake(gh);
@@ -1121,8 +1095,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
!test_bit(GLF_DEMOTE, &gl->gl_flags))
fast_path = 1;
}
- if (!test_bit(GLF_LFLUSH, &gl->gl_flags))
- __gfs2_glock_schedule_for_reclaim(gl);
+ if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
+ gfs2_glock_add_to_lru(gl);
+
trace_gfs2_glock_queue(gh, 0);
spin_unlock(&gl->gl_spin);
if (likely(fast_path))
@@ -1141,7 +1116,8 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
gfs2_glock_dq(gh);
- wait_on_demote(gl);
+ might_sleep();
+ wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
}
/**
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 4bdcf378418..32cc4fde975 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -94,6 +94,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
/* A shortened, inline version of gfs2_trans_begin() */
tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
tr.tr_ip = (unsigned long)__builtin_return_address(0);
+ sb_start_intwrite(sdp->sd_vfs);
gfs2_log_reserve(sdp, tr.tr_reserved);
BUG_ON(current->journal_info);
current->journal_info = &tr;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index aaecc8085fc..3d469d37345 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -99,9 +99,26 @@ struct gfs2_rgrpd {
#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */
spinlock_t rd_rsspin; /* protects reservation related vars */
struct rb_root rd_rstree; /* multi-block reservation tree */
- u32 rd_rs_cnt; /* count of current reservations */
};
+struct gfs2_rbm {
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */
+ u32 offset; /* The offset is bitmap relative */
+};
+
+static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
+{
+ return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset;
+}
+
+static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
+ const struct gfs2_rbm *rbm2)
+{
+ return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) &&
+ (rbm1->offset == rbm2->offset);
+}
+
enum gfs2_state_bits {
BH_Pinned = BH_PrivateStart,
BH_Escaped = BH_PrivateStart + 1,
@@ -250,18 +267,11 @@ struct gfs2_blkreserv {
/* components used during write (step 1): */
atomic_t rs_sizehint; /* hint of the write size */
- /* components used during inplace_reserve (step 2): */
- u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
-
- /* components used during get_local_rgrp (step 3): */
- struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */
struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
struct rb_node rs_node; /* link to other block reservations */
-
- /* components used during block searches and assignments (step 4): */
- struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */
- u32 rs_biblk; /* start block relative to the bi */
+ struct gfs2_rbm rs_rbm; /* Start of reservation */
u32 rs_free; /* how many blocks are still free */
+ u64 rs_inum; /* Inode number for reservation */
/* ancillary quota stuff */
struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 4ce22e54730..381893ceefa 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -712,14 +712,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_gunlock2;
- /* The newly created inode needs a reservation so it can allocate
- xattrs. At the same time, we want new blocks allocated to the new
- dinode to be as contiguous as possible. Since we allocated the
- dinode block under the directory's reservation, we transfer
- ownership of that reservation to the new inode. The directory
- doesn't need a reservation unless it needs a new allocation. */
- ip->i_res = dip->i_res;
- dip->i_res = NULL;
+ error = gfs2_rs_alloc(ip);
+ if (error)
+ goto fail_gunlock2;
error = gfs2_acl_create(dip, inode);
if (error)
@@ -737,10 +732,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
brelse(bh);
gfs2_trans_end(sdp);
- /* Check if we reserved space in the rgrp. Function link_dinode may
- not, depending on whether alloc is required. */
- if (gfs2_mb_reserved(dip))
- gfs2_inplace_release(dip);
+ gfs2_inplace_release(dip);
gfs2_quota_unlock(dip);
mark_inode_dirty(inode);
gfs2_glock_dq_uninit_m(2, ghs);
@@ -897,7 +889,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
goto out_gunlock_q;
error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
- gfs2_rg_blocks(dip) +
+ gfs2_rg_blocks(dip, sdp->sd_max_dirres) +
2 * RES_DINODE + RES_STATFS +
RES_QUOTA, 0);
if (error)
@@ -1378,7 +1370,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
goto out_gunlock_q;
error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
- gfs2_rg_blocks(ndip) +
+ gfs2_rg_blocks(ndip, sdp->sd_max_dirres) +
4 * RES_DINODE + 4 * RES_LEAF +
RES_STATFS + RES_QUOTA + 4, 0);
if (error)
@@ -1722,7 +1714,9 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name,
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
if (ret == 0) {
- ret = generic_setxattr(dentry, name, data, size, flags);
+ ret = gfs2_rs_alloc(ip);
+ if (ret == 0)
+ ret = generic_setxattr(dentry, name, data, size, flags);
gfs2_glock_dq(&gh);
}
gfs2_holder_uninit(&gh);
@@ -1757,7 +1751,9 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
if (ret == 0) {
- ret = generic_removexattr(dentry, name);
+ ret = gfs2_rs_alloc(ip);
+ if (ret == 0)
+ ret = generic_removexattr(dentry, name);
gfs2_glock_dq(&gh);
}
gfs2_holder_uninit(&gh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e5af9dc420e..e443966c810 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -19,6 +19,7 @@
#include <linux/mount.h>
#include <linux/gfs2_ondisk.h>
#include <linux/quotaops.h>
+#include <linux/lockdep.h>
#include "gfs2.h"
#include "incore.h"
@@ -766,6 +767,7 @@ fail:
return error;
}
+static struct lock_class_key gfs2_quota_imutex_key;
static int init_inodes(struct gfs2_sbd *sdp, int undo)
{
@@ -803,6 +805,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
fs_err(sdp, "can't get quota file inode: %d\n", error);
goto fail_rindex;
}
+ /*
+ * i_mutex on quota files is special. Since this inode is hidden system
+ * file, we are safe to define locking ourselves.
+ */
+ lockdep_set_class(&sdp->sd_quota_inode->i_mutex,
+ &gfs2_quota_imutex_key);
error = gfs2_rindex_update(sdp);
if (error)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a3bde91645c..4021deca61e 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -765,6 +765,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
struct gfs2_holder *ghs, i_gh;
unsigned int qx, x;
struct gfs2_quota_data *qd;
+ unsigned reserved;
loff_t offset;
unsigned int nalloc = 0, blocks;
int error;
@@ -781,7 +782,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
return -ENOMEM;
sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
- mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA);
+ mutex_lock(&ip->i_inode.i_mutex);
for (qx = 0; qx < num_qd; qx++) {
error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
GL_NOCACHE, &ghs[qx]);
@@ -811,13 +812,13 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
* two blocks need to be updated instead of 1 */
blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
- error = gfs2_inplace_reserve(ip, 1 +
- (nalloc * (data_blocks + ind_blocks)));
+ reserved = 1 + (nalloc * (data_blocks + ind_blocks));
+ error = gfs2_inplace_reserve(ip, reserved);
if (error)
goto out_alloc;
if (nalloc)
- blocks += gfs2_rg_blocks(ip) + nalloc * ind_blocks + RES_STATFS;
+ blocks += gfs2_rg_blocks(ip, reserved) + nalloc * ind_blocks + RES_STATFS;
error = gfs2_trans_begin(sdp, blocks, 0);
if (error)
@@ -1598,7 +1599,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
error = gfs2_inplace_reserve(ip, blocks);
if (error)
goto out_i;
- blocks += gfs2_rg_blocks(ip);
+ blocks += gfs2_rg_blocks(ip, blocks);
}
/* Some quotas span block boundaries and can update two blocks,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 4d34887a601..3cc402ce6fe 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -35,9 +35,6 @@
#define BFITNOENT ((u32)~0)
#define NO_BLOCK ((u64)~0)
-#define RSRV_CONTENTION_FACTOR 4
-#define RGRP_RSRV_MAX_CONTENDERS 2
-
#if BITS_PER_LONG == 32
#define LBITMASK (0x55555555UL)
#define LBITSKIP55 (0x55555555UL)
@@ -67,53 +64,48 @@ static const char valid_change[16] = {
1, 0, 0, 0
};
-static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
- unsigned char old_state,
- struct gfs2_bitmap **rbi);
+static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
+ const struct gfs2_inode *ip, bool nowrap);
+
/**
* gfs2_setbit - Set a bit in the bitmaps
- * @rgd: the resource group descriptor
- * @buf2: the clone buffer that holds the bitmaps
- * @bi: the bitmap structure
- * @block: the block to set
+ * @rbm: The position of the bit to set
+ * @do_clone: Also set the clone bitmap, if it exists
* @new_state: the new state of the block
*
*/
-static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2,
- struct gfs2_bitmap *bi, u32 block,
+static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
unsigned char new_state)
{
unsigned char *byte1, *byte2, *end, cur_state;
- unsigned int buflen = bi->bi_len;
- const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
+ unsigned int buflen = rbm->bi->bi_len;
+ const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
- byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY);
- end = bi->bi_bh->b_data + bi->bi_offset + buflen;
+ byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
+ end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen;
BUG_ON(byte1 >= end);
cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
if (unlikely(!valid_change[new_state * 4 + cur_state])) {
- printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, "
- "new_state=%d\n",
- (unsigned long long)block, cur_state, new_state);
- printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n",
- (unsigned long long)rgd->rd_addr,
- (unsigned long)bi->bi_start);
- printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n",
- (unsigned long)bi->bi_offset,
- (unsigned long)bi->bi_len);
+ printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, "
+ "new_state=%d\n", rbm->offset, cur_state, new_state);
+ printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n",
+ (unsigned long long)rbm->rgd->rd_addr,
+ rbm->bi->bi_start);
+ printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n",
+ rbm->bi->bi_offset, rbm->bi->bi_len);
dump_stack();
- gfs2_consist_rgrpd(rgd);
+ gfs2_consist_rgrpd(rbm->rgd);
return;
}
*byte1 ^= (cur_state ^ new_state) << bit;
- if (buf2) {
- byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY);
+ if (do_clone && rbm->bi->bi_clone) {
+ byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
*byte2 ^= (cur_state ^ new_state) << bit;
}
@@ -121,30 +113,21 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2,
/**
* gfs2_testbit - test a bit in the bitmaps
- * @rgd: the resource group descriptor
- * @buffer: the buffer that holds the bitmaps
- * @buflen: the length (in bytes) of the buffer
- * @block: the block to read
+ * @rbm: The bit to test
*
+ * Returns: The two bit block state of the requested bit
*/
-static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
- const unsigned char *buffer,
- unsigned int buflen, u32 block)
+static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
{
- const unsigned char *byte, *end;
- unsigned char cur_state;
+ const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset;
+ const u8 *byte;
unsigned int bit;
- byte = buffer + (block / GFS2_NBBY);
- bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
- end = buffer + buflen;
-
- gfs2_assert(rgd->rd_sbd, byte < end);
+ byte = buffer + (rbm->offset / GFS2_NBBY);
+ bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
- cur_state = (*byte >> bit) & GFS2_BIT_MASK;
-
- return cur_state;
+ return (*byte >> bit) & GFS2_BIT_MASK;
}
/**
@@ -192,7 +175,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
*/
static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
{
- u64 startblk = gfs2_rs_startblk(rs);
+ u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
if (blk >= startblk + rs->rs_free)
return 1;
@@ -202,36 +185,6 @@ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
}
/**
- * rs_find - Find a rgrp multi-block reservation that contains a given block
- * @rgd: The rgrp
- * @rgblk: The block we're looking for, relative to the rgrp
- */
-static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk)
-{
- struct rb_node **newn;
- int rc;
- u64 fsblk = rgblk + rgd->rd_data0;
-
- spin_lock(&rgd->rd_rsspin);
- newn = &rgd->rd_rstree.rb_node;
- while (*newn) {
- struct gfs2_blkreserv *cur =
- rb_entry(*newn, struct gfs2_blkreserv, rs_node);
- rc = rs_cmp(fsblk, 1, cur);
- if (rc < 0)
- newn = &((*newn)->rb_left);
- else if (rc > 0)
- newn = &((*newn)->rb_right);
- else {
- spin_unlock(&rgd->rd_rsspin);
- return cur;
- }
- }
- spin_unlock(&rgd->rd_rsspin);
- return NULL;
-}
-
-/**
* gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
* a block in a given allocation state.
* @buf: the buffer that holds the bitmaps
@@ -262,8 +215,6 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
u64 mask = 0x5555555555555555ULL;
u32 bit;
- BUG_ON(state > 3);
-
/* Mask off bits we don't care about at the start of the search */
mask <<= spoint;
tmp = gfs2_bit_search(ptr, mask, state);
@@ -285,6 +236,131 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
}
/**
+ * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
+ * @rbm: The rbm with rgd already set correctly
+ * @block: The block number (filesystem relative)
+ *
+ * This sets the bi and offset members of an rbm based on a
+ * resource group and a filesystem relative block number. The
+ * resource group must be set in the rbm on entry, the bi and
+ * offset members will be set by this function.
+ *
+ * Returns: 0 on success, or an error code
+ */
+
+static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
+{
+ u64 rblock = block - rbm->rgd->rd_data0;
+ u32 goal = (u32)rblock;
+ int x;
+
+ if (WARN_ON_ONCE(rblock > UINT_MAX))
+ return -EINVAL;
+ if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
+ return -E2BIG;
+
+ for (x = 0; x < rbm->rgd->rd_length; x++) {
+ rbm->bi = rbm->rgd->rd_bits + x;
+ if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) {
+ rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
+ * @rbm: Position to search (value/result)
+ * @n_unaligned: Number of unaligned blocks to check
+ * @len: Decremented for each block found (terminate on zero)
+ *
+ * Returns: true if a non-free block is encountered
+ */
+
+static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
+{
+ u64 block;
+ u32 n;
+ u8 res;
+
+ for (n = 0; n < n_unaligned; n++) {
+ res = gfs2_testbit(rbm);
+ if (res != GFS2_BLKST_FREE)
+ return true;
+ (*len)--;
+ if (*len == 0)
+ return true;
+ block = gfs2_rbm_to_block(rbm);
+ if (gfs2_rbm_from_block(rbm, block + 1))
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * gfs2_free_extlen - Return extent length of free blocks
+ * @rbm: Starting position
+ * @len: Max length to check
+ *
+ * Starting at the block specified by the rbm, see how many free blocks
+ * there are, not reading more than len blocks ahead. This can be done
+ * using memchr_inv when the blocks are byte aligned, but has to be done
+ * on a block by block basis in case of unaligned blocks. Also this
+ * function can cope with bitmap boundaries (although it must stop on
+ * a resource group boundary)
+ *
+ * Returns: Number of free blocks in the extent
+ */
+
+static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
+{
+ struct gfs2_rbm rbm = *rrbm;
+ u32 n_unaligned = rbm.offset & 3;
+ u32 size = len;
+ u32 bytes;
+ u32 chunk_size;
+ u8 *ptr, *start, *end;
+ u64 block;
+
+ if (n_unaligned &&
+ gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
+ goto out;
+
+ n_unaligned = len & 3;
+ /* Start is now byte aligned */
+ while (len > 3) {
+ start = rbm.bi->bi_bh->b_data;
+ if (rbm.bi->bi_clone)
+ start = rbm.bi->bi_clone;
+ end = start + rbm.bi->bi_bh->b_size;
+ start += rbm.bi->bi_offset;
+ BUG_ON(rbm.offset & 3);
+ start += (rbm.offset / GFS2_NBBY);
+ bytes = min_t(u32, len / GFS2_NBBY, (end - start));
+ ptr = memchr_inv(start, 0, bytes);
+ chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
+ chunk_size *= GFS2_NBBY;
+ BUG_ON(len < chunk_size);
+ len -= chunk_size;
+ block = gfs2_rbm_to_block(&rbm);
+ gfs2_rbm_from_block(&rbm, block + chunk_size);
+ n_unaligned = 3;
+ if (ptr)
+ break;
+ n_unaligned = len & 3;
+ }
+
+ /* Deal with any bits left over at the end */
+ if (n_unaligned)
+ gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
+out:
+ return size - len;
+}
+
+/**
* gfs2_bitcount - count the number of bits in a certain state
* @rgd: the resource group descriptor
* @buffer: the buffer that holds the bitmaps
@@ -487,6 +563,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
if (!res)
error = -ENOMEM;
+ RB_CLEAR_NODE(&res->rs_node);
+
down_write(&ip->i_rw_mutex);
if (ip->i_res)
kmem_cache_free(gfs2_rsrv_cachep, res);
@@ -496,11 +574,12 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
return error;
}
-static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
+static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
{
- gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n",
- rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk,
- rs->rs_free);
+ gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n",
+ (unsigned long long)rs->rs_inum,
+ (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
+ rs->rs_rbm.offset, rs->rs_free);
}
/**
@@ -508,41 +587,26 @@ static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
* @rs: The reservation to remove
*
*/
-static void __rs_deltree(struct gfs2_blkreserv *rs)
+static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
{
struct gfs2_rgrpd *rgd;
if (!gfs2_rs_active(rs))
return;
- rgd = rs->rs_rgd;
- /* We can't do this: The reason is that when the rgrp is invalidated,
- it's in the "middle" of acquiring the glock, but the HOLDER bit
- isn't set yet:
- BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/
- trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL);
-
- if (!RB_EMPTY_ROOT(&rgd->rd_rstree))
- rb_erase(&rs->rs_node, &rgd->rd_rstree);
- BUG_ON(!rgd->rd_rs_cnt);
- rgd->rd_rs_cnt--;
+ rgd = rs->rs_rbm.rgd;
+ trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
+ rb_erase(&rs->rs_node, &rgd->rd_rstree);
+ RB_CLEAR_NODE(&rs->rs_node);
if (rs->rs_free) {
/* return reserved blocks to the rgrp and the ip */
- BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
- rs->rs_rgd->rd_reserved -= rs->rs_free;
+ BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
+ rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
rs->rs_free = 0;
- clear_bit(GBF_FULL, &rs->rs_bi->bi_flags);
+ clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags);
smp_mb__after_clear_bit();
}
- /* We can't change any of the step 1 or step 2 components of the rs.
- E.g. We can't set rs_rgd to NULL because the rgd glock is held and
- dequeued through this pointer.
- Can't: atomic_set(&rs->rs_sizehint, 0);
- Can't: rs->rs_requested = 0;
- Can't: rs->rs_rgd = NULL;*/
- rs->rs_bi = NULL;
- rs->rs_biblk = 0;
}
/**
@@ -550,17 +614,16 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
* @rs: The reservation to remove
*
*/
-void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
+void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
{
struct gfs2_rgrpd *rgd;
- if (!gfs2_rs_active(rs))
- return;
-
- rgd = rs->rs_rgd;
- spin_lock(&rgd->rd_rsspin);
- __rs_deltree(rs);
- spin_unlock(&rgd->rd_rsspin);
+ rgd = rs->rs_rbm.rgd;
+ if (rgd) {
+ spin_lock(&rgd->rd_rsspin);
+ __rs_deltree(ip, rs);
+ spin_unlock(&rgd->rd_rsspin);
+ }
}
/**
@@ -572,8 +635,7 @@ void gfs2_rs_delete(struct gfs2_inode *ip)
{
down_write(&ip->i_rw_mutex);
if (ip->i_res) {
- gfs2_rs_deltree(ip->i_res);
- trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE);
+ gfs2_rs_deltree(ip, ip->i_res);
BUG_ON(ip->i_res->rs_free);
kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
ip->i_res = NULL;
@@ -597,7 +659,7 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd)
spin_lock(&rgd->rd_rsspin);
while ((n = rb_first(&rgd->rd_rstree))) {
rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
- __rs_deltree(rs);
+ __rs_deltree(NULL, rs);
}
spin_unlock(&rgd->rd_rsspin);
}
@@ -1270,211 +1332,276 @@ out:
/**
* rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
- * @bi: the bitmap with the blocks
* @ip: the inode structure
- * @biblk: the 32-bit block number relative to the start of the bitmap
- * @amount: the number of blocks to reserve
*
- * Returns: NULL - reservation was already taken, so not inserted
- * pointer to the inserted reservation
*/
-static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
- struct gfs2_inode *ip, u32 biblk,
- int amount)
+static void rs_insert(struct gfs2_inode *ip)
{
struct rb_node **newn, *parent = NULL;
int rc;
struct gfs2_blkreserv *rs = ip->i_res;
- struct gfs2_rgrpd *rgd = rs->rs_rgd;
- u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0;
+ struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
+ u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
+
+ BUG_ON(gfs2_rs_active(rs));
spin_lock(&rgd->rd_rsspin);
newn = &rgd->rd_rstree.rb_node;
- BUG_ON(!ip->i_res);
- BUG_ON(gfs2_rs_active(rs));
- /* Figure out where to put new node */
- /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
while (*newn) {
struct gfs2_blkreserv *cur =
rb_entry(*newn, struct gfs2_blkreserv, rs_node);
parent = *newn;
- rc = rs_cmp(fsblock, amount, cur);
+ rc = rs_cmp(fsblock, rs->rs_free, cur);
if (rc > 0)
newn = &((*newn)->rb_right);
else if (rc < 0)
newn = &((*newn)->rb_left);
else {
spin_unlock(&rgd->rd_rsspin);
- return NULL; /* reservation already in use */
+ WARN_ON(1);
+ return;
}
}
- /* Do our reservation work */
- rs = ip->i_res;
- rs->rs_free = amount;
- rs->rs_biblk = biblk;
- rs->rs_bi = bi;
rb_link_node(&rs->rs_node, parent, newn);
rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
- /* Do our inode accounting for the reservation */
- /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
-
/* Do our rgrp accounting for the reservation */
- rgd->rd_reserved += amount; /* blocks reserved */
- rgd->rd_rs_cnt++; /* number of in-tree reservations */
+ rgd->rd_reserved += rs->rs_free; /* blocks reserved */
spin_unlock(&rgd->rd_rsspin);
- trace_gfs2_rs(ip, rs, TRACE_RS_INSERT);
- return rs;
+ trace_gfs2_rs(rs, TRACE_RS_INSERT);
}
/**
- * unclaimed_blocks - return number of blocks that aren't spoken for
- */
-static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
-{
- return rgd->rd_free_clone - rgd->rd_reserved;
-}
-
-/**
- * rg_mblk_search - find a group of multiple free blocks
+ * rg_mblk_search - find a group of multiple free blocks to form a reservation
* @rgd: the resource group descriptor
- * @rs: the block reservation
* @ip: pointer to the inode for which we're reserving blocks
+ * @requested: number of blocks required for this allocation
*
- * This is very similar to rgblk_search, except we're looking for whole
- * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
- * on aligned dwords for speed's sake.
- *
- * Returns: 0 if successful or BFITNOENT if there isn't enough free space
*/
-static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
+static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
+ unsigned requested)
{
- struct gfs2_bitmap *bi = rgd->rd_bits;
- const u32 length = rgd->rd_length;
- u32 blk;
- unsigned int buf, x, search_bytes;
- u8 *buffer = NULL;
- u8 *ptr, *end, *nonzero;
- u32 goal, rsv_bytes;
- struct gfs2_blkreserv *rs;
- u32 best_rs_bytes, unclaimed;
- int best_rs_blocks;
+ struct gfs2_rbm rbm = { .rgd = rgd, };
+ u64 goal;
+ struct gfs2_blkreserv *rs = ip->i_res;
+ u32 extlen;
+ u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
+ int ret;
+
+ extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested);
+ extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
+ if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
+ return;
/* Find bitmap block that contains bits for goal block */
if (rgrp_contains_block(rgd, ip->i_goal))
- goal = ip->i_goal - rgd->rd_data0;
+ goal = ip->i_goal;
else
- goal = rgd->rd_last_alloc;
- for (buf = 0; buf < length; buf++) {
- bi = rgd->rd_bits + buf;
- /* Convert scope of "goal" from rgrp-wide to within
- found bit block */
- if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
- goal -= bi->bi_start * GFS2_NBBY;
- goto do_search;
- }
+ goal = rgd->rd_last_alloc + rgd->rd_data0;
+
+ if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
+ return;
+
+ ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true);
+ if (ret == 0) {
+ rs->rs_rbm = rbm;
+ rs->rs_free = extlen;
+ rs->rs_inum = ip->i_no_addr;
+ rs_insert(ip);
}
- buf = 0;
- goal = 0;
-
-do_search:
- best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint),
- (RGRP_RSRV_MINBLKS * rgd->rd_length));
- best_rs_bytes = (best_rs_blocks *
- (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) /
- GFS2_NBBY; /* 1 + is for our not-yet-created reservation */
- best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64));
- unclaimed = unclaimed_blocks(rgd);
- if (best_rs_bytes * GFS2_NBBY > unclaimed)
- best_rs_bytes = unclaimed >> GFS2_BIT_SIZE;
-
- for (x = 0; x <= length; x++) {
- bi = rgd->rd_bits + buf;
+}
- if (test_bit(GBF_FULL, &bi->bi_flags))
- goto skip;
+/**
+ * gfs2_next_unreserved_block - Return next block that is not reserved
+ * @rgd: The resource group
+ * @block: The starting block
+ * @length: The required length
+ * @ip: Ignore any reservations for this inode
+ *
+ * If the block does not appear in any reservation, then return the
+ * block number unchanged. If it does appear in the reservation, then
+ * keep looking through the tree of reservations in order to find the
+ * first block number which is not reserved.
+ */
- WARN_ON(!buffer_uptodate(bi->bi_bh));
- if (bi->bi_clone)
- buffer = bi->bi_clone + bi->bi_offset;
+static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
+ u32 length,
+ const struct gfs2_inode *ip)
+{
+ struct gfs2_blkreserv *rs;
+ struct rb_node *n;
+ int rc;
+
+ spin_lock(&rgd->rd_rsspin);
+ n = rgd->rd_rstree.rb_node;
+ while (n) {
+ rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
+ rc = rs_cmp(block, length, rs);
+ if (rc < 0)
+ n = n->rb_left;
+ else if (rc > 0)
+ n = n->rb_right;
else
- buffer = bi->bi_bh->b_data + bi->bi_offset;
-
- /* We have to keep the reservations aligned on u64 boundaries
- otherwise we could get situations where a byte can't be
- used because it's after a reservation, but a free bit still
- is within the reservation's area. */
- ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64));
- end = (buffer + bi->bi_len);
- while (ptr < end) {
- rsv_bytes = 0;
- if ((ptr + best_rs_bytes) <= end)
- search_bytes = best_rs_bytes;
- else
- search_bytes = end - ptr;
- BUG_ON(!search_bytes);
- nonzero = memchr_inv(ptr, 0, search_bytes);
- /* If the lot is all zeroes, reserve the whole size. If
- there's enough zeroes to satisfy the request, use
- what we can. If there's not enough, keep looking. */
- if (nonzero == NULL)
- rsv_bytes = search_bytes;
- else if ((nonzero - ptr) * GFS2_NBBY >=
- ip->i_res->rs_requested)
- rsv_bytes = (nonzero - ptr);
-
- if (rsv_bytes) {
- blk = ((ptr - buffer) * GFS2_NBBY);
- BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
- rs = rs_insert(bi, ip, blk,
- rsv_bytes * GFS2_NBBY);
- if (IS_ERR(rs))
- return PTR_ERR(rs);
- if (rs)
- return 0;
- }
- ptr += ALIGN(search_bytes, sizeof(u64));
+ break;
+ }
+
+ if (n) {
+ while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
+ block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
+ n = n->rb_right;
+ if (n == NULL)
+ break;
+ rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
}
-skip:
- /* Try next bitmap block (wrap back to rgrp header
- if at end) */
- buf++;
- buf %= length;
- goal = 0;
}
- return BFITNOENT;
+ spin_unlock(&rgd->rd_rsspin);
+ return block;
}
/**
- * try_rgrp_fit - See if a given reservation will fit in a given RG
- * @rgd: the RG data
- * @ip: the inode
+ * gfs2_reservation_check_and_update - Check for reservations during block alloc
+ * @rbm: The current position in the resource group
+ * @ip: The inode for which we are searching for blocks
+ * @minext: The minimum extent length
*
- * If there's room for the requested blocks to be allocated from the RG:
- * This will try to get a multi-block reservation first, and if that doesn't
- * fit, it will take what it can.
+ * This checks the current position in the rgrp to see whether there is
+ * a reservation covering this block. If not then this function is a
+ * no-op. If there is, then the position is moved to the end of the
+ * contiguous reservation(s) so that we are pointing at the first
+ * non-reserved block.
*
- * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
+ * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
*/
-static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
+static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
+ const struct gfs2_inode *ip,
+ u32 minext)
{
- struct gfs2_blkreserv *rs = ip->i_res;
+ u64 block = gfs2_rbm_to_block(rbm);
+ u32 extlen = 1;
+ u64 nblock;
+ int ret;
- if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
+ /*
+ * If we have a minimum extent length, then skip over any extent
+ * which is less than the min extent length in size.
+ */
+ if (minext) {
+ extlen = gfs2_free_extlen(rbm, minext);
+ nblock = block + extlen;
+ if (extlen < minext)
+ goto fail;
+ }
+
+ /*
+ * Check the extent which has been found against the reservations
+ * and skip if parts of it are already reserved
+ */
+ nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
+ if (nblock == block)
return 0;
- /* Look for a multi-block reservation. */
- if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS &&
- rg_mblk_search(rgd, ip) != BFITNOENT)
- return 1;
- if (unclaimed_blocks(rgd) >= rs->rs_requested)
- return 1;
+fail:
+ ret = gfs2_rbm_from_block(rbm, nblock);
+ if (ret < 0)
+ return ret;
+ return 1;
+}
- return 0;
+/**
+ * gfs2_rbm_find - Look for blocks of a particular state
+ * @rbm: Value/result starting position and final position
+ * @state: The state which we want to find
+ * @minext: The requested extent length (0 for a single block)
+ * @ip: If set, check for reservations
+ * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
+ * around until we've reached the starting point.
+ *
+ * Side effects:
+ * - If looking for free blocks, we set GBF_FULL on each bitmap which
+ * has no free blocks in it.
+ *
+ * Returns: 0 on success, -ENOSPC if there is no block of the requested state
+ */
+
+static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
+ const struct gfs2_inode *ip, bool nowrap)
+{
+ struct buffer_head *bh;
+ struct gfs2_bitmap *initial_bi;
+ u32 initial_offset;
+ u32 offset;
+ u8 *buffer;
+ int index;
+ int n = 0;
+ int iters = rbm->rgd->rd_length;
+ int ret;
+
+ /* If we are not starting at the beginning of a bitmap, then we
+ * need to add one to the bitmap count to ensure that we search
+ * the starting bitmap twice.
+ */
+ if (rbm->offset != 0)
+ iters++;
+
+ while(1) {
+ if (test_bit(GBF_FULL, &rbm->bi->bi_flags) &&
+ (state == GFS2_BLKST_FREE))
+ goto next_bitmap;
+
+ bh = rbm->bi->bi_bh;
+ buffer = bh->b_data + rbm->bi->bi_offset;
+ WARN_ON(!buffer_uptodate(bh));
+ if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone)
+ buffer = rbm->bi->bi_clone + rbm->bi->bi_offset;
+ initial_offset = rbm->offset;
+ offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state);
+ if (offset == BFITNOENT)
+ goto bitmap_full;
+ rbm->offset = offset;
+ if (ip == NULL)
+ return 0;
+
+ initial_bi = rbm->bi;
+ ret = gfs2_reservation_check_and_update(rbm, ip, minext);
+ if (ret == 0)
+ return 0;
+ if (ret > 0) {
+ n += (rbm->bi - initial_bi);
+ goto next_iter;
+ }
+ if (ret == -E2BIG) {
+ index = 0;
+ rbm->offset = 0;
+ n += (rbm->bi - initial_bi);
+ goto res_covered_end_of_rgrp;
+ }
+ return ret;
+
+bitmap_full: /* Mark bitmap as full and fall through */
+ if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
+ set_bit(GBF_FULL, &rbm->bi->bi_flags);
+
+next_bitmap: /* Find next bitmap in the rgrp */
+ rbm->offset = 0;
+ index = rbm->bi - rbm->rgd->rd_bits;
+ index++;
+ if (index == rbm->rgd->rd_length)
+ index = 0;
+res_covered_end_of_rgrp:
+ rbm->bi = &rbm->rgd->rd_bits[index];
+ if ((index == 0) && nowrap)
+ break;
+ n++;
+next_iter:
+ if (n >= iters)
+ break;
+ }
+
+ return -ENOSPC;
}
/**
@@ -1489,34 +1616,33 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
{
- u32 goal = 0, block;
- u64 no_addr;
+ u64 block;
struct gfs2_sbd *sdp = rgd->rd_sbd;
struct gfs2_glock *gl;
struct gfs2_inode *ip;
int error;
int found = 0;
- struct gfs2_bitmap *bi;
+ struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 };
- while (goal < rgd->rd_data) {
+ while (1) {
down_write(&sdp->sd_log_flush_lock);
- block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi);
+ error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true);
up_write(&sdp->sd_log_flush_lock);
- if (block == BFITNOENT)
+ if (error == -ENOSPC)
+ break;
+ if (WARN_ON_ONCE(error))
break;
- block = gfs2_bi2rgd_blk(bi, block);
- /* rgblk_search can return a block < goal, so we need to
- keep it marching forward. */
- no_addr = block + rgd->rd_data0;
- goal = max(block + 1, goal + 1);
- if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
+ block = gfs2_rbm_to_block(&rbm);
+ if (gfs2_rbm_from_block(&rbm, block + 1))
+ break;
+ if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
continue;
- if (no_addr == skip)
+ if (block == skip)
continue;
- *last_unlinked = no_addr;
+ *last_unlinked = block;
- error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl);
+ error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
if (error)
continue;
@@ -1543,6 +1669,19 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
return;
}
+static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
+{
+ struct gfs2_rgrpd *rgd = *pos;
+
+ rgd = gfs2_rgrpd_get_next(rgd);
+ if (rgd == NULL)
+ rgd = gfs2_rgrpd_get_next(NULL);
+ *pos = rgd;
+ if (rgd != begin) /* If we didn't wrap */
+ return true;
+ return false;
+}
+
/**
* gfs2_inplace_reserve - Reserve space in the filesystem
* @ip: the inode to reserve space for
@@ -1562,103 +1701,96 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
if (sdp->sd_args.ar_rgrplvb)
flags |= GL_SKIP;
- rs->rs_requested = requested;
- if (gfs2_assert_warn(sdp, requested)) {
- error = -EINVAL;
- goto out;
- }
+ if (gfs2_assert_warn(sdp, requested))
+ return -EINVAL;
if (gfs2_rs_active(rs)) {
- begin = rs->rs_rgd;
+ begin = rs->rs_rbm.rgd;
flags = 0; /* Yoda: Do or do not. There is no try */
} else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
- rs->rs_rgd = begin = ip->i_rgd;
+ rs->rs_rbm.rgd = begin = ip->i_rgd;
} else {
- rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
+ rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
}
- if (rs->rs_rgd == NULL)
+ if (rs->rs_rbm.rgd == NULL)
return -EBADSLT;
while (loops < 3) {
- rg_locked = 0;
-
- if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
- rg_locked = 1;
- error = 0;
- } else if (!loops && !gfs2_rs_active(rs) &&
- rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
- /* If the rgrp already is maxed out for contenders,
- we can eliminate it as a "first pass" without even
- requesting the rgrp glock. */
- error = GLR_TRYFAILED;
- } else {
- error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
+ rg_locked = 1;
+
+ if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
+ rg_locked = 0;
+ error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
LM_ST_EXCLUSIVE, flags,
&rs->rs_rgd_gh);
- if (!error && sdp->sd_args.ar_rgrplvb) {
- error = update_rgrp_lvb(rs->rs_rgd);
- if (error) {
+ if (error == GLR_TRYFAILED)
+ goto next_rgrp;
+ if (unlikely(error))
+ return error;
+ if (sdp->sd_args.ar_rgrplvb) {
+ error = update_rgrp_lvb(rs->rs_rbm.rgd);
+ if (unlikely(error)) {
gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
return error;
}
}
}
- switch (error) {
- case 0:
- if (gfs2_rs_active(rs)) {
- if (unclaimed_blocks(rs->rs_rgd) +
- rs->rs_free >= rs->rs_requested) {
- ip->i_rgd = rs->rs_rgd;
- return 0;
- }
- /* We have a multi-block reservation, but the
- rgrp doesn't have enough free blocks to
- satisfy the request. Free the reservation
- and look for a suitable rgrp. */
- gfs2_rs_deltree(rs);
- }
- if (try_rgrp_fit(rs->rs_rgd, ip)) {
- if (sdp->sd_args.ar_rgrplvb)
- gfs2_rgrp_bh_get(rs->rs_rgd);
- ip->i_rgd = rs->rs_rgd;
- return 0;
- }
- if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) {
- if (sdp->sd_args.ar_rgrplvb)
- gfs2_rgrp_bh_get(rs->rs_rgd);
- try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
- ip->i_no_addr);
- }
- if (!rg_locked)
- gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
- /* fall through */
- case GLR_TRYFAILED:
- rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd);
- rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */
- if (rs->rs_rgd != begin) /* If we didn't wrap */
- break;
- flags &= ~LM_FLAG_TRY;
- loops++;
- /* Check that fs hasn't grown if writing to rindex */
- if (ip == GFS2_I(sdp->sd_rindex) &&
- !sdp->sd_rindex_uptodate) {
- error = gfs2_ri_update(ip);
- if (error)
- goto out;
- } else if (loops == 2)
- /* Flushing the log may release space */
- gfs2_log_flush(sdp, NULL);
- break;
- default:
- goto out;
+ /* Skip unuseable resource groups */
+ if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
+ goto skip_rgrp;
+
+ if (sdp->sd_args.ar_rgrplvb)
+ gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
+
+ /* Get a reservation if we don't already have one */
+ if (!gfs2_rs_active(rs))
+ rg_mblk_search(rs->rs_rbm.rgd, ip, requested);
+
+ /* Skip rgrps when we can't get a reservation on first pass */
+ if (!gfs2_rs_active(rs) && (loops < 1))
+ goto check_rgrp;
+
+ /* If rgrp has enough free space, use it */
+ if (rs->rs_rbm.rgd->rd_free_clone >= requested) {
+ ip->i_rgd = rs->rs_rbm.rgd;
+ return 0;
+ }
+
+ /* Drop reservation, if we couldn't use reserved rgrp */
+ if (gfs2_rs_active(rs))
+ gfs2_rs_deltree(ip, rs);
+check_rgrp:
+ /* Check for unlinked inodes which can be reclaimed */
+ if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
+ try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
+ ip->i_no_addr);
+skip_rgrp:
+ /* Unlock rgrp if required */
+ if (!rg_locked)
+ gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
+next_rgrp:
+ /* Find the next rgrp, and continue looking */
+ if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
+ continue;
+
+ /* If we've scanned all the rgrps, but found no free blocks
+ * then this checks for some less likely conditions before
+ * trying again.
+ */
+ flags &= ~LM_FLAG_TRY;
+ loops++;
+ /* Check that fs hasn't grown if writing to rindex */
+ if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
+ error = gfs2_ri_update(ip);
+ if (error)
+ return error;
}
+ /* Flushing the log may release space */
+ if (loops == 2)
+ gfs2_log_flush(sdp, NULL);
}
- error = -ENOSPC;
-out:
- if (error)
- rs->rs_requested = 0;
- return error;
+ return -ENOSPC;
}
/**
@@ -1672,15 +1804,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
{
struct gfs2_blkreserv *rs = ip->i_res;
- if (!rs)
- return;
-
- if (!rs->rs_free)
- gfs2_rs_deltree(rs);
-
if (rs->rs_rgd_gh.gh_gl)
gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
- rs->rs_requested = 0;
}
/**
@@ -1693,173 +1818,47 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
{
- struct gfs2_bitmap *bi = NULL;
- u32 length, rgrp_block, buf_block;
- unsigned int buf;
- unsigned char type;
-
- length = rgd->rd_length;
- rgrp_block = block - rgd->rd_data0;
-
- for (buf = 0; buf < length; buf++) {
- bi = rgd->rd_bits + buf;
- if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
- break;
- }
+ struct gfs2_rbm rbm = { .rgd = rgd, };
+ int ret;
- gfs2_assert(rgd->rd_sbd, buf < length);
- buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
+ ret = gfs2_rbm_from_block(&rbm, block);
+ WARN_ON_ONCE(ret != 0);
- type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
- bi->bi_len, buf_block);
-
- return type;
+ return gfs2_testbit(&rbm);
}
-/**
- * rgblk_search - find a block in @state
- * @rgd: the resource group descriptor
- * @goal: the goal block within the RG (start here to search for avail block)
- * @state: GFS2_BLKST_XXX the before-allocation state to find
- * @rbi: address of the pointer to the bitmap containing the block found
- *
- * Walk rgrp's bitmap to find bits that represent a block in @state.
- *
- * This function never fails, because we wouldn't call it unless we
- * know (from reservation results, etc.) that a block is available.
- *
- * Scope of @goal is just within rgrp, not the whole filesystem.
- * Scope of @returned block is just within bitmap, not the whole filesystem.
- *
- * Returns: the block number found relative to the bitmap rbi
- */
-
-static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state,
- struct gfs2_bitmap **rbi)
-{
- struct gfs2_bitmap *bi = NULL;
- const u32 length = rgd->rd_length;
- u32 biblk = BFITNOENT;
- unsigned int buf, x;
- const u8 *buffer = NULL;
-
- *rbi = NULL;
- /* Find bitmap block that contains bits for goal block */
- for (buf = 0; buf < length; buf++) {
- bi = rgd->rd_bits + buf;
- /* Convert scope of "goal" from rgrp-wide to within found bit block */
- if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
- goal -= bi->bi_start * GFS2_NBBY;
- goto do_search;
- }
- }
- buf = 0;
- goal = 0;
-
-do_search:
- /* Search (up to entire) bitmap in this rgrp for allocatable block.
- "x <= length", instead of "x < length", because we typically start
- the search in the middle of a bit block, but if we can't find an
- allocatable block anywhere else, we want to be able wrap around and
- search in the first part of our first-searched bit block. */
- for (x = 0; x <= length; x++) {
- bi = rgd->rd_bits + buf;
-
- if (test_bit(GBF_FULL, &bi->bi_flags) &&
- (state == GFS2_BLKST_FREE))
- goto skip;
-
- /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
- bitmaps, so we must search the originals for that. */
- buffer = bi->bi_bh->b_data + bi->bi_offset;
- WARN_ON(!buffer_uptodate(bi->bi_bh));
- if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
- buffer = bi->bi_clone + bi->bi_offset;
-
- while (1) {
- struct gfs2_blkreserv *rs;
- u32 rgblk;
-
- biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
- if (biblk == BFITNOENT)
- break;
- /* Check if this block is reserved() */
- rgblk = gfs2_bi2rgd_blk(bi, biblk);
- rs = rs_find(rgd, rgblk);
- if (rs == NULL)
- break;
-
- BUG_ON(rs->rs_bi != bi);
- biblk = BFITNOENT;
- /* This should jump to the first block after the
- reservation. */
- goal = rs->rs_biblk + rs->rs_free;
- if (goal >= bi->bi_len * GFS2_NBBY)
- break;
- }
- if (biblk != BFITNOENT)
- break;
-
- if ((goal == 0) && (state == GFS2_BLKST_FREE))
- set_bit(GBF_FULL, &bi->bi_flags);
-
- /* Try next bitmap block (wrap back to rgrp header if at end) */
-skip:
- buf++;
- buf %= length;
- goal = 0;
- }
-
- if (biblk != BFITNOENT)
- *rbi = bi;
-
- return biblk;
-}
/**
* gfs2_alloc_extent - allocate an extent from a given bitmap
- * @rgd: the resource group descriptor
- * @bi: the bitmap within the rgrp
- * @blk: the block within the bitmap
+ * @rbm: the resource group information
* @dinode: TRUE if the first block we allocate is for a dinode
- * @n: The extent length
+ * @n: The extent length (value/result)
*
- * Add the found bitmap buffer to the transaction.
+ * Add the bitmap buffer to the transaction.
* Set the found bits to @new_state to change block's allocation state.
- * Returns: starting block number of the extent (fs scope)
*/
-static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
- u32 blk, bool dinode, unsigned int *n)
+static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
+ unsigned int *n)
{
+ struct gfs2_rbm pos = { .rgd = rbm->rgd, };
const unsigned int elen = *n;
- u32 goal, rgblk;
- const u8 *buffer = NULL;
- struct gfs2_blkreserv *rs;
-
- *n = 0;
- buffer = bi->bi_bh->b_data + bi->bi_offset;
- gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
- gfs2_setbit(rgd, bi->bi_clone, bi, blk,
- dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
- (*n)++;
- goal = blk;
+ u64 block;
+ int ret;
+
+ *n = 1;
+ block = gfs2_rbm_to_block(rbm);
+ gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1);
+ gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
+ block++;
while (*n < elen) {
- goal++;
- if (goal >= (bi->bi_len * GFS2_NBBY))
- break;
- rgblk = gfs2_bi2rgd_blk(bi, goal);
- rs = rs_find(rgd, rgblk);
- if (rs) /* Oops, we bumped into someone's reservation */
- break;
- if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
- GFS2_BLKST_FREE)
+ ret = gfs2_rbm_from_block(&pos, block);
+ if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
break;
- gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED);
+ gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1);
+ gfs2_setbit(&pos, true, GFS2_BLKST_USED);
(*n)++;
+ block++;
}
- blk = gfs2_bi2rgd_blk(bi, blk);
- rgd->rd_last_alloc = blk + *n - 1;
- return rgd->rd_data0 + blk;
}
/**
@@ -1875,46 +1874,30 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
u32 blen, unsigned char new_state)
{
- struct gfs2_rgrpd *rgd;
- struct gfs2_bitmap *bi = NULL;
- u32 length, rgrp_blk, buf_blk;
- unsigned int buf;
+ struct gfs2_rbm rbm;
- rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
- if (!rgd) {
+ rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
+ if (!rbm.rgd) {
if (gfs2_consist(sdp))
fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
return NULL;
}
- length = rgd->rd_length;
-
- rgrp_blk = bstart - rgd->rd_data0;
-
while (blen--) {
- for (buf = 0; buf < length; buf++) {
- bi = rgd->rd_bits + buf;
- if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
- break;
- }
-
- gfs2_assert(rgd->rd_sbd, buf < length);
-
- buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
- rgrp_blk++;
-
- if (!bi->bi_clone) {
- bi->bi_clone = kmalloc(bi->bi_bh->b_size,
- GFP_NOFS | __GFP_NOFAIL);
- memcpy(bi->bi_clone + bi->bi_offset,
- bi->bi_bh->b_data + bi->bi_offset,
- bi->bi_len);
+ gfs2_rbm_from_block(&rbm, bstart);
+ bstart++;
+ if (!rbm.bi->bi_clone) {
+ rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size,
+ GFP_NOFS | __GFP_NOFAIL);
+ memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset,
+ rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
+ rbm.bi->bi_len);
}
- gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
- gfs2_setbit(rgd, NULL, bi, buf_blk, new_state);
+ gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1);
+ gfs2_setbit(&rbm, false, new_state);
}
- return rgd;
+ return rbm.rgd;
}
/**
@@ -1956,56 +1939,41 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
}
/**
- * claim_reserved_blks - Claim previously reserved blocks
- * @ip: the inode that's claiming the reservation
- * @dinode: 1 if this block is a dinode block, otherwise data block
- * @nblocks: desired extent length
+ * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
+ * @ip: The inode we have just allocated blocks for
+ * @rbm: The start of the allocated blocks
+ * @len: The extent length
*
- * Lay claim to previously allocated block reservation blocks.
- * Returns: Starting block number of the blocks claimed.
- * Sets *nblocks to the actual extent length allocated.
+ * Adjusts a reservation after an allocation has taken place. If the
+ * reservation does not match the allocation, or if it is now empty
+ * then it is removed.
*/
-static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode,
- unsigned int *nblocks)
+
+static void gfs2_adjust_reservation(struct gfs2_inode *ip,
+ const struct gfs2_rbm *rbm, unsigned len)
{
struct gfs2_blkreserv *rs = ip->i_res;
- struct gfs2_rgrpd *rgd = rs->rs_rgd;
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct gfs2_bitmap *bi;
- u64 start_block = gfs2_rs_startblk(rs);
- const unsigned int elen = *nblocks;
-
- /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
- gfs2_assert_withdraw(sdp, rgd);
- /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
- bi = rs->rs_bi;
- gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
-
- for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
- /* Make sure the bitmap hasn't changed */
- gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk,
- dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
- rs->rs_biblk++;
- rs->rs_free--;
-
- BUG_ON(!rgd->rd_reserved);
- rgd->rd_reserved--;
- dinode = false;
- trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
- }
-
- if (!rs->rs_free) {
- struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd;
+ struct gfs2_rgrpd *rgd = rbm->rgd;
+ unsigned rlen;
+ u64 block;
+ int ret;
- gfs2_rs_deltree(rs);
- /* -nblocks because we haven't returned to do the math yet.
- I'm doing the math backwards to prevent negative numbers,
- but think of it as:
- if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */
- if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks)
- rg_mblk_search(rgd, ip);
+ spin_lock(&rgd->rd_rsspin);
+ if (gfs2_rs_active(rs)) {
+ if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
+ block = gfs2_rbm_to_block(rbm);
+ ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
+ rlen = min(rs->rs_free, len);
+ rs->rs_free -= rlen;
+ rgd->rd_reserved -= rlen;
+ trace_gfs2_rs(rs, TRACE_RS_CLAIM);
+ if (rs->rs_free && !ret)
+ goto out;
+ }
+ __rs_deltree(ip, rs);
}
- return start_block;
+out:
+ spin_unlock(&rgd->rd_rsspin);
}
/**
@@ -2024,47 +1992,40 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *dibh;
- struct gfs2_rgrpd *rgd;
+ struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
unsigned int ndata;
- u32 goal, blk; /* block, within the rgrp scope */
+ u64 goal;
u64 block; /* block, within the file system scope */
int error;
- struct gfs2_bitmap *bi;
- /* Only happens if there is a bug in gfs2, return something distinctive
- * to ensure that it is noticed.
- */
- if (ip->i_res->rs_requested == 0)
- return -ECANCELED;
-
- /* Check if we have a multi-block reservation, and if so, claim the
- next free block from it. */
- if (gfs2_rs_active(ip->i_res)) {
- BUG_ON(!ip->i_res->rs_free);
- rgd = ip->i_res->rs_rgd;
- block = claim_reserved_blks(ip, dinode, nblocks);
- } else {
- rgd = ip->i_rgd;
+ if (gfs2_rs_active(ip->i_res))
+ goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm);
+ else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal))
+ goal = ip->i_goal;
+ else
+ goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0;
- if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
- goal = ip->i_goal - rgd->rd_data0;
- else
- goal = rgd->rd_last_alloc;
-
- blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
-
- /* Since all blocks are reserved in advance, this shouldn't
- happen */
- if (blk == BFITNOENT) {
- printk(KERN_WARNING "BFITNOENT, nblocks=%u\n",
- *nblocks);
- printk(KERN_WARNING "FULL=%d\n",
- test_bit(GBF_FULL, &rgd->rd_bits->bi_flags));
- goto rgrp_error;
- }
+ gfs2_rbm_from_block(&rbm, goal);
+ error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false);
- block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
+ if (error == -ENOSPC) {
+ gfs2_rbm_from_block(&rbm, goal);
+ error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false);
+ }
+
+ /* Since all blocks are reserved in advance, this shouldn't happen */
+ if (error) {
+ fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n",
+ (unsigned long long)ip->i_no_addr, error, *nblocks,
+ test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags));
+ goto rgrp_error;
}
+
+ gfs2_alloc_extent(&rbm, dinode, nblocks);
+ block = gfs2_rbm_to_block(&rbm);
+ rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
+ if (gfs2_rs_active(ip->i_res))
+ gfs2_adjust_reservation(ip, &rbm, *nblocks);
ndata = *nblocks;
if (dinode)
ndata--;
@@ -2081,22 +2042,22 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
brelse(dibh);
}
}
- if (rgd->rd_free < *nblocks) {
+ if (rbm.rgd->rd_free < *nblocks) {
printk(KERN_WARNING "nblocks=%u\n", *nblocks);
goto rgrp_error;
}
- rgd->rd_free -= *nblocks;
+ rbm.rgd->rd_free -= *nblocks;
if (dinode) {
- rgd->rd_dinodes++;
- *generation = rgd->rd_igeneration++;
+ rbm.rgd->rd_dinodes++;
+ *generation = rbm.rgd->rd_igeneration++;
if (*generation == 0)
- *generation = rgd->rd_igeneration++;
+ *generation = rbm.rgd->rd_igeneration++;
}
- gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
- gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
- gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
+ gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1);
+ gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
+ gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
if (dinode)
@@ -2110,14 +2071,14 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
gfs2_quota_change(ip, ndata, ip->i_inode.i_uid,
ip->i_inode.i_gid);
- rgd->rd_free_clone -= *nblocks;
- trace_gfs2_block_alloc(ip, rgd, block, *nblocks,
+ rbm.rgd->rd_free_clone -= *nblocks;
+ trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
*bn = block;
return 0;
rgrp_error:
- gfs2_rgrp_error(rgd);
+ gfs2_rgrp_error(rbm.rgd);
return -EIO;
}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index ca6e26729b8..24077958dcf 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -46,7 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
bool dinode, u64 *generation);
extern int gfs2_rs_alloc(struct gfs2_inode *ip);
-extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
+extern void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs);
extern void gfs2_rs_delete(struct gfs2_inode *ip);
extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
@@ -73,30 +73,10 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
extern int gfs2_fitrim(struct file *filp, void __user *argp);
-/* This is how to tell if a multi-block reservation is "inplace" reserved: */
-static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
+/* This is how to tell if a reservation is in the rgrp tree: */
+static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs)
{
- if (ip->i_res && ip->i_res->rs_requested)
- return 1;
- return 0;
-}
-
-/* This is how to tell if a multi-block reservation is in the rgrp tree: */
-static inline int gfs2_rs_active(struct gfs2_blkreserv *rs)
-{
- if (rs && rs->rs_bi)
- return 1;
- return 0;
-}
-
-static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk)
-{
- return (bi->bi_start * GFS2_NBBY) + blk;
-}
-
-static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs)
-{
- return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0;
+ return rs && !RB_EMPTY_NODE(&rs->rs_node);
}
#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fc3168f47a1..a8d90f2f576 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1366,6 +1366,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
val = sdp->sd_tune.gt_statfs_quantum;
if (val != 30)
seq_printf(s, ",statfs_quantum=%d", val);
+ else if (sdp->sd_tune.gt_statfs_slow)
+ seq_puts(s, ",statfs_quantum=0");
val = sdp->sd_tune.gt_quota_quantum;
if (val != 60)
seq_printf(s, ",quota_quantum=%d", val);
@@ -1543,6 +1545,11 @@ static void gfs2_evict_inode(struct inode *inode)
out_truncate:
gfs2_log_flush(sdp, ip->i_gl);
+ if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
+ struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
+ filemap_fdatawrite(metamapping);
+ filemap_fdatawait(metamapping);
+ }
write_inode_now(inode, 1);
gfs2_ail_flush(ip->i_gl, 0);
@@ -1557,7 +1564,7 @@ out_truncate:
out_unlock:
/* Error path for case 1 */
if (gfs2_rs_active(ip->i_res))
- gfs2_rs_deltree(ip->i_res);
+ gfs2_rs_deltree(ip, ip->i_res);
if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
gfs2_glock_dq(&ip->i_iopen_gh);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index a25c252fe41..bbdc78af60c 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -509,10 +509,9 @@ TRACE_EVENT(gfs2_block_alloc,
/* Keep track of multi-block reservations as they are allocated/freed */
TRACE_EVENT(gfs2_rs,
- TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs,
- u8 func),
+ TP_PROTO(const struct gfs2_blkreserv *rs, u8 func),
- TP_ARGS(ip, rs, func),
+ TP_ARGS(rs, func),
TP_STRUCT__entry(
__field( dev_t, dev )
@@ -526,18 +525,17 @@ TRACE_EVENT(gfs2_rs,
),
TP_fast_assign(
- __entry->dev = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0;
- __entry->rd_addr = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0;
- __entry->rd_free_clone = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0;
- __entry->rd_reserved = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0;
- __entry->inum = ip ? ip->i_no_addr : 0;
- __entry->start = gfs2_rs_startblk(rs);
+ __entry->dev = rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev;
+ __entry->rd_addr = rs->rs_rbm.rgd->rd_addr;
+ __entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone;
+ __entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved;
+ __entry->inum = rs->rs_inum;
+ __entry->start = gfs2_rbm_to_block(&rs->rs_rbm);
__entry->free = rs->rs_free;
__entry->func = func;
),
- TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s "
- "f:%lu",
+ TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->inum,
(unsigned long long)__entry->start,
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 41f42cdccbb..bf2ae9aeee7 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -28,11 +28,10 @@ struct gfs2_glock;
/* reserve either the number of blocks to be allocated plus the rg header
* block, or all of the blocks in the rg, whichever is smaller */
-static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip)
+static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested)
{
- const struct gfs2_blkreserv *rs = ip->i_res;
- if (rs && rs->rs_requested < ip->i_rgd->rd_length)
- return rs->rs_requested + 1;
+ if (requested < ip->i_rgd->rd_length)
+ return requested + 1;
return ip->i_rgd->rd_length;
}
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 27a0b4a901f..db330e5518c 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -448,17 +448,18 @@ ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
}
/**
- * ea_get_unstuffed - actually copies the unstuffed data into the
- * request buffer
+ * ea_iter_unstuffed - copies the unstuffed xattr data to/from the
+ * request buffer
* @ip: The GFS2 inode
* @ea: The extended attribute header structure
- * @data: The data to be copied
+ * @din: The data to be copied in
+ * @dout: The data to be copied out (one of din,dout will be NULL)
*
* Returns: errno
*/
-static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
- char *data)
+static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
+ const char *din, char *dout)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head **bh;
@@ -467,6 +468,8 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
__be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
unsigned int x;
int error = 0;
+ unsigned char *pos;
+ unsigned cp_size;
bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS);
if (!bh)
@@ -497,12 +500,21 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
goto out;
}
- memcpy(data, bh[x]->b_data + sizeof(struct gfs2_meta_header),
- (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
+ pos = bh[x]->b_data + sizeof(struct gfs2_meta_header);
+ cp_size = (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize;
- amount -= sdp->sd_jbsize;
- data += sdp->sd_jbsize;
+ if (dout) {
+ memcpy(dout, pos, cp_size);
+ dout += sdp->sd_jbsize;
+ }
+
+ if (din) {
+ gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
+ memcpy(pos, din, cp_size);
+ din += sdp->sd_jbsize;
+ }
+ amount -= sdp->sd_jbsize;
brelse(bh[x]);
}
@@ -523,7 +535,7 @@ static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
memcpy(data, GFS2_EA2DATA(el->el_ea), len);
return len;
}
- ret = ea_get_unstuffed(ip, el->el_ea, data);
+ ret = gfs2_iter_unstuffed(ip, el->el_ea, NULL, data);
if (ret < 0)
return ret;
return len;
@@ -727,7 +739,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
goto out_gunlock_q;
error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
- blks + gfs2_rg_blocks(ip) +
+ blks + gfs2_rg_blocks(ip, blks) +
RES_DINODE + RES_STATFS + RES_QUOTA, 0);
if (error)
goto out_ipres;
@@ -1220,69 +1232,23 @@ static int gfs2_xattr_set(struct dentry *dentry, const char *name,
size, flags, type);
}
+
static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
struct gfs2_ea_header *ea, char *data)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct buffer_head **bh;
unsigned int amount = GFS2_EA_DATA_LEN(ea);
unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
- __be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
- unsigned int x;
- int error;
-
- bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS);
- if (!bh)
- return -ENOMEM;
-
- error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
- if (error)
- goto out;
-
- for (x = 0; x < nptrs; x++) {
- error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0,
- bh + x);
- if (error) {
- while (x--)
- brelse(bh[x]);
- goto fail;
- }
- dataptrs++;
- }
-
- for (x = 0; x < nptrs; x++) {
- error = gfs2_meta_wait(sdp, bh[x]);
- if (error) {
- for (; x < nptrs; x++)
- brelse(bh[x]);
- goto fail;
- }
- if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
- for (; x < nptrs; x++)
- brelse(bh[x]);
- error = -EIO;
- goto fail;
- }
-
- gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
-
- memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header), data,
- (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
-
- amount -= sdp->sd_jbsize;
- data += sdp->sd_jbsize;
-
- brelse(bh[x]);
- }
+ int ret;
-out:
- kfree(bh);
- return error;
+ ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
+ if (ret)
+ return ret;
-fail:
+ ret = gfs2_iter_unstuffed(ip, ea, data, NULL);
gfs2_trans_end(sdp);
- kfree(bh);
- return error;
+
+ return ret;
}
int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 09357508ec9..a2862339323 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1113,6 +1113,11 @@ static void mark_journal_empty(journal_t *journal)
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
spin_lock(&journal->j_state_lock);
+ /* Is it already empty? */
+ if (sb->s_start == 0) {
+ spin_unlock(&journal->j_state_lock);
+ return;
+ }
jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n",
journal->j_tail_sequence);
diff --git a/fs/libfs.c b/fs/libfs.c
index a74cb1725ac..7cc37ca19cd 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -874,7 +874,7 @@ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
/**
- * generic_fh_to_dentry - generic helper for the fh_to_parent export operation
+ * generic_fh_to_parent - generic helper for the fh_to_parent export operation
* @sb: filesystem to do the file handle conversion on
* @fid: file handle to convert
* @fh_len: length of the file handle in bytes
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index fb1a2bedbe9..8d80c990dff 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -289,7 +289,6 @@ static void nlmsvc_free_block(struct kref *kref)
dprintk("lockd: freeing block %p...\n", block);
/* Remove block from file's list of blocks */
- mutex_lock(&file->f_mutex);
list_del_init(&block->b_flist);
mutex_unlock(&file->f_mutex);
@@ -303,7 +302,7 @@ static void nlmsvc_free_block(struct kref *kref)
static void nlmsvc_release_block(struct nlm_block *block)
{
if (block != NULL)
- kref_put(&block->b_count, nlmsvc_free_block);
+ kref_put_mutex(&block->b_count, nlmsvc_free_block, &block->b_file->f_mutex);
}
/*
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index df0de27c273..e784a217b50 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -26,6 +26,7 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
struct completion complete;
bio_init(&bio);
+ bio.bi_max_vecs = 1;
bio.bi_io_vec = &bio_vec;
bio_vec.bv_page = page;
bio_vec.bv_len = PAGE_SIZE;
@@ -95,12 +96,11 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
struct address_space *mapping = super->s_mapping_inode->i_mapping;
struct bio *bio;
struct page *page;
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
- unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
+ unsigned int max_pages;
int i;
- if (max_pages > BIO_MAX_PAGES)
- max_pages = BIO_MAX_PAGES;
+ max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
+
bio = bio_alloc(GFP_NOFS, max_pages);
BUG_ON(!bio);
@@ -190,12 +190,11 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
{
struct logfs_super *super = logfs_super(sb);
struct bio *bio;
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
- unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
+ unsigned int max_pages;
int i;
- if (max_pages > BIO_MAX_PAGES)
- max_pages = BIO_MAX_PAGES;
+ max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
+
bio = bio_alloc(GFP_NOFS, max_pages);
BUG_ON(!bio);
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index a422f42238b..6984562738d 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -156,10 +156,26 @@ static void __logfs_destroy_inode(struct inode *inode)
call_rcu(&inode->i_rcu, logfs_i_callback);
}
+static void __logfs_destroy_meta_inode(struct inode *inode)
+{
+ struct logfs_inode *li = logfs_inode(inode);
+ BUG_ON(li->li_block);
+ call_rcu(&inode->i_rcu, logfs_i_callback);
+}
+
static void logfs_destroy_inode(struct inode *inode)
{
struct logfs_inode *li = logfs_inode(inode);
+ if (inode->i_ino < LOGFS_RESERVED_INOS) {
+ /*
+ * The reserved inodes are never destroyed unless we are in
+ * unmont path.
+ */
+ __logfs_destroy_meta_inode(inode);
+ return;
+ }
+
BUG_ON(list_empty(&li->li_freeing_list));
spin_lock(&logfs_inode_lock);
li->li_refcount--;
@@ -373,8 +389,8 @@ static void logfs_put_super(struct super_block *sb)
{
struct logfs_super *super = logfs_super(sb);
/* kill the meta-inodes */
- iput(super->s_master_inode);
iput(super->s_segfile_inode);
+ iput(super->s_master_inode);
iput(super->s_mapping_inode);
}
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 1e1c369df22..2a09b8d7398 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -565,7 +565,7 @@ static void write_wbuf(struct super_block *sb, struct logfs_area *area,
index = ofs >> PAGE_SHIFT;
page_ofs = ofs & (PAGE_SIZE - 1);
- page = find_lock_page(mapping, index);
+ page = find_or_create_page(mapping, index, GFP_NOFS);
BUG_ON(!page);
memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize);
unlock_page(page);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index f1cb512c501..5be0abef603 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -2189,7 +2189,6 @@ void logfs_evict_inode(struct inode *inode)
return;
}
- BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
page = inode_to_page(inode);
BUG_ON(!page); /* FIXME: Use emergency page */
logfs_put_write_page(page);
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index e28d090c98d..038da099179 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -886,7 +886,7 @@ static struct logfs_area *alloc_area(struct super_block *sb)
static void map_invalidatepage(struct page *page, unsigned long l)
{
- BUG();
+ return;
}
static int map_releasepage(struct page *page, gfp_t g)
diff --git a/fs/namei.c b/fs/namei.c
index db76b866a09..dd1ed1b8e98 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -352,6 +352,7 @@ int __inode_permission(struct inode *inode, int mask)
/**
* sb_permission - Check superblock-level permissions
* @sb: Superblock of inode to check permission on
+ * @inode: Inode to check permission on
* @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
*
* Separate out file-system wide checks from inode-specific permission checks.
@@ -656,6 +657,7 @@ int sysctl_protected_hardlinks __read_mostly = 1;
/**
* may_follow_link - Check symlink following for unsafe situations
* @link: The path of the symlink
+ * @nd: nameidata pathwalk data
*
* In the case of the sysctl_protected_symlinks sysctl being enabled,
* CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
diff --git a/fs/namespace.c b/fs/namespace.c
index 4d31f73e256..7bdf7907413 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1886,8 +1886,14 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
return err;
err = -EINVAL;
- if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt)))
- goto unlock;
+ if (unlikely(!check_mnt(real_mount(path->mnt)))) {
+ /* that's acceptable only for automounts done in private ns */
+ if (!(mnt_flags & MNT_SHRINKABLE))
+ goto unlock;
+ /* ... and for those we'd better have mountpoint still alive */
+ if (!real_mount(path->mnt)->mnt_ns)
+ goto unlock;
+ }
/* Refuse the same filesystem on the same mount point */
err = -EBUSY;
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 8bf3a3f6925..b7db60897f9 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -12,19 +12,19 @@ nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
-obj-$(CONFIG_NFS_V2) += nfs2.o
-nfs2-y := nfs2super.o proc.o nfs2xdr.o
+obj-$(CONFIG_NFS_V2) += nfsv2.o
+nfsv2-y := nfs2super.o proc.o nfs2xdr.o
-obj-$(CONFIG_NFS_V3) += nfs3.o
-nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o
-nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
+obj-$(CONFIG_NFS_V3) += nfsv3.o
+nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o
+nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
-obj-$(CONFIG_NFS_V4) += nfs4.o
-nfs4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \
+obj-$(CONFIG_NFS_V4) += nfsv4.o
+nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \
delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \
nfs4namespace.o nfs4getroot.o nfs4client.o
-nfs4-$(CONFIG_SYSCTL) += nfs4sysctl.o
-nfs4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
+nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o
+nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9fc0d9dfc91..99694442b93 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -105,7 +105,7 @@ struct nfs_subversion *get_nfs_version(unsigned int version)
if (IS_ERR(nfs)) {
mutex_lock(&nfs_version_mutex);
- request_module("nfs%d", version);
+ request_module("nfsv%d", version);
nfs = find_nfs_version(version);
mutex_unlock(&nfs_version_mutex);
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 75d6d0a3d32..6a7fcab7ecb 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -287,10 +287,12 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
struct inode *inode = file->f_path.dentry->d_inode;
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret != 0)
+ goto out;
mutex_lock(&inode->i_mutex);
ret = nfs_file_fsync_commit(file, start, end, datasync);
mutex_unlock(&inode->i_mutex);
-
+out:
return ret;
}
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b701358c39c..a850079467d 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -61,6 +61,12 @@ struct idmap {
struct mutex idmap_mutex;
};
+struct idmap_legacy_upcalldata {
+ struct rpc_pipe_msg pipe_msg;
+ struct idmap_msg idmap_msg;
+ struct idmap *idmap;
+};
+
/**
* nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
* @fattr: fully initialised struct nfs_fattr
@@ -324,6 +330,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
ret = nfs_idmap_request_key(&key_type_id_resolver_legacy,
name, namelen, type, data,
data_size, idmap);
+ idmap->idmap_key_cons = NULL;
mutex_unlock(&idmap->idmap_mutex);
}
return ret;
@@ -380,11 +387,13 @@ static const match_table_t nfs_idmap_tokens = {
static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
size_t);
+static void idmap_release_pipe(struct inode *);
static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
static const struct rpc_pipe_ops idmap_upcall_ops = {
.upcall = rpc_pipe_generic_upcall,
.downcall = idmap_pipe_downcall,
+ .release_pipe = idmap_release_pipe,
.destroy_msg = idmap_pipe_destroy_msg,
};
@@ -616,7 +625,8 @@ void nfs_idmap_quit(void)
nfs_idmap_quit_keyring();
}
-static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im,
+static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
+ struct idmap_msg *im,
struct rpc_pipe_msg *msg)
{
substring_t substr;
@@ -659,6 +669,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
const char *op,
void *aux)
{
+ struct idmap_legacy_upcalldata *data;
struct rpc_pipe_msg *msg;
struct idmap_msg *im;
struct idmap *idmap = (struct idmap *)aux;
@@ -666,15 +677,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
int ret = -ENOMEM;
/* msg and im are freed in idmap_pipe_destroy_msg */
- msg = kmalloc(sizeof(*msg), GFP_KERNEL);
- if (!msg)
- goto out0;
-
- im = kmalloc(sizeof(*im), GFP_KERNEL);
- if (!im)
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
goto out1;
- ret = nfs_idmap_prepare_message(key->description, im, msg);
+ msg = &data->pipe_msg;
+ im = &data->idmap_msg;
+ data->idmap = idmap;
+
+ ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
if (ret < 0)
goto out2;
@@ -683,15 +694,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
if (ret < 0)
- goto out2;
+ goto out3;
return ret;
+out3:
+ idmap->idmap_key_cons = NULL;
out2:
- kfree(im);
+ kfree(data);
out1:
- kfree(msg);
-out0:
complete_request_key(cons, ret);
return ret;
}
@@ -749,9 +760,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
}
if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
- ret = mlen;
- complete_request_key(cons, -ENOKEY);
- goto out_incomplete;
+ ret = -ENOKEY;
+ goto out;
}
namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
@@ -768,16 +778,32 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
out:
complete_request_key(cons, ret);
-out_incomplete:
return ret;
}
static void
idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
{
+ struct idmap_legacy_upcalldata *data = container_of(msg,
+ struct idmap_legacy_upcalldata,
+ pipe_msg);
+ struct idmap *idmap = data->idmap;
+ struct key_construction *cons;
+ if (msg->errno) {
+ cons = ACCESS_ONCE(idmap->idmap_key_cons);
+ idmap->idmap_key_cons = NULL;
+ complete_request_key(cons, msg->errno);
+ }
/* Free memory allocated in nfs_idmap_legacy_upcall() */
- kfree(msg->data);
- kfree(msg);
+ kfree(data);
+}
+
+static void
+idmap_release_pipe(struct inode *inode)
+{
+ struct rpc_inode *rpci = RPC_I(inode);
+ struct idmap *idmap = (struct idmap *)rpci->private;
+ idmap->idmap_key_cons = NULL;
}
int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c6e895f0fbf..9b47610338f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -154,7 +154,7 @@ static void nfs_zap_caches_locked(struct inode *inode)
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = jiffies;
- memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
+ memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
else
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 0952c791df3..69322096c32 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -69,7 +69,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
nfs_fattr_init(info->fattr);
status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply fsinfo: %d\n", __func__, status);
- if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
+ if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) {
msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
msg.rpc_resp = info->fattr;
status = rpc_call_sync(client, &msg, 0);
@@ -643,7 +643,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
u64 cookie, struct page **pages, unsigned int count, int plus)
{
struct inode *dir = dentry->d_inode;
- __be32 *verf = NFS_COOKIEVERF(dir);
+ __be32 *verf = NFS_I(dir)->cookieverf;
struct nfs3_readdirargs arg = {
.fh = NFS_FH(dir),
.cookie = cookie,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 3b950dd81e8..da0618aeead 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -205,6 +205,9 @@ extern const struct dentry_operations nfs4_dentry_operations;
int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
unsigned, umode_t, int *);
+/* super.c */
+extern struct file_system_type nfs4_fs_type;
+
/* nfs4namespace.c */
rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index cbcdfaf3250..24eb663f8ed 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -74,7 +74,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
return clp;
error:
- kfree(clp);
+ nfs_free_client(clp);
return ERR_PTR(err);
}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index acb65e7887f..eb5eb8eef4d 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -96,13 +96,15 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
struct inode *inode = file->f_path.dentry->d_inode;
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret != 0)
+ goto out;
mutex_lock(&inode->i_mutex);
ret = nfs_file_fsync_commit(file, start, end, datasync);
if (!ret && !datasync)
/* application has asked for meta-data sync */
ret = pnfs_layoutcommit_inode(inode, true);
mutex_unlock(&inode->i_mutex);
-
+out:
return ret;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a99a8d94872..1e50326d00d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3215,11 +3215,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
dentry->d_parent->d_name.name,
dentry->d_name.name,
(unsigned long long)cookie);
- nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
+ nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
res.pgbase = args.pgbase;
status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
if (status >= 0) {
- memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
+ memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
status += args.pgbase;
}
@@ -3653,11 +3653,11 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server)
&& (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL);
}
-/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that
- * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on
+/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that
+ * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_SIZE) bytes on
* the stack.
*/
-#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
+#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
static int buf_to_pages_noslab(const void *buf, size_t buflen,
struct page **pages, unsigned int *pgbase)
@@ -3668,7 +3668,7 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen,
spages = pages;
do {
- len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
+ len = min_t(size_t, PAGE_SIZE, buflen);
newpage = alloc_page(GFP_KERNEL);
if (newpage == NULL)
@@ -3737,9 +3737,10 @@ out:
static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len)
{
struct nfs4_cached_acl *acl;
+ size_t buflen = sizeof(*acl) + acl_len;
- if (pages && acl_len <= PAGE_SIZE) {
- acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL);
+ if (buflen <= PAGE_SIZE) {
+ acl = kmalloc(buflen, GFP_KERNEL);
if (acl == NULL)
goto out;
acl->cached = 1;
@@ -3781,17 +3782,15 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
.rpc_argp = &args,
.rpc_resp = &res,
};
- int ret = -ENOMEM, npages, i;
- size_t acl_len = 0;
+ unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
+ int ret = -ENOMEM, i;
- npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
/* As long as we're doing a round trip to the server anyway,
* let's be prepared for a page of acl data. */
if (npages == 0)
npages = 1;
-
- /* Add an extra page to handle the bitmap returned */
- npages++;
+ if (npages > ARRAY_SIZE(pages))
+ return -ERANGE;
for (i = 0; i < npages; i++) {
pages[i] = alloc_page(GFP_KERNEL);
@@ -3807,11 +3806,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
args.acl_len = npages * PAGE_SIZE;
args.acl_pgbase = 0;
- /* Let decode_getfacl know not to fail if the ACL data is larger than
- * the page we send as a guess */
- if (buf == NULL)
- res.acl_flags |= NFS4_ACL_LEN_REQUEST;
-
dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n",
__func__, buf, buflen, npages, args.acl_len);
ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
@@ -3819,20 +3813,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
if (ret)
goto out_free;
- acl_len = res.acl_len - res.acl_data_offset;
- if (acl_len > args.acl_len)
- nfs4_write_cached_acl(inode, NULL, 0, acl_len);
- else
- nfs4_write_cached_acl(inode, pages, res.acl_data_offset,
- acl_len);
- if (buf) {
+ /* Handle the case where the passed-in buffer is too short */
+ if (res.acl_flags & NFS4_ACL_TRUNC) {
+ /* Did the user only issue a request for the acl length? */
+ if (buf == NULL)
+ goto out_ok;
ret = -ERANGE;
- if (acl_len > buflen)
- goto out_free;
- _copy_from_pages(buf, pages, res.acl_data_offset,
- acl_len);
+ goto out_free;
}
- ret = acl_len;
+ nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len);
+ if (buf)
+ _copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len);
+out_ok:
+ ret = res.acl_len;
out_free:
for (i = 0; i < npages; i++)
if (pages[i])
@@ -3890,10 +3883,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
.rpc_argp = &arg,
.rpc_resp = &res,
};
+ unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
int ret, i;
if (!nfs4_server_supports_acls(server))
return -EOPNOTSUPP;
+ if (npages > ARRAY_SIZE(pages))
+ return -ERANGE;
i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
if (i < 0)
return i;
@@ -6223,11 +6219,58 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
dprintk("<-- %s\n", __func__);
}
+static size_t max_response_pages(struct nfs_server *server)
+{
+ u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
+ return nfs_page_array_len(0, max_resp_sz);
+}
+
+static void nfs4_free_pages(struct page **pages, size_t size)
+{
+ int i;
+
+ if (!pages)
+ return;
+
+ for (i = 0; i < size; i++) {
+ if (!pages[i])
+ break;
+ __free_page(pages[i]);
+ }
+ kfree(pages);
+}
+
+static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
+{
+ struct page **pages;
+ int i;
+
+ pages = kcalloc(size, sizeof(struct page *), gfp_flags);
+ if (!pages) {
+ dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
+ return NULL;
+ }
+
+ for (i = 0; i < size; i++) {
+ pages[i] = alloc_page(gfp_flags);
+ if (!pages[i]) {
+ dprintk("%s: failed to allocate page\n", __func__);
+ nfs4_free_pages(pages, size);
+ return NULL;
+ }
+ }
+
+ return pages;
+}
+
static void nfs4_layoutget_release(void *calldata)
{
struct nfs4_layoutget *lgp = calldata;
+ struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+ size_t max_pages = max_response_pages(server);
dprintk("--> %s\n", __func__);
+ nfs4_free_pages(lgp->args.layout.pages, max_pages);
put_nfs_open_context(lgp->args.ctx);
kfree(calldata);
dprintk("<-- %s\n", __func__);
@@ -6239,9 +6282,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
.rpc_release = nfs4_layoutget_release,
};
-int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
+void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
{
struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+ size_t max_pages = max_response_pages(server);
struct rpc_task *task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
@@ -6259,12 +6303,19 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
dprintk("--> %s\n", __func__);
+ lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
+ if (!lgp->args.layout.pages) {
+ nfs4_layoutget_release(lgp);
+ return;
+ }
+ lgp->args.layout.pglen = max_pages * PAGE_SIZE;
+
lgp->res.layoutp = &lgp->args.layout;
lgp->res.seq_res.sr_slot = NULL;
nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
- return PTR_ERR(task);
+ return;
status = nfs4_wait_for_completion_rpc_task(task);
if (status == 0)
status = task->tk_status;
@@ -6272,7 +6323,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
status = pnfs_layout_process(lgp);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
- return status;
+ return;
}
static void
@@ -6304,12 +6355,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
return;
}
spin_lock(&lo->plh_inode->i_lock);
- if (task->tk_status == 0) {
- if (lrp->res.lrs_present) {
- pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
- } else
- BUG_ON(!list_empty(&lo->plh_segs));
- }
+ if (task->tk_status == 0 && lrp->res.lrs_present)
+ pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
lo->plh_block_lgets--;
spin_unlock(&lo->plh_inode->i_lock);
dprintk("<-- %s\n", __func__);
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 12a31a9dbcd..bd61221ad2c 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -23,14 +23,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
-static struct file_system_type nfs4_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs_fs_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
-};
-
static struct file_system_type nfs4_remote_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
@@ -344,14 +336,8 @@ static int __init init_nfs_v4(void)
if (err)
goto out1;
- err = register_filesystem(&nfs4_fs_type);
- if (err < 0)
- goto out2;
-
register_nfs_version(&nfs_v4);
return 0;
-out2:
- nfs4_unregister_sysctl();
out1:
nfs_idmap_quit();
out:
@@ -361,7 +347,6 @@ out:
static void __exit exit_nfs_v4(void)
{
unregister_nfs_version(&nfs_v4);
- unregister_filesystem(&nfs4_fs_type);
nfs4_unregister_sysctl();
nfs_idmap_quit();
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index ca13483edd6..8dba6bd4855 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5045,22 +5045,19 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
struct nfs_getaclres *res)
{
unsigned int savep;
- __be32 *bm_p;
uint32_t attrlen,
bitmap[3] = {0};
int status;
- size_t page_len = xdr->buf->page_len;
+ unsigned int pg_offset;
res->acl_len = 0;
if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
goto out;
- bm_p = xdr->p;
- res->acl_data_offset = be32_to_cpup(bm_p) + 2;
- res->acl_data_offset <<= 2;
- /* Check if the acl data starts beyond the allocated buffer */
- if (res->acl_data_offset > page_len)
- return -ERANGE;
+ xdr_enter_page(xdr, xdr->buf->page_len);
+
+ /* Calculate the offset of the page data */
+ pg_offset = xdr->buf->head[0].iov_len;
if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
goto out;
@@ -5074,23 +5071,16 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
/* The bitmap (xdr len + bitmaps) and the attr xdr len words
* are stored with the acl data to handle the problem of
* variable length bitmaps.*/
- xdr->p = bm_p;
-
- /* We ignore &savep and don't do consistency checks on
- * the attr length. Let userspace figure it out.... */
- attrlen += res->acl_data_offset;
- if (attrlen > page_len) {
- if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
- /* getxattr interface called with a NULL buf */
- res->acl_len = attrlen;
- goto out;
- }
- dprintk("NFS: acl reply: attrlen %u > page_len %zu\n",
- attrlen, page_len);
- return -EINVAL;
- }
- xdr_read_pages(xdr, attrlen);
+ res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset;
res->acl_len = attrlen;
+
+ /* Check for receive buffer overflow */
+ if (res->acl_len > (xdr->nwords << 2) ||
+ res->acl_len + res->acl_data_offset > xdr->buf->page_len) {
+ res->acl_flags |= NFS4_ACL_TRUNC;
+ dprintk("NFS: acl reply: attrlen %u > page_len %u\n",
+ attrlen, xdr->nwords << 2);
+ }
} else
status = -EOPNOTSUPP;
@@ -6235,7 +6225,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_open(xdr, res);
if (status)
goto out;
- if (decode_getfh(xdr, &res->fh) != 0)
+ status = decode_getfh(xdr, &res->fh);
+ if (status)
goto out;
decode_getfattr(xdr, res->f_attr, res->server);
out:
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index f50d3e8d6f2..ea6d111b03e 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -570,17 +570,66 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
return false;
return pgio->pg_count + req->wb_bytes <=
- OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
+ (unsigned long)pgio->pg_layout_private;
+}
+
+void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ pnfs_generic_pg_init_read(pgio, req);
+ if (unlikely(pgio->pg_lseg == NULL))
+ return; /* Not pNFS */
+
+ pgio->pg_layout_private = (void *)
+ OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
+}
+
+static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
+ unsigned long *stripe_end)
+{
+ u32 stripe_off;
+ unsigned stripe_size;
+
+ if (layout->raid_algorithm == PNFS_OSD_RAID_0)
+ return true;
+
+ stripe_size = layout->stripe_unit *
+ (layout->group_width - layout->parity);
+
+ div_u64_rem(offset, stripe_size, &stripe_off);
+ if (!stripe_off)
+ return true;
+
+ *stripe_end = stripe_size - stripe_off;
+ return false;
+}
+
+void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ unsigned long stripe_end = 0;
+
+ pnfs_generic_pg_init_write(pgio, req);
+ if (unlikely(pgio->pg_lseg == NULL))
+ return; /* Not pNFS */
+
+ if (req->wb_offset ||
+ !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE,
+ &OBJIO_LSEG(pgio->pg_lseg)->layout,
+ &stripe_end)) {
+ pgio->pg_layout_private = (void *)stripe_end;
+ } else {
+ pgio->pg_layout_private = (void *)
+ OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
+ }
}
static const struct nfs_pageio_ops objio_pg_read_ops = {
- .pg_init = pnfs_generic_pg_init_read,
+ .pg_init = objio_init_read,
.pg_test = objio_pg_test,
.pg_doio = pnfs_generic_pg_readpages,
};
static const struct nfs_pageio_ops objio_pg_write_ops = {
- .pg_init = pnfs_generic_pg_init_write,
+ .pg_init = objio_init_write,
.pg_test = objio_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
};
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 1a6732ed04a..311a79681e2 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
hdr->io_start = req_offset(hdr->req);
hdr->good_bytes = desc->pg_count;
hdr->dreq = desc->pg_dreq;
+ hdr->layout_private = desc->pg_layout_private;
hdr->release = release;
hdr->completion_ops = desc->pg_completion_ops;
if (hdr->completion_ops->init_hdr)
@@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_error = 0;
desc->pg_lseg = NULL;
desc->pg_dreq = NULL;
+ desc->pg_layout_private = NULL;
}
EXPORT_SYMBOL_GPL(nfs_pageio_init);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 76875bfcf19..2e00feacd4b 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -583,9 +583,6 @@ send_layoutget(struct pnfs_layout_hdr *lo,
struct nfs_server *server = NFS_SERVER(ino);
struct nfs4_layoutget *lgp;
struct pnfs_layout_segment *lseg = NULL;
- struct page **pages = NULL;
- int i;
- u32 max_resp_sz, max_pages;
dprintk("--> %s\n", __func__);
@@ -594,20 +591,6 @@ send_layoutget(struct pnfs_layout_hdr *lo,
if (lgp == NULL)
return NULL;
- /* allocate pages for xdr post processing */
- max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- max_pages = nfs_page_array_len(0, max_resp_sz);
-
- pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
- if (!pages)
- goto out_err_free;
-
- for (i = 0; i < max_pages; i++) {
- pages[i] = alloc_page(gfp_flags);
- if (!pages[i])
- goto out_err_free;
- }
-
lgp->args.minlength = PAGE_CACHE_SIZE;
if (lgp->args.minlength > range->length)
lgp->args.minlength = range->length;
@@ -616,39 +599,19 @@ send_layoutget(struct pnfs_layout_hdr *lo,
lgp->args.type = server->pnfs_curr_ld->id;
lgp->args.inode = ino;
lgp->args.ctx = get_nfs_open_context(ctx);
- lgp->args.layout.pages = pages;
- lgp->args.layout.pglen = max_pages * PAGE_SIZE;
lgp->lsegpp = &lseg;
lgp->gfp_flags = gfp_flags;
/* Synchronously retrieve layout information from server and
* store in lseg.
*/
- nfs4_proc_layoutget(lgp);
+ nfs4_proc_layoutget(lgp, gfp_flags);
if (!lseg) {
/* remember that LAYOUTGET failed and suspend trying */
set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
}
- /* free xdr pages */
- for (i = 0; i < max_pages; i++)
- __free_page(pages[i]);
- kfree(pages);
-
return lseg;
-
-out_err_free:
- /* free any allocated xdr pages, lgp as it's not used */
- if (pages) {
- for (i = 0; i < max_pages; i++) {
- if (!pages[i])
- break;
- __free_page(pages[i]);
- }
- kfree(pages);
- }
- kfree(lgp);
- return NULL;
}
/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 2c6c80503ba..745aa1b39e7 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server,
struct pnfs_devicelist *devlist);
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *dev);
-extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
+extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags);
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
/* pnfs.c */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ac6a3c55dce..d2c7f5db084 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -319,6 +319,34 @@ EXPORT_SYMBOL_GPL(nfs_sops);
static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
static int nfs4_validate_mount_data(void *options,
struct nfs_parsed_mount_data *args, const char *dev_name);
+
+struct file_system_type nfs4_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs4",
+ .mount = nfs_fs_mount,
+ .kill_sb = nfs_kill_super,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+EXPORT_SYMBOL_GPL(nfs4_fs_type);
+
+static int __init register_nfs4_fs(void)
+{
+ return register_filesystem(&nfs4_fs_type);
+}
+
+static void unregister_nfs4_fs(void)
+{
+ unregister_filesystem(&nfs4_fs_type);
+}
+#else
+static int __init register_nfs4_fs(void)
+{
+ return 0;
+}
+
+static void unregister_nfs4_fs(void)
+{
+}
#endif
static struct shrinker acl_shrinker = {
@@ -337,12 +365,18 @@ int __init register_nfs_fs(void)
if (ret < 0)
goto error_0;
- ret = nfs_register_sysctl();
+ ret = register_nfs4_fs();
if (ret < 0)
goto error_1;
+
+ ret = nfs_register_sysctl();
+ if (ret < 0)
+ goto error_2;
register_shrinker(&acl_shrinker);
return 0;
+error_2:
+ unregister_nfs4_fs();
error_1:
unregister_filesystem(&nfs_fs_type);
error_0:
@@ -356,6 +390,7 @@ void __exit unregister_nfs_fs(void)
{
unregister_shrinker(&acl_shrinker);
nfs_unregister_sysctl();
+ unregister_nfs4_fs();
unregister_filesystem(&nfs_fs_type);
}
@@ -1502,7 +1537,7 @@ static int nfs_parse_mount_options(char *raw,
/*
* verify that any proto=/mountproto= options match the address
- * familiies in the addr=/mountaddr= options.
+ * families in the addr=/mountaddr= options.
*/
if (protofamily != AF_UNSPEC &&
protofamily != mnt->nfs_server.address.ss_family)
@@ -1832,6 +1867,7 @@ static int nfs23_validate_mount_data(void *options,
memcpy(sap, &data->addr, sizeof(data->addr));
args->nfs_server.addrlen = sizeof(data->addr);
+ args->nfs_server.port = ntohs(data->addr.sin_port);
if (!nfs_verify_server_address(sap))
goto out_no_address;
@@ -2529,6 +2565,7 @@ static int nfs4_validate_mount_data(void *options,
return -EFAULT;
if (!nfs_verify_server_address(sap))
goto out_no_address;
+ args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
if (data->auth_flavourlen) {
if (data->auth_flavourlen > 1)
@@ -2645,4 +2682,6 @@ MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 "
module_param(send_implementation_id, ushort, 0644);
MODULE_PARM_DESC(send_implementation_id,
"Send implementation ID with NFSv4.1 exchange_id");
+MODULE_ALIAS("nfs4");
+
#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5829d0ce7cf..e3b55372726 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1814,19 +1814,19 @@ int __init nfs_init_writepagecache(void)
nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
nfs_wdata_cachep);
if (nfs_wdata_mempool == NULL)
- return -ENOMEM;
+ goto out_destroy_write_cache;
nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
sizeof(struct nfs_commit_data),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_cdata_cachep == NULL)
- return -ENOMEM;
+ goto out_destroy_write_mempool;
nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
nfs_wdata_cachep);
if (nfs_commit_mempool == NULL)
- return -ENOMEM;
+ goto out_destroy_commit_cache;
/*
* NFS congestion size, scale with available memory.
@@ -1849,11 +1849,20 @@ int __init nfs_init_writepagecache(void)
nfs_congestion_kb = 256*1024;
return 0;
+
+out_destroy_commit_cache:
+ kmem_cache_destroy(nfs_cdata_cachep);
+out_destroy_write_mempool:
+ mempool_destroy(nfs_wdata_mempool);
+out_destroy_write_cache:
+ kmem_cache_destroy(nfs_wdata_cachep);
+ return -ENOMEM;
}
void nfs_destroy_writepagecache(void)
{
mempool_destroy(nfs_commit_mempool);
+ kmem_cache_destroy(nfs_cdata_cachep);
mempool_destroy(nfs_wdata_mempool);
kmem_cache_destroy(nfs_wdata_cachep);
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index cbaf4f8bb7b..4c7bd35b187 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -651,12 +651,12 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
if (clp->cl_minorversion == 0) {
if (!clp->cl_cred.cr_principal &&
- (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
+ (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5))
return -EINVAL;
args.client_name = clp->cl_cred.cr_principal;
args.prognumber = conn->cb_prog,
args.protocol = XPRT_TRANSPORT_TCP;
- args.authflavor = clp->cl_flavor;
+ args.authflavor = clp->cl_cred.cr_flavor;
clp->cl_cb_ident = conn->cb_ident;
} else {
if (!conn->cb_xprt)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e6173147f98..22bd0a66c35 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -231,7 +231,6 @@ struct nfs4_client {
nfs4_verifier cl_verifier; /* generated by client */
time_t cl_time; /* time of last lease renewal */
struct sockaddr_storage cl_addr; /* client ipaddress */
- u32 cl_flavor; /* setclientid pseudoflavor */
struct svc_cred cl_cred; /* setclientid principal */
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index dfafeb2b05a..eb7cc91b725 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -462,9 +462,6 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
err = ERR_PTR(-ENOMEM);
inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
- if (h)
- sysctl_head_finish(h);
-
if (!inode)
goto out;
@@ -473,6 +470,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
d_add(dentry, inode);
out:
+ if (h)
+ sysctl_head_finish(h);
sysctl_head_finish(head);
return err;
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 36a29b753c7..c495a3055e2 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1589,10 +1589,10 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
goto out;
}
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warn[cnt].w_type = QUOTA_NL_NOWARN;
+ down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquots[cnt])
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 4c0c7d163d1..a98b7740a0f 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1334,9 +1334,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
else if (bitmap == 0)
block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
- reiserfs_write_unlock(sb);
bh = sb_bread(sb, block);
- reiserfs_write_lock(sb);
if (bh == NULL)
reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
"reading failed", __func__, block);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a6d4268fb6c..855da58db14 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -76,10 +76,10 @@ void reiserfs_evict_inode(struct inode *inode)
;
}
out:
+ reiserfs_write_unlock_once(inode->i_sb, depth);
clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
dquot_drop(inode);
inode->i_blocks = 0;
- reiserfs_write_unlock_once(inode->i_sb, depth);
return;
no_delete:
diff --git a/fs/stat.c b/fs/stat.c
index b6ff11825fc..40780229a03 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -58,7 +58,7 @@ EXPORT_SYMBOL(vfs_getattr);
int vfs_fstat(unsigned int fd, struct kstat *stat)
{
int fput_needed;
- struct file *f = fget_light(fd, &fput_needed);
+ struct file *f = fget_raw_light(fd, &fput_needed);
int error = -EBADF;
if (f) {
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 8b8cc4e945f..760de723dad 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -167,7 +167,7 @@ struct ubifs_global_debug_info {
#define ubifs_dbg_msg(type, fmt, ...) \
pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__)
-#define DBG_KEY_BUF_LEN 32
+#define DBG_KEY_BUF_LEN 48
#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \
char __tmp_key_buf[DBG_KEY_BUF_LEN]; \
pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index ce33b2beb15..8640920766e 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1749,7 +1749,10 @@ int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr)
return 0;
out_err:
- ubifs_lpt_free(c, 0);
+ if (wr)
+ ubifs_lpt_free(c, 1);
+ if (rd)
+ ubifs_lpt_free(c, 0);
return err;
}
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index c30d976b4be..edeec499c04 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -788,7 +788,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
corrupted_rescan:
/* Re-scan the corrupted data with verbose messages */
- ubifs_err("corruptio %d", ret);
+ ubifs_err("corruption %d", ret);
ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
corrupted:
ubifs_scanned_corruption(c, lnum, offs, buf);
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index eba46d4a761..94d78fc5d4e 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -1026,7 +1026,6 @@ int ubifs_replay_journal(struct ubifs_info *c)
c->replaying = 1;
lnum = c->ltail_lnum = c->lhead_lnum;
- lnum = UBIFS_LOG_LNUM;
do {
err = replay_log_leb(c, lnum, 0, c->sbuf);
if (err == 1)
@@ -1035,7 +1034,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
if (err)
goto out;
lnum = ubifs_next_log_lnum(c, lnum);
- } while (lnum != UBIFS_LOG_LNUM);
+ } while (lnum != c->ltail_lnum);
err = replay_buds(c);
if (err)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index c3fa6c5327a..71a197f0f93 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1157,9 +1157,6 @@ static int check_free_space(struct ubifs_info *c)
*
* This function mounts UBIFS file system. Returns zero in case of success and
* a negative error code in case of failure.
- *
- * Note, the function does not de-allocate resources it it fails half way
- * through, and the caller has to do this instead.
*/
static int mount_ubifs(struct ubifs_info *c)
{
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7f3f7ba3df6..d1c6093fd3d 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -39,20 +39,24 @@
#include "udf_i.h"
#include "udf_sb.h"
-static int udf_adinicb_readpage(struct file *file, struct page *page)
+static void __udf_adinicb_readpage(struct page *page)
{
struct inode *inode = page->mapping->host;
char *kaddr;
struct udf_inode_info *iinfo = UDF_I(inode);
- BUG_ON(!PageLocked(page));
-
kaddr = kmap(page);
- memset(kaddr, 0, PAGE_CACHE_SIZE);
memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
+ memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size);
flush_dcache_page(page);
SetPageUptodate(page);
kunmap(page);
+}
+
+static int udf_adinicb_readpage(struct file *file, struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+ __udf_adinicb_readpage(page);
unlock_page(page);
return 0;
@@ -77,6 +81,25 @@ static int udf_adinicb_writepage(struct page *page,
return 0;
}
+static int udf_adinicb_write_begin(struct file *file,
+ struct address_space *mapping, loff_t pos,
+ unsigned len, unsigned flags, struct page **pagep,
+ void **fsdata)
+{
+ struct page *page;
+
+ if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE))
+ return -EIO;
+ page = grab_cache_page_write_begin(mapping, 0, flags);
+ if (!page)
+ return -ENOMEM;
+ *pagep = page;
+
+ if (!PageUptodate(page) && len != PAGE_CACHE_SIZE)
+ __udf_adinicb_readpage(page);
+ return 0;
+}
+
static int udf_adinicb_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
@@ -98,8 +121,8 @@ static int udf_adinicb_write_end(struct file *file,
const struct address_space_operations udf_adinicb_aops = {
.readpage = udf_adinicb_readpage,
.writepage = udf_adinicb_writepage,
- .write_begin = simple_write_begin,
- .write_end = udf_adinicb_write_end,
+ .write_begin = udf_adinicb_write_begin,
+ .write_end = udf_adinicb_write_end,
};
static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index fafaad795cd..aa233469b3c 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1124,14 +1124,17 @@ int udf_setsize(struct inode *inode, loff_t newsize)
if (err)
return err;
down_write(&iinfo->i_data_sem);
- } else
+ } else {
iinfo->i_lenAlloc = newsize;
+ goto set_size;
+ }
}
err = udf_extend_file(inode, newsize);
if (err) {
up_write(&iinfo->i_data_sem);
return err;
}
+set_size:
truncate_setsize(inode, newsize);
up_write(&iinfo->i_data_sem);
} else {
diff --git a/fs/udf/super.c b/fs/udf/super.c
index dcbf98722af..18fc038a438 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1344,6 +1344,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
udf_err(sb, "error loading logical volume descriptor: "
"Partition table too long (%u > %lu)\n", table_len,
sb->s_blocksize - sizeof(*lvd));
+ ret = 1;
goto out_bh;
}
@@ -1388,8 +1389,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
UDF_ID_SPARABLE,
strlen(UDF_ID_SPARABLE))) {
if (udf_load_sparable_map(sb, map,
- (struct sparablePartitionMap *)gpm) < 0)
+ (struct sparablePartitionMap *)gpm) < 0) {
+ ret = 1;
goto out_bh;
+ }
} else if (!strncmp(upm2->partIdent.ident,
UDF_ID_METADATA,
strlen(UDF_ID_METADATA))) {
@@ -2000,6 +2003,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
if (!silent)
pr_notice("Rescanning with blocksize %d\n",
UDF_DEFAULT_BLOCKSIZE);
+ brelse(sbi->s_lvid_bh);
+ sbi->s_lvid_bh = NULL;
uopt.blocksize = UDF_DEFAULT_BLOCKSIZE;
ret = udf_load_vrs(sb, &uopt, silent, &fileset);
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index d7a9dd735e1..933b7930b86 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -96,6 +96,7 @@ xfs_buf_lru_add(
atomic_inc(&bp->b_hold);
list_add_tail(&bp->b_lru, &btp->bt_lru);
btp->bt_lru_nr++;
+ bp->b_lru_flags &= ~_XBF_LRU_DISPOSE;
}
spin_unlock(&btp->bt_lru_lock);
}
@@ -154,7 +155,8 @@ xfs_buf_stale(
struct xfs_buftarg *btp = bp->b_target;
spin_lock(&btp->bt_lru_lock);
- if (!list_empty(&bp->b_lru)) {
+ if (!list_empty(&bp->b_lru) &&
+ !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) {
list_del_init(&bp->b_lru);
btp->bt_lru_nr--;
atomic_dec(&bp->b_hold);
@@ -1501,6 +1503,7 @@ xfs_buftarg_shrink(
*/
list_move(&bp->b_lru, &dispose);
btp->bt_lru_nr--;
+ bp->b_lru_flags |= _XBF_LRU_DISPOSE;
}
spin_unlock(&btp->bt_lru_lock);
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index d03b73b9604..7c0b6a0a155 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -38,27 +38,28 @@ typedef enum {
XBRW_ZERO = 3, /* Zero target memory */
} xfs_buf_rw_t;
-#define XBF_READ (1 << 0) /* buffer intended for reading from device */
-#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
-#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
-#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
-#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
-#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
+#define XBF_READ (1 << 0) /* buffer intended for reading from device */
+#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
+#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
+#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
+#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
+#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
/* I/O hints for the BIO layer */
-#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
-#define XBF_FUA (1 << 11)/* force cache write through mode */
-#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */
+#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
+#define XBF_FUA (1 << 11)/* force cache write through mode */
+#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */
/* flags used only as arguments to access routines */
-#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
-#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */
+#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
+#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */
/* flags used only internally */
-#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
-#define _XBF_KMEM (1 << 21)/* backed by heap memory */
-#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
-#define _XBF_COMPOUND (1 << 23)/* compound buffer */
+#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
+#define _XBF_KMEM (1 << 21)/* backed by heap memory */
+#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
+#define _XBF_COMPOUND (1 << 23)/* compound buffer */
+#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */
typedef unsigned int xfs_buf_flags_t;
@@ -72,12 +73,13 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_SYNCIO, "SYNCIO" }, \
{ XBF_FUA, "FUA" }, \
{ XBF_FLUSH, "FLUSH" }, \
- { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\
+ { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\
{ XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\
{ _XBF_PAGES, "PAGES" }, \
{ _XBF_KMEM, "KMEM" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
- { _XBF_COMPOUND, "COMPOUND" }
+ { _XBF_COMPOUND, "COMPOUND" }, \
+ { _XBF_LRU_DISPOSE, "LRU_DISPOSE" }
typedef struct xfs_buftarg {
dev_t bt_dev;
@@ -124,7 +126,12 @@ typedef struct xfs_buf {
xfs_buf_flags_t b_flags; /* status flags */
struct semaphore b_sema; /* semaphore for lockables */
+ /*
+ * concurrent access to b_lru and b_lru_flags are protected by
+ * bt_lru_lock and not by b_sema
+ */
struct list_head b_lru; /* lru list */
+ xfs_buf_flags_t b_lru_flags; /* internal lru status flags */
wait_queue_head_t b_waiters; /* unpin waiters */
struct list_head b_list;
struct xfs_perag *b_pag; /* contains rbtree root */
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index f9c3fe304a1..69cf4fcde03 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -179,12 +179,14 @@ xfs_ioc_trim(
* used by the fstrim application. In the end it really doesn't
* matter as trimming blocks is an advisory interface.
*/
+ if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
+ range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)))
+ return -XFS_ERROR(EINVAL);
+
start = BTOBB(range.start);
end = start + BTOBBT(range.len) - 1;
minlen = BTOBB(max_t(u64, granularity, range.minlen));
- if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks)
- return -XFS_ERROR(EINVAL);
if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 21e37b55f7e..5aceb3f8ecd 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -962,23 +962,22 @@ xfs_dialloc(
if (!pag->pagi_freecount && !okalloc)
goto nextag;
+ /*
+ * Then read in the AGI buffer and recheck with the AGI buffer
+ * lock held.
+ */
error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
if (error)
goto out_error;
- /*
- * Once the AGI has been read in we have to recheck
- * pagi_freecount with the AGI buffer lock held.
- */
if (pag->pagi_freecount) {
xfs_perag_put(pag);
goto out_alloc;
}
- if (!okalloc) {
- xfs_trans_brelse(tp, agbp);
- goto nextag;
- }
+ if (!okalloc)
+ goto nextag_relse_buffer;
+
error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
if (error) {
@@ -1007,6 +1006,8 @@ xfs_dialloc(
return 0;
}
+nextag_relse_buffer:
+ xfs_trans_brelse(tp, agbp);
nextag:
xfs_perag_put(pag);
if (++agno == mp->m_sb.sb_agcount)
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 92d4331cd4f..ca28a4ba4b5 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -857,7 +857,7 @@ xfs_rtbuf_get(
xfs_buf_t *bp; /* block buffer, result */
xfs_inode_t *ip; /* bitmap or summary inode */
xfs_bmbt_irec_t map;
- int nmap;
+ int nmap = 1;
int error; /* error value */
ip = issum ? mp->m_rsumip : mp->m_rbmip;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index bdaf4cb9f4a..19e2380fb86 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -919,6 +919,7 @@ xfs_fs_put_super(
struct xfs_mount *mp = XFS_M(sb);
xfs_filestream_unmount(mp);
+ cancel_delayed_work_sync(&mp->m_sync_work);
xfs_unmountfs(mp);
xfs_syncd_stop(mp);
xfs_freesb(mp);