summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile2
-rw-r--r--fs/aio.c174
-rw-r--r--fs/bad_inode.c7
-rw-r--r--fs/btrfs/async-thread.c44
-rw-r--r--fs/btrfs/async-thread.h28
-rw-r--r--fs/btrfs/backref.c14
-rw-r--r--fs/btrfs/btrfs_inode.h6
-rw-r--r--fs/btrfs/ctree.c20
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/delayed-inode.c4
-rw-r--r--fs/btrfs/disk-io.c88
-rw-r--r--fs/btrfs/extent-tree.c308
-rw-r--r--fs/btrfs/extent_io.c5
-rw-r--r--fs/btrfs/file-item.c2
-rw-r--r--fs/btrfs/file.c31
-rw-r--r--fs/btrfs/inode.c371
-rw-r--r--fs/btrfs/ioctl.c68
-rw-r--r--fs/btrfs/ordered-data.c124
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/qgroup.c170
-rw-r--r--fs/btrfs/qgroup.h1
-rw-r--r--fs/btrfs/raid56.c9
-rw-r--r--fs/btrfs/reada.c3
-rw-r--r--fs/btrfs/scrub.c25
-rw-r--r--fs/btrfs/super.c60
-rw-r--r--fs/btrfs/sysfs.c2
-rw-r--r--fs/btrfs/transaction.c33
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/tree-log.c94
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/ulist.h15
-rw-r--r--fs/btrfs/volumes.c65
-rw-r--r--fs/ceph/acl.c14
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/file.c24
-rw-r--r--fs/ceph/mds_client.c16
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/ceph/xattr.c4
-rw-r--r--fs/cifs/Kconfig35
-rw-r--r--fs/cifs/cifsfs.c26
-rw-r--r--fs/cifs/cifsfs.h4
-rw-r--r--fs/cifs/cifsglob.h10
-rw-r--r--fs/cifs/cifspdu.h23
-rw-r--r--fs/cifs/connect.c21
-rw-r--r--fs/cifs/dir.c8
-rw-r--r--fs/cifs/file.c10
-rw-r--r--fs/cifs/inode.c25
-rw-r--r--fs/cifs/misc.c7
-rw-r--r--fs/cifs/readdir.c6
-rw-r--r--fs/cifs/sess.c7
-rw-r--r--fs/cifs/smb1ops.c7
-rw-r--r--fs/cifs/smb2file.c2
-rw-r--r--fs/cifs/smb2inode.c2
-rw-r--r--fs/cifs/smb2maperror.c4
-rw-r--r--fs/cifs/smb2misc.c17
-rw-r--r--fs/cifs/smb2ops.c172
-rw-r--r--fs/cifs/smb2pdu.c23
-rw-r--r--fs/cifs/smb2pdu.h6
-rw-r--r--fs/cifs/smbfsctl.h2
-rw-r--r--fs/dcache.c196
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/eventpoll.c3
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext3/super.c5
-rw-r--r--fs/ext4/ext4.h18
-rw-r--r--fs/ext4/extents.c88
-rw-r--r--fs/ext4/inode.c44
-rw-r--r--fs/ext4/mballoc.c5
-rw-r--r--fs/ext4/namei.c59
-rw-r--r--fs/ext4/resize.c2
-rw-r--r--fs/ext4/super.c5
-rw-r--r--fs/f2fs/Kconfig4
-rw-r--r--fs/f2fs/checkpoint.c80
-rw-r--r--fs/f2fs/data.c19
-rw-r--r--fs/f2fs/debug.c4
-rw-r--r--fs/f2fs/dir.c6
-rw-r--r--fs/f2fs/f2fs.h26
-rw-r--r--fs/f2fs/file.c60
-rw-r--r--fs/f2fs/gc.c8
-rw-r--r--fs/f2fs/gc.h2
-rw-r--r--fs/f2fs/hash.c7
-rw-r--r--fs/f2fs/inline.c38
-rw-r--r--fs/f2fs/namei.c23
-rw-r--r--fs/f2fs/node.c80
-rw-r--r--fs/f2fs/recovery.c30
-rw-r--r--fs/f2fs/segment.c53
-rw-r--r--fs/f2fs/segment.h2
-rw-r--r--fs/f2fs/super.c32
-rw-r--r--fs/f2fs/xattr.c2
-rw-r--r--fs/fs_pin.c78
-rw-r--r--fs/fuse/dir.c7
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/hostfs/hostfs.h1
-rw-r--r--fs/hostfs/hostfs_kern.c30
-rw-r--r--fs/hostfs/hostfs_user.c28
-rw-r--r--fs/internal.h7
-rw-r--r--fs/isofs/inode.c15
-rw-r--r--fs/isofs/isofs.h23
-rw-r--r--fs/isofs/rock.c39
-rw-r--r--fs/jbd2/commit.c21
-rw-r--r--fs/jbd2/journal.c56
-rw-r--r--fs/jbd2/recovery.c33
-rw-r--r--fs/jbd2/revoke.c6
-rw-r--r--fs/lockd/svc.c4
-rw-r--r--fs/locks.c88
-rw-r--r--fs/mount.h2
-rw-r--r--fs/namei.c34
-rw-r--r--fs/namespace.c77
-rw-r--r--fs/nfs/blocklayout/blocklayout.c101
-rw-r--r--fs/nfs/callback.c12
-rw-r--r--fs/nfs/client.c30
-rw-r--r--fs/nfs/delegation.c34
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c208
-rw-r--r--fs/nfs/direct.c33
-rw-r--r--fs/nfs/filelayout/filelayout.c299
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c2
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/internal.h11
-rw-r--r--fs/nfs/nfs3acl.c7
-rw-r--r--fs/nfs/nfs3proc.c21
-rw-r--r--fs/nfs/nfs4_fs.h21
-rw-r--r--fs/nfs/nfs4client.c5
-rw-r--r--fs/nfs/nfs4proc.c274
-rw-r--r--fs/nfs/nfs4state.c45
-rw-r--r--fs/nfs/nfs4trace.h28
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c24
-rw-r--r--fs/nfs/objlayout/objlayout.c81
-rw-r--r--fs/nfs/objlayout/objlayout.h8
-rw-r--r--fs/nfs/pagelist.c316
-rw-r--r--fs/nfs/pnfs.c178
-rw-r--r--fs/nfs/pnfs.h45
-rw-r--r--fs/nfs/proc.c27
-rw-r--r--fs/nfs/read.c54
-rw-r--r--fs/nfs/super.c12
-rw-r--r--fs/nfs/write.c163
-rw-r--r--fs/nfs_common/nfsacl.c5
-rw-r--r--fs/nfsd/nfs4xdr.c14
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/notify/fdinfo.c4
-rw-r--r--fs/ocfs2/cluster/quorum.c13
-rw-r--r--fs/ocfs2/cluster/tcp.c45
-rw-r--r--fs/ocfs2/cluster/tcp.h1
-rw-r--r--fs/ocfs2/ioctl.c129
-rw-r--r--fs/pnode.c1
-rw-r--r--fs/quota/dquot.c180
-rw-r--r--fs/quota/kqid.c2
-rw-r--r--fs/quota/netlink.c3
-rw-r--r--fs/quota/quota.c6
-rw-r--r--fs/reiserfs/do_balan.c111
-rw-r--r--fs/reiserfs/journal.c22
-rw-r--r--fs/reiserfs/lbalance.c5
-rw-r--r--fs/reiserfs/reiserfs.h9
-rw-r--r--fs/reiserfs/super.c6
-rw-r--r--fs/super.c20
-rw-r--r--fs/sync.c2
-rw-r--r--fs/ubifs/commit.c2
-rw-r--r--fs/ubifs/io.c2
-rw-r--r--fs/ubifs/log.c12
-rw-r--r--fs/ubifs/lpt.c5
-rw-r--r--fs/ubifs/lpt_commit.c7
-rw-r--r--fs/ubifs/master.c7
-rw-r--r--fs/ubifs/orphan.c1
-rw-r--r--fs/ubifs/recovery.c5
-rw-r--r--fs/ubifs/sb.c4
-rw-r--r--fs/ubifs/scan.c14
-rw-r--r--fs/ubifs/super.c19
-rw-r--r--fs/ubifs/tnc.c1
-rw-r--r--fs/ubifs/tnc_commit.c1
-rw-r--r--fs/ubifs/ubifs.h4
-rw-r--r--fs/udf/file.c22
-rw-r--r--fs/udf/ialloc.c28
-rw-r--r--fs/udf/inode.c161
-rw-r--r--fs/udf/lowlevel.c2
-rw-r--r--fs/udf/namei.c156
-rw-r--r--fs/udf/super.c71
-rw-r--r--fs/udf/symlink.c2
-rw-r--r--fs/udf/udfdecl.h3
-rw-r--r--fs/udf/unicode.c9
-rw-r--r--fs/ufs/inode.c7
-rw-r--r--fs/ufs/namei.c14
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--fs/xfs/Makefile71
-rw-r--r--fs/xfs/libxfs/xfs_ag.h (renamed from fs/xfs/xfs_ag.h)0
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c (renamed from fs/xfs/xfs_alloc.c)20
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h (renamed from fs/xfs/xfs_alloc.h)0
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c (renamed from fs/xfs/xfs_alloc_btree.c)6
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.h (renamed from fs/xfs/xfs_alloc_btree.h)0
-rw-r--r--fs/xfs/libxfs/xfs_attr.c (renamed from fs/xfs/xfs_attr.c)92
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c (renamed from fs/xfs/xfs_attr_leaf.c)78
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h (renamed from fs/xfs/xfs_attr_leaf.h)0
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c (renamed from fs/xfs/xfs_attr_remote.c)22
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.h (renamed from fs/xfs/xfs_attr_remote.h)0
-rw-r--r--fs/xfs/libxfs/xfs_attr_sf.h (renamed from fs/xfs/xfs_attr_sf.h)0
-rw-r--r--fs/xfs/libxfs/xfs_bit.h (renamed from fs/xfs/xfs_bit.h)0
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c (renamed from fs/xfs/xfs_bmap.c)78
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h (renamed from fs/xfs/xfs_bmap.h)0
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c (renamed from fs/xfs/xfs_bmap_btree.c)99
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.h (renamed from fs/xfs/xfs_bmap_btree.h)0
-rw-r--r--fs/xfs/libxfs/xfs_btree.c (renamed from fs/xfs/xfs_btree.c)46
-rw-r--r--fs/xfs/libxfs/xfs_btree.h (renamed from fs/xfs/xfs_btree.h)2
-rw-r--r--fs/xfs/libxfs/xfs_cksum.h (renamed from fs/xfs/xfs_cksum.h)0
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c (renamed from fs/xfs/xfs_da_btree.c)112
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h (renamed from fs/xfs/xfs_da_btree.h)0
-rw-r--r--fs/xfs/libxfs/xfs_da_format.c (renamed from fs/xfs/xfs_da_format.c)0
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h (renamed from fs/xfs/xfs_da_format.h)0
-rw-r--r--fs/xfs/libxfs/xfs_dinode.h (renamed from fs/xfs/xfs_dinode.h)0
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c (renamed from fs/xfs/xfs_dir2.c)24
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h (renamed from fs/xfs/xfs_dir2.h)0
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c (renamed from fs/xfs/xfs_dir2_block.c)18
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c (renamed from fs/xfs/xfs_dir2_data.c)10
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c (renamed from fs/xfs/xfs_dir2_leaf.c)24
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c (renamed from fs/xfs/xfs_dir2_node.c)40
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h (renamed from fs/xfs/xfs_dir2_priv.h)0
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c (renamed from fs/xfs/xfs_dir2_sf.c)75
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c (renamed from fs/xfs/xfs_dquot_buf.c)6
-rw-r--r--fs/xfs/libxfs/xfs_format.h (renamed from fs/xfs/xfs_format.h)14
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c (renamed from fs/xfs/xfs_ialloc.c)34
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h (renamed from fs/xfs/xfs_ialloc.h)0
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c (renamed from fs/xfs/xfs_ialloc_btree.c)6
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h (renamed from fs/xfs/xfs_ialloc_btree.h)0
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c (renamed from fs/xfs/xfs_inode_buf.c)10
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h (renamed from fs/xfs/xfs_inode_buf.h)0
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c (renamed from fs/xfs/xfs_inode_fork.c)36
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h (renamed from fs/xfs/xfs_inode_fork.h)0
-rw-r--r--fs/xfs/libxfs/xfs_inum.h (renamed from fs/xfs/xfs_inum.h)4
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h (renamed from fs/xfs/xfs_log_format.h)4
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h (renamed from fs/xfs/xfs_log_recover.h)0
-rw-r--r--fs/xfs/libxfs/xfs_log_rlimit.c (renamed from fs/xfs/xfs_log_rlimit.c)0
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h (renamed from fs/xfs/xfs_quota_defs.h)2
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c (renamed from fs/xfs/xfs_rtbitmap.c)0
-rw-r--r--fs/xfs/libxfs/xfs_sb.c (renamed from fs/xfs/xfs_sb.c)56
-rw-r--r--fs/xfs/libxfs/xfs_sb.h (renamed from fs/xfs/xfs_sb.h)8
-rw-r--r--fs/xfs/libxfs/xfs_shared.h (renamed from fs/xfs/xfs_shared.h)0
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c (renamed from fs/xfs/xfs_symlink_remote.c)6
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c (renamed from fs/xfs/xfs_trans_resv.c)0
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.h (renamed from fs/xfs/xfs_trans_resv.h)0
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h (renamed from fs/xfs/xfs_trans_space.h)0
-rw-r--r--fs/xfs/xfs_acl.c8
-rw-r--r--fs/xfs/xfs_aops.c79
-rw-r--r--fs/xfs/xfs_attr_inactive.c22
-rw-r--r--fs/xfs/xfs_attr_list.c38
-rw-r--r--fs/xfs/xfs_bmap_util.c194
-rw-r--r--fs/xfs/xfs_buf.c40
-rw-r--r--fs/xfs/xfs_buf.h2
-rw-r--r--fs/xfs/xfs_buf_item.c4
-rw-r--r--fs/xfs/xfs_dir2_readdir.c4
-rw-r--r--fs/xfs/xfs_discard.c18
-rw-r--r--fs/xfs/xfs_dquot.c41
-rw-r--r--fs/xfs/xfs_dquot.h15
-rw-r--r--fs/xfs/xfs_error.c25
-rw-r--r--fs/xfs/xfs_error.h13
-rw-r--r--fs/xfs/xfs_export.c10
-rw-r--r--fs/xfs/xfs_extfree_item.c2
-rw-r--r--fs/xfs/xfs_file.c102
-rw-r--r--fs/xfs/xfs_filestream.c4
-rw-r--r--fs/xfs/xfs_fs.h7
-rw-r--r--fs/xfs/xfs_fsops.c42
-rw-r--r--fs/xfs/xfs_icache.c148
-rw-r--r--fs/xfs/xfs_icache.h13
-rw-r--r--fs/xfs/xfs_inode.c68
-rw-r--r--fs/xfs/xfs_inode.h10
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_ioctl.c266
-rw-r--r--fs/xfs/xfs_ioctl32.c111
-rw-r--r--fs/xfs/xfs_iomap.c54
-rw-r--r--fs/xfs/xfs_iops.c72
-rw-r--r--fs/xfs/xfs_itable.c579
-rw-r--r--fs/xfs/xfs_itable.h23
-rw-r--r--fs/xfs/xfs_linux.h27
-rw-r--r--fs/xfs/xfs_log.c69
-rw-r--r--fs/xfs/xfs_log_cil.c8
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c284
-rw-r--r--fs/xfs/xfs_mount.c97
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_mru_cache.c14
-rw-r--r--fs/xfs/xfs_qm.c229
-rw-r--r--fs/xfs/xfs_qm.h1
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c46
-rw-r--r--fs/xfs/xfs_quotaops.c20
-rw-r--r--fs/xfs/xfs_rtalloc.c24
-rw-r--r--fs/xfs/xfs_rtalloc.h2
-rw-r--r--fs/xfs/xfs_super.c132
-rw-r--r--fs/xfs/xfs_super.h15
-rw-r--r--fs/xfs/xfs_symlink.c30
-rw-r--r--fs/xfs/xfs_sysfs.c165
-rw-r--r--fs/xfs/xfs_sysfs.h59
-rw-r--r--fs/xfs/xfs_trans.c10
-rw-r--r--fs/xfs/xfs_trans_ail.c4
-rw-r--r--fs/xfs/xfs_trans_buf.c37
-rw-r--r--fs/xfs/xfs_trans_dquot.c4
-rw-r--r--fs/xfs/xfs_types.h29
-rw-r--r--fs/xfs/xfs_vnode.h46
-rw-r--r--fs/xfs/xfs_xattr.c6
298 files changed, 6511 insertions, 4786 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 4030cbfbc9a..90c88529892 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o \
- stack.o fs_struct.o statfs.o
+ stack.o fs_struct.o statfs.o fs_pin.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/aio.c b/fs/aio.c
index bd7ec2cc267..733750096b7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -141,6 +141,7 @@ struct kioctx {
struct {
unsigned tail;
+ unsigned completed_events;
spinlock_t completion_lock;
} ____cacheline_aligned_in_smp;
@@ -192,7 +193,6 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
}
file->f_flags = O_RDWR;
- file->private_data = ctx;
return file;
}
@@ -202,7 +202,7 @@ static struct dentry *aio_mount(struct file_system_type *fs_type,
static const struct dentry_operations ops = {
.d_dname = simple_dname,
};
- return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1);
+ return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC);
}
/* aio_setup
@@ -556,8 +556,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
struct aio_ring *ring;
spin_lock(&mm->ioctx_lock);
- rcu_read_lock();
- table = rcu_dereference(mm->ioctx_table);
+ table = rcu_dereference_raw(mm->ioctx_table);
while (1) {
if (table)
@@ -565,7 +564,6 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
if (!table->table[i]) {
ctx->id = i;
table->table[i] = ctx;
- rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
/* While kioctx setup is in progress,
@@ -579,8 +577,6 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
}
new_nr = (table ? table->nr : 1) * 4;
-
- rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
@@ -591,8 +587,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
table->nr = new_nr;
spin_lock(&mm->ioctx_lock);
- rcu_read_lock();
- old = rcu_dereference(mm->ioctx_table);
+ old = rcu_dereference_raw(mm->ioctx_table);
if (!old) {
rcu_assign_pointer(mm->ioctx_table, table);
@@ -739,12 +734,9 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
spin_lock(&mm->ioctx_lock);
- rcu_read_lock();
- table = rcu_dereference(mm->ioctx_table);
-
+ table = rcu_dereference_raw(mm->ioctx_table);
WARN_ON(ctx != table->table[ctx->id]);
table->table[ctx->id] = NULL;
- rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
/* percpu_ref_kill() will do the necessary call_rcu() */
@@ -793,40 +785,35 @@ EXPORT_SYMBOL(wait_on_sync_kiocb);
*/
void exit_aio(struct mm_struct *mm)
{
- struct kioctx_table *table;
- struct kioctx *ctx;
- unsigned i = 0;
-
- while (1) {
- rcu_read_lock();
- table = rcu_dereference(mm->ioctx_table);
-
- do {
- if (!table || i >= table->nr) {
- rcu_read_unlock();
- rcu_assign_pointer(mm->ioctx_table, NULL);
- if (table)
- kfree(table);
- return;
- }
+ struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
+ int i;
- ctx = table->table[i++];
- } while (!ctx);
+ if (!table)
+ return;
- rcu_read_unlock();
+ for (i = 0; i < table->nr; ++i) {
+ struct kioctx *ctx = table->table[i];
+ struct completion requests_done =
+ COMPLETION_INITIALIZER_ONSTACK(requests_done);
+ if (!ctx)
+ continue;
/*
- * We don't need to bother with munmap() here -
- * exit_mmap(mm) is coming and it'll unmap everything.
- * Since aio_free_ring() uses non-zero ->mmap_size
- * as indicator that it needs to unmap the area,
- * just set it to 0; aio_free_ring() is the only
- * place that uses ->mmap_size, so it's safe.
+ * We don't need to bother with munmap() here - exit_mmap(mm)
+ * is coming and it'll unmap everything. And we simply can't,
+ * this is not necessarily our ->mm.
+ * Since kill_ioctx() uses non-zero ->mmap_size as indicator
+ * that it needs to unmap the area, just set it to 0.
*/
ctx->mmap_size = 0;
+ kill_ioctx(mm, ctx, &requests_done);
- kill_ioctx(mm, ctx, NULL);
+ /* Wait until all IO for the context are done. */
+ wait_for_completion(&requests_done);
}
+
+ RCU_INIT_POINTER(mm->ioctx_table, NULL);
+ kfree(table);
}
static void put_reqs_available(struct kioctx *ctx, unsigned nr)
@@ -834,10 +821,8 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
struct kioctx_cpu *kcpu;
unsigned long flags;
- preempt_disable();
- kcpu = this_cpu_ptr(ctx->cpu);
-
local_irq_save(flags);
+ kcpu = this_cpu_ptr(ctx->cpu);
kcpu->reqs_available += nr;
while (kcpu->reqs_available >= ctx->req_batch * 2) {
@@ -846,7 +831,6 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
}
local_irq_restore(flags);
- preempt_enable();
}
static bool get_reqs_available(struct kioctx *ctx)
@@ -855,10 +839,8 @@ static bool get_reqs_available(struct kioctx *ctx)
bool ret = false;
unsigned long flags;
- preempt_disable();
- kcpu = this_cpu_ptr(ctx->cpu);
-
local_irq_save(flags);
+ kcpu = this_cpu_ptr(ctx->cpu);
if (!kcpu->reqs_available) {
int old, avail = atomic_read(&ctx->reqs_available);
@@ -878,10 +860,71 @@ static bool get_reqs_available(struct kioctx *ctx)
kcpu->reqs_available--;
out:
local_irq_restore(flags);
- preempt_enable();
return ret;
}
+/* refill_reqs_available
+ * Updates the reqs_available reference counts used for tracking the
+ * number of free slots in the completion ring. This can be called
+ * from aio_complete() (to optimistically update reqs_available) or
+ * from aio_get_req() (the we're out of events case). It must be
+ * called holding ctx->completion_lock.
+ */
+static void refill_reqs_available(struct kioctx *ctx, unsigned head,
+ unsigned tail)
+{
+ unsigned events_in_ring, completed;
+
+ /* Clamp head since userland can write to it. */
+ head %= ctx->nr_events;
+ if (head <= tail)
+ events_in_ring = tail - head;
+ else
+ events_in_ring = ctx->nr_events - (head - tail);
+
+ completed = ctx->completed_events;
+ if (events_in_ring < completed)
+ completed -= events_in_ring;
+ else
+ completed = 0;
+
+ if (!completed)
+ return;
+
+ ctx->completed_events -= completed;
+ put_reqs_available(ctx, completed);
+}
+
+/* user_refill_reqs_available
+ * Called to refill reqs_available when aio_get_req() encounters an
+ * out of space in the completion ring.
+ */
+static void user_refill_reqs_available(struct kioctx *ctx)
+{
+ spin_lock_irq(&ctx->completion_lock);
+ if (ctx->completed_events) {
+ struct aio_ring *ring;
+ unsigned head;
+
+ /* Access of ring->head may race with aio_read_events_ring()
+ * here, but that's okay since whether we read the old version
+ * or the new version, and either will be valid. The important
+ * part is that head cannot pass tail since we prevent
+ * aio_complete() from updating tail by holding
+ * ctx->completion_lock. Even if head is invalid, the check
+ * against ctx->completed_events below will make sure we do the
+ * safe/right thing.
+ */
+ ring = kmap_atomic(ctx->ring_pages[0]);
+ head = ring->head;
+ kunmap_atomic(ring);
+
+ refill_reqs_available(ctx, head, ctx->tail);
+ }
+
+ spin_unlock_irq(&ctx->completion_lock);
+}
+
/* aio_get_req
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
@@ -890,8 +933,11 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
{
struct kiocb *req;
- if (!get_reqs_available(ctx))
- return NULL;
+ if (!get_reqs_available(ctx)) {
+ user_refill_reqs_available(ctx);
+ if (!get_reqs_available(ctx))
+ return NULL;
+ }
req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
if (unlikely(!req))
@@ -950,8 +996,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
struct io_event *ev_page, *event;
+ unsigned tail, pos, head;
unsigned long flags;
- unsigned tail, pos;
/*
* Special case handling for sync iocbs:
@@ -1012,10 +1058,14 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
ctx->tail = tail;
ring = kmap_atomic(ctx->ring_pages[0]);
+ head = ring->head;
ring->tail = tail;
kunmap_atomic(ring);
flush_dcache_page(ctx->ring_pages[0]);
+ ctx->completed_events++;
+ if (ctx->completed_events > 1)
+ refill_reqs_available(ctx, head, tail);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
pr_debug("added to ring %p at [%u]\n", iocb, tail);
@@ -1030,7 +1080,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
/* everything turned out well, dispose of the aiocb. */
kiocb_free(iocb);
- put_reqs_available(ctx, 1);
/*
* We have to order our ring_info tail store above and test
@@ -1047,7 +1096,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
}
EXPORT_SYMBOL(aio_complete);
-/* aio_read_events
+/* aio_read_events_ring
* Pull an event off of the ioctx's event ring. Returns the number of
* events fetched
*/
@@ -1067,6 +1116,12 @@ static long aio_read_events_ring(struct kioctx *ctx,
tail = ring->tail;
kunmap_atomic(ring);
+ /*
+ * Ensure that once we've read the current tail pointer, that
+ * we also see the events that were stored up to the tail.
+ */
+ smp_rmb();
+
pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events);
if (head == tail)
@@ -1270,12 +1325,12 @@ static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
if (compat)
ret = compat_rw_copy_check_uvector(rw,
(struct compat_iovec __user *)buf,
- *nr_segs, 1, *iovec, iovec);
+ *nr_segs, UIO_FASTIOV, *iovec, iovec);
else
#endif
ret = rw_copy_check_uvector(rw,
(struct iovec __user *)buf,
- *nr_segs, 1, *iovec, iovec);
+ *nr_segs, UIO_FASTIOV, *iovec, iovec);
if (ret < 0)
return ret;
@@ -1299,9 +1354,8 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
}
/*
- * aio_setup_iocb:
- * Performs the initial checks and aio retry method
- * setup for the kiocb at the time of io submission.
+ * aio_run_iocb:
+ * Performs the initial checks and io submission.
*/
static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
char __user *buf, bool compat)
@@ -1313,7 +1367,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
fmode_t mode;
aio_rw_op *rw_op;
rw_iter_op *iter_op;
- struct iovec inline_vec, *iovec = &inline_vec;
+ struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
switch (opcode) {
@@ -1348,7 +1402,7 @@ rw_common:
if (!ret)
ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
if (ret < 0) {
- if (iovec != &inline_vec)
+ if (iovec != inline_vecs)
kfree(iovec);
return ret;
}
@@ -1395,7 +1449,7 @@ rw_common:
return -EINVAL;
}
- if (iovec != &inline_vec)
+ if (iovec != inline_vecs)
kfree(iovec);
if (ret != -EIOCBQUEUED) {
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 7c93953030f..afd2b4408ad 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -218,8 +218,9 @@ static int bad_inode_mknod (struct inode *dir, struct dentry *dentry,
return -EIO;
}
-static int bad_inode_rename (struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+static int bad_inode_rename2(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
{
return -EIO;
}
@@ -279,7 +280,7 @@ static const struct inode_operations bad_inode_ops =
.mkdir = bad_inode_mkdir,
.rmdir = bad_inode_rmdir,
.mknod = bad_inode_mknod,
- .rename = bad_inode_rename,
+ .rename2 = bad_inode_rename2,
.readlink = bad_inode_readlink,
/* follow_link must be no-op, otherwise unmounting this inode
won't work */
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 5a201d81049..fbd76ded9a3 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -22,7 +22,6 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/freezer.h>
-#include <linux/workqueue.h>
#include "async-thread.h"
#include "ctree.h"
@@ -55,8 +54,39 @@ struct btrfs_workqueue {
struct __btrfs_workqueue *high;
};
-static inline struct __btrfs_workqueue
-*__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
+static void normal_work_helper(struct btrfs_work *work);
+
+#define BTRFS_WORK_HELPER(name) \
+void btrfs_##name(struct work_struct *arg) \
+{ \
+ struct btrfs_work *work = container_of(arg, struct btrfs_work, \
+ normal_work); \
+ normal_work_helper(work); \
+}
+
+BTRFS_WORK_HELPER(worker_helper);
+BTRFS_WORK_HELPER(delalloc_helper);
+BTRFS_WORK_HELPER(flush_delalloc_helper);
+BTRFS_WORK_HELPER(cache_helper);
+BTRFS_WORK_HELPER(submit_helper);
+BTRFS_WORK_HELPER(fixup_helper);
+BTRFS_WORK_HELPER(endio_helper);
+BTRFS_WORK_HELPER(endio_meta_helper);
+BTRFS_WORK_HELPER(endio_meta_write_helper);
+BTRFS_WORK_HELPER(endio_raid56_helper);
+BTRFS_WORK_HELPER(rmw_helper);
+BTRFS_WORK_HELPER(endio_write_helper);
+BTRFS_WORK_HELPER(freespace_write_helper);
+BTRFS_WORK_HELPER(delayed_meta_helper);
+BTRFS_WORK_HELPER(readahead_helper);
+BTRFS_WORK_HELPER(qgroup_rescan_helper);
+BTRFS_WORK_HELPER(extent_refs_helper);
+BTRFS_WORK_HELPER(scrub_helper);
+BTRFS_WORK_HELPER(scrubwrc_helper);
+BTRFS_WORK_HELPER(scrubnc_helper);
+
+static struct __btrfs_workqueue *
+__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
int thresh)
{
struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
@@ -232,13 +262,11 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
spin_unlock_irqrestore(lock, flags);
}
-static void normal_work_helper(struct work_struct *arg)
+static void normal_work_helper(struct btrfs_work *work)
{
- struct btrfs_work *work;
struct __btrfs_workqueue *wq;
int need_order = 0;
- work = container_of(arg, struct btrfs_work, normal_work);
/*
* We should not touch things inside work in the following cases:
* 1) after work->func() if it has no ordered_free
@@ -262,7 +290,7 @@ static void normal_work_helper(struct work_struct *arg)
trace_btrfs_all_work_done(work);
}
-void btrfs_init_work(struct btrfs_work *work,
+void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
btrfs_func_t func,
btrfs_func_t ordered_func,
btrfs_func_t ordered_free)
@@ -270,7 +298,7 @@ void btrfs_init_work(struct btrfs_work *work,
work->func = func;
work->ordered_func = ordered_func;
work->ordered_free = ordered_free;
- INIT_WORK(&work->normal_work, normal_work_helper);
+ INIT_WORK(&work->normal_work, uniq_func);
INIT_LIST_HEAD(&work->ordered_list);
work->flags = 0;
}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 9c6b66d15fb..e9e31c94758 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -19,12 +19,14 @@
#ifndef __BTRFS_ASYNC_THREAD_
#define __BTRFS_ASYNC_THREAD_
+#include <linux/workqueue.h>
struct btrfs_workqueue;
/* Internal use only */
struct __btrfs_workqueue;
struct btrfs_work;
typedef void (*btrfs_func_t)(struct btrfs_work *arg);
+typedef void (*btrfs_work_func_t)(struct work_struct *arg);
struct btrfs_work {
btrfs_func_t func;
@@ -38,11 +40,35 @@ struct btrfs_work {
unsigned long flags;
};
+#define BTRFS_WORK_HELPER_PROTO(name) \
+void btrfs_##name(struct work_struct *arg)
+
+BTRFS_WORK_HELPER_PROTO(worker_helper);
+BTRFS_WORK_HELPER_PROTO(delalloc_helper);
+BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
+BTRFS_WORK_HELPER_PROTO(cache_helper);
+BTRFS_WORK_HELPER_PROTO(submit_helper);
+BTRFS_WORK_HELPER_PROTO(fixup_helper);
+BTRFS_WORK_HELPER_PROTO(endio_helper);
+BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
+BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
+BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
+BTRFS_WORK_HELPER_PROTO(rmw_helper);
+BTRFS_WORK_HELPER_PROTO(endio_write_helper);
+BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
+BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
+BTRFS_WORK_HELPER_PROTO(readahead_helper);
+BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
+BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
+BTRFS_WORK_HELPER_PROTO(scrub_helper);
+BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
+BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
+
struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
int flags,
int max_active,
int thresh);
-void btrfs_init_work(struct btrfs_work *work,
+void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
btrfs_func_t func,
btrfs_func_t ordered_func,
btrfs_func_t ordered_free);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e25564bfcb4..54a201dac7f 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -276,9 +276,8 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
}
if (ret > 0)
goto next;
- ret = ulist_add_merge(parents, eb->start,
- (uintptr_t)eie,
- (u64 *)&old, GFP_NOFS);
+ ret = ulist_add_merge_ptr(parents, eb->start,
+ eie, (void **)&old, GFP_NOFS);
if (ret < 0)
break;
if (!ret && extent_item_pos) {
@@ -1001,16 +1000,19 @@ again:
ret = -EIO;
goto out;
}
+ btrfs_tree_read_lock(eb);
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
ret = find_extent_in_eb(eb, bytenr,
*extent_item_pos, &eie);
+ btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
if (ret < 0)
goto out;
ref->inode_list = eie;
}
- ret = ulist_add_merge(refs, ref->parent,
- (uintptr_t)ref->inode_list,
- (u64 *)&eie, GFP_NOFS);
+ ret = ulist_add_merge_ptr(refs, ref->parent,
+ ref->inode_list,
+ (void **)&eie, GFP_NOFS);
if (ret < 0)
goto out;
if (!ret && extent_item_pos) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4794923c410..43527fd7882 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -84,12 +84,6 @@ struct btrfs_inode {
*/
struct list_head delalloc_inodes;
- /*
- * list for tracking inodes that must be sent to disk before a
- * rename or truncate commit
- */
- struct list_head ordered_operations;
-
/* node for the red-black tree that links inodes in subvolume root */
struct rb_node rb_node;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index aeab453b8e2..44ee5d2e52a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -280,9 +280,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
WARN_ON(btrfs_header_generation(buf) > trans->transid);
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1);
else
- ret = btrfs_inc_ref(trans, root, cow, 0, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 0);
if (ret)
return ret;
@@ -1035,14 +1035,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if ((owner == root->root_key.objectid ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
- ret = btrfs_inc_ref(trans, root, buf, 1, 1);
+ ret = btrfs_inc_ref(trans, root, buf, 1);
BUG_ON(ret); /* -ENOMEM */
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID) {
- ret = btrfs_dec_ref(trans, root, buf, 0, 1);
+ ret = btrfs_dec_ref(trans, root, buf, 0);
BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_inc_ref(trans, root, cow, 1, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1);
BUG_ON(ret); /* -ENOMEM */
}
new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -1050,9 +1050,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1);
else
- ret = btrfs_inc_ref(trans, root, cow, 0, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 0);
BUG_ON(ret); /* -ENOMEM */
}
if (new_flags != 0) {
@@ -1069,11 +1069,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1);
else
- ret = btrfs_inc_ref(trans, root, cow, 0, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 0);
BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_dec_ref(trans, root, buf, 1, 1);
+ ret = btrfs_dec_ref(trans, root, buf, 1);
BUG_ON(ret); /* -ENOMEM */
}
clean_tree_block(trans, root, buf);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index be91397f4e9..8e29b614fe9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3326,9 +3326,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data, int delalloc);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int no_quota);
+ struct extent_buffer *buf, int full_backref);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int no_quota);
+ struct extent_buffer *buf, int full_backref);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 flags,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index da775bfdebc..a2e90f855d7 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1395,8 +1395,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
return -ENOMEM;
async_work->delayed_root = delayed_root;
- btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root,
- NULL, NULL);
+ btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
+ btrfs_async_run_delayed_root, NULL, NULL);
async_work->nr = nr;
btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 08e65e9cf2a..a1d36e62179 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -39,7 +39,6 @@
#include "btrfs_inode.h"
#include "volumes.h"
#include "print-tree.h"
-#include "async-thread.h"
#include "locking.h"
#include "tree-log.h"
#include "free-space-cache.h"
@@ -60,8 +59,6 @@ static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only);
-static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
- struct btrfs_root *root);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_root *root);
@@ -695,35 +692,41 @@ static void end_workqueue_bio(struct bio *bio, int err)
{
struct end_io_wq *end_io_wq = bio->bi_private;
struct btrfs_fs_info *fs_info;
+ struct btrfs_workqueue *wq;
+ btrfs_work_func_t func;
fs_info = end_io_wq->info;
end_io_wq->error = err;
- btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
if (bio->bi_rw & REQ_WRITE) {
- if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
- btrfs_queue_work(fs_info->endio_meta_write_workers,
- &end_io_wq->work);
- else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
- btrfs_queue_work(fs_info->endio_freespace_worker,
- &end_io_wq->work);
- else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
- btrfs_queue_work(fs_info->endio_raid56_workers,
- &end_io_wq->work);
- else
- btrfs_queue_work(fs_info->endio_write_workers,
- &end_io_wq->work);
+ if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
+ wq = fs_info->endio_meta_write_workers;
+ func = btrfs_endio_meta_write_helper;
+ } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) {
+ wq = fs_info->endio_freespace_worker;
+ func = btrfs_freespace_write_helper;
+ } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
+ wq = fs_info->endio_raid56_workers;
+ func = btrfs_endio_raid56_helper;
+ } else {
+ wq = fs_info->endio_write_workers;
+ func = btrfs_endio_write_helper;
+ }
} else {
- if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
- btrfs_queue_work(fs_info->endio_raid56_workers,
- &end_io_wq->work);
- else if (end_io_wq->metadata)
- btrfs_queue_work(fs_info->endio_meta_workers,
- &end_io_wq->work);
- else
- btrfs_queue_work(fs_info->endio_workers,
- &end_io_wq->work);
+ if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
+ wq = fs_info->endio_raid56_workers;
+ func = btrfs_endio_raid56_helper;
+ } else if (end_io_wq->metadata) {
+ wq = fs_info->endio_meta_workers;
+ func = btrfs_endio_meta_helper;
+ } else {
+ wq = fs_info->endio_workers;
+ func = btrfs_endio_helper;
+ }
}
+
+ btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL);
+ btrfs_queue_work(wq, &end_io_wq->work);
}
/*
@@ -830,7 +833,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
async->submit_bio_start = submit_bio_start;
async->submit_bio_done = submit_bio_done;
- btrfs_init_work(&async->work, run_one_async_start,
+ btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
run_one_async_done, run_one_async_free);
async->bio_flags = bio_flags;
@@ -3452,7 +3455,8 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
btrfs_set_stack_device_generation(dev_item, 0);
btrfs_set_stack_device_type(dev_item, dev->type);
btrfs_set_stack_device_id(dev_item, dev->devid);
- btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
+ btrfs_set_stack_device_total_bytes(dev_item,
+ dev->disk_total_bytes);
btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
btrfs_set_stack_device_io_align(dev_item, dev->io_align);
btrfs_set_stack_device_io_width(dev_item, dev->io_width);
@@ -3829,34 +3833,6 @@ static void btrfs_error_commit_super(struct btrfs_root *root)
btrfs_cleanup_transaction(root);
}
-static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
- struct btrfs_root *root)
-{
- struct btrfs_inode *btrfs_inode;
- struct list_head splice;
-
- INIT_LIST_HEAD(&splice);
-
- mutex_lock(&root->fs_info->ordered_operations_mutex);
- spin_lock(&root->fs_info->ordered_root_lock);
-
- list_splice_init(&t->ordered_operations, &splice);
- while (!list_empty(&splice)) {
- btrfs_inode = list_entry(splice.next, struct btrfs_inode,
- ordered_operations);
-
- list_del_init(&btrfs_inode->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
-
- btrfs_invalidate_inodes(btrfs_inode->root);
-
- spin_lock(&root->fs_info->ordered_root_lock);
- }
-
- spin_unlock(&root->fs_info->ordered_root_lock);
- mutex_unlock(&root->fs_info->ordered_operations_mutex);
-}
-
static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
{
struct btrfs_ordered_extent *ordered;
@@ -4093,8 +4069,6 @@ again:
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
struct btrfs_root *root)
{
- btrfs_destroy_ordered_operations(cur_trans, root);
-
btrfs_destroy_delayed_refs(cur_trans, root);
cur_trans->state = TRANS_STATE_COMMIT_START;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 813537f362f..3efe1c3877b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -552,7 +552,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
caching_ctl->block_group = cache;
caching_ctl->progress = cache->key.objectid;
atomic_set(&caching_ctl->count, 1);
- btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
+ btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
+ caching_thread, NULL, NULL);
spin_lock(&cache->lock);
/*
@@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root,
async->sync = 0;
init_completion(&async->wait);
- btrfs_init_work(&async->work, delayed_ref_async_start,
- NULL, NULL);
+ btrfs_init_work(&async->work, btrfs_extent_refs_helper,
+ delayed_ref_async_start, NULL, NULL);
btrfs_queue_work(root->fs_info->extent_workers, &async->work);
@@ -3057,7 +3058,7 @@ out:
static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
- int full_backref, int inc, int no_quota)
+ int full_backref, int inc)
{
u64 bytenr;
u64 num_bytes;
@@ -3111,7 +3112,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
key.offset -= btrfs_file_extent_offset(buf, fi);
ret = process_func(trans, root, bytenr, num_bytes,
parent, ref_root, key.objectid,
- key.offset, no_quota);
+ key.offset, 1);
if (ret)
goto fail;
} else {
@@ -3119,7 +3120,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
num_bytes = btrfs_level_size(root, level - 1);
ret = process_func(trans, root, bytenr, num_bytes,
parent, ref_root, level - 1, 0,
- no_quota);
+ 1);
if (ret)
goto fail;
}
@@ -3130,15 +3131,15 @@ fail:
}
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int no_quota)
+ struct extent_buffer *buf, int full_backref)
{
- return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota);
+ return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
}
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int no_quota)
+ struct extent_buffer *buf, int full_backref)
{
- return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota);
+ return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
}
static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -3586,13 +3587,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
*/
static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
- /*
- * we add in the count of missing devices because we want
- * to make sure that any RAID levels on a degraded FS
- * continue to be honored.
- */
- u64 num_devices = root->fs_info->fs_devices->rw_devices +
- root->fs_info->fs_devices->missing_devices;
+ u64 num_devices = root->fs_info->fs_devices->rw_devices;
u64 target;
u64 tmp;
@@ -7478,6 +7473,220 @@ reada:
wc->reada_slot = slot;
}
+static int account_leaf_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *eb)
+{
+ int nr = btrfs_header_nritems(eb);
+ int i, extent_type, ret;
+ struct btrfs_key key;
+ struct btrfs_file_extent_item *fi;
+ u64 bytenr, num_bytes;
+
+ for (i = 0; i < nr; i++) {
+ btrfs_item_key_to_cpu(eb, &key, i);
+
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+
+ fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
+ /* filter out non qgroup-accountable extents */
+ extent_type = btrfs_file_extent_type(eb, fi);
+
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE)
+ continue;
+
+ bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
+ if (!bytenr)
+ continue;
+
+ num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
+
+ ret = btrfs_qgroup_record_ref(trans, root->fs_info,
+ root->objectid,
+ bytenr, num_bytes,
+ BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Walk up the tree from the bottom, freeing leaves and any interior
+ * nodes which have had all slots visited. If a node (leaf or
+ * interior) is freed, the node above it will have it's slot
+ * incremented. The root node will never be freed.
+ *
+ * At the end of this function, we should have a path which has all
+ * slots incremented to the next position for a search. If we need to
+ * read a new node it will be NULL and the node above it will have the
+ * correct slot selected for a later read.
+ *
+ * If we increment the root nodes slot counter past the number of
+ * elements, 1 is returned to signal completion of the search.
+ */
+static int adjust_slots_upwards(struct btrfs_root *root,
+ struct btrfs_path *path, int root_level)
+{
+ int level = 0;
+ int nr, slot;
+ struct extent_buffer *eb;
+
+ if (root_level == 0)
+ return 1;
+
+ while (level <= root_level) {
+ eb = path->nodes[level];
+ nr = btrfs_header_nritems(eb);
+ path->slots[level]++;
+ slot = path->slots[level];
+ if (slot >= nr || level == 0) {
+ /*
+ * Don't free the root - we will detect this
+ * condition after our loop and return a
+ * positive value for caller to stop walking the tree.
+ */
+ if (level != root_level) {
+ btrfs_tree_unlock_rw(eb, path->locks[level]);
+ path->locks[level] = 0;
+
+ free_extent_buffer(eb);
+ path->nodes[level] = NULL;
+ path->slots[level] = 0;
+ }
+ } else {
+ /*
+ * We have a valid slot to walk back down
+ * from. Stop here so caller can process these
+ * new nodes.
+ */
+ break;
+ }
+
+ level++;
+ }
+
+ eb = path->nodes[root_level];
+ if (path->slots[root_level] >= btrfs_header_nritems(eb))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * root_eb is the subtree root and is locked before this function is called.
+ */
+static int account_shared_subtree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *root_eb,
+ u64 root_gen,
+ int root_level)
+{
+ int ret = 0;
+ int level;
+ struct extent_buffer *eb = root_eb;
+ struct btrfs_path *path = NULL;
+
+ BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
+ BUG_ON(root_eb == NULL);
+
+ if (!root->fs_info->quota_enabled)
+ return 0;
+
+ if (!extent_buffer_uptodate(root_eb)) {
+ ret = btrfs_read_buffer(root_eb, root_gen);
+ if (ret)
+ goto out;
+ }
+
+ if (root_level == 0) {
+ ret = account_leaf_items(trans, root, root_eb);
+ goto out;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /*
+ * Walk down the tree. Missing extent blocks are filled in as
+ * we go. Metadata is accounted every time we read a new
+ * extent block.
+ *
+ * When we reach a leaf, we account for file extent items in it,
+ * walk back up the tree (adjusting slot pointers as we go)
+ * and restart the search process.
+ */
+ extent_buffer_get(root_eb); /* For path */
+ path->nodes[root_level] = root_eb;
+ path->slots[root_level] = 0;
+ path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
+walk_down:
+ level = root_level;
+ while (level >= 0) {
+ if (path->nodes[level] == NULL) {
+ int child_bsize = root->nodesize;
+ int parent_slot;
+ u64 child_gen;
+ u64 child_bytenr;
+
+ /* We need to get child blockptr/gen from
+ * parent before we can read it. */
+ eb = path->nodes[level + 1];
+ parent_slot = path->slots[level + 1];
+ child_bytenr = btrfs_node_blockptr(eb, parent_slot);
+ child_gen = btrfs_node_ptr_generation(eb, parent_slot);
+
+ eb = read_tree_block(root, child_bytenr, child_bsize,
+ child_gen);
+ if (!eb || !extent_buffer_uptodate(eb)) {
+ ret = -EIO;
+ goto out;
+ }
+
+ path->nodes[level] = eb;
+ path->slots[level] = 0;
+
+ btrfs_tree_read_lock(eb);
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+ path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
+
+ ret = btrfs_qgroup_record_ref(trans, root->fs_info,
+ root->objectid,
+ child_bytenr,
+ child_bsize,
+ BTRFS_QGROUP_OPER_SUB_SUBTREE,
+ 0);
+ if (ret)
+ goto out;
+
+ }
+
+ if (level == 0) {
+ ret = account_leaf_items(trans, root, path->nodes[level]);
+ if (ret)
+ goto out;
+
+ /* Nonzero return here means we completed our search */
+ ret = adjust_slots_upwards(root, path, root_level);
+ if (ret)
+ break;
+
+ /* Restart search with new slots */
+ goto walk_down;
+ }
+
+ level--;
+ }
+
+ ret = 0;
+out:
+ btrfs_free_path(path);
+
+ return ret;
+}
+
/*
* helper to process tree block while walking down the tree.
*
@@ -7532,9 +7741,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
/* wc->stage == UPDATE_BACKREF */
if (!(wc->flags[level] & flag)) {
BUG_ON(!path->locks[level]);
- ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
+ ret = btrfs_inc_ref(trans, root, eb, 1);
BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
+ ret = btrfs_dec_ref(trans, root, eb, 0);
BUG_ON(ret); /* -ENOMEM */
ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
eb->len, flag,
@@ -7581,6 +7790,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
int level = wc->level;
int reada = 0;
int ret = 0;
+ bool need_account = false;
generation = btrfs_node_ptr_generation(path->nodes[level],
path->slots[level]);
@@ -7626,6 +7836,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
if (wc->stage == DROP_REFERENCE) {
if (wc->refs[level - 1] > 1) {
+ need_account = true;
if (level == 1 &&
(wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
goto skip;
@@ -7689,6 +7900,16 @@ skip:
parent = 0;
}
+ if (need_account) {
+ ret = account_shared_subtree(trans, root, next,
+ generation, level - 1);
+ if (ret) {
+ printk_ratelimited(KERN_ERR "BTRFS: %s Error "
+ "%d accounting shared subtree. Quota "
+ "is out of sync, rescan required.\n",
+ root->fs_info->sb->s_id, ret);
+ }
+ }
ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
root->root_key.objectid, level - 1, 0, 0);
BUG_ON(ret); /* -ENOMEM */
@@ -7769,12 +7990,17 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (wc->refs[level] == 1) {
if (level == 0) {
if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
- ret = btrfs_dec_ref(trans, root, eb, 1,
- wc->for_reloc);
+ ret = btrfs_dec_ref(trans, root, eb, 1);
else
- ret = btrfs_dec_ref(trans, root, eb, 0,
- wc->for_reloc);
+ ret = btrfs_dec_ref(trans, root, eb, 0);
BUG_ON(ret); /* -ENOMEM */
+ ret = account_leaf_items(trans, root, eb);
+ if (ret) {
+ printk_ratelimited(KERN_ERR "BTRFS: %s Error "
+ "%d accounting leaf items. Quota "
+ "is out of sync, rescan required.\n",
+ root->fs_info->sb->s_id, ret);
+ }
}
/* make block locked assertion in clean_tree_block happy */
if (!path->locks[level] &&
@@ -7900,6 +8126,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
int level;
bool root_dropped = false;
+ btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
+
path = btrfs_alloc_path();
if (!path) {
err = -ENOMEM;
@@ -8025,6 +8253,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
goto out_end_trans;
}
+ /*
+ * Qgroup update accounting is run from
+ * delayed ref handling. This usually works
+ * out because delayed refs are normally the
+ * only way qgroup updates are added. However,
+ * we may have added updates during our tree
+ * walk so run qgroups here to make sure we
+ * don't lose any updates.
+ */
+ ret = btrfs_delayed_qgroup_accounting(trans,
+ root->fs_info);
+ if (ret)
+ printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
+ "running qgroup updates "
+ "during snapshot delete. "
+ "Quota is out of sync, "
+ "rescan required.\n", ret);
+
btrfs_end_transaction_throttle(trans, tree_root);
if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
pr_debug("BTRFS: drop snapshot early exit\n");
@@ -8078,6 +8324,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
}
root_dropped = true;
out_end_trans:
+ ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
+ if (ret)
+ printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
+ "running qgroup updates "
+ "during snapshot delete. "
+ "Quota is out of sync, "
+ "rescan required.\n", ret);
+
btrfs_end_transaction_throttle(trans, tree_root);
out_free:
kfree(wc);
@@ -8181,13 +8435,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
if (stripped)
return extended_to_chunk(stripped);
- /*
- * we add in the count of missing devices because we want
- * to make sure that any RAID levels on a degraded FS
- * continue to be honored.
- */
- num_devices = root->fs_info->fs_devices->rw_devices +
- root->fs_info->fs_devices->missing_devices;
+ num_devices = root->fs_info->fs_devices->rw_devices;
stripped = BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e11aab9f39..af0359dcf33 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2532,6 +2532,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
test_bit(BIO_UPTODATE, &bio->bi_flags);
if (err)
uptodate = 0;
+ offset += len;
continue;
}
}
@@ -4207,8 +4208,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return -ENOMEM;
path->leave_spinning = 1;
- start = ALIGN(start, BTRFS_I(inode)->root->sectorsize);
- len = ALIGN(len, BTRFS_I(inode)->root->sectorsize);
+ start = round_down(start, BTRFS_I(inode)->root->sectorsize);
+ len = round_up(max, BTRFS_I(inode)->root->sectorsize) - start;
/*
* lookup the last file extent. We're not using i_size here
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f46cfe45d68..54c84daec9b 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -756,7 +756,7 @@ again:
found_next = 1;
if (ret != 0)
goto insert;
- slot = 0;
+ slot = path->slots[0];
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1f2b99cb55e..ff1cc0399b9 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1838,6 +1838,8 @@ out:
int btrfs_release_file(struct inode *inode, struct file *filp)
{
+ if (filp->private_data)
+ btrfs_ioctl_trans_end(filp);
/*
* ordered_data_close is set by settattr when we are about to truncate
* a file from a non-zero size to a zero size. This tries to
@@ -1845,26 +1847,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
* application were using truncate to replace a file in place.
*/
if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
- &BTRFS_I(inode)->runtime_flags)) {
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(inode)->root;
-
- /*
- * We need to block on a committing transaction to keep us from
- * throwing a ordered operation on to the list and causing
- * something like sync to deadlock trying to flush out this
- * inode.
- */
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
- btrfs_end_transaction(trans, root);
- if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+ &BTRFS_I(inode)->runtime_flags))
filemap_flush(inode->i_mapping);
- }
- if (filp->private_data)
- btrfs_ioctl_trans_end(filp);
return 0;
}
@@ -1982,7 +1966,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
btrfs_init_log_ctx(&ctx);
- ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx);
+ ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
if (ret < 0) {
/* Fallthrough and commit/free transaction. */
ret = 1;
@@ -2112,10 +2096,9 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
goto out;
}
- if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) {
+ if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
u64 num_bytes;
- path->slots[0]++;
key.offset = offset;
btrfs_set_item_key_safe(root, path, &key);
fi = btrfs_item_ptr(leaf, path->slots[0],
@@ -2240,7 +2223,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
goto out_only_mutex;
}
- lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize);
+ lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
lockend = round_down(offset + len,
BTRFS_I(inode)->root->sectorsize) - 1;
same_page = ((offset >> PAGE_CACHE_SHIFT) ==
@@ -2301,7 +2284,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
tail_start + tail_len, 0, 1);
if (ret)
goto out_only_mutex;
- }
+ }
}
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3668048e16f..016c403bfe7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -709,6 +709,18 @@ retry:
unlock_extent(io_tree, async_extent->start,
async_extent->start +
async_extent->ram_size - 1);
+
+ /*
+ * we need to redirty the pages if we decide to
+ * fallback to uncompressed IO, otherwise we
+ * will not submit these pages down to lower
+ * layers.
+ */
+ extent_range_redirty_for_io(inode,
+ async_extent->start,
+ async_extent->start +
+ async_extent->ram_size - 1);
+
goto retry;
}
goto out_free;
@@ -766,8 +778,12 @@ retry:
ins.offset,
BTRFS_ORDERED_COMPRESSED,
async_extent->compress_type);
- if (ret)
+ if (ret) {
+ btrfs_drop_extent_cache(inode, async_extent->start,
+ async_extent->start +
+ async_extent->ram_size - 1, 0);
goto out_free_reserve;
+ }
/*
* clear dirty, set writeback and unlock the pages.
@@ -959,14 +975,14 @@ static noinline int cow_file_range(struct inode *inode,
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
ram_size, cur_alloc_size, 0);
if (ret)
- goto out_reserve;
+ goto out_drop_extent_cache;
if (root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_reloc_clone_csums(inode, start,
cur_alloc_size);
if (ret)
- goto out_reserve;
+ goto out_drop_extent_cache;
}
if (disk_num_bytes < cur_alloc_size)
@@ -994,6 +1010,8 @@ static noinline int cow_file_range(struct inode *inode,
out:
return ret;
+out_drop_extent_cache:
+ btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
out_reserve:
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
out_unlock:
@@ -1084,8 +1102,10 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
async_cow->end = cur_end;
INIT_LIST_HEAD(&async_cow->extents);
- btrfs_init_work(&async_cow->work, async_cow_start,
- async_cow_submit, async_cow_free);
+ btrfs_init_work(&async_cow->work,
+ btrfs_delalloc_helper,
+ async_cow_start, async_cow_submit,
+ async_cow_free);
nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
PAGE_CACHE_SHIFT;
@@ -1869,7 +1889,8 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
SetPageChecked(page);
page_cache_get(page);
- btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
+ btrfs_init_work(&fixup->work, btrfs_fixup_helper,
+ btrfs_writepage_fixup_worker, NULL, NULL);
fixup->page = page;
btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);
return -EBUSY;
@@ -2810,7 +2831,8 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
struct inode *inode = page->mapping->host;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ordered_extent *ordered_extent = NULL;
- struct btrfs_workqueue *workers;
+ struct btrfs_workqueue *wq;
+ btrfs_work_func_t func;
trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
@@ -2819,13 +2841,17 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
end - start + 1, uptodate))
return 0;
- btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);
+ if (btrfs_is_free_space_inode(inode)) {
+ wq = root->fs_info->endio_freespace_worker;
+ func = btrfs_freespace_write_helper;
+ } else {
+ wq = root->fs_info->endio_write_workers;
+ func = btrfs_endio_write_helper;
+ }
- if (btrfs_is_free_space_inode(inode))
- workers = root->fs_info->endio_freespace_worker;
- else
- workers = root->fs_info->endio_write_workers;
- btrfs_queue_work(workers, &ordered_extent->work);
+ btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
+ NULL);
+ btrfs_queue_work(wq, &ordered_extent->work);
return 0;
}
@@ -4222,7 +4248,8 @@ out:
btrfs_abort_transaction(trans, root, ret);
}
error:
- if (last_size != (u64)-1)
+ if (last_size != (u64)-1 &&
+ root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
btrfs_ordered_update_i_size(inode, last_size, NULL);
btrfs_free_path(path);
return err;
@@ -4662,6 +4689,11 @@ static void evict_inode_truncate_pages(struct inode *inode)
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
remove_extent_mapping(map_tree, em);
free_extent_map(em);
+ if (need_resched()) {
+ write_unlock(&map_tree->lock);
+ cond_resched();
+ write_lock(&map_tree->lock);
+ }
}
write_unlock(&map_tree->lock);
@@ -4684,6 +4716,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
&cached_state, GFP_NOFS);
free_extent_state(state);
+ cond_resched();
spin_lock(&io_tree->lock);
}
spin_unlock(&io_tree->lock);
@@ -5169,6 +5202,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
iput(inode);
inode = ERR_PTR(ret);
}
+ /*
+ * If orphan cleanup did remove any orphans, it means the tree
+ * was modified and therefore the commit root is not the same as
+ * the current root anymore. This is a problem, because send
+ * uses the commit root and therefore can see inode items that
+ * don't exist in the current root anymore, and for example make
+ * calls to btrfs_iget, which will do tree lookups based on the
+ * current root and not on the commit root. Those lookups will
+ * fail, returning a -ESTALE error, and making send fail with
+ * that error. So make sure a send does not see any orphans we
+ * have just removed, and that it will see the same inodes
+ * regardless of whether a transaction commit happened before
+ * it started (meaning that the commit root will be the same as
+ * the current root) or not.
+ */
+ if (sub_root->node != sub_root->commit_root) {
+ u64 sub_flags = btrfs_root_flags(&sub_root->root_item);
+
+ if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) {
+ struct extent_buffer *eb;
+
+ /*
+ * Assert we can't have races between dentry
+ * lookup called through the snapshot creation
+ * ioctl and the VFS.
+ */
+ ASSERT(mutex_is_locked(&dir->i_mutex));
+
+ down_write(&root->fs_info->commit_root_sem);
+ eb = sub_root->commit_root;
+ sub_root->commit_root =
+ btrfs_root_node(sub_root);
+ up_write(&root->fs_info->commit_root_sem);
+ free_extent_buffer(eb);
+ }
+ }
}
return inode;
@@ -5565,6 +5634,17 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
return ret;
}
+static int btrfs_insert_inode_locked(struct inode *inode)
+{
+ struct btrfs_iget_args args;
+ args.location = &BTRFS_I(inode)->location;
+ args.root = BTRFS_I(inode)->root;
+
+ return insert_inode_locked4(inode,
+ btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
+ btrfs_find_actor, &args);
+}
+
static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *dir,
@@ -5594,6 +5674,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
}
/*
+ * O_TMPFILE, set link count to 0, so that after this point,
+ * we fill in an inode item with the correct link count.
+ */
+ if (!name)
+ set_nlink(inode, 0);
+
+ /*
* we have to initialize this early, so we can reclaim the inode
* number if we fail afterwards in this function.
*/
@@ -5650,10 +5737,19 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
sizes[1] = name_len + sizeof(*ref);
}
+ location = &BTRFS_I(inode)->location;
+ location->objectid = objectid;
+ location->offset = 0;
+ btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
+
+ ret = btrfs_insert_inode_locked(inode);
+ if (ret < 0)
+ goto fail;
+
path->leave_spinning = 1;
ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
if (ret != 0)
- goto fail;
+ goto fail_unlock;
inode_init_owner(inode, dir, mode);
inode_set_bytes(inode, 0);
@@ -5676,11 +5772,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
- location = &BTRFS_I(inode)->location;
- location->objectid = objectid;
- location->offset = 0;
- btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
-
btrfs_inherit_iflags(inode, dir);
if (S_ISREG(mode)) {
@@ -5691,7 +5782,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
BTRFS_INODE_NODATASUM;
}
- btrfs_insert_inode_hash(inode);
inode_tree_add(inode);
trace_btrfs_inode_new(inode);
@@ -5706,6 +5796,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
btrfs_ino(inode), root->root_key.objectid, ret);
return inode;
+
+fail_unlock:
+ unlock_new_inode(inode);
fail:
if (dir && name)
BTRFS_I(dir)->index_cnt--;
@@ -5840,28 +5933,28 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
goto out_unlock;
}
- err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
- if (err) {
- drop_inode = 1;
- goto out_unlock;
- }
-
/*
* If the active LSM wants to access the inode during
* d_instantiate it needs these. Smack checks to see
* if the filesystem supports xattrs by looking at the
* ops vector.
*/
-
inode->i_op = &btrfs_special_inode_operations;
- err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+ init_special_inode(inode, inode->i_mode, rdev);
+
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
- drop_inode = 1;
- else {
- init_special_inode(inode, inode->i_mode, rdev);
+ goto out_unlock_inode;
+
+ err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+ if (err) {
+ goto out_unlock_inode;
+ } else {
btrfs_update_inode(trans, root, inode);
+ unlock_new_inode(inode);
d_instantiate(dentry, inode);
}
+
out_unlock:
btrfs_end_transaction(trans, root);
btrfs_balance_delayed_items(root);
@@ -5871,6 +5964,12 @@ out_unlock:
iput(inode);
}
return err;
+
+out_unlock_inode:
+ drop_inode = 1;
+ unlock_new_inode(inode);
+ goto out_unlock;
+
}
static int btrfs_create(struct inode *dir, struct dentry *dentry,
@@ -5905,15 +6004,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
goto out_unlock;
}
drop_inode_on_err = 1;
-
- err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
- if (err)
- goto out_unlock;
-
- err = btrfs_update_inode(trans, root, inode);
- if (err)
- goto out_unlock;
-
/*
* If the active LSM wants to access the inode during
* d_instantiate it needs these. Smack checks to see
@@ -5922,14 +6012,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
*/
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
+ inode->i_mapping->a_ops = &btrfs_aops;
+ inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
+ if (err)
+ goto out_unlock_inode;
+
+ err = btrfs_update_inode(trans, root, inode);
+ if (err)
+ goto out_unlock_inode;
err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
- goto out_unlock;
+ goto out_unlock_inode;
- inode->i_mapping->a_ops = &btrfs_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+ unlock_new_inode(inode);
d_instantiate(dentry, inode);
out_unlock:
@@ -5941,6 +6040,11 @@ out_unlock:
btrfs_balance_delayed_items(root);
btrfs_btree_balance_dirty(root);
return err;
+
+out_unlock_inode:
+ unlock_new_inode(inode);
+ goto out_unlock;
+
}
static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
@@ -6048,25 +6152,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
}
drop_on_err = 1;
+ /* these must be set before we unlock the inode */
+ inode->i_op = &btrfs_dir_inode_operations;
+ inode->i_fop = &btrfs_dir_file_operations;
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
- goto out_fail;
-
- inode->i_op = &btrfs_dir_inode_operations;
- inode->i_fop = &btrfs_dir_file_operations;
+ goto out_fail_inode;
btrfs_i_size_write(inode, 0);
err = btrfs_update_inode(trans, root, inode);
if (err)
- goto out_fail;
+ goto out_fail_inode;
err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
dentry->d_name.len, 0, index);
if (err)
- goto out_fail;
+ goto out_fail_inode;
d_instantiate(dentry, inode);
+ /*
+ * mkdir is special. We're unlocking after we call d_instantiate
+ * to avoid a race with nfsd calling d_instantiate.
+ */
+ unlock_new_inode(inode);
drop_on_err = 0;
out_fail:
@@ -6076,6 +6185,10 @@ out_fail:
btrfs_balance_delayed_items(root);
btrfs_btree_balance_dirty(root);
return err;
+
+out_fail_inode:
+ unlock_new_inode(inode);
+ goto out_fail;
}
/* helper for btfs_get_extent. Given an existing extent in the tree,
@@ -6085,14 +6198,14 @@ out_fail:
static int merge_extent_mapping(struct extent_map_tree *em_tree,
struct extent_map *existing,
struct extent_map *em,
- u64 map_start, u64 map_len)
+ u64 map_start)
{
u64 start_diff;
BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
start_diff = map_start - em->start;
em->start = map_start;
- em->len = map_len;
+ em->len = existing->start - em->start;
if (em->block_start < EXTENT_MAP_LAST_BYTE &&
!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
em->block_start += start_diff;
@@ -6263,6 +6376,8 @@ next:
goto not_found;
if (start + len <= found_key.offset)
goto not_found;
+ if (start > found_key.offset)
+ goto next;
em->start = start;
em->orig_start = start;
em->len = found_key.offset - start;
@@ -6378,8 +6493,7 @@ insert:
em->len);
if (existing) {
err = merge_extent_mapping(em_tree, existing,
- em, start,
- root->sectorsize);
+ em, start);
free_extent_map(existing);
if (err) {
free_extent_map(em);
@@ -7146,7 +7260,8 @@ again:
if (!ret)
goto out_test;
- btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL);
+ btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
+ finish_ordered_fn, NULL, NULL);
btrfs_queue_work(root->fs_info->endio_write_workers,
&ordered->work);
out_test:
@@ -7294,10 +7409,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
map_length = orig_bio->bi_iter.bi_size;
ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
&map_length, NULL, 0);
- if (ret) {
- bio_put(orig_bio);
+ if (ret)
return -EIO;
- }
if (map_length >= orig_bio->bi_iter.bi_size) {
bio = orig_bio;
@@ -7314,6 +7427,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
if (!bio)
return -ENOMEM;
+
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
atomic_inc(&dip->pending_bios);
@@ -7522,7 +7636,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
count = iov_iter_count(iter);
if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags))
- filemap_fdatawrite_range(inode->i_mapping, offset, count);
+ filemap_fdatawrite_range(inode->i_mapping, offset,
+ offset + count - 1);
if (rw & WRITE) {
/*
@@ -7939,27 +8054,6 @@ static int btrfs_truncate(struct inode *inode)
BUG_ON(ret);
/*
- * setattr is responsible for setting the ordered_data_close flag,
- * but that is only tested during the last file release. That
- * could happen well after the next commit, leaving a great big
- * window where new writes may get lost if someone chooses to write
- * to this file after truncating to zero
- *
- * The inode doesn't have any dirty data here, and so if we commit
- * this is a noop. If someone immediately starts writing to the inode
- * it is very likely we'll catch some of their writes in this
- * transaction, and the commit will find this file on the ordered
- * data list with good things to send down.
- *
- * This is a best effort solution, there is still a window where
- * using truncate to replace the contents of the file will
- * end up with a zero length file after a crash.
- */
- if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
- &BTRFS_I(inode)->runtime_flags))
- btrfs_add_ordered_operation(trans, root, inode);
-
- /*
* So if we truncate and then write and fsync we normally would just
* write the extents that changed, which is a problem if we need to
* first truncate that entire inode. So set this flag so we write out
@@ -8050,6 +8144,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
set_nlink(inode, 1);
btrfs_i_size_write(inode, 0);
+ unlock_new_inode(inode);
err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
if (err)
@@ -8106,7 +8201,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->delalloc_inodes);
- INIT_LIST_HEAD(&ei->ordered_operations);
RB_CLEAR_NODE(&ei->rb_node);
return inode;
@@ -8146,17 +8240,6 @@ void btrfs_destroy_inode(struct inode *inode)
if (!root)
goto free;
- /*
- * Make sure we're properly removed from the ordered operation
- * lists.
- */
- smp_mb();
- if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
- spin_lock(&root->fs_info->ordered_root_lock);
- list_del_init(&BTRFS_I(inode)->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
- }
-
if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags)) {
btrfs_info(root->fs_info, "inode %llu still on the orphan list",
@@ -8338,12 +8421,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
ret = 0;
/*
- * we're using rename to replace one file with another.
- * and the replacement file is large. Start IO on it now so
- * we don't add too much work to the end of the transaction
+ * we're using rename to replace one file with another. Start IO on it
+ * now so we don't add too much work to the end of the transaction
*/
- if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
- old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+ if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
filemap_flush(old_inode->i_mapping);
/* close the racy window with snapshot create/destroy ioctl */
@@ -8391,12 +8472,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
btrfs_pin_log_trans(root);
}
- /*
- * make sure the inode gets flushed if it is replacing
- * something.
- */
- if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
- btrfs_add_ordered_operation(trans, root, old_inode);
inode_inc_iversion(old_dir);
inode_inc_iversion(new_dir);
@@ -8476,6 +8551,16 @@ out_notrans:
return ret;
}
+static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ if (flags & ~RENAME_NOREPLACE)
+ return -EINVAL;
+
+ return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+
static void btrfs_run_delalloc_work(struct btrfs_work *work)
{
struct btrfs_delalloc_work *delalloc_work;
@@ -8514,7 +8599,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
work->inode = inode;
work->wait = wait;
work->delay_iput = delay_iput;
- btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
+ WARN_ON_ONCE(!inode);
+ btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
+ btrfs_run_delalloc_work, NULL, NULL);
return work;
}
@@ -8718,12 +8805,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_unlock;
}
- err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
- if (err) {
- drop_inode = 1;
- goto out_unlock;
- }
-
/*
* If the active LSM wants to access the inode during
* d_instantiate it needs these. Smack checks to see
@@ -8732,23 +8813,22 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
*/
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
+ inode->i_mapping->a_ops = &btrfs_aops;
+ inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+ BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
+ if (err)
+ goto out_unlock_inode;
err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
- drop_inode = 1;
- else {
- inode->i_mapping->a_ops = &btrfs_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
- BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
- }
- if (drop_inode)
- goto out_unlock;
+ goto out_unlock_inode;
path = btrfs_alloc_path();
if (!path) {
err = -ENOMEM;
- drop_inode = 1;
- goto out_unlock;
+ goto out_unlock_inode;
}
key.objectid = btrfs_ino(inode);
key.offset = 0;
@@ -8757,9 +8837,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
err = btrfs_insert_empty_item(trans, root, path, &key,
datasize);
if (err) {
- drop_inode = 1;
btrfs_free_path(path);
- goto out_unlock;
+ goto out_unlock_inode;
}
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -8783,12 +8862,15 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
inode_set_bytes(inode, name_len);
btrfs_i_size_write(inode, name_len);
err = btrfs_update_inode(trans, root, inode);
- if (err)
+ if (err) {
drop_inode = 1;
+ goto out_unlock_inode;
+ }
+
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
out_unlock:
- if (!err)
- d_instantiate(dentry, inode);
btrfs_end_transaction(trans, root);
if (drop_inode) {
inode_dec_link_count(inode);
@@ -8796,6 +8878,11 @@ out_unlock:
}
btrfs_btree_balance_dirty(root);
return err;
+
+out_unlock_inode:
+ drop_inode = 1;
+ unlock_new_inode(inode);
+ goto out_unlock;
}
static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -8979,14 +9066,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out;
}
- ret = btrfs_init_inode_security(trans, inode, dir, NULL);
- if (ret)
- goto out;
-
- ret = btrfs_update_inode(trans, root, inode);
- if (ret)
- goto out;
-
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
@@ -8994,10 +9073,26 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+ ret = btrfs_init_inode_security(trans, inode, dir, NULL);
+ if (ret)
+ goto out_inode;
+
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret)
+ goto out_inode;
ret = btrfs_orphan_add(trans, inode);
if (ret)
- goto out;
+ goto out_inode;
+ /*
+ * We set number of links to 0 in btrfs_new_inode(), and here we set
+ * it to 1 because d_tmpfile() will issue a warning if the count is 0,
+ * through:
+ *
+ * d_tmpfile() -> inode_dec_link_count() -> drop_nlink()
+ */
+ set_nlink(inode, 1);
+ unlock_new_inode(inode);
d_tmpfile(dentry, inode);
mark_inode_dirty(inode);
@@ -9007,8 +9102,12 @@ out:
iput(inode);
btrfs_balance_delayed_items(root);
btrfs_btree_balance_dirty(root);
-
return ret;
+
+out_inode:
+ unlock_new_inode(inode);
+ goto out;
+
}
static const struct inode_operations btrfs_dir_inode_operations = {
@@ -9019,7 +9118,7 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.link = btrfs_link,
.mkdir = btrfs_mkdir,
.rmdir = btrfs_rmdir,
- .rename = btrfs_rename,
+ .rename2 = btrfs_rename2,
.symlink = btrfs_symlink,
.setattr = btrfs_setattr,
.mknod = btrfs_mknod,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 47aceb494d1..8a8e29878c3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -711,39 +711,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (ret)
goto fail;
- ret = btrfs_orphan_cleanup(pending_snapshot->snap);
- if (ret)
- goto fail;
-
- /*
- * If orphan cleanup did remove any orphans, it means the tree was
- * modified and therefore the commit root is not the same as the
- * current root anymore. This is a problem, because send uses the
- * commit root and therefore can see inode items that don't exist
- * in the current root anymore, and for example make calls to
- * btrfs_iget, which will do tree lookups based on the current root
- * and not on the commit root. Those lookups will fail, returning a
- * -ESTALE error, and making send fail with that error. So make sure
- * a send does not see any orphans we have just removed, and that it
- * will see the same inodes regardless of whether a transaction
- * commit happened before it started (meaning that the commit root
- * will be the same as the current root) or not.
- */
- if (readonly && pending_snapshot->snap->node !=
- pending_snapshot->snap->commit_root) {
- trans = btrfs_join_transaction(pending_snapshot->snap);
- if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) {
- ret = PTR_ERR(trans);
- goto fail;
- }
- if (!IS_ERR(trans)) {
- ret = btrfs_commit_transaction(trans,
- pending_snapshot->snap);
- if (ret)
- goto fail;
- }
- }
-
inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
@@ -1052,8 +1019,10 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
return false;
next = defrag_lookup_extent(inode, em->start + em->len);
- if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE ||
- (em->block_start + em->block_len == next->block_start))
+ if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
+ ret = false;
+ else if ((em->block_start + em->block_len == next->block_start) &&
+ (em->block_len > 128 * 1024 && next->block_len > 128 * 1024))
ret = false;
free_extent_map(next);
@@ -1088,7 +1057,6 @@ static int should_defrag_range(struct inode *inode, u64 start, int thresh,
}
next_mergeable = defrag_check_next_extent(inode, em);
-
/*
* we hit a real extent, if it is big or the next extent is not a
* real extent, don't bother defragging it
@@ -1735,7 +1703,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY |
BTRFS_SUBVOL_QGROUP_INHERIT)) {
ret = -EOPNOTSUPP;
- goto out;
+ goto free_args;
}
if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC)
@@ -1745,27 +1713,31 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
if (vol_args->size > PAGE_CACHE_SIZE) {
ret = -EINVAL;
- goto out;
+ goto free_args;
}
inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
if (IS_ERR(inherit)) {
ret = PTR_ERR(inherit);
- goto out;
+ goto free_args;
}
}
ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
vol_args->fd, subvol, ptr,
readonly, inherit);
+ if (ret)
+ goto free_inherit;
- if (ret == 0 && ptr &&
- copy_to_user(arg +
- offsetof(struct btrfs_ioctl_vol_args_v2,
- transid), ptr, sizeof(*ptr)))
+ if (ptr && copy_to_user(arg +
+ offsetof(struct btrfs_ioctl_vol_args_v2,
+ transid),
+ ptr, sizeof(*ptr)))
ret = -EFAULT;
-out:
- kfree(vol_args);
+
+free_inherit:
kfree(inherit);
+free_args:
+ kfree(vol_args);
return ret;
}
@@ -2685,7 +2657,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
- goto out;
+ goto err_drop;
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
@@ -2703,6 +2675,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
out:
kfree(vol_args);
+err_drop:
mnt_drop_write_file(file);
return ret;
}
@@ -3527,7 +3500,8 @@ process_slot:
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
- last_dest_end = new_key.offset + datal;
+ last_dest_end = ALIGN(new_key.offset + datal,
+ root->sectorsize);
ret = clone_finish_inode_update(trans, inode,
last_dest_end,
destoff, olen);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 7187b14faa6..ac734ec4cc2 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -571,18 +571,6 @@ void btrfs_remove_ordered_extent(struct inode *inode,
trace_btrfs_ordered_extent_remove(inode, entry);
- /*
- * we have no more ordered extents for this inode and
- * no dirty pages. We can safely remove it from the
- * list of ordered extents
- */
- if (RB_EMPTY_ROOT(&tree->tree) &&
- !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
- spin_lock(&root->fs_info->ordered_root_lock);
- list_del_init(&BTRFS_I(inode)->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
- }
-
if (!root->nr_ordered_extents) {
spin_lock(&root->fs_info->ordered_root_lock);
BUG_ON(list_empty(&root->ordered_root));
@@ -627,6 +615,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
spin_unlock(&root->ordered_extent_lock);
btrfs_init_work(&ordered->flush_work,
+ btrfs_flush_delalloc_helper,
btrfs_run_ordered_extent_work, NULL, NULL);
list_add_tail(&ordered->work_list, &works);
btrfs_queue_work(root->fs_info->flush_workers,
@@ -687,81 +676,6 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
}
/*
- * this is used during transaction commit to write all the inodes
- * added to the ordered operation list. These files must be fully on
- * disk before the transaction commits.
- *
- * we have two modes here, one is to just start the IO via filemap_flush
- * and the other is to wait for all the io. When we wait, we have an
- * extra check to make sure the ordered operation list really is empty
- * before we return
- */
-int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int wait)
-{
- struct btrfs_inode *btrfs_inode;
- struct inode *inode;
- struct btrfs_transaction *cur_trans = trans->transaction;
- struct list_head splice;
- struct list_head works;
- struct btrfs_delalloc_work *work, *next;
- int ret = 0;
-
- INIT_LIST_HEAD(&splice);
- INIT_LIST_HEAD(&works);
-
- mutex_lock(&root->fs_info->ordered_extent_flush_mutex);
- spin_lock(&root->fs_info->ordered_root_lock);
- list_splice_init(&cur_trans->ordered_operations, &splice);
- while (!list_empty(&splice)) {
- btrfs_inode = list_entry(splice.next, struct btrfs_inode,
- ordered_operations);
- inode = &btrfs_inode->vfs_inode;
-
- list_del_init(&btrfs_inode->ordered_operations);
-
- /*
- * the inode may be getting freed (in sys_unlink path).
- */
- inode = igrab(inode);
- if (!inode)
- continue;
-
- if (!wait)
- list_add_tail(&BTRFS_I(inode)->ordered_operations,
- &cur_trans->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
-
- work = btrfs_alloc_delalloc_work(inode, wait, 1);
- if (!work) {
- spin_lock(&root->fs_info->ordered_root_lock);
- if (list_empty(&BTRFS_I(inode)->ordered_operations))
- list_add_tail(&btrfs_inode->ordered_operations,
- &splice);
- list_splice_tail(&splice,
- &cur_trans->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
- ret = -ENOMEM;
- goto out;
- }
- list_add_tail(&work->list, &works);
- btrfs_queue_work(root->fs_info->flush_workers,
- &work->work);
-
- cond_resched();
- spin_lock(&root->fs_info->ordered_root_lock);
- }
- spin_unlock(&root->fs_info->ordered_root_lock);
-out:
- list_for_each_entry_safe(work, next, &works, list) {
- list_del_init(&work->list);
- btrfs_wait_and_free_delalloc_work(work);
- }
- mutex_unlock(&root->fs_info->ordered_extent_flush_mutex);
- return ret;
-}
-
-/*
* Used to start IO or wait for a given ordered extent to finish.
*
* If wait is one, this effectively waits on page writeback for all the pages
@@ -1120,42 +1034,6 @@ out:
return index;
}
-
-/*
- * add a given inode to the list of inodes that must be fully on
- * disk before a transaction commit finishes.
- *
- * This basically gives us the ext3 style data=ordered mode, and it is mostly
- * used to make sure renamed files are fully on disk.
- *
- * It is a noop if the inode is already fully on disk.
- *
- * If trans is not null, we'll do a friendly check for a transaction that
- * is already flushing things and force the IO down ourselves.
- */
-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode)
-{
- struct btrfs_transaction *cur_trans = trans->transaction;
- u64 last_mod;
-
- last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
-
- /*
- * if this file hasn't been changed since the last transaction
- * commit, we can safely return without doing anything
- */
- if (last_mod <= root->fs_info->last_trans_committed)
- return;
-
- spin_lock(&root->fs_info->ordered_root_lock);
- if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
- list_add_tail(&BTRFS_I(inode)->ordered_operations,
- &cur_trans->ordered_operations);
- }
- spin_unlock(&root->fs_info->ordered_root_lock);
-}
-
int __init ordered_data_init(void)
{
btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 246897058ef..d81a274d621 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -190,11 +190,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
u32 *sum, int len);
-int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int wait);
-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode);
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
void btrfs_get_logged_extents(struct inode *inode,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 98cb6b2630f..ded5c601d91 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1201,6 +1201,50 @@ out:
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
+
+static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
+ struct btrfs_qgroup_operation *oper2)
+{
+ /*
+ * Ignore seq and type here, we're looking for any operation
+ * at all related to this extent on that root.
+ */
+ if (oper1->bytenr < oper2->bytenr)
+ return -1;
+ if (oper1->bytenr > oper2->bytenr)
+ return 1;
+ if (oper1->ref_root < oper2->ref_root)
+ return -1;
+ if (oper1->ref_root > oper2->ref_root)
+ return 1;
+ return 0;
+}
+
+static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_operation *oper)
+{
+ struct rb_node *n;
+ struct btrfs_qgroup_operation *cur;
+ int cmp;
+
+ spin_lock(&fs_info->qgroup_op_lock);
+ n = fs_info->qgroup_op_tree.rb_node;
+ while (n) {
+ cur = rb_entry(n, struct btrfs_qgroup_operation, n);
+ cmp = comp_oper_exist(cur, oper);
+ if (cmp < 0) {
+ n = n->rb_right;
+ } else if (cmp) {
+ n = n->rb_left;
+ } else {
+ spin_unlock(&fs_info->qgroup_op_lock);
+ return -EEXIST;
+ }
+ }
+ spin_unlock(&fs_info->qgroup_op_lock);
+ return 0;
+}
+
static int comp_oper(struct btrfs_qgroup_operation *oper1,
struct btrfs_qgroup_operation *oper2)
{
@@ -1290,6 +1334,23 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
INIT_LIST_HEAD(&oper->elem.list);
oper->elem.seq = 0;
+
+ if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
+ /*
+ * If any operation for this bytenr/ref_root combo
+ * exists, then we know it's not exclusively owned and
+ * shouldn't be queued up.
+ *
+ * This also catches the case where we have a cloned
+ * extent that gets queued up multiple times during
+ * drop snapshot.
+ */
+ if (qgroup_oper_exists(fs_info, oper)) {
+ kfree(oper);
+ return 0;
+ }
+ }
+
ret = insert_qgroup_oper(fs_info, oper);
if (ret) {
/* Shouldn't happen so have an assert for developers */
@@ -1884,6 +1945,111 @@ out:
}
/*
+ * Process a reference to a shared subtree. This type of operation is
+ * queued during snapshot removal when we encounter extents which are
+ * shared between more than one root.
+ */
+static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_operation *oper)
+{
+ struct ulist *roots = NULL;
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ struct btrfs_qgroup_list *glist;
+ struct ulist *parents;
+ int ret = 0;
+ int err;
+ struct btrfs_qgroup *qg;
+ u64 root_obj = 0;
+ struct seq_list elem = {};
+
+ parents = ulist_alloc(GFP_NOFS);
+ if (!parents)
+ return -ENOMEM;
+
+ btrfs_get_tree_mod_seq(fs_info, &elem);
+ ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
+ elem.seq, &roots);
+ btrfs_put_tree_mod_seq(fs_info, &elem);
+ if (ret < 0)
+ goto out;
+
+ if (roots->nnodes != 1)
+ goto out;
+
+ ULIST_ITER_INIT(&uiter);
+ unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
+ /*
+ * If we find our ref root then that means all refs
+ * this extent has to the root have not yet been
+ * deleted. In that case, we do nothing and let the
+ * last ref for this bytenr drive our update.
+ *
+ * This can happen for example if an extent is
+ * referenced multiple times in a snapshot (clone,
+ * etc). If we are in the middle of snapshot removal,
+ * queued updates for such an extent will find the
+ * root if we have not yet finished removing the
+ * snapshot.
+ */
+ if (unode->val == oper->ref_root)
+ goto out;
+
+ root_obj = unode->val;
+ BUG_ON(!root_obj);
+
+ spin_lock(&fs_info->qgroup_lock);
+ qg = find_qgroup_rb(fs_info, root_obj);
+ if (!qg)
+ goto out_unlock;
+
+ qg->excl += oper->num_bytes;
+ qg->excl_cmpr += oper->num_bytes;
+ qgroup_dirty(fs_info, qg);
+
+ /*
+ * Adjust counts for parent groups. First we find all
+ * parents, then in the 2nd loop we do the adjustment
+ * while adding parents of the parents to our ulist.
+ */
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ err = ulist_add(parents, glist->group->qgroupid,
+ ptr_to_u64(glist->group), GFP_ATOMIC);
+ if (err < 0) {
+ ret = err;
+ goto out_unlock;
+ }
+ }
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(parents, &uiter))) {
+ qg = u64_to_ptr(unode->aux);
+ qg->excl += oper->num_bytes;
+ qg->excl_cmpr += oper->num_bytes;
+ qgroup_dirty(fs_info, qg);
+
+ /* Add any parents of the parents */
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ err = ulist_add(parents, glist->group->qgroupid,
+ ptr_to_u64(glist->group), GFP_ATOMIC);
+ if (err < 0) {
+ ret = err;
+ goto out_unlock;
+ }
+ }
+ }
+
+out_unlock:
+ spin_unlock(&fs_info->qgroup_lock);
+
+out:
+ ulist_free(roots);
+ ulist_free(parents);
+ return ret;
+}
+
+/*
* btrfs_qgroup_account_ref is called for every ref that is added to or deleted
* from the fs. First, all roots referencing the extent are searched, and
* then the space is accounted accordingly to the different roots. The
@@ -1920,6 +2086,9 @@ static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
case BTRFS_QGROUP_OPER_SUB_SHARED:
ret = qgroup_shared_accounting(trans, fs_info, oper);
break;
+ case BTRFS_QGROUP_OPER_SUB_SUBTREE:
+ ret = qgroup_subtree_accounting(trans, fs_info, oper);
+ break;
default:
ASSERT(0);
}
@@ -2551,6 +2720,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
memset(&fs_info->qgroup_rescan_work, 0,
sizeof(fs_info->qgroup_rescan_work));
btrfs_init_work(&fs_info->qgroup_rescan_work,
+ btrfs_qgroup_rescan_helper,
btrfs_qgroup_rescan_worker, NULL, NULL);
if (ret) {
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 5952ff1fbd7..18cc68ca309 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -44,6 +44,7 @@ enum btrfs_qgroup_operation_type {
BTRFS_QGROUP_OPER_ADD_SHARED,
BTRFS_QGROUP_OPER_SUB_EXCL,
BTRFS_QGROUP_OPER_SUB_SHARED,
+ BTRFS_QGROUP_OPER_SUB_SUBTREE,
};
struct btrfs_qgroup_operation {
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 4a88f073fdd..0a6b6e4bcbb 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1416,7 +1416,8 @@ cleanup:
static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
{
- btrfs_init_work(&rbio->work, rmw_work, NULL, NULL);
+ btrfs_init_work(&rbio->work, btrfs_rmw_helper,
+ rmw_work, NULL, NULL);
btrfs_queue_work(rbio->fs_info->rmw_workers,
&rbio->work);
@@ -1424,7 +1425,8 @@ static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
static void async_read_rebuild(struct btrfs_raid_bio *rbio)
{
- btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL);
+ btrfs_init_work(&rbio->work, btrfs_rmw_helper,
+ read_rebuild_work, NULL, NULL);
btrfs_queue_work(rbio->fs_info->rmw_workers,
&rbio->work);
@@ -1665,7 +1667,8 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
plug = container_of(cb, struct btrfs_plug_cb, cb);
if (from_schedule) {
- btrfs_init_work(&plug->work, unplug_work, NULL, NULL);
+ btrfs_init_work(&plug->work, btrfs_rmw_helper,
+ unplug_work, NULL, NULL);
btrfs_queue_work(plug->info->rmw_workers,
&plug->work);
return;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 09230cf3a24..20408c6b665 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -798,7 +798,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
/* FIXME we cannot handle this properly right now */
BUG();
}
- btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL);
+ btrfs_init_work(&rmw->work, btrfs_readahead_helper,
+ reada_start_machine_worker, NULL, NULL);
rmw->fs_info = fs_info;
btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index b6d198f5181..f4a41f37be2 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -428,8 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
sbio->index = i;
sbio->sctx = sctx;
sbio->page_count = 0;
- btrfs_init_work(&sbio->work, scrub_bio_end_io_worker,
- NULL, NULL);
+ btrfs_init_work(&sbio->work, btrfs_scrub_helper,
+ scrub_bio_end_io_worker, NULL, NULL);
if (i != SCRUB_BIOS_PER_SCTX - 1)
sctx->bios[i]->next_free = i + 1;
@@ -999,8 +999,8 @@ nodatasum_case:
fixup_nodatasum->root = fs_info->extent_root;
fixup_nodatasum->mirror_num = failed_mirror_index + 1;
scrub_pending_trans_workers_inc(sctx);
- btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum,
- NULL, NULL);
+ btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
+ scrub_fixup_nodatasum, NULL, NULL);
btrfs_queue_work(fs_info->scrub_workers,
&fixup_nodatasum->work);
goto out;
@@ -1616,7 +1616,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err)
sbio->err = err;
sbio->bio = bio;
- btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
+ btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
+ scrub_wr_bio_end_io_worker, NULL, NULL);
btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
}
@@ -2904,6 +2905,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
struct scrub_ctx *sctx;
int ret;
struct btrfs_device *dev;
+ struct rcu_string *name;
if (btrfs_fs_closing(fs_info))
return -EINVAL;
@@ -2965,6 +2967,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return -ENODEV;
}
+ if (!is_dev_replace && !readonly && !dev->writeable) {
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ rcu_read_lock();
+ name = rcu_dereference(dev->name);
+ btrfs_err(fs_info, "scrub: device %s is not writable",
+ name->str);
+ rcu_read_unlock();
+ return -EROFS;
+ }
+
mutex_lock(&fs_info->scrub_lock);
if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
mutex_unlock(&fs_info->scrub_lock);
@@ -3203,7 +3215,8 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
nocow_ctx->len = len;
nocow_ctx->mirror_num = mirror_num;
nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
- btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL);
+ btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
+ copy_nocow_pages_worker, NULL, NULL);
INIT_LIST_HEAD(&nocow_ctx->inodes);
btrfs_queue_work(fs_info->scrub_nocow_workers,
&nocow_ctx->work);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8e16bca69c5..c4124de4435 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -851,7 +851,6 @@ static struct dentry *get_default_root(struct super_block *sb,
struct btrfs_path *path;
struct btrfs_key location;
struct inode *inode;
- struct dentry *dentry;
u64 dir_id;
int new = 0;
@@ -922,13 +921,7 @@ setup_root:
return dget(sb->s_root);
}
- dentry = d_obtain_alias(inode);
- if (!IS_ERR(dentry)) {
- spin_lock(&dentry->d_lock);
- dentry->d_flags &= ~DCACHE_DISCONNECTED;
- spin_unlock(&dentry->d_lock);
- }
- return dentry;
+ return d_obtain_root(inode);
}
static int btrfs_fill_super(struct super_block *sb,
@@ -1672,6 +1665,21 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
return 0;
}
+/*
+ * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
+ *
+ * If there's a redundant raid level at DATA block groups, use the respective
+ * multiplier to scale the sizes.
+ *
+ * Unused device space usage is based on simulating the chunk allocator
+ * algorithm that respects the device sizes, order of allocations and the
+ * 'alloc_start' value, this is a close approximation of the actual use but
+ * there are other factors that may change the result (like a new metadata
+ * chunk).
+ *
+ * FIXME: not accurate for mixed block groups, total and free/used are ok,
+ * available appears slightly larger.
+ */
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
@@ -1682,6 +1690,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
u64 total_free_data = 0;
int bits = dentry->d_sb->s_blocksize_bits;
__be32 *fsid = (__be32 *)fs_info->fsid;
+ unsigned factor = 1;
+ struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
int ret;
/* holding chunk_muext to avoid allocating new chunks */
@@ -1689,30 +1699,52 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
+ int i;
+
total_free_data += found->disk_total - found->disk_used;
total_free_data -=
btrfs_account_ro_block_groups_free_space(found);
+
+ for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+ if (!list_empty(&found->block_groups[i])) {
+ switch (i) {
+ case BTRFS_RAID_DUP:
+ case BTRFS_RAID_RAID1:
+ case BTRFS_RAID_RAID10:
+ factor = 2;
+ }
+ }
+ }
}
total_used += found->disk_used;
}
+
rcu_read_unlock();
- buf->f_namelen = BTRFS_NAME_LEN;
- buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
- buf->f_bfree = buf->f_blocks - (total_used >> bits);
- buf->f_bsize = dentry->d_sb->s_blocksize;
- buf->f_type = BTRFS_SUPER_MAGIC;
+ buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
+ buf->f_blocks >>= bits;
+ buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
+
+ /* Account global block reserve as used, it's in logical size already */
+ spin_lock(&block_rsv->lock);
+ buf->f_bfree -= block_rsv->size >> bits;
+ spin_unlock(&block_rsv->lock);
+
buf->f_bavail = total_free_data;
ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
if (ret) {
mutex_unlock(&fs_info->chunk_mutex);
return ret;
}
- buf->f_bavail += total_free_data;
+ buf->f_bavail += div_u64(total_free_data, factor);
buf->f_bavail = buf->f_bavail >> bits;
mutex_unlock(&fs_info->chunk_mutex);
+ buf->f_type = BTRFS_SUPER_MAGIC;
+ buf->f_bsize = dentry->d_sb->s_blocksize;
+ buf->f_namelen = BTRFS_NAME_LEN;
+
/* We treat it as constant endianness (it doesn't matter _which_)
because we want the fsid to come out the same whether mounted
on a big-endian or little-endian host */
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 78699364f53..12e53556e21 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -614,7 +614,7 @@ int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
if (!fs_info->device_dir_kobj)
return -EINVAL;
- if (one_device) {
+ if (one_device && one_device->bdev) {
disk = one_device->bdev->bd_part;
disk_kobj = &part_to_dev(disk)->kobj;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5f379affdf2..d89c6d3542c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -218,7 +218,6 @@ loop:
spin_lock_init(&cur_trans->delayed_refs.lock);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
- INIT_LIST_HEAD(&cur_trans->ordered_operations);
INIT_LIST_HEAD(&cur_trans->pending_chunks);
INIT_LIST_HEAD(&cur_trans->switch_commits);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
@@ -1612,27 +1611,6 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
kmem_cache_free(btrfs_trans_handle_cachep, trans);
}
-static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- int ret;
-
- ret = btrfs_run_delayed_items(trans, root);
- if (ret)
- return ret;
-
- /*
- * rename don't use btrfs_join_transaction, so, once we
- * set the transaction to blocked above, we aren't going
- * to get any new ordered operations. We can safely run
- * it here and no for sure that nothing new will be added
- * to the list
- */
- ret = btrfs_run_ordered_operations(trans, root, 1);
-
- return ret;
-}
-
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
@@ -1653,13 +1631,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_transaction *prev_trans = NULL;
int ret;
- ret = btrfs_run_ordered_operations(trans, root, 0);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- btrfs_end_transaction(trans, root);
- return ret;
- }
-
/* Stop the commit early if ->aborted is set */
if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
@@ -1740,7 +1711,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
if (ret)
goto cleanup_transaction;
- ret = btrfs_flush_all_pending_stuffs(trans, root);
+ ret = btrfs_run_delayed_items(trans, root);
if (ret)
goto cleanup_transaction;
@@ -1748,7 +1719,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
extwriter_counter_read(cur_trans) == 0);
/* some pending stuffs might be added after the previous flush. */
- ret = btrfs_flush_all_pending_stuffs(trans, root);
+ ret = btrfs_run_delayed_items(trans, root);
if (ret)
goto cleanup_transaction;
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 7dd558ed071..579be51b27e 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -55,7 +55,6 @@ struct btrfs_transaction {
wait_queue_head_t writer_wait;
wait_queue_head_t commit_wait;
struct list_head pending_snapshots;
- struct list_head ordered_operations;
struct list_head pending_chunks;
struct list_head switch_commits;
struct btrfs_delayed_ref_root delayed_refs;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9e1f2cd5e67..d296efe2d3e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -94,8 +94,10 @@
#define LOG_WALK_REPLAY_ALL 3
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- int inode_only);
+ struct btrfs_root *root, struct inode *inode,
+ int inode_only,
+ const loff_t start,
+ const loff_t end);
static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid);
@@ -3298,7 +3300,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct list_head ordered_sums;
int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
bool has_extents = false;
- bool need_find_last_extent = (*last_extent == 0);
+ bool need_find_last_extent = true;
bool done = false;
INIT_LIST_HEAD(&ordered_sums);
@@ -3352,8 +3354,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
*/
if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
has_extents = true;
- if (need_find_last_extent &&
- first_key.objectid == (u64)-1)
+ if (first_key.objectid == (u64)-1)
first_key = ins_keys[i];
} else {
need_find_last_extent = false;
@@ -3427,6 +3428,16 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
if (!has_extents)
return ret;
+ if (need_find_last_extent && *last_extent == first_key.offset) {
+ /*
+ * We don't have any leafs between our current one and the one
+ * we processed before that can have file extent items for our
+ * inode (and have a generation number smaller than our current
+ * transaction id).
+ */
+ need_find_last_extent = false;
+ }
+
/*
* Because we use btrfs_search_forward we could skip leaves that were
* not modified and then assume *last_extent is valid when it really
@@ -3537,7 +3548,7 @@ fill_holes:
0, 0);
if (ret)
break;
- *last_extent = offset + len;
+ *last_extent = extent_end;
}
/*
* Need to let the callers know we dropped the path so they should
@@ -3849,8 +3860,10 @@ process:
* This handles both files and directories.
*/
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- int inode_only)
+ struct btrfs_root *root, struct inode *inode,
+ int inode_only,
+ const loff_t start,
+ const loff_t end)
{
struct btrfs_path *path;
struct btrfs_path *dst_path;
@@ -3867,6 +3880,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
int ins_nr;
bool fast_search = false;
u64 ino = btrfs_ino(inode);
+ struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
path = btrfs_alloc_path();
if (!path)
@@ -4040,13 +4054,35 @@ log_extents:
goto out_unlock;
}
} else if (inode_only == LOG_INODE_ALL) {
- struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
struct extent_map *em, *n;
- write_lock(&tree->lock);
- list_for_each_entry_safe(em, n, &tree->modified_extents, list)
- list_del_init(&em->list);
- write_unlock(&tree->lock);
+ write_lock(&em_tree->lock);
+ /*
+ * We can't just remove every em if we're called for a ranged
+ * fsync - that is, one that doesn't cover the whole possible
+ * file range (0 to LLONG_MAX). This is because we can have
+ * em's that fall outside the range we're logging and therefore
+ * their ordered operations haven't completed yet
+ * (btrfs_finish_ordered_io() not invoked yet). This means we
+ * didn't get their respective file extent item in the fs/subvol
+ * tree yet, and need to let the next fast fsync (one which
+ * consults the list of modified extent maps) find the em so
+ * that it logs a matching file extent item and waits for the
+ * respective ordered operation to complete (if it's still
+ * running).
+ *
+ * Removing every em outside the range we're logging would make
+ * the next fast fsync not log their matching file extent items,
+ * therefore making us lose data after a log replay.
+ */
+ list_for_each_entry_safe(em, n, &em_tree->modified_extents,
+ list) {
+ const u64 mod_end = em->mod_start + em->mod_len - 1;
+
+ if (em->mod_start >= start && mod_end <= end)
+ list_del_init(&em->list);
+ }
+ write_unlock(&em_tree->lock);
}
if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
@@ -4056,8 +4092,19 @@ log_extents:
goto out_unlock;
}
}
- BTRFS_I(inode)->logged_trans = trans->transid;
- BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
+
+ write_lock(&em_tree->lock);
+ /*
+ * If we're doing a ranged fsync and there are still modified extents
+ * in the list, we must run on the next fsync call as it might cover
+ * those extents (a full fsync or an fsync for other range).
+ */
+ if (list_empty(&em_tree->modified_extents)) {
+ BTRFS_I(inode)->logged_trans = trans->transid;
+ BTRFS_I(inode)->last_log_commit =
+ BTRFS_I(inode)->last_sub_trans;
+ }
+ write_unlock(&em_tree->lock);
out_unlock:
if (unlikely(err))
btrfs_put_logged_extents(&logged_list);
@@ -4152,7 +4199,10 @@ out:
*/
static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
- struct dentry *parent, int exists_only,
+ struct dentry *parent,
+ const loff_t start,
+ const loff_t end,
+ int exists_only,
struct btrfs_log_ctx *ctx)
{
int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
@@ -4198,7 +4248,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret)
goto end_no_trans;
- ret = btrfs_log_inode(trans, root, inode, inode_only);
+ ret = btrfs_log_inode(trans, root, inode, inode_only, start, end);
if (ret)
goto end_trans;
@@ -4226,7 +4276,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (BTRFS_I(inode)->generation >
root->fs_info->last_trans_committed) {
- ret = btrfs_log_inode(trans, root, inode, inode_only);
+ ret = btrfs_log_inode(trans, root, inode, inode_only,
+ 0, LLONG_MAX);
if (ret)
goto end_trans;
}
@@ -4260,13 +4311,15 @@ end_no_trans:
*/
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry,
+ const loff_t start,
+ const loff_t end,
struct btrfs_log_ctx *ctx)
{
struct dentry *parent = dget_parent(dentry);
int ret;
ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent,
- 0, ctx);
+ start, end, 0, ctx);
dput(parent);
return ret;
@@ -4503,6 +4556,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
root->fs_info->last_trans_committed))
return 0;
- return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL);
+ return btrfs_log_inode_parent(trans, root, inode, parent, 0,
+ LLONG_MAX, 1, NULL);
}
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 7f5b41bd537..e2e798ae7cd 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -59,6 +59,8 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
int btrfs_recover_log_trees(struct btrfs_root *tree_root);
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry,
+ const loff_t start,
+ const loff_t end,
struct btrfs_log_ctx *ctx);
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index 7f78cbf5cf4..4c29db604bb 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -57,6 +57,21 @@ void ulist_free(struct ulist *ulist);
int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
u64 *old_aux, gfp_t gfp_mask);
+
+/* just like ulist_add_merge() but take a pointer for the aux data */
+static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
+ void **old_aux, gfp_t gfp_mask)
+{
+#if BITS_PER_LONG == 32
+ u64 old64 = (uintptr_t)*old_aux;
+ int ret = ulist_add_merge(ulist, val, (uintptr_t)aux, &old64, gfp_mask);
+ *old_aux = (void *)((uintptr_t)old64);
+ return ret;
+#else
+ return ulist_add_merge(ulist, val, (u64)aux, (u64 *)old_aux, gfp_mask);
+#endif
+}
+
struct ulist_node *ulist_next(struct ulist *ulist,
struct ulist_iterator *uiter);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6cb82f62cb7..340a92d08e8 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -508,6 +508,44 @@ static noinline int device_list_add(const char *path,
ret = 1;
device->fs_devices = fs_devices;
} else if (!device->name || strcmp(device->name->str, path)) {
+ /*
+ * When FS is already mounted.
+ * 1. If you are here and if the device->name is NULL that
+ * means this device was missing at time of FS mount.
+ * 2. If you are here and if the device->name is different
+ * from 'path' that means either
+ * a. The same device disappeared and reappeared with
+ * different name. or
+ * b. The missing-disk-which-was-replaced, has
+ * reappeared now.
+ *
+ * We must allow 1 and 2a above. But 2b would be a spurious
+ * and unintentional.
+ *
+ * Further in case of 1 and 2a above, the disk at 'path'
+ * would have missed some transaction when it was away and
+ * in case of 2a the stale bdev has to be updated as well.
+ * 2b must not be allowed at all time.
+ */
+
+ /*
+ * As of now don't allow update to btrfs_fs_device through
+ * the btrfs dev scan cli, after FS has been mounted.
+ */
+ if (fs_devices->opened) {
+ return -EBUSY;
+ } else {
+ /*
+ * That is if the FS is _not_ mounted and if you
+ * are here, that means there is more than one
+ * disk with same uuid and devid.We keep the one
+ * with larger generation number or the last-in if
+ * generation are equal.
+ */
+ if (found_transid < device->generation)
+ return -EEXIST;
+ }
+
name = rcu_string_strdup(path, GFP_NOFS);
if (!name)
return -ENOMEM;
@@ -519,6 +557,15 @@ static noinline int device_list_add(const char *path,
}
}
+ /*
+ * Unmount does not free the btrfs_device struct but would zero
+ * generation along with most of the other members. So just update
+ * it back. We need it to pick the disk with largest generation
+ * (as above).
+ */
+ if (!fs_devices->opened)
+ device->generation = found_transid;
+
if (found_transid > fs_devices->latest_trans) {
fs_devices->latest_devid = devid;
fs_devices->latest_trans = found_transid;
@@ -1436,7 +1483,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans,
btrfs_set_device_io_align(leaf, dev_item, device->io_align);
btrfs_set_device_io_width(leaf, dev_item, device->io_width);
btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
- btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
+ btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
btrfs_set_device_group(leaf, dev_item, 0);
btrfs_set_device_seek_speed(leaf, dev_item, 0);
@@ -1671,7 +1718,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
device->fs_devices->total_devices--;
if (device->missing)
- root->fs_info->fs_devices->missing_devices--;
+ device->fs_devices->missing_devices--;
next_device = list_entry(root->fs_info->fs_devices->devices.next,
struct btrfs_device, dev_list);
@@ -1801,8 +1848,12 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
if (srcdev->bdev) {
fs_info->fs_devices->open_devices--;
- /* zero out the old super */
- btrfs_scratch_superblock(srcdev);
+ /*
+ * zero out the old super if it is not writable
+ * (e.g. seed device)
+ */
+ if (srcdev->writeable)
+ btrfs_scratch_superblock(srcdev);
}
call_rcu(&srcdev->rcu, free_device);
@@ -1941,6 +1992,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
fs_devices->seeding = 0;
fs_devices->num_devices = 0;
fs_devices->open_devices = 0;
+ fs_devices->missing_devices = 0;
+ fs_devices->num_can_discard = 0;
+ fs_devices->rotating = 0;
fs_devices->seed = seed_devices;
generate_random_uuid(fs_devices->fsid);
@@ -5800,7 +5854,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
else
generate_random_uuid(dev->uuid);
- btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL);
+ btrfs_init_work(&dev->work, btrfs_submit_helper,
+ pending_bios_fn, NULL, NULL);
return dev;
}
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 469f2e8657e..cebf2ebefb5 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -172,14 +172,24 @@ out:
int ceph_init_acl(struct dentry *dentry, struct inode *inode, struct inode *dir)
{
struct posix_acl *default_acl, *acl;
+ umode_t new_mode = inode->i_mode;
int error;
- error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
+ error = posix_acl_create(dir, &new_mode, &default_acl, &acl);
if (error)
return error;
- if (!default_acl && !acl)
+ if (!default_acl && !acl) {
cache_no_acl(inode);
+ if (new_mode != inode->i_mode) {
+ struct iattr newattrs = {
+ .ia_mode = new_mode,
+ .ia_valid = ATTR_MODE,
+ };
+ error = ceph_setattr(dentry, &newattrs);
+ }
+ return error;
+ }
if (default_acl) {
error = ceph_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 1fde164b74b..6d1cd45dca8 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3277,7 +3277,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
rel->ino = cpu_to_le64(ceph_ino(inode));
rel->cap_id = cpu_to_le64(cap->cap_id);
rel->seq = cpu_to_le32(cap->seq);
- rel->issue_seq = cpu_to_le32(cap->issue_seq),
+ rel->issue_seq = cpu_to_le32(cap->issue_seq);
rel->mseq = cpu_to_le32(cap->mseq);
rel->caps = cpu_to_le32(cap->implemented);
rel->wanted = cpu_to_le32(cap->mds_wanted);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 302085100c2..2eb02f80a0a 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -423,6 +423,9 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
dout("sync_read on file %p %llu~%u %s\n", file, off,
(unsigned)len,
(file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
+
+ if (!len)
+ return 0;
/*
* flush any page cache pages in this range. this
* will make concurrent normal and sync io slow,
@@ -470,8 +473,11 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
size_t left = ret;
while (left) {
- int copy = min_t(size_t, PAGE_SIZE, left);
- l = copy_page_to_iter(pages[k++], 0, copy, i);
+ size_t page_off = off & ~PAGE_MASK;
+ size_t copy = min_t(size_t,
+ PAGE_SIZE - page_off, left);
+ l = copy_page_to_iter(pages[k++], page_off,
+ copy, i);
off += l;
left -= l;
if (l < copy)
@@ -531,7 +537,7 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
* objects, rollback on failure, etc.)
*/
static ssize_t
-ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from)
+ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
@@ -547,7 +553,6 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from)
int check_caps = 0;
int ret;
struct timespec mtime = CURRENT_TIME;
- loff_t pos = iocb->ki_pos;
size_t count = iov_iter_count(from);
if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
@@ -646,7 +651,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from)
* correct atomic write, we should e.g. take write locks on all
* objects, rollback on failure, etc.)
*/
-static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t
+ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
@@ -663,7 +669,6 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from)
int check_caps = 0;
int ret;
struct timespec mtime = CURRENT_TIME;
- loff_t pos = iocb->ki_pos;
size_t count = iov_iter_count(from);
if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
@@ -918,9 +923,9 @@ retry_snap:
/* we might need to revert back to that point */
data = *from;
if (file->f_flags & O_DIRECT)
- written = ceph_sync_direct_write(iocb, &data);
+ written = ceph_sync_direct_write(iocb, &data, pos);
else
- written = ceph_sync_write(iocb, &data);
+ written = ceph_sync_write(iocb, &data, pos);
if (written == -EOLDSNAPC) {
dout("aio_write %p %llx.%llx %llu~%u"
"got EOLDSNAPC, retrying\n",
@@ -1177,6 +1182,9 @@ static long ceph_fallocate(struct file *file, int mode,
loff_t endoff = 0;
loff_t size;
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ return -EOPNOTSUPP;
+
if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 92a2548278f..bad07c09f91 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1904,6 +1904,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
if (req->r_got_unsafe) {
+ void *p;
/*
* Replay. Do not regenerate message (and rebuild
* paths, etc.); just use the original message.
@@ -1924,8 +1925,13 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
/* remove cap/dentry releases from message */
rhead->num_releases = 0;
- msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset);
- msg->front.iov_len = req->r_request_release_offset;
+
+ /* time stamp */
+ p = msg->front.iov_base + req->r_request_release_offset;
+ ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp));
+
+ msg->front.iov_len = p - msg->front.iov_base;
+ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
return 0;
}
@@ -2061,11 +2067,12 @@ static void __wake_requests(struct ceph_mds_client *mdsc,
static void kick_requests(struct ceph_mds_client *mdsc, int mds)
{
struct ceph_mds_request *req;
- struct rb_node *p;
+ struct rb_node *p = rb_first(&mdsc->request_tree);
dout("kick_requests mds%d\n", mds);
- for (p = rb_first(&mdsc->request_tree); p; p = rb_next(p)) {
+ while (p) {
req = rb_entry(p, struct ceph_mds_request, r_node);
+ p = rb_next(p);
if (req->r_got_unsafe)
continue;
if (req->r_session &&
@@ -2248,6 +2255,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
*/
if (result == -ESTALE) {
dout("got ESTALE on request %llu", req->r_tid);
+ req->r_resend_mds = -1;
if (req->r_direct_mode != USE_AUTH_MDS) {
dout("not using auth, setting for that now");
req->r_direct_mode = USE_AUTH_MDS;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 06150fd745a..f6e12377335 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -755,7 +755,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
goto out;
}
} else {
- root = d_obtain_alias(inode);
+ root = d_obtain_root(inode);
}
ceph_init_dentry(root);
dout("open_root_inode success, root dentry is %p\n", root);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index c9c2b887381..12f58d22e01 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -592,12 +592,12 @@ start:
xattr_version = ci->i_xattrs.version;
spin_unlock(&ci->i_ceph_lock);
- xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
+ xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *),
GFP_NOFS);
err = -ENOMEM;
if (!xattrs)
goto bad_lock;
- memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *));
+
for (i = 0; i < numattr; i++) {
xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
GFP_NOFS);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 603f18a65c1..a2172f3f69e 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -22,6 +22,11 @@ config CIFS
support for OS/2 and Windows ME and similar servers is provided as
well.
+ The module also provides optional support for the followon
+ protocols for CIFS including SMB3, which enables
+ useful performance and security features (see the description
+ of CONFIG_CIFS_SMB2).
+
The cifs module provides an advanced network file system
client for mounting to CIFS compliant servers. It includes
support for DFS (hierarchical name space), secure per-user
@@ -121,7 +126,8 @@ config CIFS_ACL
depends on CIFS_XATTR && KEYS
help
Allows fetching CIFS/NTFS ACL from the server. The DACL blob
- is handed over to the application/caller.
+ is handed over to the application/caller. See the man
+ page for getcifsacl for more information.
config CIFS_DEBUG
bool "Enable CIFS debugging routines"
@@ -162,7 +168,7 @@ config CIFS_NFSD_EXPORT
Allows NFS server to export a CIFS mounted share (nfsd over cifs)
config CIFS_SMB2
- bool "SMB2 network file system support"
+ bool "SMB2 and SMB3 network file system support"
depends on CIFS && INET
select NLS
select KEYS
@@ -170,16 +176,21 @@ config CIFS_SMB2
select DNS_RESOLVER
help
- This enables experimental support for the SMB2 (Server Message Block
- version 2) protocol. The SMB2 protocol is the successor to the
- popular CIFS and SMB network file sharing protocols. SMB2 is the
- native file sharing mechanism for recent versions of Windows
- operating systems (since Vista). SMB2 enablement will eventually
- allow users better performance, security and features, than would be
- possible with cifs. Note that smb2 mount options also are simpler
- (compared to cifs) due to protocol improvements.
-
- Unless you are a developer or tester, say N.
+ This enables support for the Server Message Block version 2
+ family of protocols, including SMB3. SMB3 support is
+ enabled on mount by specifying "vers=3.0" in the mount
+ options. These protocols are the successors to the popular
+ CIFS and SMB network file sharing protocols. SMB3 is the
+ native file sharing mechanism for the more recent
+ versions of Windows (Windows 8 and Windows 2012 and
+ later) and Samba server and many others support SMB3 well.
+ In general SMB3 enables better performance, security
+ and features, than would be possible with CIFS (Note that
+ when mounting to Samba, due to the CIFS POSIX extensions,
+ CIFS mounts can provide slightly better POSIX compatibility
+ than SMB3 mounts do though). Note that SMB2/SMB3 mount
+ options are also slightly simpler (compared to CIFS) due
+ to protocol improvements.
config CIFS_FSCACHE
bool "Provide CIFS client caching support"
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 88839806742..889b9845575 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -207,6 +207,19 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
+static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
+{
+ struct super_block *sb = file->f_path.dentry->d_sb;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ struct TCP_Server_Info *server = tcon->ses->server;
+
+ if (server->ops->fallocate)
+ return server->ops->fallocate(file, tcon, mode, off, len);
+
+ return -EOPNOTSUPP;
+}
+
static int cifs_permission(struct inode *inode, int mask)
{
struct cifs_sb_info *cifs_sb;
@@ -812,8 +825,9 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
if (!(S_ISREG(inode->i_mode)))
return -EINVAL;
- /* check if file is oplocked */
- if (((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) ||
+ /* Check if file is oplocked if this is request for new lease */
+ if (arg == F_UNLCK ||
+ ((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) ||
((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode))))
return generic_setlease(file, arg, lease);
else if (tlink_tcon(cfile->tlink)->local_lease &&
@@ -848,7 +862,7 @@ const struct inode_operations cifs_dir_inode_ops = {
.link = cifs_hardlink,
.mkdir = cifs_mkdir,
.rmdir = cifs_rmdir,
- .rename = cifs_rename,
+ .rename2 = cifs_rename2,
.permission = cifs_permission,
/* revalidate:cifs_revalidate, */
.setattr = cifs_setattr,
@@ -908,6 +922,7 @@ const struct file_operations cifs_file_ops = {
.unlocked_ioctl = cifs_ioctl,
#endif /* CONFIG_CIFS_POSIX */
.setlease = cifs_setlease,
+ .fallocate = cifs_fallocate,
};
const struct file_operations cifs_file_strict_ops = {
@@ -927,6 +942,7 @@ const struct file_operations cifs_file_strict_ops = {
.unlocked_ioctl = cifs_ioctl,
#endif /* CONFIG_CIFS_POSIX */
.setlease = cifs_setlease,
+ .fallocate = cifs_fallocate,
};
const struct file_operations cifs_file_direct_ops = {
@@ -947,6 +963,7 @@ const struct file_operations cifs_file_direct_ops = {
#endif /* CONFIG_CIFS_POSIX */
.llseek = cifs_llseek,
.setlease = cifs_setlease,
+ .fallocate = cifs_fallocate,
};
const struct file_operations cifs_file_nobrl_ops = {
@@ -965,6 +982,7 @@ const struct file_operations cifs_file_nobrl_ops = {
.unlocked_ioctl = cifs_ioctl,
#endif /* CONFIG_CIFS_POSIX */
.setlease = cifs_setlease,
+ .fallocate = cifs_fallocate,
};
const struct file_operations cifs_file_strict_nobrl_ops = {
@@ -983,6 +1001,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
.unlocked_ioctl = cifs_ioctl,
#endif /* CONFIG_CIFS_POSIX */
.setlease = cifs_setlease,
+ .fallocate = cifs_fallocate,
};
const struct file_operations cifs_file_direct_nobrl_ops = {
@@ -1002,6 +1021,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
#endif /* CONFIG_CIFS_POSIX */
.llseek = cifs_llseek,
.setlease = cifs_setlease,
+ .fallocate = cifs_fallocate,
};
const struct file_operations cifs_dir_ops = {
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 56048026333..b0fafa49950 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -68,8 +68,8 @@ extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
extern int cifs_mkdir(struct inode *, struct dentry *, umode_t);
extern int cifs_rmdir(struct inode *, struct dentry *);
-extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
- struct dentry *);
+extern int cifs_rename2(struct inode *, struct dentry *, struct inode *,
+ struct dentry *, unsigned int);
extern int cifs_revalidate_file_attr(struct file *filp);
extern int cifs_revalidate_dentry_attr(struct dentry *);
extern int cifs_revalidate_file(struct file *filp);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 0012e1e291d..25b8392bfdd 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -70,11 +70,6 @@
#define SERVER_NAME_LENGTH 40
#define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1)
-/* used to define string lengths for reversing unicode strings */
-/* (256+1)*2 = 514 */
-/* (max path length + 1 for null) * 2 for unicode */
-#define MAX_NAME 514
-
/* SMB echo "timeout" -- FIXME: tunable? */
#define SMB_ECHO_INTERVAL (60 * HZ)
@@ -409,6 +404,10 @@ struct smb_version_operations {
/* get mtu credits */
int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int,
unsigned int *, unsigned int *);
+ /* check if we need to issue closedir */
+ bool (*dir_needs_close)(struct cifsFileInfo *);
+ long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t,
+ loff_t);
};
struct smb_version_values {
@@ -883,6 +882,7 @@ struct cifs_tcon {
for this mount even if server would support */
bool local_lease:1; /* check leases (only) on local system not remote */
bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
+ bool broken_sparse_sup; /* if server or share does not support sparse */
bool need_reconnect:1; /* connection reset, tid now invalid */
#ifdef CONFIG_CIFS_SMB2
bool print:1; /* set if connection to printer share */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 33df36ef9d5..5f9822ac024 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2253,6 +2253,29 @@ typedef struct {
/* minimum includes first three fields, and empty FS Name */
#define MIN_FS_ATTR_INFO_SIZE 12
+
+/* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */
+#define FILE_SUPPORT_INTEGRITY_STREAMS 0x04000000
+#define FILE_SUPPORTS_USN_JOURNAL 0x02000000
+#define FILE_SUPPORTS_OPEN_BY_FILE_ID 0x01000000
+#define FILE_SUPPORTS_EXTENDED_ATTRIBUTES 0x00800000
+#define FILE_SUPPORTS_HARD_LINKS 0x00400000
+#define FILE_SUPPORTS_TRANSACTIONS 0x00200000
+#define FILE_SEQUENTIAL_WRITE_ONCE 0x00100000
+#define FILE_READ_ONLY_VOLUME 0x00080000
+#define FILE_NAMED_STREAMS 0x00040000
+#define FILE_SUPPORTS_ENCRYPTION 0x00020000
+#define FILE_SUPPORTS_OBJECT_IDS 0x00010000
+#define FILE_VOLUME_IS_COMPRESSED 0x00008000
+#define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100
+#define FILE_SUPPORTS_REPARSE_POINTS 0x00000080
+#define FILE_SUPPORTS_SPARSE_FILES 0x00000040
+#define FILE_VOLUME_QUOTAS 0x00000020
+#define FILE_FILE_COMPRESSION 0x00000010
+#define FILE_PERSISTENT_ACLS 0x00000008
+#define FILE_UNICODE_ON_DISK 0x00000004
+#define FILE_CASE_PRESERVED_NAMES 0x00000002
+#define FILE_CASE_SENSITIVE_SEARCH 0x00000001
typedef struct {
__le32 Attributes;
__le32 MaxPathNameComponentLength;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 03ed8a09581..8a9fded7c13 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -837,7 +837,6 @@ cifs_demultiplex_thread(void *p)
struct TCP_Server_Info *server = p;
unsigned int pdu_length;
char *buf = NULL;
- struct task_struct *task_to_wake = NULL;
struct mid_q_entry *mid_entry;
current->flags |= PF_MEMALLOC;
@@ -928,19 +927,7 @@ cifs_demultiplex_thread(void *p)
if (server->smallbuf) /* no sense logging a debug message if NULL */
cifs_small_buf_release(server->smallbuf);
- task_to_wake = xchg(&server->tsk, NULL);
clean_demultiplex_info(server);
-
- /* if server->tsk was NULL then wait for a signal before exiting */
- if (!task_to_wake) {
- set_current_state(TASK_INTERRUPTIBLE);
- while (!signal_pending(current)) {
- schedule();
- set_current_state(TASK_INTERRUPTIBLE);
- }
- set_current_state(TASK_RUNNING);
- }
-
module_put_and_exit(0);
}
@@ -1600,6 +1587,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
tmp_end++;
if (!(tmp_end < end && tmp_end[1] == delim)) {
/* No it is not. Set the password to NULL */
+ kfree(vol->password);
vol->password = NULL;
break;
}
@@ -1637,6 +1625,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
options = end;
}
+ kfree(vol->password);
/* Now build new password string */
temp_len = strlen(value);
vol->password = kzalloc(temp_len+1, GFP_KERNEL);
@@ -2061,8 +2050,6 @@ cifs_find_tcp_session(struct smb_vol *vol)
static void
cifs_put_tcp_session(struct TCP_Server_Info *server)
{
- struct task_struct *task;
-
spin_lock(&cifs_tcp_ses_lock);
if (--server->srv_count > 0) {
spin_unlock(&cifs_tcp_ses_lock);
@@ -2086,10 +2073,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
kfree(server->session_key.response);
server->session_key.response = NULL;
server->session_key.len = 0;
-
- task = xchg(&server->tsk, NULL);
- if (task)
- force_sig(SIGKILL, task);
}
static struct TCP_Server_Info *
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3db0c5fd9a1..6cbd9c688cf 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -497,6 +497,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
goto out;
}
+ if (file->f_flags & O_DIRECT &&
+ CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
+ if (CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+ file->f_op = &cifs_file_direct_nobrl_ops;
+ else
+ file->f_op = &cifs_file_direct_ops;
+ }
+
file_info = cifs_new_fileinfo(&fid, file, tlink, oplock);
if (file_info == NULL) {
if (server->ops->close)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 4ab2f79ffa7..7c018a1c52f 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -467,6 +467,14 @@ int cifs_open(struct inode *inode, struct file *file)
cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
inode, file->f_flags, full_path);
+ if (file->f_flags & O_DIRECT &&
+ cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+ file->f_op = &cifs_file_direct_nobrl_ops;
+ else
+ file->f_op = &cifs_file_direct_ops;
+ }
+
if (server->oplocks)
oplock = REQ_OPLOCK;
else
@@ -762,7 +770,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
cifs_dbg(FYI, "Freeing private data in close dir\n");
spin_lock(&cifs_file_list_lock);
- if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+ if (server->ops->dir_needs_close(cfile)) {
cfile->invalidHandle = true;
spin_unlock(&cifs_file_list_lock);
if (server->ops->close_dir)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 41de3935caa..7899a40465b 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1627,8 +1627,9 @@ do_rename_exit:
}
int
-cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
- struct inode *target_dir, struct dentry *target_dentry)
+cifs_rename2(struct inode *source_dir, struct dentry *source_dentry,
+ struct inode *target_dir, struct dentry *target_dentry,
+ unsigned int flags)
{
char *from_name = NULL;
char *to_name = NULL;
@@ -1640,6 +1641,9 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
unsigned int xid;
int rc, tmprc;
+ if (flags & ~RENAME_NOREPLACE)
+ return -EINVAL;
+
cifs_sb = CIFS_SB(source_dir->i_sb);
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
@@ -1667,6 +1671,12 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
to_name);
+ /*
+ * No-replace is the natural behavior for CIFS, so skip unlink hacks.
+ */
+ if (flags & RENAME_NOREPLACE)
+ goto cifs_rename_exit;
+
if (rc == -EEXIST && tcon->unix_ext) {
/*
* Are src and dst hardlinks of same inode? We can only tell
@@ -1710,13 +1720,22 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
unlink_target:
/* Try unlinking the target dentry if it's not negative */
if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) {
- tmprc = cifs_unlink(target_dir, target_dentry);
+ if (d_is_dir(target_dentry))
+ tmprc = cifs_rmdir(target_dir, target_dentry);
+ else
+ tmprc = cifs_unlink(target_dir, target_dentry);
if (tmprc)
goto cifs_rename_exit;
rc = cifs_do_rename(xid, source_dentry, from_name,
target_dentry, to_name);
}
+ /* force revalidate to go get info when needed */
+ CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0;
+
+ source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime =
+ target_dir->i_mtime = current_fs_time(source_dir->i_sb);
+
cifs_rename_exit:
kfree(info_buf_source);
kfree(from_name);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 81340c6253e..b7415d596db 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -574,13 +574,6 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
cinode->oplock = 0;
}
-static int
-cifs_oplock_break_wait(void *unused)
-{
- schedule();
- return signal_pending(current) ? -ERESTARTSYS : 0;
-}
-
/*
* We wait for oplock breaks to be processed before we attempt to perform
* writes.
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b15862e0f68..b334a89d6a6 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -593,11 +593,11 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
/* close and restart search */
cifs_dbg(FYI, "search backing up - close and restart search\n");
spin_lock(&cifs_file_list_lock);
- if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+ if (server->ops->dir_needs_close(cfile)) {
cfile->invalidHandle = true;
spin_unlock(&cifs_file_list_lock);
- if (server->ops->close)
- server->ops->close(xid, tcon, &cfile->fid);
+ if (server->ops->close_dir)
+ server->ops->close_dir(xid, tcon, &cfile->fid);
} else
spin_unlock(&cifs_file_list_lock);
if (cfile->srch_inf.ntwrk_buf_start) {
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 39ee32688ea..3a5e8331768 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -243,10 +243,11 @@ static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
kfree(ses->serverOS);
ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
- if (ses->serverOS)
+ if (ses->serverOS) {
strncpy(ses->serverOS, bcc_ptr, len);
- if (strncmp(ses->serverOS, "OS/2", 4) == 0)
- cifs_dbg(FYI, "OS/2 server\n");
+ if (strncmp(ses->serverOS, "OS/2", 4) == 0)
+ cifs_dbg(FYI, "OS/2 server\n");
+ }
bcc_ptr += len + 1;
bleft -= len + 1;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 5e8c22d6c7b..1a6df4b03f6 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1015,6 +1015,12 @@ cifs_wp_retry_size(struct inode *inode)
return CIFS_SB(inode->i_sb)->wsize;
}
+static bool
+cifs_dir_needs_close(struct cifsFileInfo *cfile)
+{
+ return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle;
+}
+
struct smb_version_operations smb1_operations = {
.send_cancel = send_nt_cancel,
.compare_fids = cifs_compare_fids,
@@ -1086,6 +1092,7 @@ struct smb_version_operations smb1_operations = {
.create_mf_symlink = cifs_create_mf_symlink,
.is_read_op = cifs_is_read_op,
.wp_retry_size = cifs_wp_retry_size,
+ .dir_needs_close = cifs_dir_needs_close,
#ifdef CONFIG_CIFS_XATTR
.query_all_EAs = CIFSSMBQAllEAs,
.set_EA = CIFSSMBSetEA,
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 3f17b455083..45992944e23 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -50,7 +50,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
goto out;
}
- smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+ smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
GFP_KERNEL);
if (smb2_data == NULL) {
rc = -ENOMEM;
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 0150182a449..899bbc86f73 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -131,7 +131,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
*adjust_tz = false;
*symlink = false;
- smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+ smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
GFP_KERNEL);
if (smb2_data == NULL)
return -ENOMEM;
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index e31a9dfdcd3..af59d03db49 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"},
{STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"},
{STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"},
- {STATUS_NO_MORE_FILES, -EIO, "STATUS_NO_MORE_FILES"},
+ {STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"},
{STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"},
{STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"},
{STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"},
@@ -298,7 +298,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_INVALID_PARAMETER, -EINVAL, "STATUS_INVALID_PARAMETER"},
{STATUS_NO_SUCH_DEVICE, -ENODEV, "STATUS_NO_SUCH_DEVICE"},
{STATUS_NO_SUCH_FILE, -ENOENT, "STATUS_NO_SUCH_FILE"},
- {STATUS_INVALID_DEVICE_REQUEST, -EIO, "STATUS_INVALID_DEVICE_REQUEST"},
+ {STATUS_INVALID_DEVICE_REQUEST, -EOPNOTSUPP, "STATUS_INVALID_DEVICE_REQUEST"},
{STATUS_END_OF_FILE, -ENODATA, "STATUS_END_OF_FILE"},
{STATUS_WRONG_VOLUME, -EIO, "STATUS_WRONG_VOLUME"},
{STATUS_NO_MEDIA_IN_DEVICE, -EIO, "STATUS_NO_MEDIA_IN_DEVICE"},
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index f2e6ac29a8d..4aa7a0f07d6 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -178,9 +178,24 @@ smb2_check_message(char *buf, unsigned int length)
/* Windows 7 server returns 24 bytes more */
if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE)
return 0;
- /* server can return one byte more */
+ /* server can return one byte more due to implied bcc[0] */
if (clc_len == 4 + len + 1)
return 0;
+
+ /*
+ * MacOS server pads after SMB2.1 write response with 3 bytes
+ * of junk. Other servers match RFC1001 len to actual
+ * SMB2/SMB3 frame length (header + smb2 response specific data)
+ * Log the server error (once), but allow it and continue
+ * since the frame is parseable.
+ */
+ if (clc_len < 4 /* RFC1001 header size */ + len) {
+ printk_once(KERN_WARNING
+ "SMB2 server sent bad RFC1001 len %d not %d\n",
+ len, clc_len - 4);
+ return 0;
+ }
+
return 1;
}
return 0;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 77f8aeb9c2f..f522193b718 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -389,7 +389,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
int rc;
struct smb2_file_all_info *smb2_data;
- smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+ smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
GFP_KERNEL);
if (smb2_data == NULL)
return -ENOMEM;
@@ -731,11 +731,72 @@ smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile,
return SMB2_write(xid, parms, written, iov, nr_segs);
}
+/* Set or clear the SPARSE_FILE attribute based on value passed in setsparse */
+static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifsFileInfo *cfile, struct inode *inode, __u8 setsparse)
+{
+ struct cifsInodeInfo *cifsi;
+ int rc;
+
+ cifsi = CIFS_I(inode);
+
+ /* if file already sparse don't bother setting sparse again */
+ if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) && setsparse)
+ return true; /* already sparse */
+
+ if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) && !setsparse)
+ return true; /* already not sparse */
+
+ /*
+ * Can't check for sparse support on share the usual way via the
+ * FS attribute info (FILE_SUPPORTS_SPARSE_FILES) on the share
+ * since Samba server doesn't set the flag on the share, yet
+ * supports the set sparse FSCTL and returns sparse correctly
+ * in the file attributes. If we fail setting sparse though we
+ * mark that server does not support sparse files for this share
+ * to avoid repeatedly sending the unsupported fsctl to server
+ * if the file is repeatedly extended.
+ */
+ if (tcon->broken_sparse_sup)
+ return false;
+
+ rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid, FSCTL_SET_SPARSE,
+ true /* is_fctl */, &setsparse, 1, NULL, NULL);
+ if (rc) {
+ tcon->broken_sparse_sup = true;
+ cifs_dbg(FYI, "set sparse rc = %d\n", rc);
+ return false;
+ }
+
+ if (setsparse)
+ cifsi->cifsAttrs |= FILE_ATTRIBUTE_SPARSE_FILE;
+ else
+ cifsi->cifsAttrs &= (~FILE_ATTRIBUTE_SPARSE_FILE);
+
+ return true;
+}
+
static int
smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *cfile, __u64 size, bool set_alloc)
{
__le64 eof = cpu_to_le64(size);
+ struct inode *inode;
+
+ /*
+ * If extending file more than one page make sparse. Many Linux fs
+ * make files sparse by default when extending via ftruncate
+ */
+ inode = cfile->dentry->d_inode;
+
+ if (!set_alloc && (size > inode->i_size + 8192)) {
+ __u8 set_sparse = 1;
+
+ /* whether set sparse succeeds or not, extend the file */
+ smb2_set_sparse(xid, tcon, cfile, inode, set_sparse);
+ }
+
return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, cfile->pid, &eof, false);
}
@@ -954,6 +1015,105 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
+static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
+ loff_t offset, loff_t len, bool keep_size)
+{
+ struct inode *inode;
+ struct cifsInodeInfo *cifsi;
+ struct cifsFileInfo *cfile = file->private_data;
+ struct file_zero_data_information fsctl_buf;
+ long rc;
+ unsigned int xid;
+
+ xid = get_xid();
+
+ inode = cfile->dentry->d_inode;
+ cifsi = CIFS_I(inode);
+
+ /* if file not oplocked can't be sure whether asking to extend size */
+ if (!CIFS_CACHE_READ(cifsi))
+ if (keep_size == false)
+ return -EOPNOTSUPP;
+
+ /*
+ * Must check if file sparse since fallocate -z (zero range) assumes
+ * non-sparse allocation
+ */
+ if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE))
+ return -EOPNOTSUPP;
+
+ /*
+ * need to make sure we are not asked to extend the file since the SMB3
+ * fsctl does not change the file size. In the future we could change
+ * this to zero the first part of the range then set the file size
+ * which for a non sparse file would zero the newly extended range
+ */
+ if (keep_size == false)
+ if (i_size_read(inode) < offset + len)
+ return -EOPNOTSUPP;
+
+ cifs_dbg(FYI, "offset %lld len %lld", offset, len);
+
+ fsctl_buf.FileOffset = cpu_to_le64(offset);
+ fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len);
+
+ rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
+ true /* is_fctl */, (char *)&fsctl_buf,
+ sizeof(struct file_zero_data_information), NULL, NULL);
+ free_xid(xid);
+ return rc;
+}
+
+static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
+ loff_t offset, loff_t len)
+{
+ struct inode *inode;
+ struct cifsInodeInfo *cifsi;
+ struct cifsFileInfo *cfile = file->private_data;
+ struct file_zero_data_information fsctl_buf;
+ long rc;
+ unsigned int xid;
+ __u8 set_sparse = 1;
+
+ xid = get_xid();
+
+ inode = cfile->dentry->d_inode;
+ cifsi = CIFS_I(inode);
+
+ /* Need to make file sparse, if not already, before freeing range. */
+ /* Consider adding equivalent for compressed since it could also work */
+ if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse))
+ return -EOPNOTSUPP;
+
+ cifs_dbg(FYI, "offset %lld len %lld", offset, len);
+
+ fsctl_buf.FileOffset = cpu_to_le64(offset);
+ fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len);
+
+ rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
+ true /* is_fctl */, (char *)&fsctl_buf,
+ sizeof(struct file_zero_data_information), NULL, NULL);
+ free_xid(xid);
+ return rc;
+}
+
+static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode,
+ loff_t off, loff_t len)
+{
+ /* KEEP_SIZE already checked for by do_fallocate */
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+ return smb3_punch_hole(file, tcon, off, len);
+ else if (mode & FALLOC_FL_ZERO_RANGE) {
+ if (mode & FALLOC_FL_KEEP_SIZE)
+ return smb3_zero_range(file, tcon, off, len, true);
+ return smb3_zero_range(file, tcon, off, len, false);
+ }
+
+ return -EOPNOTSUPP;
+}
+
static void
smb2_downgrade_oplock(struct TCP_Server_Info *server,
struct cifsInodeInfo *cinode, bool set_level2)
@@ -1161,6 +1321,12 @@ smb2_wp_retry_size(struct inode *inode)
SMB2_MAX_BUFFER_SIZE);
}
+static bool
+smb2_dir_needs_close(struct cifsFileInfo *cfile)
+{
+ return !cfile->invalidHandle;
+}
+
struct smb_version_operations smb20_operations = {
.compare_fids = smb2_compare_fids,
.setup_request = smb2_setup_request,
@@ -1236,6 +1402,7 @@ struct smb_version_operations smb20_operations = {
.parse_lease_buf = smb2_parse_lease_buf,
.clone_range = smb2_clone_range,
.wp_retry_size = smb2_wp_retry_size,
+ .dir_needs_close = smb2_dir_needs_close,
};
struct smb_version_operations smb21_operations = {
@@ -1313,6 +1480,7 @@ struct smb_version_operations smb21_operations = {
.parse_lease_buf = smb2_parse_lease_buf,
.clone_range = smb2_clone_range,
.wp_retry_size = smb2_wp_retry_size,
+ .dir_needs_close = smb2_dir_needs_close,
};
struct smb_version_operations smb30_operations = {
@@ -1393,6 +1561,8 @@ struct smb_version_operations smb30_operations = {
.clone_range = smb2_clone_range,
.validate_negotiate = smb3_validate_negotiate,
.wp_retry_size = smb2_wp_retry_size,
+ .dir_needs_close = smb2_dir_needs_close,
+ .fallocate = smb3_fallocate,
};
struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 42ebc1a8be6..74b3a668438 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -530,7 +530,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
struct smb2_sess_setup_rsp *rsp = NULL;
struct kvec iov[2];
int rc = 0;
- int resp_buftype;
+ int resp_buftype = CIFS_NO_BUFFER;
__le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
struct TCP_Server_Info *server = ses->server;
u16 blob_length = 0;
@@ -907,7 +907,8 @@ tcon_exit:
tcon_error_exit:
if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) {
cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
- tcon->bad_network_name = true;
+ if (tcon)
+ tcon->bad_network_name = true;
}
goto tcon_exit;
}
@@ -1224,7 +1225,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
cifs_dbg(FYI, "SMB2 IOCTL\n");
- *out_data = NULL;
+ if (out_data != NULL)
+ *out_data = NULL;
+
/* zero out returned data len, in case of error */
if (plen)
*plen = 0;
@@ -1400,8 +1403,7 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
rsp = (struct smb2_close_rsp *)iov[0].iov_base;
if (rc != 0) {
- if (tcon)
- cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE);
+ cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE);
goto close_exit;
}
@@ -1530,7 +1532,7 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
{
return query_info(xid, tcon, persistent_fid, volatile_fid,
FILE_ALL_INFORMATION,
- sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+ sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
sizeof(struct smb2_file_all_info), data);
}
@@ -2177,6 +2179,10 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base;
if (rc) {
+ if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) {
+ srch_inf->endOfSearch = true;
+ rc = 0;
+ }
cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
goto qdir_exit;
}
@@ -2214,11 +2220,6 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
else
cifs_dbg(VFS, "illegal search buffer type\n");
- if (rsp->hdr.Status == STATUS_NO_MORE_FILES)
- srch_inf->endOfSearch = 1;
- else
- srch_inf->endOfSearch = 0;
-
return rc;
qdir_exit:
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 69f3595d395..fbe486c285a 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -573,6 +573,12 @@ struct copychunk_ioctl {
__u32 Reserved2;
} __packed;
+/* this goes in the ioctl buffer when doing FSCTL_SET_ZERO_DATA */
+struct file_zero_data_information {
+ __le64 FileOffset;
+ __le64 BeyondFinalZero;
+} __packed;
+
struct copychunk_ioctl_rsp {
__le32 ChunksWritten;
__le32 ChunkBytesWritten;
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
index 0e538b5c962..83efa59535b 100644
--- a/fs/cifs/smbfsctl.h
+++ b/fs/cifs/smbfsctl.h
@@ -63,7 +63,7 @@
#define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */
#define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */
#define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */
-#define FSCTL_SET_ZERO_DATA 0x000900C8 /* BB add struct */
+#define FSCTL_SET_ZERO_DATA 0x000980C8
#define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */
#define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */
#define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */
diff --git a/fs/dcache.c b/fs/dcache.c
index 06f65857a85..d30ce699ae4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -731,8 +731,6 @@ EXPORT_SYMBOL(dget_parent);
/**
* d_find_alias - grab a hashed alias of inode
* @inode: inode in question
- * @want_discon: flag, used by d_splice_alias, to request
- * that only a DISCONNECTED alias be returned.
*
* If inode has a hashed alias, or is a directory and has any alias,
* acquire the reference to alias and return it. Otherwise return NULL.
@@ -741,10 +739,9 @@ EXPORT_SYMBOL(dget_parent);
* of a filesystem.
*
* If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
- * any other hashed alias over that one unless @want_discon is set,
- * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
+ * any other hashed alias over that one.
*/
-static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
+static struct dentry *__d_find_alias(struct inode *inode)
{
struct dentry *alias, *discon_alias;
@@ -756,7 +753,7 @@ again:
if (IS_ROOT(alias) &&
(alias->d_flags & DCACHE_DISCONNECTED)) {
discon_alias = alias;
- } else if (!want_discon) {
+ } else {
__dget_dlock(alias);
spin_unlock(&alias->d_lock);
return alias;
@@ -768,12 +765,9 @@ again:
alias = discon_alias;
spin_lock(&alias->d_lock);
if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
- if (IS_ROOT(alias) &&
- (alias->d_flags & DCACHE_DISCONNECTED)) {
- __dget_dlock(alias);
- spin_unlock(&alias->d_lock);
- return alias;
- }
+ __dget_dlock(alias);
+ spin_unlock(&alias->d_lock);
+ return alias;
}
spin_unlock(&alias->d_lock);
goto again;
@@ -787,7 +781,7 @@ struct dentry *d_find_alias(struct inode *inode)
if (!hlist_empty(&inode->i_dentry)) {
spin_lock(&inode->i_lock);
- de = __d_find_alias(inode, 0);
+ de = __d_find_alias(inode);
spin_unlock(&inode->i_lock);
}
return de;
@@ -1781,25 +1775,7 @@ struct dentry *d_find_any_alias(struct inode *inode)
}
EXPORT_SYMBOL(d_find_any_alias);
-/**
- * d_obtain_alias - find or allocate a dentry for a given inode
- * @inode: inode to allocate the dentry for
- *
- * Obtain a dentry for an inode resulting from NFS filehandle conversion or
- * similar open by handle operations. The returned dentry may be anonymous,
- * or may have a full name (if the inode was already in the cache).
- *
- * When called on a directory inode, we must ensure that the inode only ever
- * has one dentry. If a dentry is found, that is returned instead of
- * allocating a new one.
- *
- * On successful return, the reference to the inode has been transferred
- * to the dentry. In case of an error the reference on the inode is released.
- * To make it easier to use in export operations a %NULL or IS_ERR inode may
- * be passed in and will be the error will be propagate to the return value,
- * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
- */
-struct dentry *d_obtain_alias(struct inode *inode)
+static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
{
static const struct qstr anonstring = QSTR_INIT("/", 1);
struct dentry *tmp;
@@ -1830,7 +1806,10 @@ struct dentry *d_obtain_alias(struct inode *inode)
}
/* attach a disconnected dentry */
- add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED;
+ add_flags = d_flags_for_inode(inode);
+
+ if (disconnected)
+ add_flags |= DCACHE_DISCONNECTED;
spin_lock(&tmp->d_lock);
tmp->d_inode = inode;
@@ -1851,59 +1830,51 @@ struct dentry *d_obtain_alias(struct inode *inode)
iput(inode);
return res;
}
-EXPORT_SYMBOL(d_obtain_alias);
/**
- * d_splice_alias - splice a disconnected dentry into the tree if one exists
- * @inode: the inode which may have a disconnected dentry
- * @dentry: a negative dentry which we want to point to the inode.
- *
- * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
- * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
- * and return it, else simply d_add the inode to the dentry and return NULL.
+ * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode
+ * @inode: inode to allocate the dentry for
*
- * This is needed in the lookup routine of any filesystem that is exportable
- * (via knfsd) so that we can build dcache paths to directories effectively.
+ * Obtain a dentry for an inode resulting from NFS filehandle conversion or
+ * similar open by handle operations. The returned dentry may be anonymous,
+ * or may have a full name (if the inode was already in the cache).
*
- * If a dentry was found and moved, then it is returned. Otherwise NULL
- * is returned. This matches the expected return value of ->lookup.
+ * When called on a directory inode, we must ensure that the inode only ever
+ * has one dentry. If a dentry is found, that is returned instead of
+ * allocating a new one.
*
- * Cluster filesystems may call this function with a negative, hashed dentry.
- * In that case, we know that the inode will be a regular file, and also this
- * will only occur during atomic_open. So we need to check for the dentry
- * being already hashed only in the final case.
+ * On successful return, the reference to the inode has been transferred
+ * to the dentry. In case of an error the reference on the inode is released.
+ * To make it easier to use in export operations a %NULL or IS_ERR inode may
+ * be passed in and the error will be propagated to the return value,
+ * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
*/
-struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
+struct dentry *d_obtain_alias(struct inode *inode)
{
- struct dentry *new = NULL;
-
- if (IS_ERR(inode))
- return ERR_CAST(inode);
+ return __d_obtain_alias(inode, 1);
+}
+EXPORT_SYMBOL(d_obtain_alias);
- if (inode && S_ISDIR(inode->i_mode)) {
- spin_lock(&inode->i_lock);
- new = __d_find_alias(inode, 1);
- if (new) {
- BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
- spin_unlock(&inode->i_lock);
- security_d_instantiate(new, inode);
- d_move(new, dentry);
- iput(inode);
- } else {
- /* already taking inode->i_lock, so d_add() by hand */
- __d_instantiate(dentry, inode);
- spin_unlock(&inode->i_lock);
- security_d_instantiate(dentry, inode);
- d_rehash(dentry);
- }
- } else {
- d_instantiate(dentry, inode);
- if (d_unhashed(dentry))
- d_rehash(dentry);
- }
- return new;
+/**
+ * d_obtain_root - find or allocate a dentry for a given inode
+ * @inode: inode to allocate the dentry for
+ *
+ * Obtain an IS_ROOT dentry for the root of a filesystem.
+ *
+ * We must ensure that directory inodes only ever have one dentry. If a
+ * dentry is found, that is returned instead of allocating a new one.
+ *
+ * On successful return, the reference to the inode has been transferred
+ * to the dentry. In case of an error the reference on the inode is
+ * released. A %NULL or IS_ERR inode may be passed in and will be the
+ * error will be propagate to the return value, with a %NULL @inode
+ * replaced by ERR_PTR(-ESTALE).
+ */
+struct dentry *d_obtain_root(struct inode *inode)
+{
+ return __d_obtain_alias(inode, 0);
}
-EXPORT_SYMBOL(d_splice_alias);
+EXPORT_SYMBOL(d_obtain_root);
/**
* d_add_ci - lookup or allocate new dentry with case-exact name
@@ -2697,6 +2668,75 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
}
/**
+ * d_splice_alias - splice a disconnected dentry into the tree if one exists
+ * @inode: the inode which may have a disconnected dentry
+ * @dentry: a negative dentry which we want to point to the inode.
+ *
+ * If inode is a directory and has an IS_ROOT alias, then d_move that in
+ * place of the given dentry and return it, else simply d_add the inode
+ * to the dentry and return NULL.
+ *
+ * If a non-IS_ROOT directory is found, the filesystem is corrupt, and
+ * we should error out: directories can't have multiple aliases.
+ *
+ * This is needed in the lookup routine of any filesystem that is exportable
+ * (via knfsd) so that we can build dcache paths to directories effectively.
+ *
+ * If a dentry was found and moved, then it is returned. Otherwise NULL
+ * is returned. This matches the expected return value of ->lookup.
+ *
+ * Cluster filesystems may call this function with a negative, hashed dentry.
+ * In that case, we know that the inode will be a regular file, and also this
+ * will only occur during atomic_open. So we need to check for the dentry
+ * being already hashed only in the final case.
+ */
+struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
+{
+ struct dentry *new = NULL;
+
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ if (inode && S_ISDIR(inode->i_mode)) {
+ spin_lock(&inode->i_lock);
+ new = __d_find_any_alias(inode);
+ if (new) {
+ if (!IS_ROOT(new)) {
+ spin_unlock(&inode->i_lock);
+ dput(new);
+ return ERR_PTR(-EIO);
+ }
+ if (d_ancestor(new, dentry)) {
+ spin_unlock(&inode->i_lock);
+ dput(new);
+ return ERR_PTR(-EIO);
+ }
+ write_seqlock(&rename_lock);
+ __d_materialise_dentry(dentry, new);
+ write_sequnlock(&rename_lock);
+ __d_drop(new);
+ _d_rehash(new);
+ spin_unlock(&new->d_lock);
+ spin_unlock(&inode->i_lock);
+ security_d_instantiate(new, inode);
+ iput(inode);
+ } else {
+ /* already taking inode->i_lock, so d_add() by hand */
+ __d_instantiate(dentry, inode);
+ spin_unlock(&inode->i_lock);
+ security_d_instantiate(dentry, inode);
+ d_rehash(dentry);
+ }
+ } else {
+ d_instantiate(dentry, inode);
+ if (d_unhashed(dentry))
+ d_rehash(dentry);
+ }
+ return new;
+}
+EXPORT_SYMBOL(d_splice_alias);
+
+/**
* d_materialise_unique - introduce an inode into the tree
* @dentry: candidate dentry
* @inode: inode to bind to the dentry, to which aliases may be attached
@@ -2724,7 +2764,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
struct dentry *alias;
/* Does an aliased dentry already exist? */
- alias = __d_find_alias(inode, 0);
+ alias = __d_find_alias(inode);
if (alias) {
actual = alias;
write_seqlock(&rename_lock);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 17e39b047de..c3116404ab4 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -158,7 +158,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
{
ssize_t ret;
- ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE,
+ ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES,
&sdio->from);
if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b10b48c2a7a..7bcfff900f0 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1852,7 +1852,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
goto error_tgt_fput;
/* Check if EPOLLWAKEUP is allowed */
- ep_take_care_of_epollwakeup(&epds);
+ if (ep_op_has_event(op))
+ ep_take_care_of_epollwakeup(&epds);
/*
* We have to check that the file structure underneath the file descriptor
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 3750031cfa2..b88edc05c23 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -161,7 +161,7 @@ static struct kmem_cache * ext2_inode_cachep;
static struct inode *ext2_alloc_inode(struct super_block *sb)
{
struct ext2_inode_info *ei;
- ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
+ ei = kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
ei->i_block_alloc_info = NULL;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 08cdfe5461e..622e8824902 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2828,8 +2828,9 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
*/
overhead += ngroups * (2 + sbi->s_itb_per_group);
- /* Add the journal blocks as well */
- overhead += sbi->s_journal->j_maxlen;
+ /* Add the internal journal blocks as well */
+ if (sbi->s_journal && !sbi->journal_bdev)
+ overhead += sbi->s_journal->j_maxlen;
sbi->s_overhead_last = overhead;
smp_wmb();
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5b19760b1de..b0c225cdb52 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1825,7 +1825,7 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
/*
* Special error return code only used by dx_probe() and its callers.
*/
-#define ERR_BAD_DX_DIR -75000
+#define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
/*
* Timeout and state flag for lazy initialization inode thread.
@@ -2454,6 +2454,22 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
up_write(&EXT4_I(inode)->i_data_sem);
}
+/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */
+static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
+{
+ int changed = 0;
+
+ if (newsize > inode->i_size) {
+ i_size_write(inode, newsize);
+ changed = 1;
+ }
+ if (newsize > EXT4_I(inode)->i_disksize) {
+ ext4_update_i_disksize(inode, newsize);
+ changed |= 2;
+ }
+ return changed;
+}
+
struct ext4_group_info {
unsigned long bb_state;
struct rb_root bb_free_root;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 76c2df382b7..74292a71b38 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4665,7 +4665,8 @@ retry:
}
static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
- ext4_lblk_t len, int flags, int mode)
+ ext4_lblk_t len, loff_t new_size,
+ int flags, int mode)
{
struct inode *inode = file_inode(file);
handle_t *handle;
@@ -4674,8 +4675,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
int retries = 0;
struct ext4_map_blocks map;
unsigned int credits;
+ loff_t epos;
map.m_lblk = offset;
+ map.m_len = len;
/*
* Don't normalize the request if it can fit in one extent so
* that it doesn't get unnecessarily split into multiple
@@ -4690,9 +4693,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
credits = ext4_chunk_trans_blocks(inode, len);
retry:
- while (ret >= 0 && ret < len) {
- map.m_lblk = map.m_lblk + ret;
- map.m_len = len = len - ret;
+ while (ret >= 0 && len) {
handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
credits);
if (IS_ERR(handle)) {
@@ -4709,6 +4710,21 @@ retry:
ret2 = ext4_journal_stop(handle);
break;
}
+ map.m_lblk += ret;
+ map.m_len = len = len - ret;
+ epos = (loff_t)map.m_lblk << inode->i_blkbits;
+ inode->i_ctime = ext4_current_time(inode);
+ if (new_size) {
+ if (epos > new_size)
+ epos = new_size;
+ if (ext4_update_inode_size(inode, epos) & 0x1)
+ inode->i_mtime = inode->i_ctime;
+ } else {
+ if (epos > inode->i_size)
+ ext4_set_inode_flag(inode,
+ EXT4_INODE_EOFBLOCKS);
+ }
+ ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
if (ret2)
break;
@@ -4731,7 +4747,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
loff_t new_size = 0;
int ret = 0;
int flags;
- int partial;
+ int credits;
+ int partial_begin, partial_end;
loff_t start, end;
ext4_lblk_t lblk;
struct address_space *mapping = inode->i_mapping;
@@ -4771,7 +4788,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (start < offset || end > offset + len)
return -EINVAL;
- partial = (offset + len) & ((1 << blkbits) - 1);
+ partial_begin = offset & ((1 << blkbits) - 1);
+ partial_end = (offset + len) & ((1 << blkbits) - 1);
lblk = start >> blkbits;
max_blocks = (end >> blkbits);
@@ -4805,7 +4823,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
* If we have a partial block after EOF we have to allocate
* the entire block.
*/
- if (partial)
+ if (partial_end)
max_blocks += 1;
}
@@ -4813,6 +4831,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
/* Now release the pages and zero block aligned part of pages*/
truncate_pagecache_range(inode, start, end - 1);
+ inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
/* Wait all existing dio workers, newcomers will block on i_mutex */
ext4_inode_block_unlocked_dio(inode);
@@ -4825,13 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (ret)
goto out_dio;
- ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags,
- mode);
+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+ flags, mode);
if (ret)
goto out_dio;
}
+ if (!partial_begin && !partial_end)
+ goto out_dio;
- handle = ext4_journal_start(inode, EXT4_HT_MISC, 4);
+ /*
+ * In worst case we have to writeout two nonadjacent unwritten
+ * blocks and update the inode
+ */
+ credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
+ if (ext4_should_journal_data(inode))
+ credits += 2;
+ handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
ext4_std_error(inode->i_sb, ret);
@@ -4839,12 +4867,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
}
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
-
if (new_size) {
- if (new_size > i_size_read(inode))
- i_size_write(inode, new_size);
- if (new_size > EXT4_I(inode)->i_disksize)
- ext4_update_i_disksize(inode, new_size);
+ ext4_update_inode_size(inode, new_size);
} else {
/*
* Mark that we allocate beyond EOF so the subsequent truncate
@@ -4853,7 +4877,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if ((offset + len) > i_size_read(inode))
ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
}
-
ext4_mark_inode_dirty(handle, inode);
/* Zero out partial block at the edges of the range */
@@ -4880,13 +4903,11 @@ out_mutex:
long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
- handle_t *handle;
loff_t new_size = 0;
unsigned int max_blocks;
int ret = 0;
int flags;
ext4_lblk_t lblk;
- struct timespec tv;
unsigned int blkbits = inode->i_blkbits;
/* Return error if mode is not supported */
@@ -4937,36 +4958,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
goto out;
}
- ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode);
+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+ flags, mode);
if (ret)
goto out;
- handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
- if (IS_ERR(handle))
- goto out;
-
- tv = inode->i_ctime = ext4_current_time(inode);
-
- if (new_size) {
- if (new_size > i_size_read(inode)) {
- i_size_write(inode, new_size);
- inode->i_mtime = tv;
- }
- if (new_size > EXT4_I(inode)->i_disksize)
- ext4_update_i_disksize(inode, new_size);
- } else {
- /*
- * Mark that we allocate beyond EOF so the subsequent truncate
- * can proceed even if the new size is the same as i_size.
- */
- if ((offset + len) > i_size_read(inode))
- ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
+ if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
+ ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
+ EXT4_I(inode)->i_sync_tid);
}
- ext4_mark_inode_dirty(handle, inode);
- if (file->f_flags & O_SYNC)
- ext4_handle_sync(handle);
-
- ext4_journal_stop(handle);
out:
mutex_unlock(&inode->i_mutex);
trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 367a60c07cf..3aa26e9117c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1055,27 +1055,11 @@ static int ext4_write_end(struct file *file,
} else
copied = block_write_end(file, mapping, pos,
len, copied, page, fsdata);
-
/*
- * No need to use i_size_read() here, the i_size
- * cannot change under us because we hole i_mutex.
- *
- * But it's important to update i_size while still holding page lock:
+ * it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size.
*/
- if (pos + copied > inode->i_size) {
- i_size_write(inode, pos + copied);
- i_size_changed = 1;
- }
-
- if (pos + copied > EXT4_I(inode)->i_disksize) {
- /* We need to mark inode dirty even if
- * new_i_size is less that inode->i_size
- * but greater than i_disksize. (hint delalloc)
- */
- ext4_update_i_disksize(inode, (pos + copied));
- i_size_changed = 1;
- }
+ i_size_changed = ext4_update_inode_size(inode, pos + copied);
unlock_page(page);
page_cache_release(page);
@@ -1123,7 +1107,7 @@ static int ext4_journalled_write_end(struct file *file,
int ret = 0, ret2;
int partial = 0;
unsigned from, to;
- loff_t new_i_size;
+ int size_changed = 0;
trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1146,20 +1130,18 @@ static int ext4_journalled_write_end(struct file *file,
if (!partial)
SetPageUptodate(page);
}
- new_i_size = pos + copied;
- if (new_i_size > inode->i_size)
- i_size_write(inode, pos+copied);
+ size_changed = ext4_update_inode_size(inode, pos + copied);
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
- if (new_i_size > EXT4_I(inode)->i_disksize) {
- ext4_update_i_disksize(inode, new_i_size);
+ unlock_page(page);
+ page_cache_release(page);
+
+ if (size_changed) {
ret2 = ext4_mark_inode_dirty(handle, inode);
if (!ret)
ret = ret2;
}
- unlock_page(page);
- page_cache_release(page);
if (pos + len > inode->i_size && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
@@ -2095,6 +2077,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
struct ext4_map_blocks *map = &mpd->map;
int err;
loff_t disksize;
+ int progress = 0;
mpd->io_submit.io_end->offset =
((loff_t)map->m_lblk) << inode->i_blkbits;
@@ -2111,8 +2094,11 @@ static int mpage_map_and_submit_extent(handle_t *handle,
* is non-zero, a commit should free up blocks.
*/
if ((err == -ENOMEM) ||
- (err == -ENOSPC && ext4_count_free_clusters(sb)))
+ (err == -ENOSPC && ext4_count_free_clusters(sb))) {
+ if (progress)
+ goto update_disksize;
return err;
+ }
ext4_msg(sb, KERN_CRIT,
"Delayed block allocation failed for "
"inode %lu at logical offset %llu with"
@@ -2129,15 +2115,17 @@ static int mpage_map_and_submit_extent(handle_t *handle,
*give_up_on_write = true;
return err;
}
+ progress = 1;
/*
* Update buffer state, submit mapped pages, and get us new
* extent to map
*/
err = mpage_map_and_submit_buffers(mpd);
if (err < 0)
- return err;
+ goto update_disksize;
} while (map->m_len);
+update_disksize:
/*
* Update on-disk size after IO is submitted. Races with
* truncate are avoided by checking i_size under i_data_sem.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 956027711fa..8b0f9ef517d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1412,6 +1412,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
int last = first + count - 1;
struct super_block *sb = e4b->bd_sb;
+ if (WARN_ON(count == 0))
+ return;
BUG_ON(last >= (sb->s_blocksize << 3));
assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
/* Don't bother if the block group is corrupt. */
@@ -3221,6 +3223,8 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
int err;
if (pa == NULL) {
+ if (ac->ac_f_ex.fe_len == 0)
+ return;
err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
if (err) {
/*
@@ -3235,6 +3239,7 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
ac->ac_f_ex.fe_len);
ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+ ext4_mb_unload_buddy(&e4b);
return;
}
if (pa->pa_type == MB_INODE_PA)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 3520ab8a663..603e4ebbd0a 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1227,7 +1227,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
buffer */
int num = 0;
ext4_lblk_t nblocks;
- int i, err;
+ int i, err = 0;
int namelen;
*res_dir = NULL;
@@ -1264,7 +1264,11 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
* return. Otherwise, fall back to doing a search the
* old fashioned way.
*/
- if (bh || (err != ERR_BAD_DX_DIR))
+ if (err == -ENOENT)
+ return NULL;
+ if (err && err != ERR_BAD_DX_DIR)
+ return ERR_PTR(err);
+ if (bh)
return bh;
dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
"falling back\n"));
@@ -1295,6 +1299,11 @@ restart:
}
num++;
bh = ext4_getblk(NULL, dir, b++, 0, &err);
+ if (unlikely(err)) {
+ if (ra_max == 0)
+ return ERR_PTR(err);
+ break;
+ }
bh_use[ra_max] = bh;
if (bh)
ll_rw_block(READ | REQ_META | REQ_PRIO,
@@ -1417,6 +1426,8 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
return ERR_PTR(-ENAMETOOLONG);
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+ if (IS_ERR(bh))
+ return (struct dentry *) bh;
inode = NULL;
if (bh) {
__u32 ino = le32_to_cpu(de->inode);
@@ -1450,6 +1461,8 @@ struct dentry *ext4_get_parent(struct dentry *child)
struct buffer_head *bh;
bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
+ if (IS_ERR(bh))
+ return (struct dentry *) bh;
if (!bh)
return ERR_PTR(-ENOENT);
ino = le32_to_cpu(de->inode);
@@ -2727,6 +2740,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
if (!bh)
goto end_rmdir;
@@ -2794,6 +2809,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
if (!bh)
goto end_unlink;
@@ -3121,6 +3138,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
struct ext4_dir_entry_2 *de;
bh = ext4_find_entry(dir, d_name, &de, NULL);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
if (bh) {
retval = ext4_delete_entry(handle, dir, de, bh);
brelse(bh);
@@ -3128,7 +3147,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
return retval;
}
-static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent)
+static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
+ int force_reread)
{
int retval;
/*
@@ -3140,7 +3160,8 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent)
if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino ||
ent->de->name_len != ent->dentry->d_name.len ||
strncmp(ent->de->name, ent->dentry->d_name.name,
- ent->de->name_len)) {
+ ent->de->name_len) ||
+ force_reread) {
retval = ext4_find_delete_entry(handle, ent->dir,
&ent->dentry->d_name);
} else {
@@ -3191,6 +3212,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
.dentry = new_dentry,
.inode = new_dentry->d_inode,
};
+ int force_reread;
int retval;
dquot_initialize(old.dir);
@@ -3202,6 +3224,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
dquot_initialize(new.inode);
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ if (IS_ERR(old.bh))
+ return PTR_ERR(old.bh);
/*
* Check for inode number is _not_ due to possible IO errors.
* We might rmdir the source, keep it as pwd of some process
@@ -3214,6 +3238,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
&new.de, &new.inlined);
+ if (IS_ERR(new.bh)) {
+ retval = PTR_ERR(new.bh);
+ new.bh = NULL;
+ goto end_rename;
+ }
if (new.bh) {
if (!new.inode) {
brelse(new.bh);
@@ -3246,6 +3275,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (retval)
goto end_rename;
}
+ /*
+ * If we're renaming a file within an inline_data dir and adding or
+ * setting the new dirent causes a conversion from inline_data to
+ * extents/blockmap, we need to force the dirent delete code to
+ * re-read the directory, or else we end up trying to delete a dirent
+ * from what is now the extent tree root (or a block map).
+ */
+ force_reread = (new.dir->i_ino == old.dir->i_ino &&
+ ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
if (!new.bh) {
retval = ext4_add_entry(handle, new.dentry, old.inode);
if (retval)
@@ -3256,6 +3294,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (retval)
goto end_rename;
}
+ if (force_reread)
+ force_reread = !ext4_test_inode_flag(new.dir,
+ EXT4_INODE_INLINE_DATA);
/*
* Like most other Unix systems, set the ctime for inodes on a
@@ -3267,7 +3308,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
/*
* ok, that's it
*/
- ext4_rename_delete(handle, &old);
+ ext4_rename_delete(handle, &old, force_reread);
if (new.inode) {
ext4_dec_count(handle, new.inode);
@@ -3330,6 +3371,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
&old.de, &old.inlined);
+ if (IS_ERR(old.bh))
+ return PTR_ERR(old.bh);
/*
* Check for inode number is _not_ due to possible IO errors.
* We might rmdir the source, keep it as pwd of some process
@@ -3342,6 +3385,11 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
&new.de, &new.inlined);
+ if (IS_ERR(new.bh)) {
+ retval = PTR_ERR(new.bh);
+ new.bh = NULL;
+ goto end_rename;
+ }
/* RENAME_EXCHANGE case: old *and* new must both exist */
if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
@@ -3455,7 +3503,6 @@ const struct inode_operations ext4_dir_inode_operations = {
.rmdir = ext4_rmdir,
.mknod = ext4_mknod,
.tmpfile = ext4_tmpfile,
- .rename = ext4_rename,
.rename2 = ext4_rename2,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index bb0e80f03e2..1e43b905ff9 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -575,6 +575,7 @@ handle_bb:
bh = bclean(handle, sb, block);
if (IS_ERR(bh)) {
err = PTR_ERR(bh);
+ bh = NULL;
goto out;
}
overhead = ext4_group_overhead_blocks(sb, group);
@@ -603,6 +604,7 @@ handle_ib:
bh = bclean(handle, sb, block);
if (IS_ERR(bh)) {
err = PTR_ERR(bh);
+ bh = NULL;
goto out;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 32b43ad154b..0b28b36e791 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3181,9 +3181,9 @@ static int set_journal_csum_feature_set(struct super_block *sb)
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
- /* journal checksum v2 */
+ /* journal checksum v3 */
compat = 0;
- incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2;
+ incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
} else {
/* journal checksum v1 */
compat = JBD2_FEATURE_COMPAT_CHECKSUM;
@@ -3205,6 +3205,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
jbd2_journal_clear_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
+ JBD2_FEATURE_INCOMPAT_CSUM_V3 |
JBD2_FEATURE_INCOMPAT_CSUM_V2);
}
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 214fe1054fc..736a348509f 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -23,7 +23,7 @@ config F2FS_STAT_FS
mounted as f2fs. Each file shows the whole f2fs information.
/sys/kernel/debug/f2fs/status includes:
- - major file system information managed by f2fs currently
+ - major filesystem information managed by f2fs currently
- average SIT information about whole segments
- current memory footprint consumed by f2fs.
@@ -68,6 +68,6 @@ config F2FS_CHECK_FS
bool "F2FS consistency checking feature"
depends on F2FS_FS
help
- Enables BUG_ONs which check the file system consistency in runtime.
+ Enables BUG_ONs which check the filesystem consistency in runtime.
If you want to improve the performance, say N.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6aeed5bada5..ec3b7a5381f 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -160,14 +160,11 @@ static int f2fs_write_meta_page(struct page *page,
goto redirty_out;
if (wbc->for_reclaim)
goto redirty_out;
-
- /* Should not write any meta pages, if any IO error was occurred */
- if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
- goto no_write;
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
f2fs_wait_on_page_writeback(page, META);
write_meta_page(sbi, page);
-no_write:
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
return 0;
@@ -348,7 +345,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
return e ? true : false;
}
-static void release_dirty_inode(struct f2fs_sb_info *sbi)
+void release_dirty_inode(struct f2fs_sb_info *sbi)
{
struct ino_entry *e, *tmp;
int i;
@@ -446,8 +443,8 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
struct f2fs_orphan_block *orphan_blk = NULL;
unsigned int nentries = 0;
unsigned short index;
- unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
- (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
+ unsigned short orphan_blocks =
+ (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans);
struct page *page = NULL;
struct ino_entry *orphan = NULL;
@@ -737,7 +734,7 @@ retry:
/*
* Freeze all the FS-operations for checkpoint.
*/
-static void block_operations(struct f2fs_sb_info *sbi)
+static int block_operations(struct f2fs_sb_info *sbi)
{
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
@@ -745,6 +742,7 @@ static void block_operations(struct f2fs_sb_info *sbi)
.for_reclaim = 0,
};
struct blk_plug plug;
+ int err = 0;
blk_start_plug(&plug);
@@ -754,11 +752,15 @@ retry_flush_dents:
if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
f2fs_unlock_all(sbi);
sync_dirty_dir_inodes(sbi);
+ if (unlikely(f2fs_cp_error(sbi))) {
+ err = -EIO;
+ goto out;
+ }
goto retry_flush_dents;
}
/*
- * POR: we should ensure that there is no dirty node pages
+ * POR: we should ensure that there are no dirty node pages
* until finishing nat/sit flush.
*/
retry_flush_nodes:
@@ -767,9 +769,16 @@ retry_flush_nodes:
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
up_write(&sbi->node_write);
sync_node_pages(sbi, 0, &wbc);
+ if (unlikely(f2fs_cp_error(sbi))) {
+ f2fs_unlock_all(sbi);
+ err = -EIO;
+ goto out;
+ }
goto retry_flush_nodes;
}
+out:
blk_finish_plug(&plug);
+ return err;
}
static void unblock_operations(struct f2fs_sb_info *sbi)
@@ -813,8 +822,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
/* Flush all the NAT/SIT pages */
- while (get_pages(sbi, F2FS_DIRTY_META))
+ while (get_pages(sbi, F2FS_DIRTY_META)) {
sync_meta_pages(sbi, META, LONG_MAX);
+ if (unlikely(f2fs_cp_error(sbi)))
+ return;
+ }
next_free_nid(sbi, &last_nid);
@@ -825,7 +837,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
- for (i = 0; i < 3; i++) {
+ for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
ckpt->cur_node_segno[i] =
cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
ckpt->cur_node_blkoff[i] =
@@ -833,7 +845,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
ckpt->alloc_type[i + CURSEG_HOT_NODE] =
curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
}
- for (i = 0; i < 3; i++) {
+ for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
ckpt->cur_data_segno[i] =
cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
ckpt->cur_data_blkoff[i] =
@@ -848,24 +860,23 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
/* 2 cp + n data seg summary + orphan inode blocks */
data_sum_blocks = npages_for_summary_flush(sbi);
- if (data_sum_blocks < 3)
+ if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
else
clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
- orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
- / F2FS_ORPHANS_PER_BLOCK;
+ orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans);
ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
orphan_blocks);
if (is_umount) {
set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
- ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
+ ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
cp_payload_blks + data_sum_blocks +
orphan_blocks + NR_CURSEG_NODE_TYPE);
} else {
clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
- ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
+ ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
cp_payload_blks + data_sum_blocks +
orphan_blocks);
}
@@ -924,6 +935,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
/* wait for previous submitted node/meta pages writeback */
wait_on_all_pages_writeback(sbi);
+ if (unlikely(f2fs_cp_error(sbi)))
+ return;
+
filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
@@ -934,15 +948,17 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
- if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
- clear_prefree_segments(sbi);
- release_dirty_inode(sbi);
- F2FS_RESET_SB_DIRT(sbi);
- }
+ release_dirty_inode(sbi);
+
+ if (unlikely(f2fs_cp_error(sbi)))
+ return;
+
+ clear_prefree_segments(sbi);
+ F2FS_RESET_SB_DIRT(sbi);
}
/*
- * We guarantee that this checkpoint procedure should not fail.
+ * We guarantee that this checkpoint procedure will not fail.
*/
void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
{
@@ -952,7 +968,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
mutex_lock(&sbi->cp_mutex);
- block_operations(sbi);
+
+ if (!sbi->s_dirty)
+ goto out;
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto out;
+ if (block_operations(sbi))
+ goto out;
trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
@@ -976,9 +998,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
do_checkpoint(sbi, is_umount);
unblock_operations(sbi);
- mutex_unlock(&sbi->cp_mutex);
-
stat_inc_cp_count(sbi->stat_info);
+out:
+ mutex_unlock(&sbi->cp_mutex);
trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
}
@@ -999,8 +1021,8 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
* for cp pack we can have max 1020*504 orphan entries
*/
sbi->n_orphans = 0;
- sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
- * F2FS_ORPHANS_PER_BLOCK;
+ sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
+ NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
}
int __init create_checkpoint_caches(void)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 03313099c51..76de83e25a8 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -53,7 +53,7 @@ static void f2fs_write_end_io(struct bio *bio, int err)
struct page *page = bvec->bv_page;
if (unlikely(err)) {
- SetPageError(page);
+ set_page_dirty(page);
set_bit(AS_EIO, &page->mapping->flags);
f2fs_stop_checkpoint(sbi);
}
@@ -691,7 +691,7 @@ get_next:
allocated = true;
blkaddr = dn.data_blkaddr;
}
- /* Give more consecutive addresses for the read ahead */
+ /* Give more consecutive addresses for the readahead */
if (blkaddr == (bh_result->b_blocknr + ofs)) {
ofs++;
dn.ofs_in_node++;
@@ -739,7 +739,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
trace_f2fs_readpage(page, DATA);
- /* If the file has inline data, try to read it directlly */
+ /* If the file has inline data, try to read it directly */
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
else
@@ -836,10 +836,19 @@ write:
/* Dentry blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode)) {
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
err = do_write_data_page(page, &fio);
goto done;
}
+ /* we should bypass data pages to proceed the kworkder jobs */
+ if (unlikely(f2fs_cp_error(sbi))) {
+ SetPageError(page);
+ unlock_page(page);
+ return 0;
+ }
+
if (!wbc->for_reclaim)
need_balance_fs = true;
else if (has_not_enough_free_secs(sbi, 0))
@@ -927,7 +936,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
if (to > inode->i_size) {
truncate_pagecache(inode, inode->i_size);
- truncate_blocks(inode, inode->i_size);
+ truncate_blocks(inode, inode->i_size, true);
}
}
@@ -946,7 +955,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
f2fs_balance_fs(sbi);
repeat:
- err = f2fs_convert_inline_data(inode, pos + len);
+ err = f2fs_convert_inline_data(inode, pos + len, NULL);
if (err)
goto fail;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a441ba33be1..fecebdbfd78 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -32,7 +32,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
struct f2fs_stat_info *si = F2FS_STAT(sbi);
int i;
- /* valid check of the segment numbers */
+ /* validation check of the segment numbers */
si->hit_ext = sbi->read_hit_ext;
si->total_ext = sbi->total_hit_ext;
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
@@ -152,7 +152,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi));
si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi));
- /* buld nm */
+ /* build nm */
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index bcf893c3d90..155fb056b7f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -124,7 +124,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
/*
* For the most part, it should be a bug when name_len is zero.
- * We stop here for figuring out where the bugs are occurred.
+ * We stop here for figuring out where the bugs has occurred.
*/
f2fs_bug_on(!de->name_len);
@@ -391,7 +391,7 @@ put_error:
error:
/* once the failed inode becomes a bad inode, i_mode is S_IFREG */
truncate_inode_pages(&inode->i_data, 0);
- truncate_blocks(inode, 0);
+ truncate_blocks(inode, 0, false);
remove_dirty_dir_inode(inode);
remove_inode_page(inode);
return ERR_PTR(err);
@@ -563,7 +563,7 @@ fail:
}
/*
- * It only removes the dentry from the dentry page,corresponding name
+ * It only removes the dentry from the dentry page, corresponding name
* entry in name page does not need to be touched during deletion.
*/
void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4dab5338a97..e921242186f 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -24,7 +24,7 @@
#define f2fs_bug_on(condition) BUG_ON(condition)
#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
#else
-#define f2fs_bug_on(condition)
+#define f2fs_bug_on(condition) WARN_ON(condition)
#define f2fs_down_write(x, y) down_write(x)
#endif
@@ -395,7 +395,7 @@ enum count_type {
};
/*
- * The below are the page types of bios used in submti_bio().
+ * The below are the page types of bios used in submit_bio().
* The available types are:
* DATA User data pages. It operates as async mode.
* NODE Node pages. It operates as async mode.
@@ -470,7 +470,7 @@ struct f2fs_sb_info {
struct list_head dir_inode_list; /* dir inode list */
spinlock_t dir_inode_lock; /* for dir inode list lock */
- /* basic file system units */
+ /* basic filesystem units */
unsigned int log_sectors_per_block; /* log2 sectors per block */
unsigned int log_blocksize; /* log2 block size */
unsigned int blocksize; /* block size */
@@ -799,7 +799,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
/*
* odd numbered checkpoint should at cp segment 0
- * and even segent must be at cp segment 1
+ * and even segment must be at cp segment 1
*/
if (!(ckpt_version & 1))
start_addr += sbi->blocks_per_seg;
@@ -1096,6 +1096,11 @@ static inline int f2fs_readonly(struct super_block *sb)
return sb->s_flags & MS_RDONLY;
}
+static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi)
+{
+ return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+}
+
static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
{
set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
@@ -1117,7 +1122,7 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
*/
int f2fs_sync_file(struct file *, loff_t, loff_t, int);
void truncate_data_blocks(struct dnode_of_data *);
-int truncate_blocks(struct inode *, u64);
+int truncate_blocks(struct inode *, u64, bool);
void f2fs_truncate(struct inode *);
int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
int f2fs_setattr(struct dentry *, struct iattr *);
@@ -1202,10 +1207,8 @@ int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *);
bool alloc_nid(struct f2fs_sb_info *, nid_t *);
void alloc_nid_done(struct f2fs_sb_info *, nid_t);
void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
-void recover_node_page(struct f2fs_sb_info *, struct page *,
- struct f2fs_summary *, struct node_info *, block_t);
void recover_inline_xattr(struct inode *, struct page *);
-bool recover_xattr_data(struct inode *, struct page *, block_t);
+void recover_xattr_data(struct inode *, struct page *, block_t);
int recover_inode_page(struct f2fs_sb_info *, struct page *);
int restore_node_summary(struct f2fs_sb_info *, unsigned int,
struct f2fs_summary_block *);
@@ -1238,8 +1241,6 @@ void write_data_page(struct page *, struct dnode_of_data *, block_t *,
void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *);
void recover_data_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, block_t, block_t);
-void rewrite_node_page(struct f2fs_sb_info *, struct page *,
- struct f2fs_summary *, block_t, block_t);
void allocate_data_block(struct f2fs_sb_info *, struct page *,
block_t, block_t *, struct f2fs_summary *, int);
void f2fs_wait_on_page_writeback(struct page *, enum page_type);
@@ -1262,6 +1263,7 @@ int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
+void release_dirty_inode(struct f2fs_sb_info *);
bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
@@ -1439,8 +1441,8 @@ extern const struct inode_operations f2fs_special_inode_operations;
*/
bool f2fs_may_inline(struct inode *);
int f2fs_read_inline_data(struct inode *, struct page *);
-int f2fs_convert_inline_data(struct inode *, pgoff_t);
+int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *);
int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
void truncate_inline_data(struct inode *, u64);
-int recover_inline_data(struct inode *, struct page *);
+bool recover_inline_data(struct inode *, struct page *);
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 208f1a9bd56..060aee65aee 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -41,6 +41,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
sb_start_pagefault(inode->i_sb);
+ /* force to convert with normal data indices */
+ err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page);
+ if (err)
+ goto out;
+
/* block allocation */
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -110,6 +115,25 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
return 1;
}
+static inline bool need_do_checkpoint(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ bool need_cp = false;
+
+ if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
+ need_cp = true;
+ else if (file_wrong_pino(inode))
+ need_cp = true;
+ else if (!space_for_roll_forward(sbi))
+ need_cp = true;
+ else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
+ need_cp = true;
+ else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
+ need_cp = true;
+
+ return need_cp;
+}
+
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
@@ -154,23 +178,12 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
/* guarantee free sections for fsync */
f2fs_balance_fs(sbi);
- down_read(&fi->i_sem);
-
/*
* Both of fdatasync() and fsync() are able to be recovered from
* sudden-power-off.
*/
- if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
- need_cp = true;
- else if (file_wrong_pino(inode))
- need_cp = true;
- else if (!space_for_roll_forward(sbi))
- need_cp = true;
- else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
- need_cp = true;
- else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
- need_cp = true;
-
+ down_read(&fi->i_sem);
+ need_cp = need_do_checkpoint(inode);
up_read(&fi->i_sem);
if (need_cp) {
@@ -288,7 +301,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
if (err && err != -ENOENT) {
goto fail;
} else if (err == -ENOENT) {
- /* direct node is not exist */
+ /* direct node does not exists */
if (whence == SEEK_DATA) {
pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
F2FS_I(inode));
@@ -417,7 +430,7 @@ out:
f2fs_put_page(page, 1);
}
-int truncate_blocks(struct inode *inode, u64 from)
+int truncate_blocks(struct inode *inode, u64 from, bool lock)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
unsigned int blocksize = inode->i_sb->s_blocksize;
@@ -433,14 +446,16 @@ int truncate_blocks(struct inode *inode, u64 from)
free_from = (pgoff_t)
((from + blocksize - 1) >> (sbi->log_blocksize));
- f2fs_lock_op(sbi);
+ if (lock)
+ f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
if (err) {
if (err == -ENOENT)
goto free_next;
- f2fs_unlock_op(sbi);
+ if (lock)
+ f2fs_unlock_op(sbi);
trace_f2fs_truncate_blocks_exit(inode, err);
return err;
}
@@ -458,7 +473,8 @@ int truncate_blocks(struct inode *inode, u64 from)
f2fs_put_dnode(&dn);
free_next:
err = truncate_inode_blocks(inode, free_from);
- f2fs_unlock_op(sbi);
+ if (lock)
+ f2fs_unlock_op(sbi);
done:
/* lastly zero out the first data page */
truncate_partial_data_page(inode, from);
@@ -475,7 +491,7 @@ void f2fs_truncate(struct inode *inode)
trace_f2fs_truncate(inode);
- if (!truncate_blocks(inode, i_size_read(inode))) {
+ if (!truncate_blocks(inode, i_size_read(inode), true)) {
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
}
@@ -533,7 +549,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
- err = f2fs_convert_inline_data(inode, attr->ia_size);
+ err = f2fs_convert_inline_data(inode, attr->ia_size, NULL);
if (err)
return err;
@@ -622,7 +638,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
loff_t off_start, off_end;
int ret = 0;
- ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1);
+ ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
if (ret)
return ret;
@@ -678,7 +694,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (ret)
return ret;
- ret = f2fs_convert_inline_data(inode, offset + len);
+ ret = f2fs_convert_inline_data(inode, offset + len, NULL);
if (ret)
return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d7947d90ccc..943a31db7cc 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -58,7 +58,7 @@ static int gc_thread_func(void *data)
* 3. IO subsystem is idle by checking the # of requests in
* bdev's request list.
*
- * Note) We have to avoid triggering GCs too much frequently.
+ * Note) We have to avoid triggering GCs frequently.
* Because it is possible that some segments can be
* invalidated soon after by user update or deletion.
* So, I'd like to wait some time to collect dirty segments.
@@ -222,7 +222,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
u = (vblocks * 100) >> sbi->log_blocks_per_seg;
- /* Handle if the system time is changed by user */
+ /* Handle if the system time has changed by the user */
if (mtime < sit_i->min_mtime)
sit_i->min_mtime = mtime;
if (mtime > sit_i->max_mtime)
@@ -593,7 +593,7 @@ next_step:
if (phase == 2) {
inode = f2fs_iget(sb, dni.ino);
- if (IS_ERR(inode))
+ if (IS_ERR(inode) || is_bad_inode(inode))
continue;
start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
@@ -693,7 +693,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
gc_more:
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
- if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
+ if (unlikely(f2fs_cp_error(sbi)))
goto stop;
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 5d5eb6047bf..16f0b2b2299 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -91,7 +91,7 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
block_t invalid_user_blocks = sbi->user_block_count -
written_block_count(sbi);
/*
- * Background GC is triggered with the following condition.
+ * Background GC is triggered with the following conditions.
* 1. There are a number of invalid blocks.
* 2. There is not enough free space.
*/
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 948d17bf728..a844fcfb9a8 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -42,7 +42,8 @@ static void TEA_transform(unsigned int buf[4], unsigned int const in[])
buf[1] += b1;
}
-static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)
+static void str2hashbuf(const unsigned char *msg, size_t len,
+ unsigned int *buf, int num)
{
unsigned pad, val;
int i;
@@ -73,9 +74,9 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
{
__u32 hash;
f2fs_hash_t f2fs_hash;
- const char *p;
+ const unsigned char *p;
__u32 in[8], buf[4];
- const char *name = name_info->name;
+ const unsigned char *name = name_info->name;
size_t len = name_info->len;
if ((len <= 2) && (name[0] == '.') &&
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 5beeccef9ae..3e8ecdf3742 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -68,7 +68,7 @@ out:
static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
{
- int err;
+ int err = 0;
struct page *ipage;
struct dnode_of_data dn;
void *src_addr, *dst_addr;
@@ -86,6 +86,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
goto out;
}
+ /* someone else converted inline_data already */
+ if (!f2fs_has_inline_data(inode))
+ goto out;
+
/*
* i_addr[0] is not used for inline data,
* so reserving new block will not destroy inline data
@@ -124,9 +128,10 @@ out:
return err;
}
-int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size)
+int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size,
+ struct page *page)
{
- struct page *page;
+ struct page *new_page = page;
int err;
if (!f2fs_has_inline_data(inode))
@@ -134,17 +139,20 @@ int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size)
else if (to_size <= MAX_INLINE_DATA)
return 0;
- page = grab_cache_page(inode->i_mapping, 0);
- if (!page)
- return -ENOMEM;
+ if (!page || page->index != 0) {
+ new_page = grab_cache_page(inode->i_mapping, 0);
+ if (!new_page)
+ return -ENOMEM;
+ }
- err = __f2fs_convert_inline_data(inode, page);
- f2fs_put_page(page, 1);
+ err = __f2fs_convert_inline_data(inode, new_page);
+ if (!page || page->index != 0)
+ f2fs_put_page(new_page, 1);
return err;
}
int f2fs_write_inline_data(struct inode *inode,
- struct page *page, unsigned size)
+ struct page *page, unsigned size)
{
void *src_addr, *dst_addr;
struct page *ipage;
@@ -199,7 +207,7 @@ void truncate_inline_data(struct inode *inode, u64 from)
f2fs_put_page(ipage, 1);
}
-int recover_inline_data(struct inode *inode, struct page *npage)
+bool recover_inline_data(struct inode *inode, struct page *npage)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_inode *ri = NULL;
@@ -218,7 +226,7 @@ int recover_inline_data(struct inode *inode, struct page *npage)
ri = F2FS_INODE(npage);
if (f2fs_has_inline_data(inode) &&
- ri && ri->i_inline & F2FS_INLINE_DATA) {
+ ri && (ri->i_inline & F2FS_INLINE_DATA)) {
process_inline:
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(IS_ERR(ipage));
@@ -230,7 +238,7 @@ process_inline:
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
- return -1;
+ return true;
}
if (f2fs_has_inline_data(inode)) {
@@ -242,10 +250,10 @@ process_inline:
clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
- } else if (ri && ri->i_inline & F2FS_INLINE_DATA) {
- truncate_blocks(inode, 0);
+ } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
+ truncate_blocks(inode, 0, false);
set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
goto process_inline;
}
- return 0;
+ return false;
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 27b03776ffd..ee103fd7283 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -134,9 +134,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
return 0;
out:
clear_nlink(inode);
- unlock_new_inode(inode);
- make_bad_inode(inode);
- iput(inode);
+ iget_failed(inode);
alloc_nid_failed(sbi, ino);
return err;
}
@@ -229,7 +227,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
f2fs_delete_entry(de, page, inode);
f2fs_unlock_op(sbi);
- /* In order to evict this inode, we set it dirty */
+ /* In order to evict this inode, we set it dirty */
mark_inode_dirty(inode);
fail:
trace_f2fs_unlink_exit(inode, err);
@@ -267,9 +265,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
return err;
out:
clear_nlink(inode);
- unlock_new_inode(inode);
- make_bad_inode(inode);
- iput(inode);
+ iget_failed(inode);
alloc_nid_failed(sbi, inode->i_ino);
return err;
}
@@ -308,9 +304,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
out_fail:
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
clear_nlink(inode);
- unlock_new_inode(inode);
- make_bad_inode(inode);
- iput(inode);
+ iget_failed(inode);
alloc_nid_failed(sbi, inode->i_ino);
return err;
}
@@ -354,9 +348,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
return 0;
out:
clear_nlink(inode);
- unlock_new_inode(inode);
- make_bad_inode(inode);
- iput(inode);
+ iget_failed(inode);
alloc_nid_failed(sbi, inode->i_ino);
return err;
}
@@ -688,9 +680,7 @@ release_out:
out:
f2fs_unlock_op(sbi);
clear_nlink(inode);
- unlock_new_inode(inode);
- make_bad_inode(inode);
- iput(inode);
+ iget_failed(inode);
alloc_nid_failed(sbi, inode->i_ino);
return err;
}
@@ -704,7 +694,6 @@ const struct inode_operations f2fs_dir_inode_operations = {
.mkdir = f2fs_mkdir,
.rmdir = f2fs_rmdir,
.mknod = f2fs_mknod,
- .rename = f2fs_rename,
.rename2 = f2fs_rename2,
.tmpfile = f2fs_tmpfile,
.getattr = f2fs_getattr,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d3d90d28463..45378196e19 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -237,7 +237,7 @@ retry:
nat_get_blkaddr(e) != NULL_ADDR &&
new_blkaddr == NEW_ADDR);
- /* increament version no as node is removed */
+ /* increment version no as node is removed */
if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
unsigned char version = nat_get_version(e);
nat_set_version(e, inc_node_version(version));
@@ -274,7 +274,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
}
/*
- * This function returns always success
+ * This function always returns success
*/
void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
{
@@ -650,7 +650,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
/* get indirect nodes in the path */
for (i = 0; i < idx + 1; i++) {
- /* refernece count'll be increased */
+ /* reference count'll be increased */
pages[i] = get_node_page(sbi, nid[i]);
if (IS_ERR(pages[i])) {
err = PTR_ERR(pages[i]);
@@ -823,22 +823,26 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
*/
void remove_inode_page(struct inode *inode)
{
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct page *page;
- nid_t ino = inode->i_ino;
struct dnode_of_data dn;
- page = get_node_page(sbi, ino);
- if (IS_ERR(page))
+ set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
+ if (get_dnode_of_data(&dn, 0, LOOKUP_NODE))
return;
- if (truncate_xattr_node(inode, page)) {
- f2fs_put_page(page, 1);
+ if (truncate_xattr_node(inode, dn.inode_page)) {
+ f2fs_put_dnode(&dn);
return;
}
- /* 0 is possible, after f2fs_new_inode() is failed */
+
+ /* remove potential inline_data blocks */
+ if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode))
+ truncate_data_blocks_range(&dn, 1);
+
+ /* 0 is possible, after f2fs_new_inode() has failed */
f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);
- set_new_dnode(&dn, inode, page, page, ino);
+
+ /* will put inode & node pages */
truncate_node(&dn);
}
@@ -1129,8 +1133,11 @@ continue_unlock:
set_fsync_mark(page, 0);
set_dentry_mark(page, 0);
}
- NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
- wrote++;
+
+ if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
+ unlock_page(page);
+ else
+ wrote++;
if (--wbc->nr_to_write == 0)
break;
@@ -1212,6 +1219,8 @@ static int f2fs_write_node_page(struct page *page,
if (unlikely(sbi->por_doing))
goto redirty_out;
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
f2fs_wait_on_page_writeback(page, NODE);
@@ -1540,15 +1549,6 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
kmem_cache_free(free_nid_slab, i);
}
-void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
- struct f2fs_summary *sum, struct node_info *ni,
- block_t new_blkaddr)
-{
- rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
- set_node_addr(sbi, ni, new_blkaddr, false);
- clear_node_page_dirty(page);
-}
-
void recover_inline_xattr(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -1557,40 +1557,33 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
struct page *ipage;
struct f2fs_inode *ri;
- if (!f2fs_has_inline_xattr(inode))
- return;
-
- if (!IS_INODE(page))
- return;
-
- ri = F2FS_INODE(page);
- if (!(ri->i_inline & F2FS_INLINE_XATTR))
- return;
-
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(IS_ERR(ipage));
+ ri = F2FS_INODE(page);
+ if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR);
+ goto update_inode;
+ }
+
dst_addr = inline_xattr_addr(ipage);
src_addr = inline_xattr_addr(page);
inline_size = inline_xattr_size(inode);
f2fs_wait_on_page_writeback(ipage, NODE);
memcpy(dst_addr, src_addr, inline_size);
-
+update_inode:
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
}
-bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
+void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
nid_t new_xnid = nid_of_node(page);
struct node_info ni;
- if (!f2fs_has_xattr_block(ofs_of_node(page)))
- return false;
-
/* 1: invalidate the previous xattr nid */
if (!prev_xnid)
goto recover_xnid;
@@ -1618,7 +1611,6 @@ recover_xnid:
set_node_addr(sbi, &ni, blkaddr, false);
update_inode_page(inode);
- return true;
}
int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
@@ -1637,7 +1629,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
if (!ipage)
return -ENOMEM;
- /* Should not use this inode from free nid list */
+ /* Should not use this inode from free nid list */
remove_free_nid(NM_I(sbi), ino);
SetPageUptodate(ipage);
@@ -1651,6 +1643,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
dst->i_blocks = cpu_to_le64(1);
dst->i_links = cpu_to_le32(1);
dst->i_xattr_nid = 0;
+ dst->i_inline = src->i_inline & F2FS_INLINE_XATTR;
new_ni = old_ni;
new_ni.ino = ino;
@@ -1659,13 +1652,14 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
WARN_ON(1);
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
inc_valid_inode_count(sbi);
+ set_page_dirty(ipage);
f2fs_put_page(ipage, 1);
return 0;
}
/*
* ra_sum_pages() merge contiguous pages into one bio and submit.
- * these pre-readed pages are alloced in bd_inode's mapping tree.
+ * these pre-read pages are allocated in bd_inode's mapping tree.
*/
static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
int start, int nrpages)
@@ -1709,7 +1703,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
nrpages = min(last_offset - i, bio_blocks);
- /* read ahead node pages */
+ /* readahead node pages */
nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
if (!nrpages)
return -ENOMEM;
@@ -1967,7 +1961,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
/* not used nids: 0, node, meta, (and root counted as valid node) */
- nm_i->available_nids = nm_i->max_nid - 3;
+ nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
nm_i->fcnt = 0;
nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index fe1c6d921ba..756c41cd258 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -62,8 +62,10 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
}
retry:
de = f2fs_find_entry(dir, &name, &page);
- if (de && inode->i_ino == le32_to_cpu(de->ino))
+ if (de && inode->i_ino == le32_to_cpu(de->ino)) {
+ clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
goto out_unmap_put;
+ }
if (de) {
einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
if (IS_ERR(einode)) {
@@ -300,14 +302,19 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
struct node_info ni;
int err = 0, recovered = 0;
- recover_inline_xattr(inode, page);
-
- if (recover_inline_data(inode, page))
+ /* step 1: recover xattr */
+ if (IS_INODE(page)) {
+ recover_inline_xattr(inode, page);
+ } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
+ recover_xattr_data(inode, page, blkaddr);
goto out;
+ }
- if (recover_xattr_data(inode, page, blkaddr))
+ /* step 2: recover inline data */
+ if (recover_inline_data(inode, page))
goto out;
+ /* step 3: recover data indices */
start = start_bidx_of_node(ofs_of_node(page), fi);
end = start + ADDRS_PER_PAGE(page, fi);
@@ -364,8 +371,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
fill_node_footer(dn.node_page, dn.nid, ni.ino,
ofs_of_node(page), false);
set_page_dirty(dn.node_page);
-
- recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
err:
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
@@ -452,6 +457,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
/* step #1: find fsynced inode numbers */
sbi->por_doing = true;
+ /* prevent checkpoint */
+ mutex_lock(&sbi->cp_mutex);
+
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
err = find_fsync_dnodes(sbi, &inode_list);
@@ -465,7 +473,8 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
/* step #2: recover data */
err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
- f2fs_bug_on(!list_empty(&inode_list));
+ if (!err)
+ f2fs_bug_on(!list_empty(&inode_list));
out:
destroy_fsync_dnodes(&inode_list);
kmem_cache_destroy(fsync_entry_slab);
@@ -482,8 +491,13 @@ out:
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META))
sync_meta_pages(sbi, META, LONG_MAX);
+ set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+ mutex_unlock(&sbi->cp_mutex);
} else if (need_writecp) {
+ mutex_unlock(&sbi->cp_mutex);
write_checkpoint(sbi, false);
+ } else {
+ mutex_unlock(&sbi->cp_mutex);
}
return err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0dfeebae2a5..0aa337cd5bb 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -62,7 +62,7 @@ static inline unsigned long __reverse_ffs(unsigned long word)
}
/*
- * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue
+ * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
* f2fs_set_bit makes MSB and LSB reversed in a byte.
* Example:
* LSB <--> MSB
@@ -808,7 +808,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
}
/*
- * This function always allocates a used segment (from dirty seglist) by SSR
+ * This function always allocates a used segment(from dirty seglist) by SSR
* manner, so it should recover the existing segment information of valid blocks
*/
static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
@@ -1103,55 +1103,6 @@ void recover_data_page(struct f2fs_sb_info *sbi,
mutex_unlock(&curseg->curseg_mutex);
}
-void rewrite_node_page(struct f2fs_sb_info *sbi,
- struct page *page, struct f2fs_summary *sum,
- block_t old_blkaddr, block_t new_blkaddr)
-{
- struct sit_info *sit_i = SIT_I(sbi);
- int type = CURSEG_WARM_NODE;
- struct curseg_info *curseg;
- unsigned int segno, old_cursegno;
- block_t next_blkaddr = next_blkaddr_of_node(page);
- unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
- struct f2fs_io_info fio = {
- .type = NODE,
- .rw = WRITE_SYNC,
- };
-
- curseg = CURSEG_I(sbi, type);
-
- mutex_lock(&curseg->curseg_mutex);
- mutex_lock(&sit_i->sentry_lock);
-
- segno = GET_SEGNO(sbi, new_blkaddr);
- old_cursegno = curseg->segno;
-
- /* change the current segment */
- if (segno != curseg->segno) {
- curseg->next_segno = segno;
- change_curseg(sbi, type, true);
- }
- curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
- __add_sum_entry(sbi, type, sum);
-
- /* change the current log to the next block addr in advance */
- if (next_segno != segno) {
- curseg->next_segno = next_segno;
- change_curseg(sbi, type, true);
- }
- curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);
-
- /* rewrite node page */
- set_page_writeback(page);
- f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
- f2fs_submit_merged_bio(sbi, NODE, WRITE);
- refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
- locate_dirty_segment(sbi, old_cursegno);
-
- mutex_unlock(&sit_i->sentry_lock);
- mutex_unlock(&curseg->curseg_mutex);
-}
-
static inline bool is_merged_page(struct f2fs_sb_info *sbi,
struct page *page, enum page_type type)
{
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 55973f7b033..ff483257283 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -549,7 +549,7 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
}
/*
- * Summary block is always treated as invalid block
+ * Summary block is always treated as an invalid block
*/
static inline void check_block_count(struct f2fs_sb_info *sbi,
int segno, struct f2fs_sit_entry *raw_sit)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 657582fc760..41bdf511003 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -432,9 +432,15 @@ static void f2fs_put_super(struct super_block *sb)
stop_gc_thread(sbi);
/* We don't need to do checkpoint when it's clean */
- if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES))
+ if (sbi->s_dirty)
write_checkpoint(sbi, true);
+ /*
+ * normally superblock is clean, so we need to release this.
+ * In addition, EIO will skip do checkpoint, we need this as well.
+ */
+ release_dirty_inode(sbi);
+
iput(sbi->node_inode);
iput(sbi->meta_inode);
@@ -457,9 +463,6 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
trace_f2fs_sync_fs(sb, sync);
- if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
- return 0;
-
if (sync) {
mutex_lock(&sbi->gc_mutex);
write_checkpoint(sbi, false);
@@ -505,8 +508,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
buf->f_bavail = user_block_count - valid_user_blocks(sbi);
- buf->f_files = sbi->total_node_count;
- buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi);
+ buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
+ buf->f_ffree = buf->f_files - valid_inode_count(sbi);
buf->f_namelen = F2FS_NAME_LEN;
buf->f_fsid.val[0] = (u32)id;
@@ -663,7 +666,7 @@ restore_gc:
if (need_restart_gc) {
if (start_gc_thread(sbi))
f2fs_msg(sbi->sb, KERN_WARNING,
- "background gc thread is stop");
+ "background gc thread has stopped");
} else if (need_stop_gc) {
stop_gc_thread(sbi);
}
@@ -812,7 +815,7 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (unlikely(fsmeta >= total))
return 1;
- if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
+ if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
}
@@ -899,8 +902,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
struct buffer_head *raw_super_buf;
struct inode *root;
long err = -EINVAL;
+ bool retry = true;
int i;
+try_onemore:
/* allocate memory for f2fs-specific super block info */
sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
if (!sbi)
@@ -1080,9 +1085,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
/* recover fsynced data */
if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
err = recover_fsync_data(sbi);
- if (err)
+ if (err) {
f2fs_msg(sb, KERN_ERR,
"Cannot recover all fsync data errno=%ld", err);
+ goto free_kobj;
+ }
}
/*
@@ -1123,6 +1130,13 @@ free_sb_buf:
brelse(raw_super_buf);
free_sbi:
kfree(sbi);
+
+ /* give only one another chance */
+ if (retry) {
+ retry = 0;
+ shrink_dcache_sb(sb);
+ goto try_onemore;
+ }
return err;
}
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 8bea941ee30..728a5dc3dc1 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -528,7 +528,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
int free;
/*
* If value is NULL, it is remove operation.
- * In case of update operation, we caculate free.
+ * In case of update operation, we calculate free.
*/
free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);
if (found)
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
new file mode 100644
index 00000000000..9368236ca10
--- /dev/null
+++ b/fs/fs_pin.c
@@ -0,0 +1,78 @@
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/fs_pin.h>
+#include "internal.h"
+#include "mount.h"
+
+static void pin_free_rcu(struct rcu_head *head)
+{
+ kfree(container_of(head, struct fs_pin, rcu));
+}
+
+static DEFINE_SPINLOCK(pin_lock);
+
+void pin_put(struct fs_pin *p)
+{
+ if (atomic_long_dec_and_test(&p->count))
+ call_rcu(&p->rcu, pin_free_rcu);
+}
+
+void pin_remove(struct fs_pin *pin)
+{
+ spin_lock(&pin_lock);
+ hlist_del(&pin->m_list);
+ hlist_del(&pin->s_list);
+ spin_unlock(&pin_lock);
+}
+
+void pin_insert(struct fs_pin *pin, struct vfsmount *m)
+{
+ spin_lock(&pin_lock);
+ hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins);
+ hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins);
+ spin_unlock(&pin_lock);
+}
+
+void mnt_pin_kill(struct mount *m)
+{
+ while (1) {
+ struct hlist_node *p;
+ struct fs_pin *pin;
+ rcu_read_lock();
+ p = ACCESS_ONCE(m->mnt_pins.first);
+ if (!p) {
+ rcu_read_unlock();
+ break;
+ }
+ pin = hlist_entry(p, struct fs_pin, m_list);
+ if (!atomic_long_inc_not_zero(&pin->count)) {
+ rcu_read_unlock();
+ cpu_relax();
+ continue;
+ }
+ rcu_read_unlock();
+ pin->kill(pin);
+ }
+}
+
+void sb_pin_kill(struct super_block *sb)
+{
+ while (1) {
+ struct hlist_node *p;
+ struct fs_pin *pin;
+ rcu_read_lock();
+ p = ACCESS_ONCE(sb->s_pins.first);
+ if (!p) {
+ rcu_read_unlock();
+ break;
+ }
+ pin = hlist_entry(p, struct fs_pin, s_list);
+ if (!atomic_long_inc_not_zero(&pin->count)) {
+ rcu_read_unlock();
+ cpu_relax();
+ continue;
+ }
+ rcu_read_unlock();
+ pin->kill(pin);
+ }
+}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 0c6048247a3..de1d84af9f7 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -845,12 +845,6 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
return err;
}
-static int fuse_rename(struct inode *olddir, struct dentry *oldent,
- struct inode *newdir, struct dentry *newent)
-{
- return fuse_rename2(olddir, oldent, newdir, newent, 0);
-}
-
static int fuse_link(struct dentry *entry, struct inode *newdir,
struct dentry *newent)
{
@@ -2024,7 +2018,6 @@ static const struct inode_operations fuse_dir_inode_operations = {
.symlink = fuse_symlink,
.unlink = fuse_unlink,
.rmdir = fuse_rmdir,
- .rename = fuse_rename,
.rename2 = fuse_rename2,
.link = fuse_link,
.setattr = fuse_setattr,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 40ac2628ddc..912061ac4ba 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1303,10 +1303,10 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
unsigned npages;
size_t start;
- unsigned n = req->max_pages - req->num_pages;
ssize_t ret = iov_iter_get_pages(ii,
&req->pages[req->num_pages],
- n * PAGE_SIZE, &start);
+ req->max_pages - req->num_pages,
+ &start);
if (ret < 0)
return ret;
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 9c88da0e855..4fcd40d6f30 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -89,6 +89,7 @@ extern int do_mknod(const char *file, int mode, unsigned int major,
extern int link_file(const char *from, const char *to);
extern int hostfs_do_readlink(char *file, char *buf, int size);
extern int rename_file(char *from, char *to);
+extern int rename2_file(char *from, char *to, unsigned int flags);
extern int do_statfs(char *root, long *bsize_out, long long *blocks_out,
long long *bfree_out, long long *bavail_out,
long long *files_out, long long *ffree_out,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index bb529f3b7f2..fd62cae0fdc 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -741,21 +741,31 @@ static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
return err;
}
-static int hostfs_rename(struct inode *from_ino, struct dentry *from,
- struct inode *to_ino, struct dentry *to)
+static int hostfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
{
- char *from_name, *to_name;
+ char *old_name, *new_name;
int err;
- if ((from_name = dentry_name(from)) == NULL)
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+ return -EINVAL;
+
+ old_name = dentry_name(old_dentry);
+ if (old_name == NULL)
return -ENOMEM;
- if ((to_name = dentry_name(to)) == NULL) {
- __putname(from_name);
+ new_name = dentry_name(new_dentry);
+ if (new_name == NULL) {
+ __putname(old_name);
return -ENOMEM;
}
- err = rename_file(from_name, to_name);
- __putname(from_name);
- __putname(to_name);
+ if (!flags)
+ err = rename_file(old_name, new_name);
+ else
+ err = rename2_file(old_name, new_name, flags);
+
+ __putname(old_name);
+ __putname(new_name);
return err;
}
@@ -867,7 +877,7 @@ static const struct inode_operations hostfs_dir_iops = {
.mkdir = hostfs_mkdir,
.rmdir = hostfs_rmdir,
.mknod = hostfs_mknod,
- .rename = hostfs_rename,
+ .rename2 = hostfs_rename2,
.permission = hostfs_permission,
.setattr = hostfs_setattr,
};
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 67838f3aa20..9765dab95cb 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -14,6 +14,7 @@
#include <sys/time.h>
#include <sys/types.h>
#include <sys/vfs.h>
+#include <sys/syscall.h>
#include "hostfs.h"
#include <utime.h>
@@ -360,6 +361,33 @@ int rename_file(char *from, char *to)
return 0;
}
+int rename2_file(char *from, char *to, unsigned int flags)
+{
+ int err;
+
+#ifndef SYS_renameat2
+# ifdef __x86_64__
+# define SYS_renameat2 316
+# endif
+# ifdef __i386__
+# define SYS_renameat2 353
+# endif
+#endif
+
+#ifdef SYS_renameat2
+ err = syscall(SYS_renameat2, AT_FDCWD, from, AT_FDCWD, to, flags);
+ if (err < 0) {
+ if (errno != ENOSYS)
+ return -errno;
+ else
+ return -EINVAL;
+ }
+ return 0;
+#else
+ return -EINVAL;
+#endif
+}
+
int do_statfs(char *root, long *bsize_out, long long *blocks_out,
long long *bfree_out, long long *bavail_out,
long long *files_out, long long *ffree_out,
diff --git a/fs/internal.h b/fs/internal.h
index 46574240746..e325b4f9c79 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -131,7 +131,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
/*
* read_write.c
*/
-extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
/*
@@ -144,3 +143,9 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
* pipe.c
*/
extern const struct file_operations pipefifo_fops;
+
+/*
+ * fs_pin.c
+ */
+extern void sb_pin_kill(struct super_block *sb);
+extern void mnt_pin_kill(struct mount *m);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 4556ce1af5b..5ddaf8625d3 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -61,7 +61,7 @@ static void isofs_put_super(struct super_block *sb)
return;
}
-static int isofs_read_inode(struct inode *);
+static int isofs_read_inode(struct inode *, int relocated);
static int isofs_statfs (struct dentry *, struct kstatfs *);
static struct kmem_cache *isofs_inode_cachep;
@@ -1259,7 +1259,7 @@ out_toomany:
goto out;
}
-static int isofs_read_inode(struct inode *inode)
+static int isofs_read_inode(struct inode *inode, int relocated)
{
struct super_block *sb = inode->i_sb;
struct isofs_sb_info *sbi = ISOFS_SB(sb);
@@ -1404,7 +1404,7 @@ static int isofs_read_inode(struct inode *inode)
*/
if (!high_sierra) {
- parse_rock_ridge_inode(de, inode);
+ parse_rock_ridge_inode(de, inode, relocated);
/* if we want uid/gid set, override the rock ridge setting */
if (sbi->s_uid_set)
inode->i_uid = sbi->s_uid;
@@ -1483,9 +1483,10 @@ static int isofs_iget5_set(struct inode *ino, void *data)
* offset that point to the underlying meta-data for the inode. The
* code below is otherwise similar to the iget() code in
* include/linux/fs.h */
-struct inode *isofs_iget(struct super_block *sb,
- unsigned long block,
- unsigned long offset)
+struct inode *__isofs_iget(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset,
+ int relocated)
{
unsigned long hashval;
struct inode *inode;
@@ -1507,7 +1508,7 @@ struct inode *isofs_iget(struct super_block *sb,
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
- ret = isofs_read_inode(inode);
+ ret = isofs_read_inode(inode, relocated);
if (ret < 0) {
iget_failed(inode);
inode = ERR_PTR(ret);
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 99167238518..0ac4c1f73fb 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -107,7 +107,7 @@ extern int iso_date(char *, int);
struct inode; /* To make gcc happy */
-extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *);
+extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *, int relocated);
extern int get_rock_ridge_filename(struct iso_directory_record *, char *, struct inode *);
extern int isofs_name_translate(struct iso_directory_record *, char *, struct inode *);
@@ -118,9 +118,24 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int
extern struct buffer_head *isofs_bread(struct inode *, sector_t);
extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long);
-extern struct inode *isofs_iget(struct super_block *sb,
- unsigned long block,
- unsigned long offset);
+struct inode *__isofs_iget(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset,
+ int relocated);
+
+static inline struct inode *isofs_iget(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset)
+{
+ return __isofs_iget(sb, block, offset, 0);
+}
+
+static inline struct inode *isofs_iget_reloc(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset)
+{
+ return __isofs_iget(sb, block, offset, 1);
+}
/* Because the inode number is no longer relevant to finding the
* underlying meta-data for an inode, we are free to choose a more
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index c0bf42472e4..f488bbae541 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -288,12 +288,16 @@ eio:
goto out;
}
+#define RR_REGARD_XA 1
+#define RR_RELOC_DE 2
+
static int
parse_rock_ridge_inode_internal(struct iso_directory_record *de,
- struct inode *inode, int regard_xa)
+ struct inode *inode, int flags)
{
int symlink_len = 0;
int cnt, sig;
+ unsigned int reloc_block;
struct inode *reloc;
struct rock_ridge *rr;
int rootflag;
@@ -305,7 +309,7 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
init_rock_state(&rs, inode);
setup_rock_ridge(de, inode, &rs);
- if (regard_xa) {
+ if (flags & RR_REGARD_XA) {
rs.chr += 14;
rs.len -= 14;
if (rs.len < 0)
@@ -485,12 +489,22 @@ repeat:
"relocated directory\n");
goto out;
case SIG('C', 'L'):
- ISOFS_I(inode)->i_first_extent =
- isonum_733(rr->u.CL.location);
- reloc =
- isofs_iget(inode->i_sb,
- ISOFS_I(inode)->i_first_extent,
- 0);
+ if (flags & RR_RELOC_DE) {
+ printk(KERN_ERR
+ "ISOFS: Recursive directory relocation "
+ "is not supported\n");
+ goto eio;
+ }
+ reloc_block = isonum_733(rr->u.CL.location);
+ if (reloc_block == ISOFS_I(inode)->i_iget5_block &&
+ ISOFS_I(inode)->i_iget5_offset == 0) {
+ printk(KERN_ERR
+ "ISOFS: Directory relocation points to "
+ "itself\n");
+ goto eio;
+ }
+ ISOFS_I(inode)->i_first_extent = reloc_block;
+ reloc = isofs_iget_reloc(inode->i_sb, reloc_block, 0);
if (IS_ERR(reloc)) {
ret = PTR_ERR(reloc);
goto out;
@@ -637,9 +651,11 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
return rpnt;
}
-int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
+int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode,
+ int relocated)
{
- int result = parse_rock_ridge_inode_internal(de, inode, 0);
+ int flags = relocated ? RR_RELOC_DE : 0;
+ int result = parse_rock_ridge_inode_internal(de, inode, flags);
/*
* if rockridge flag was reset and we didn't look for attributes
@@ -647,7 +663,8 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
*/
if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1)
&& (ISOFS_SB(inode->i_sb)->s_rock == 2)) {
- result = parse_rock_ridge_inode_internal(de, inode, 14);
+ result = parse_rock_ridge_inode_internal(de, inode,
+ flags | RR_REGARD_XA);
}
return result;
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6fac7434985..b73e0215baa 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -97,7 +97,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
struct commit_header *h;
__u32 csum;
- if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (!jbd2_journal_has_csum_v2or3(j))
return;
h = (struct commit_header *)(bh->b_data);
@@ -313,11 +313,11 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
return checksum;
}
-static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
+static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
unsigned long long block)
{
tag->t_blocknr = cpu_to_be32(block & (u32)~0);
- if (tag_bytes > JBD2_TAG_SIZE32)
+ if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT))
tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
}
@@ -327,7 +327,7 @@ static void jbd2_descr_block_csum_set(journal_t *j,
struct jbd2_journal_block_tail *tail;
__u32 csum;
- if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (!jbd2_journal_has_csum_v2or3(j))
return;
tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
@@ -340,12 +340,13 @@ static void jbd2_descr_block_csum_set(journal_t *j,
static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
struct buffer_head *bh, __u32 sequence)
{
+ journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
struct page *page = bh->b_page;
__u8 *addr;
__u32 csum32;
__be32 seq;
- if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (!jbd2_journal_has_csum_v2or3(j))
return;
seq = cpu_to_be32(sequence);
@@ -355,8 +356,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
bh->b_size);
kunmap_atomic(addr);
- /* We only have space to store the lower 16 bits of the crc32c. */
- tag->t_checksum = cpu_to_be16(csum32);
+ if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+ tag3->t_checksum = cpu_to_be32(csum32);
+ else
+ tag->t_checksum = cpu_to_be16(csum32);
}
/*
* jbd2_journal_commit_transaction
@@ -396,7 +399,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
LIST_HEAD(io_bufs);
LIST_HEAD(log_bufs);
- if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (jbd2_journal_has_csum_v2or3(journal))
csum_size = sizeof(struct jbd2_journal_block_tail);
/*
@@ -690,7 +693,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
tag_flag |= JBD2_FLAG_SAME_UUID;
tag = (journal_block_tag_t *) tagp;
- write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
+ write_tag_block(journal, tag, jh2bh(jh)->b_blocknr);
tag->t_flags = cpu_to_be16(tag_flag);
jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
commit_transaction->t_tid);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 67b8e303946..19d74d86d99 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug);
/* Checksumming functions */
static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
{
- if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (!jbd2_journal_has_csum_v2or3(j))
return 1;
return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
@@ -145,7 +145,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
{
- if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (!jbd2_journal_has_csum_v2or3(j))
return 1;
return sb->s_checksum == jbd2_superblock_csum(j, sb);
@@ -153,7 +153,7 @@ static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
{
- if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (!jbd2_journal_has_csum_v2or3(j))
return;
sb->s_checksum = jbd2_superblock_csum(j, sb);
@@ -1522,21 +1522,29 @@ static int journal_get_superblock(journal_t *journal)
goto out;
}
- if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) &&
- JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
+ if (jbd2_journal_has_csum_v2or3(journal) &&
+ JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
/* Can't have checksum v1 and v2 on at the same time! */
printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
"at the same time!\n");
goto out;
}
+ if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) &&
+ JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
+ /* Can't have checksum v2 and v3 at the same time! */
+ printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
+ "at the same time!\n");
+ goto out;
+ }
+
if (!jbd2_verify_csum_type(journal, sb)) {
printk(KERN_ERR "JBD2: Unknown checksum type\n");
goto out;
}
/* Load the checksum driver */
- if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
+ if (jbd2_journal_has_csum_v2or3(journal)) {
journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
if (IS_ERR(journal->j_chksum_driver)) {
printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
@@ -1553,7 +1561,7 @@ static int journal_get_superblock(journal_t *journal)
}
/* Precompute checksum seed for all metadata */
- if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (jbd2_journal_has_csum_v2or3(journal))
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
sizeof(sb->s_uuid));
@@ -1813,8 +1821,14 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
return 0;
- /* Asking for checksumming v2 and v1? Only give them v2. */
- if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 &&
+ /* If enabling v2 checksums, turn on v3 instead */
+ if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) {
+ incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2;
+ incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3;
+ }
+
+ /* Asking for checksumming v3 and v1? Only give them v3. */
+ if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 &&
compat & JBD2_FEATURE_COMPAT_CHECKSUM)
compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
@@ -1823,8 +1837,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
sb = journal->j_superblock;
- /* If enabling v2 checksums, update superblock */
- if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
+ /* If enabling v3 checksums, update superblock */
+ if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
sb->s_feature_compat &=
~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
@@ -1842,8 +1856,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
}
/* Precompute checksum seed for all metadata */
- if (JBD2_HAS_INCOMPAT_FEATURE(journal,
- JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (jbd2_journal_has_csum_v2or3(journal))
journal->j_csum_seed = jbd2_chksum(journal, ~0,
sb->s_uuid,
sizeof(sb->s_uuid));
@@ -1852,7 +1865,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
/* If enabling v1 checksums, downgrade superblock */
if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
sb->s_feature_incompat &=
- ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2);
+ ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 |
+ JBD2_FEATURE_INCOMPAT_CSUM_V3);
sb->s_feature_compat |= cpu_to_be32(compat);
sb->s_feature_ro_compat |= cpu_to_be32(ro);
@@ -2165,16 +2179,20 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
*/
size_t journal_tag_bytes(journal_t *journal)
{
- journal_block_tag_t tag;
- size_t x = 0;
+ size_t sz;
+
+ if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+ return sizeof(journal_block_tag3_t);
+
+ sz = sizeof(journal_block_tag_t);
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
- x += sizeof(tag.t_checksum);
+ sz += sizeof(__u16);
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
- return x + JBD2_TAG_SIZE64;
+ return sz;
else
- return x + JBD2_TAG_SIZE32;
+ return sz - sizeof(__u32);
}
/*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 3b6bb19d60b..9b329b55ffe 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -181,7 +181,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
__be32 provided;
__u32 calculated;
- if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (!jbd2_journal_has_csum_v2or3(j))
return 1;
tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
@@ -205,7 +205,7 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
int nr = 0, size = journal->j_blocksize;
int tag_bytes = journal_tag_bytes(journal);
- if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (jbd2_journal_has_csum_v2or3(journal))
size -= sizeof(struct jbd2_journal_block_tail);
tagp = &bh->b_data[sizeof(journal_header_t)];
@@ -338,10 +338,11 @@ int jbd2_journal_skip_recovery(journal_t *journal)
return err;
}
-static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
+static inline unsigned long long read_tag_block(journal_t *journal,
+ journal_block_tag_t *tag)
{
unsigned long long block = be32_to_cpu(tag->t_blocknr);
- if (tag_bytes > JBD2_TAG_SIZE32)
+ if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
return block;
}
@@ -384,7 +385,7 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
__be32 provided;
__u32 calculated;
- if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (!jbd2_journal_has_csum_v2or3(j))
return 1;
h = buf;
@@ -399,17 +400,21 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
void *buf, __u32 sequence)
{
+ journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
__u32 csum32;
__be32 seq;
- if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (!jbd2_journal_has_csum_v2or3(j))
return 1;
seq = cpu_to_be32(sequence);
csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
- return tag->t_checksum == cpu_to_be16(csum32);
+ if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+ return tag3->t_checksum == cpu_to_be32(csum32);
+ else
+ return tag->t_checksum == cpu_to_be16(csum32);
}
static int do_one_pass(journal_t *journal,
@@ -426,6 +431,7 @@ static int do_one_pass(journal_t *journal,
int tag_bytes = journal_tag_bytes(journal);
__u32 crc32_sum = ~0; /* Transactional Checksums */
int descr_csum_size = 0;
+ int block_error = 0;
/*
* First thing is to establish what we expect to find in the log
@@ -512,8 +518,7 @@ static int do_one_pass(journal_t *journal,
switch(blocktype) {
case JBD2_DESCRIPTOR_BLOCK:
/* Verify checksum first */
- if (JBD2_HAS_INCOMPAT_FEATURE(journal,
- JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (jbd2_journal_has_csum_v2or3(journal))
descr_csum_size =
sizeof(struct jbd2_journal_block_tail);
if (descr_csum_size > 0 &&
@@ -574,7 +579,7 @@ static int do_one_pass(journal_t *journal,
unsigned long long blocknr;
J_ASSERT(obh != NULL);
- blocknr = read_tag_block(tag_bytes,
+ blocknr = read_tag_block(journal,
tag);
/* If the block has been
@@ -598,7 +603,8 @@ static int do_one_pass(journal_t *journal,
"checksum recovering "
"block %llu in log\n",
blocknr);
- continue;
+ block_error = 1;
+ goto skip_write;
}
/* Find a buffer for the new
@@ -797,7 +803,8 @@ static int do_one_pass(journal_t *journal,
success = -EIO;
}
}
-
+ if (block_error && success == 0)
+ success = -EIO;
return success;
failed:
@@ -811,7 +818,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
__be32 provided;
__u32 calculated;
- if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (!jbd2_journal_has_csum_v2or3(j))
return 1;
tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 198c9c10276..d5e95a175c9 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -91,8 +91,8 @@
#include <linux/list.h>
#include <linux/init.h>
#include <linux/bio.h>
-#endif
#include <linux/log2.h>
+#endif
static struct kmem_cache *jbd2_revoke_record_cache;
static struct kmem_cache *jbd2_revoke_table_cache;
@@ -597,7 +597,7 @@ static void write_one_revoke_record(journal_t *journal,
offset = *offsetp;
/* Do we need to leave space at the end for a checksum? */
- if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (jbd2_journal_has_csum_v2or3(journal))
csum_size = sizeof(struct jbd2_journal_revoke_tail);
/* Make sure we have a descriptor with space left for the record */
@@ -644,7 +644,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
struct jbd2_journal_revoke_tail *tail;
__u32 csum;
- if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ if (!jbd2_journal_has_csum_v2or3(j))
return;
tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 8f27c93f8d2..ec9e082f9ec 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -253,13 +253,11 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net)
error = make_socks(serv, net);
if (error < 0)
- goto err_socks;
+ goto err_bind;
set_grace_period(net);
dprintk("lockd_up_net: per-net data created; net=%p\n", net);
return 0;
-err_socks:
- svc_rpcb_cleanup(serv, net);
err_bind:
ln->nlmsvc_users--;
return error;
diff --git a/fs/locks.c b/fs/locks.c
index a6f54802d27..bb08857f90b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -247,6 +247,18 @@ void locks_free_lock(struct file_lock *fl)
}
EXPORT_SYMBOL(locks_free_lock);
+static void
+locks_dispose_list(struct list_head *dispose)
+{
+ struct file_lock *fl;
+
+ while (!list_empty(dispose)) {
+ fl = list_first_entry(dispose, struct file_lock, fl_block);
+ list_del_init(&fl->fl_block);
+ locks_free_lock(fl);
+ }
+}
+
void locks_init_lock(struct file_lock *fl)
{
memset(fl, 0, sizeof(struct file_lock));
@@ -285,7 +297,8 @@ EXPORT_SYMBOL(__locks_copy_lock);
void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
- locks_release_private(new);
+ /* "new" must be a freshly-initialized lock */
+ WARN_ON_ONCE(new->fl_ops);
__locks_copy_lock(new, fl);
new->fl_file = fl->fl_file;
@@ -650,12 +663,16 @@ static void locks_unlink_lock(struct file_lock **thisfl_p)
*
* Must be called with i_lock held!
*/
-static void locks_delete_lock(struct file_lock **thisfl_p)
+static void locks_delete_lock(struct file_lock **thisfl_p,
+ struct list_head *dispose)
{
struct file_lock *fl = *thisfl_p;
locks_unlink_lock(thisfl_p);
- locks_free_lock(fl);
+ if (dispose)
+ list_add(&fl->fl_block, dispose);
+ else
+ locks_free_lock(fl);
}
/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
@@ -811,6 +828,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
struct inode * inode = file_inode(filp);
int error = 0;
int found = 0;
+ LIST_HEAD(dispose);
if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
new_fl = locks_alloc_lock();
@@ -833,7 +851,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
if (request->fl_type == fl->fl_type)
goto out;
found = 1;
- locks_delete_lock(before);
+ locks_delete_lock(before, &dispose);
break;
}
@@ -880,6 +898,7 @@ out:
spin_unlock(&inode->i_lock);
if (new_fl)
locks_free_lock(new_fl);
+ locks_dispose_list(&dispose);
return error;
}
@@ -893,6 +912,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
struct file_lock **before;
int error;
bool added = false;
+ LIST_HEAD(dispose);
/*
* We may need two file_lock structures for this operation,
@@ -988,7 +1008,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
else
request->fl_end = fl->fl_end;
if (added) {
- locks_delete_lock(before);
+ locks_delete_lock(before, &dispose);
continue;
}
request = fl;
@@ -1018,21 +1038,24 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
* one (This may happen several times).
*/
if (added) {
- locks_delete_lock(before);
+ locks_delete_lock(before, &dispose);
continue;
}
- /* Replace the old lock with the new one.
- * Wake up anybody waiting for the old one,
- * as the change in lock type might satisfy
- * their needs.
+ /*
+ * Replace the old lock with new_fl, and
+ * remove the old one. It's safe to do the
+ * insert here since we know that we won't be
+ * using new_fl later, and that the lock is
+ * just replacing an existing lock.
*/
- locks_wake_up_blocks(fl);
- fl->fl_start = request->fl_start;
- fl->fl_end = request->fl_end;
- fl->fl_type = request->fl_type;
- locks_release_private(fl);
- locks_copy_private(fl, request);
- request = fl;
+ error = -ENOLCK;
+ if (!new_fl)
+ goto out;
+ locks_copy_lock(new_fl, request);
+ request = new_fl;
+ new_fl = NULL;
+ locks_delete_lock(before, &dispose);
+ locks_insert_lock(before, request);
added = true;
}
}
@@ -1093,6 +1116,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
locks_free_lock(new_fl);
if (new_fl2)
locks_free_lock(new_fl2);
+ locks_dispose_list(&dispose);
return error;
}
@@ -1268,7 +1292,7 @@ int lease_modify(struct file_lock **before, int arg)
printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
fl->fl_fasync = NULL;
}
- locks_delete_lock(before);
+ locks_delete_lock(before, NULL);
}
return 0;
}
@@ -1595,7 +1619,7 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
smp_mb();
error = check_conflicting_open(dentry, arg);
if (error)
- locks_unlink_lock(flp);
+ locks_unlink_lock(before);
out:
if (is_deleg)
mutex_unlock(&inode->i_mutex);
@@ -1737,13 +1761,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
ret = fl;
spin_lock(&inode->i_lock);
error = __vfs_setlease(filp, arg, &ret);
- if (error) {
- spin_unlock(&inode->i_lock);
- locks_free_lock(fl);
- goto out_free_fasync;
- }
- if (ret != fl)
- locks_free_lock(fl);
+ if (error)
+ goto out_unlock;
+ if (ret == fl)
+ fl = NULL;
/*
* fasync_insert_entry() returns the old entry if any.
@@ -1755,9 +1776,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
new = NULL;
error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+out_unlock:
spin_unlock(&inode->i_lock);
-
-out_free_fasync:
+ if (fl)
+ locks_free_lock(fl);
if (new)
fasync_free(new);
return error;
@@ -2320,6 +2342,7 @@ void locks_remove_file(struct file *filp)
struct inode * inode = file_inode(filp);
struct file_lock *fl;
struct file_lock **before;
+ LIST_HEAD(dispose);
if (!inode->i_flock)
return;
@@ -2365,12 +2388,13 @@ void locks_remove_file(struct file *filp)
fl->fl_type, fl->fl_flags,
fl->fl_start, fl->fl_end);
- locks_delete_lock(before);
+ locks_delete_lock(before, &dispose);
continue;
}
before = &fl->fl_next;
}
spin_unlock(&inode->i_lock);
+ locks_dispose_list(&dispose);
}
/**
@@ -2452,7 +2476,11 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
seq_puts(f, "FLOCK ADVISORY ");
}
} else if (IS_LEASE(fl)) {
- seq_puts(f, "LEASE ");
+ if (fl->fl_flags & FL_DELEG)
+ seq_puts(f, "DELEG ");
+ else
+ seq_puts(f, "LEASE ");
+
if (lease_breaking(fl))
seq_puts(f, "BREAKING ");
else if (fl->fl_file)
diff --git a/fs/mount.h b/fs/mount.h
index d55297f2fa0..6740a621552 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -55,7 +55,7 @@ struct mount {
int mnt_id; /* mount identifier */
int mnt_group_id; /* peer group identifier */
int mnt_expiry_mark; /* true if marked for expiry */
- int mnt_pinned;
+ struct hlist_head mnt_pins;
struct path mnt_ex_mountpoint;
};
diff --git a/fs/namei.c b/fs/namei.c
index 9eb787e5c16..a996bb48dfa 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1091,10 +1091,10 @@ int follow_down_one(struct path *path)
}
EXPORT_SYMBOL(follow_down_one);
-static inline bool managed_dentry_might_block(struct dentry *dentry)
+static inline int managed_dentry_rcu(struct dentry *dentry)
{
- return (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
- dentry->d_op->d_manage(dentry, true) < 0);
+ return (dentry->d_flags & DCACHE_MANAGE_TRANSIT) ?
+ dentry->d_op->d_manage(dentry, true) : 0;
}
/*
@@ -1110,11 +1110,18 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
* Don't forget we might have a non-mountpoint managed dentry
* that wants to block transit.
*/
- if (unlikely(managed_dentry_might_block(path->dentry)))
+ switch (managed_dentry_rcu(path->dentry)) {
+ case -ECHILD:
+ default:
return false;
+ case -EISDIR:
+ return true;
+ case 0:
+ break;
+ }
if (!d_mountpoint(path->dentry))
- return true;
+ return !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
mounted = __lookup_mnt(path->mnt, path->dentry);
if (!mounted)
@@ -1130,7 +1137,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
*/
*inode = path->dentry->d_inode;
}
- return read_seqretry(&mount_lock, nd->m_seq);
+ return read_seqretry(&mount_lock, nd->m_seq) &&
+ !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
}
static int follow_dotdot_rcu(struct nameidata *nd)
@@ -1402,11 +1410,8 @@ static int lookup_fast(struct nameidata *nd,
}
path->mnt = mnt;
path->dentry = dentry;
- if (unlikely(!__follow_mount_rcu(nd, path, inode)))
- goto unlazy;
- if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
- goto unlazy;
- return 0;
+ if (likely(__follow_mount_rcu(nd, path, inode)))
+ return 0;
unlazy:
if (unlazy_walk(nd, dentry))
return -ECHILD;
@@ -4019,7 +4024,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
* The worst of all namespace operations - renaming directory. "Perverted"
* doesn't even start to describe it. Somebody in UCB had a heck of a trip...
* Problems:
- * a) we can get into loop creation. Check is done in is_subdir().
+ * a) we can get into loop creation.
* b) race potential - two innocent renames can create a loop together.
* That's where 4.4 screws up. Current fix: serialization on
* sb->s_vfs_rename_mutex. We might be more accurate, but that's another
@@ -4075,7 +4080,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (error)
return error;
- if (!old_dir->i_op->rename)
+ if (!old_dir->i_op->rename && !old_dir->i_op->rename2)
return -EPERM;
if (flags && !old_dir->i_op->rename2)
@@ -4134,10 +4139,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (error)
goto out;
}
- if (!flags) {
+ if (!old_dir->i_op->rename2) {
error = old_dir->i_op->rename(old_dir, old_dentry,
new_dir, new_dentry);
} else {
+ WARN_ON(old_dir->i_op->rename != NULL);
error = old_dir->i_op->rename2(old_dir, old_dentry,
new_dir, new_dentry, flags);
}
diff --git a/fs/namespace.c b/fs/namespace.c
index 0acabea5831..ef42d9bee21 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -16,7 +16,6 @@
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/idr.h>
-#include <linux/acct.h> /* acct_auto_close_mnt */
#include <linux/init.h> /* init_rootfs */
#include <linux/fs_struct.h> /* get_fs_root et.al. */
#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
@@ -779,6 +778,20 @@ static void attach_mnt(struct mount *mnt,
list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
}
+static void attach_shadowed(struct mount *mnt,
+ struct mount *parent,
+ struct mount *shadows)
+{
+ if (shadows) {
+ hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
+ list_add(&mnt->mnt_child, &shadows->mnt_child);
+ } else {
+ hlist_add_head_rcu(&mnt->mnt_hash,
+ m_hash(&parent->mnt, mnt->mnt_mountpoint));
+ list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+ }
+}
+
/*
* vfsmount lock must be held for write
*/
@@ -797,12 +810,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
list_splice(&head, n->list.prev);
- if (shadows)
- hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
- else
- hlist_add_head_rcu(&mnt->mnt_hash,
- m_hash(&parent->mnt, mnt->mnt_mountpoint));
- list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+ attach_shadowed(mnt, parent, shadows);
touch_mnt_namespace(n);
}
@@ -951,7 +959,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
static void mntput_no_expire(struct mount *mnt)
{
-put_again:
rcu_read_lock();
mnt_add_count(mnt, -1);
if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
@@ -964,14 +971,6 @@ put_again:
unlock_mount_hash();
return;
}
- if (unlikely(mnt->mnt_pinned)) {
- mnt_add_count(mnt, mnt->mnt_pinned + 1);
- mnt->mnt_pinned = 0;
- rcu_read_unlock();
- unlock_mount_hash();
- acct_auto_close_mnt(&mnt->mnt);
- goto put_again;
- }
if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
rcu_read_unlock();
unlock_mount_hash();
@@ -994,6 +993,8 @@ put_again:
* so mnt_get_writers() below is safe.
*/
WARN_ON(mnt_get_writers(mnt));
+ if (unlikely(mnt->mnt_pins.first))
+ mnt_pin_kill(mnt);
fsnotify_vfsmount_delete(&mnt->mnt);
dput(mnt->mnt.mnt_root);
deactivate_super(mnt->mnt.mnt_sb);
@@ -1021,25 +1022,15 @@ struct vfsmount *mntget(struct vfsmount *mnt)
}
EXPORT_SYMBOL(mntget);
-void mnt_pin(struct vfsmount *mnt)
+struct vfsmount *mnt_clone_internal(struct path *path)
{
- lock_mount_hash();
- real_mount(mnt)->mnt_pinned++;
- unlock_mount_hash();
-}
-EXPORT_SYMBOL(mnt_pin);
-
-void mnt_unpin(struct vfsmount *m)
-{
- struct mount *mnt = real_mount(m);
- lock_mount_hash();
- if (mnt->mnt_pinned) {
- mnt_add_count(mnt, 1);
- mnt->mnt_pinned--;
- }
- unlock_mount_hash();
+ struct mount *p;
+ p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
+ if (IS_ERR(p))
+ return ERR_CAST(p);
+ p->mnt.mnt_flags |= MNT_INTERNAL;
+ return &p->mnt;
}
-EXPORT_SYMBOL(mnt_unpin);
static inline void mangle(struct seq_file *m, const char *s)
{
@@ -1226,6 +1217,11 @@ static void namespace_unlock(void)
head.first->pprev = &head.first;
INIT_HLIST_HEAD(&unmounted);
+ /* undo decrements we'd done in umount_tree() */
+ hlist_for_each_entry(mnt, &head, mnt_hash)
+ if (mnt->mnt_ex_mountpoint.mnt)
+ mntget(mnt->mnt_ex_mountpoint.mnt);
+
up_write(&namespace_sem);
synchronize_rcu();
@@ -1262,6 +1258,9 @@ void umount_tree(struct mount *mnt, int how)
hlist_add_head(&p->mnt_hash, &tmp_list);
}
+ hlist_for_each_entry(p, &tmp_list, mnt_hash)
+ list_del_init(&p->mnt_child);
+
if (how)
propagate_umount(&tmp_list);
@@ -1272,9 +1271,9 @@ void umount_tree(struct mount *mnt, int how)
p->mnt_ns = NULL;
if (how < 2)
p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
- list_del_init(&p->mnt_child);
if (mnt_has_parent(p)) {
put_mountpoint(p->mnt_mp);
+ mnt_add_count(p->mnt_parent, -1);
/* move the reference to mountpoint into ->mnt_ex_mountpoint */
p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint;
p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt;
@@ -1505,6 +1504,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
continue;
for (s = r; s; s = next_mnt(s, r)) {
+ struct mount *t = NULL;
if (!(flag & CL_COPY_UNBINDABLE) &&
IS_MNT_UNBINDABLE(s)) {
s = skip_mnt_tree(s);
@@ -1526,7 +1526,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
goto out;
lock_mount_hash();
list_add_tail(&q->mnt_list, &res->mnt_list);
- attach_mnt(q, parent, p->mnt_mp);
+ mnt_set_mountpoint(parent, p->mnt_mp, q);
+ if (!list_empty(&parent->mnt_mounts)) {
+ t = list_last_entry(&parent->mnt_mounts,
+ struct mount, mnt_child);
+ if (t->mnt_mp != p->mnt_mp)
+ t = NULL;
+ }
+ attach_shadowed(q, parent, t);
unlock_mount_hash();
}
}
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9b431f44fad..cbb1797149d 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -210,8 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
SetPageUptodate(bvec->bv_page);
if (err) {
- struct nfs_pgio_data *rdata = par->data;
- struct nfs_pgio_header *header = rdata->header;
+ struct nfs_pgio_header *header = par->data;
if (!header->pnfs_error)
header->pnfs_error = -EIO;
@@ -224,43 +223,44 @@ static void bl_end_io_read(struct bio *bio, int err)
static void bl_read_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_pgio_data *rdata;
+ struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- rdata = container_of(task, struct nfs_pgio_data, task);
- pnfs_ld_read_done(rdata);
+ hdr = container_of(task, struct nfs_pgio_header, task);
+ pnfs_ld_read_done(hdr);
}
static void
bl_end_par_io_read(void *data, int unused)
{
- struct nfs_pgio_data *rdata = data;
+ struct nfs_pgio_header *hdr = data;
- rdata->task.tk_status = rdata->header->pnfs_error;
- INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
- schedule_work(&rdata->task.u.tk_work);
+ hdr->task.tk_status = hdr->pnfs_error;
+ INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup);
+ schedule_work(&hdr->task.u.tk_work);
}
static enum pnfs_try_status
-bl_read_pagelist(struct nfs_pgio_data *rdata)
+bl_read_pagelist(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *header = rdata->header;
+ struct nfs_pgio_header *header = hdr;
int i, hole;
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
sector_t isect, extent_length = 0;
struct parallel_io *par;
- loff_t f_offset = rdata->args.offset;
- size_t bytes_left = rdata->args.count;
+ loff_t f_offset = hdr->args.offset;
+ size_t bytes_left = hdr->args.count;
unsigned int pg_offset, pg_len;
- struct page **pages = rdata->args.pages;
- int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
+ struct page **pages = hdr->args.pages;
+ int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT;
const bool is_dio = (header->dreq != NULL);
dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
- rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
+ hdr->page_array.npages, f_offset,
+ (unsigned int)hdr->args.count);
- par = alloc_parallel(rdata);
+ par = alloc_parallel(hdr);
if (!par)
goto use_mds;
par->pnfs_callback = bl_end_par_io_read;
@@ -268,7 +268,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
isect = (sector_t) (f_offset >> SECTOR_SHIFT);
/* Code assumes extents are page-aligned */
- for (i = pg_index; i < rdata->pages.npages; i++) {
+ for (i = pg_index; i < hdr->page_array.npages; i++) {
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
@@ -317,7 +317,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
struct pnfs_block_extent *be_read;
be_read = (hole && cow_read) ? cow_read : be;
- bio = do_add_page_to_bio(bio, rdata->pages.npages - i,
+ bio = do_add_page_to_bio(bio,
+ hdr->page_array.npages - i,
READ,
isect, pages[i], be_read,
bl_end_io_read, par,
@@ -332,10 +333,10 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
extent_length -= PAGE_CACHE_SECTORS;
}
if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
- rdata->res.eof = 1;
- rdata->res.count = header->inode->i_size - rdata->args.offset;
+ hdr->res.eof = 1;
+ hdr->res.count = header->inode->i_size - hdr->args.offset;
} else {
- rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset;
+ hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset;
}
out:
bl_put_extent(be);
@@ -390,8 +391,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
}
if (unlikely(err)) {
- struct nfs_pgio_data *data = par->data;
- struct nfs_pgio_header *header = data->header;
+ struct nfs_pgio_header *header = par->data;
if (!header->pnfs_error)
header->pnfs_error = -EIO;
@@ -405,8 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct nfs_pgio_data *data = par->data;
- struct nfs_pgio_header *header = data->header;
+ struct nfs_pgio_header *header = par->data;
if (!uptodate) {
if (!header->pnfs_error)
@@ -423,32 +422,32 @@ static void bl_end_io_write(struct bio *bio, int err)
static void bl_write_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_pgio_data *wdata;
+ struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- wdata = container_of(task, struct nfs_pgio_data, task);
- if (likely(!wdata->header->pnfs_error)) {
+ hdr = container_of(task, struct nfs_pgio_header, task);
+ if (likely(!hdr->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
- mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
- wdata->args.offset, wdata->args.count);
+ mark_extents_written(BLK_LSEG2EXT(hdr->lseg),
+ hdr->args.offset, hdr->args.count);
}
- pnfs_ld_write_done(wdata);
+ pnfs_ld_write_done(hdr);
}
/* Called when last of bios associated with a bl_write_pagelist call finishes */
static void bl_end_par_io_write(void *data, int num_se)
{
- struct nfs_pgio_data *wdata = data;
+ struct nfs_pgio_header *hdr = data;
- if (unlikely(wdata->header->pnfs_error)) {
- bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
+ if (unlikely(hdr->pnfs_error)) {
+ bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval,
num_se);
}
- wdata->task.tk_status = wdata->header->pnfs_error;
- wdata->verf.committed = NFS_FILE_SYNC;
- INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
- schedule_work(&wdata->task.u.tk_work);
+ hdr->task.tk_status = hdr->pnfs_error;
+ hdr->verf.committed = NFS_FILE_SYNC;
+ INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
+ schedule_work(&hdr->task.u.tk_work);
}
/* FIXME STUB - mark intersection of layout and page as bad, so is not
@@ -673,18 +672,17 @@ check_page:
}
static enum pnfs_try_status
-bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
+bl_write_pagelist(struct nfs_pgio_header *header, int sync)
{
- struct nfs_pgio_header *header = wdata->header;
int i, ret, npg_zero, pg_index, last = 0;
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
sector_t isect, last_isect = 0, extent_length = 0;
struct parallel_io *par = NULL;
- loff_t offset = wdata->args.offset;
- size_t count = wdata->args.count;
+ loff_t offset = header->args.offset;
+ size_t count = header->args.count;
unsigned int pg_offset, pg_len, saved_len;
- struct page **pages = wdata->args.pages;
+ struct page **pages = header->args.pages;
struct page *page;
pgoff_t index;
u64 temp;
@@ -699,11 +697,11 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
goto out_mds;
}
- /* At this point, wdata->pages is a (sequential) list of nfs_pages.
+ /* At this point, header->page_aray is a (sequential) list of nfs_pages.
* We want to write each, and if there is an error set pnfs_error
* to have it redone using nfs.
*/
- par = alloc_parallel(wdata);
+ par = alloc_parallel(header);
if (!par)
goto out_mds;
par->pnfs_callback = bl_end_par_io_write;
@@ -790,8 +788,8 @@ next_page:
bio = bl_submit_bio(WRITE, bio);
/* Middle pages */
- pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
- for (i = pg_index; i < wdata->pages.npages; i++) {
+ pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
+ for (i = pg_index; i < header->page_array.npages; i++) {
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
@@ -862,7 +860,8 @@ next_page:
}
- bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
+ bio = do_add_page_to_bio(bio, header->page_array.npages - i,
+ WRITE,
isect, pages[i], be,
bl_end_io_write, par,
pg_offset, pg_len);
@@ -890,7 +889,7 @@ next_page:
}
write_done:
- wdata->res.count = wdata->args.count;
+ header->res.count = header->args.count;
out:
bl_put_extent(be);
bl_put_extent(cow_read);
@@ -1063,7 +1062,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
return ERR_PTR(-ENOMEM);
}
- pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS);
+ pages = kcalloc(max_pages, sizeof(struct page *), GFP_NOFS);
if (pages == NULL) {
kfree(dev);
return ERR_PTR(-ENOMEM);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 073b4cf67ed..54de482143c 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -428,6 +428,18 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
if (p == NULL)
return 0;
+ /*
+ * Did we get the acceptor from userland during the SETCLIENID
+ * negotiation?
+ */
+ if (clp->cl_acceptor)
+ return !strcmp(p, clp->cl_acceptor);
+
+ /*
+ * Otherwise try to verify it using the cl_hostname. Note that this
+ * doesn't work if a non-canonical hostname was used in the devname.
+ */
+
/* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
if (memcmp(p, "nfs@", 4) != 0)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 180d1ec9c32..6a4f3666e27 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -110,8 +110,8 @@ struct nfs_subversion *get_nfs_version(unsigned int version)
mutex_unlock(&nfs_version_mutex);
}
- if (!IS_ERR(nfs))
- try_module_get(nfs->owner);
+ if (!IS_ERR(nfs) && !try_module_get(nfs->owner))
+ return ERR_PTR(-EAGAIN);
return nfs;
}
@@ -158,7 +158,8 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
goto error_0;
clp->cl_nfs_mod = cl_init->nfs_mod;
- try_module_get(clp->cl_nfs_mod->owner);
+ if (!try_module_get(clp->cl_nfs_mod->owner))
+ goto error_dealloc;
clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
@@ -190,6 +191,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
error_cleanup:
put_nfs_version(clp->cl_nfs_mod);
+error_dealloc:
kfree(clp);
error_0:
return ERR_PTR(err);
@@ -252,6 +254,7 @@ void nfs_free_client(struct nfs_client *clp)
put_net(clp->cl_net);
put_nfs_version(clp->cl_nfs_mod);
kfree(clp->cl_hostname);
+ kfree(clp->cl_acceptor);
kfree(clp);
dprintk("<-- nfs_free_client()\n");
@@ -482,8 +485,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops;
+ if (cl_init->hostname == NULL) {
+ WARN_ON(1);
+ return NULL;
+ }
+
dprintk("--> nfs_get_client(%s,v%u)\n",
- cl_init->hostname ?: "", rpc_ops->version);
+ cl_init->hostname, rpc_ops->version);
/* see if the client already exists */
do {
@@ -510,7 +518,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
} while (!IS_ERR(new));
dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
- cl_init->hostname ?: "", PTR_ERR(new));
+ cl_init->hostname, PTR_ERR(new));
return new;
}
EXPORT_SYMBOL_GPL(nfs_get_client);
@@ -1404,24 +1412,18 @@ int nfs_fs_proc_net_init(struct net *net)
p = proc_create("volumes", S_IFREG|S_IRUGO,
nn->proc_nfsfs, &nfs_volume_list_fops);
if (!p)
- goto error_2;
+ goto error_1;
return 0;
-error_2:
- remove_proc_entry("servers", nn->proc_nfsfs);
error_1:
- remove_proc_entry("fs/nfsfs", NULL);
+ remove_proc_subtree("nfsfs", net->proc_net);
error_0:
return -ENOMEM;
}
void nfs_fs_proc_net_exit(struct net *net)
{
- struct nfs_net *nn = net_generic(net, nfs_net_id);
-
- remove_proc_entry("volumes", nn->proc_nfsfs);
- remove_proc_entry("servers", nn->proc_nfsfs);
- remove_proc_entry("fs/nfsfs", NULL);
+ remove_proc_subtree("nfsfs", net->proc_net);
}
/*
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5d8ccecf5f5..5853f53db73 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -41,14 +41,8 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
}
-/**
- * nfs_have_delegation - check if inode has a delegation
- * @inode: inode to check
- * @flags: delegation types to check for
- *
- * Returns one if inode has the indicated delegation, otherwise zero.
- */
-int nfs4_have_delegation(struct inode *inode, fmode_t flags)
+static int
+nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
{
struct nfs_delegation *delegation;
int ret = 0;
@@ -58,12 +52,34 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL && (delegation->type & flags) == flags &&
!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
- nfs_mark_delegation_referenced(delegation);
+ if (mark)
+ nfs_mark_delegation_referenced(delegation);
ret = 1;
}
rcu_read_unlock();
return ret;
}
+/**
+ * nfs_have_delegation - check if inode has a delegation, mark it
+ * NFS_DELEGATION_REFERENCED if there is one.
+ * @inode: inode to check
+ * @flags: delegation types to check for
+ *
+ * Returns one if inode has the indicated delegation, otherwise zero.
+ */
+int nfs4_have_delegation(struct inode *inode, fmode_t flags)
+{
+ return nfs4_do_check_delegation(inode, flags, true);
+}
+
+/*
+ * nfs4_check_delegation - check if inode has a delegation, do not mark
+ * NFS_DELEGATION_REFERENCED if it has one.
+ */
+int nfs4_check_delegation(struct inode *inode, fmode_t flags)
+{
+ return nfs4_do_check_delegation(inode, flags, false);
+}
static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
{
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 9a79c7a99d6..5c1cce39297 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -59,6 +59,7 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
int nfs4_have_delegation(struct inode *inode, fmode_t flags);
+int nfs4_check_delegation(struct inode *inode, fmode_t flags);
#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4a3d4ef7612..36d921f0c60 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -988,9 +988,13 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
* A check for whether or not the parent directory has changed.
* In the case it has, we assume that the dentries are untrustworthy
* and may need to be looked up again.
+ * If rcu_walk prevents us from performing a full check, return 0.
*/
-static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
+static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
+ int rcu_walk)
{
+ int ret;
+
if (IS_ROOT(dentry))
return 1;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
@@ -998,7 +1002,11 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
if (!nfs_verify_change_attribute(dir, dentry->d_time))
return 0;
/* Revalidate nfsi->cache_change_attribute before we declare a match */
- if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
+ if (rcu_walk)
+ ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir);
+ else
+ ret = nfs_revalidate_inode(NFS_SERVER(dir), dir);
+ if (ret < 0)
return 0;
if (!nfs_verify_change_attribute(dir, dentry->d_time))
return 0;
@@ -1042,6 +1050,8 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
out:
return (inode->i_nlink == 0) ? -ENOENT : 0;
out_force:
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
ret = __nfs_revalidate_inode(server, inode);
if (ret != 0)
return ret;
@@ -1054,6 +1064,9 @@ out_force:
*
* If parent mtime has changed, we revalidate, else we wait for a
* period corresponding to the parent's attribute cache timeout value.
+ *
+ * If LOOKUP_RCU prevents us from performing a full check, return 1
+ * suggesting a reval is needed.
*/
static inline
int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
@@ -1064,7 +1077,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
return 0;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
return 1;
- return !nfs_check_verifier(dir, dentry);
+ return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
}
/*
@@ -1088,21 +1101,30 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
struct nfs4_label *label = NULL;
int error;
- if (flags & LOOKUP_RCU)
- return -ECHILD;
-
- parent = dget_parent(dentry);
- dir = parent->d_inode;
+ if (flags & LOOKUP_RCU) {
+ parent = ACCESS_ONCE(dentry->d_parent);
+ dir = ACCESS_ONCE(parent->d_inode);
+ if (!dir)
+ return -ECHILD;
+ } else {
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+ }
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = dentry->d_inode;
if (!inode) {
- if (nfs_neg_need_reval(dir, dentry, flags))
+ if (nfs_neg_need_reval(dir, dentry, flags)) {
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
goto out_bad;
+ }
goto out_valid_noent;
}
if (is_bad_inode(inode)) {
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
__func__, dentry);
goto out_bad;
@@ -1112,12 +1134,20 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
goto out_set_verifier;
/* Force a full look up iff the parent directory has changed */
- if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) {
- if (nfs_lookup_verify_inode(inode, flags))
+ if (!nfs_is_exclusive_create(dir, flags) &&
+ nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
+
+ if (nfs_lookup_verify_inode(inode, flags)) {
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
goto out_zap_parent;
+ }
goto out_valid;
}
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
if (NFS_STALE(inode))
goto out_bad;
@@ -1153,13 +1183,18 @@ out_set_verifier:
/* Success: notify readdir to use READDIRPLUS */
nfs_advise_use_readdirplus(dir);
out_valid_noent:
- dput(parent);
+ if (flags & LOOKUP_RCU) {
+ if (parent != ACCESS_ONCE(dentry->d_parent))
+ return -ECHILD;
+ } else
+ dput(parent);
dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
__func__, dentry);
return 1;
out_zap_parent:
nfs_zap_caches(dir);
out_bad:
+ WARN_ON(flags & LOOKUP_RCU);
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
nfs4_label_free(label);
@@ -1185,6 +1220,7 @@ out_zap_parent:
__func__, dentry);
return 0;
out_error:
+ WARN_ON(flags & LOOKUP_RCU);
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
nfs4_label_free(label);
@@ -1529,14 +1565,9 @@ EXPORT_SYMBOL_GPL(nfs_atomic_open);
static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
- struct dentry *parent = NULL;
struct inode *inode;
- struct inode *dir;
int ret = 0;
- if (flags & LOOKUP_RCU)
- return -ECHILD;
-
if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
goto no_open;
if (d_mountpoint(dentry))
@@ -1545,34 +1576,47 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
goto no_open;
inode = dentry->d_inode;
- parent = dget_parent(dentry);
- dir = parent->d_inode;
/* We can't create new files in nfs_open_revalidate(), so we
* optimize away revalidation of negative dentries.
*/
if (inode == NULL) {
+ struct dentry *parent;
+ struct inode *dir;
+
+ if (flags & LOOKUP_RCU) {
+ parent = ACCESS_ONCE(dentry->d_parent);
+ dir = ACCESS_ONCE(parent->d_inode);
+ if (!dir)
+ return -ECHILD;
+ } else {
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+ }
if (!nfs_neg_need_reval(dir, dentry, flags))
ret = 1;
+ else if (flags & LOOKUP_RCU)
+ ret = -ECHILD;
+ if (!(flags & LOOKUP_RCU))
+ dput(parent);
+ else if (parent != ACCESS_ONCE(dentry->d_parent))
+ return -ECHILD;
goto out;
}
/* NFS only supports OPEN on regular files */
if (!S_ISREG(inode->i_mode))
- goto no_open_dput;
+ goto no_open;
/* We cannot do exclusive creation on a positive dentry */
if (flags & LOOKUP_EXCL)
- goto no_open_dput;
+ goto no_open;
/* Let f_op->open() actually open (and revalidate) the file */
ret = 1;
out:
- dput(parent);
return ret;
-no_open_dput:
- dput(parent);
no_open:
return nfs_lookup_revalidate(dentry, flags);
}
@@ -2028,10 +2072,14 @@ static DEFINE_SPINLOCK(nfs_access_lru_lock);
static LIST_HEAD(nfs_access_lru_list);
static atomic_long_t nfs_access_nr_entries;
+static unsigned long nfs_access_max_cachesize = ULONG_MAX;
+module_param(nfs_access_max_cachesize, ulong, 0644);
+MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
+
static void nfs_access_free_entry(struct nfs_access_entry *entry)
{
put_rpccred(entry->cred);
- kfree(entry);
+ kfree_rcu(entry, rcu_head);
smp_mb__before_atomic();
atomic_long_dec(&nfs_access_nr_entries);
smp_mb__after_atomic();
@@ -2048,19 +2096,14 @@ static void nfs_access_free_list(struct list_head *head)
}
}
-unsigned long
-nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long
+nfs_do_access_cache_scan(unsigned int nr_to_scan)
{
LIST_HEAD(head);
struct nfs_inode *nfsi, *next;
struct nfs_access_entry *cache;
- int nr_to_scan = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
long freed = 0;
- if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
- return SHRINK_STOP;
-
spin_lock(&nfs_access_lru_lock);
list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
struct inode *inode;
@@ -2094,11 +2137,39 @@ remove_lru_entry:
}
unsigned long
+nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ int nr_to_scan = sc->nr_to_scan;
+ gfp_t gfp_mask = sc->gfp_mask;
+
+ if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+ return SHRINK_STOP;
+ return nfs_do_access_cache_scan(nr_to_scan);
+}
+
+
+unsigned long
nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
}
+static void
+nfs_access_cache_enforce_limit(void)
+{
+ long nr_entries = atomic_long_read(&nfs_access_nr_entries);
+ unsigned long diff;
+ unsigned int nr_to_scan;
+
+ if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
+ return;
+ nr_to_scan = 100;
+ diff = nr_entries - nfs_access_max_cachesize;
+ if (diff < nr_to_scan)
+ nr_to_scan = diff;
+ nfs_do_access_cache_scan(nr_to_scan);
+}
+
static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
{
struct rb_root *root_node = &nfsi->access_cache;
@@ -2186,6 +2257,38 @@ out_zap:
return -ENOENT;
}
+static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+{
+ /* Only check the most recently returned cache entry,
+ * but do it without locking.
+ */
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_access_entry *cache;
+ int err = -ECHILD;
+ struct list_head *lh;
+
+ rcu_read_lock();
+ if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+ goto out;
+ lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
+ cache = list_entry(lh, struct nfs_access_entry, lru);
+ if (lh == &nfsi->access_cache_entry_lru ||
+ cred != cache->cred)
+ cache = NULL;
+ if (cache == NULL)
+ goto out;
+ if (!nfs_have_delegated_attributes(inode) &&
+ !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
+ goto out;
+ res->jiffies = cache->jiffies;
+ res->cred = cache->cred;
+ res->mask = cache->mask;
+ err = 0;
+out:
+ rcu_read_unlock();
+ return err;
+}
+
static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
{
struct nfs_inode *nfsi = NFS_I(inode);
@@ -2229,6 +2332,11 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
cache->cred = get_rpccred(set->cred);
cache->mask = set->mask;
+ /* The above field assignments must be visible
+ * before this item appears on the lru. We cannot easily
+ * use rcu_assign_pointer, so just force the memory barrier.
+ */
+ smp_wmb();
nfs_access_add_rbtree(inode, cache);
/* Update accounting */
@@ -2244,6 +2352,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
&nfs_access_lru_list);
spin_unlock(&nfs_access_lru_lock);
}
+ nfs_access_cache_enforce_limit();
}
EXPORT_SYMBOL_GPL(nfs_access_add_cache);
@@ -2267,10 +2376,16 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
trace_nfs_access_enter(inode);
- status = nfs_access_get_cached(inode, cred, &cache);
+ status = nfs_access_get_cached_rcu(inode, cred, &cache);
+ if (status != 0)
+ status = nfs_access_get_cached(inode, cred, &cache);
if (status == 0)
goto out_cached;
+ status = -ECHILD;
+ if (mask & MAY_NOT_BLOCK)
+ goto out;
+
/* Be clever: ask server to check for all possible rights */
cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
cache.cred = cred;
@@ -2321,9 +2436,6 @@ int nfs_permission(struct inode *inode, int mask)
struct rpc_cred *cred;
int res = 0;
- if (mask & MAY_NOT_BLOCK)
- return -ECHILD;
-
nfs_inc_stats(inode, NFSIOS_VFSACCESS);
if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
@@ -2350,12 +2462,23 @@ force_lookup:
if (!NFS_PROTO(inode)->access)
goto out_notsup;
- cred = rpc_lookup_cred();
- if (!IS_ERR(cred)) {
- res = nfs_do_access(inode, cred, mask);
- put_rpccred(cred);
- } else
+ /* Always try fast lookups first */
+ rcu_read_lock();
+ cred = rpc_lookup_cred_nonblock();
+ if (!IS_ERR(cred))
+ res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
+ else
res = PTR_ERR(cred);
+ rcu_read_unlock();
+ if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) {
+ /* Fast lookup failed, try the slow way */
+ cred = rpc_lookup_cred();
+ if (!IS_ERR(cred)) {
+ res = nfs_do_access(inode, cred, mask);
+ put_rpccred(cred);
+ } else
+ res = PTR_ERR(cred);
+ }
out:
if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
res = -EACCES;
@@ -2364,6 +2487,9 @@ out:
inode->i_sb->s_id, inode->i_ino, mask, res);
return res;
out_notsup:
+ if (mask & MAY_NOT_BLOCK)
+ return -ECHILD;
+
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (res == 0)
res = generic_permission(inode, mask);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f11b9eed0de..65ef6e00dee 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
{
struct nfs_writeverf *verfp;
- verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
- hdr->data->ds_idx);
+ verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
+ hdr->ds_idx);
WARN_ON_ONCE(verfp->committed >= 0);
memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
WARN_ON_ONCE(verfp->committed < 0);
@@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
{
struct nfs_writeverf *verfp;
- verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
- hdr->data->ds_idx);
+ verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
+ hdr->ds_idx);
if (verfp->committed < 0) {
nfs_direct_set_hdr_verf(dreq, hdr);
return 0;
@@ -715,7 +715,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_direct_req *dreq = hdr->dreq;
struct nfs_commit_info cinfo;
- int bit = -1;
+ bool request_commit = false;
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -729,27 +729,20 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
dreq->flags = 0;
dreq->error = hdr->error;
}
- if (dreq->error != 0)
- bit = NFS_IOHDR_ERROR;
- else {
+ if (dreq->error == 0) {
dreq->count += hdr->good_bytes;
- if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- bit = NFS_IOHDR_NEED_RESCHED;
- } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+ if (nfs_write_need_commit(hdr)) {
if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
- bit = NFS_IOHDR_NEED_RESCHED;
+ request_commit = true;
else if (dreq->flags == 0) {
nfs_direct_set_hdr_verf(dreq, hdr);
- bit = NFS_IOHDR_NEED_COMMIT;
+ request_commit = true;
dreq->flags = NFS_ODIRECT_DO_COMMIT;
} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
- if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
+ request_commit = true;
+ if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr))
dreq->flags =
NFS_ODIRECT_RESCHED_WRITES;
- bit = NFS_IOHDR_NEED_RESCHED;
- } else
- bit = NFS_IOHDR_NEED_COMMIT;
}
}
}
@@ -759,9 +752,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
- switch (bit) {
- case NFS_IOHDR_NEED_RESCHED:
- case NFS_IOHDR_NEED_COMMIT:
+ if (request_commit) {
kref_get(&req->wb_kref);
nfs_mark_request_commit(req, hdr->lseg, &cinfo);
}
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index d2eba1c13b7..90978075f73 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -84,45 +84,37 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
BUG();
}
-static void filelayout_reset_write(struct nfs_pgio_data *data)
+static void filelayout_reset_write(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
- struct rpc_task *task = &data->task;
+ struct rpc_task *task = &hdr->task;
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS "
"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
- data->task.tk_pid,
+ hdr->task.tk_pid,
hdr->inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(hdr->inode),
- data->args.count,
- (unsigned long long)data->args.offset);
+ hdr->args.count,
+ (unsigned long long)hdr->args.offset);
- task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
- &hdr->pages,
- hdr->completion_ops,
- hdr->dreq);
+ task->tk_status = pnfs_write_done_resend_to_mds(hdr);
}
}
-static void filelayout_reset_read(struct nfs_pgio_data *data)
+static void filelayout_reset_read(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
- struct rpc_task *task = &data->task;
+ struct rpc_task *task = &hdr->task;
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS "
"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
- data->task.tk_pid,
+ hdr->task.tk_pid,
hdr->inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(hdr->inode),
- data->args.count,
- (unsigned long long)data->args.offset);
+ hdr->args.count,
+ (unsigned long long)hdr->args.offset);
- task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
- &hdr->pages,
- hdr->completion_ops,
- hdr->dreq);
+ task->tk_status = pnfs_read_done_resend_to_mds(hdr);
}
}
@@ -243,18 +235,17 @@ wait_on_recovery:
/* NFS_PROTO call done callback routines */
static int filelayout_read_done_cb(struct rpc_task *task,
- struct nfs_pgio_data *data)
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
int err;
- trace_nfs4_pnfs_read(data, task->tk_status);
- err = filelayout_async_handle_error(task, data->args.context->state,
- data->ds_clp, hdr->lseg);
+ trace_nfs4_pnfs_read(hdr, task->tk_status);
+ err = filelayout_async_handle_error(task, hdr->args.context->state,
+ hdr->ds_clp, hdr->lseg);
switch (err) {
case -NFS4ERR_RESET_TO_MDS:
- filelayout_reset_read(data);
+ filelayout_reset_read(hdr);
return task->tk_status;
case -EAGAIN:
rpc_restart_call_prepare(task);
@@ -270,15 +261,14 @@ static int filelayout_read_done_cb(struct rpc_task *task,
* rfc5661 is not clear about which credential should be used.
*/
static void
-filelayout_set_layoutcommit(struct nfs_pgio_data *wdata)
+filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = wdata->header;
if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
- wdata->res.verf->committed == NFS_FILE_SYNC)
+ hdr->res.verf->committed == NFS_FILE_SYNC)
return;
- pnfs_set_layoutcommit(wdata);
+ pnfs_set_layoutcommit(hdr);
dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
}
@@ -305,83 +295,82 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
*/
static void filelayout_read_prepare(struct rpc_task *task, void *data)
{
- struct nfs_pgio_data *rdata = data;
+ struct nfs_pgio_header *hdr = data;
- if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
+ if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return;
}
- if (filelayout_reset_to_mds(rdata->header->lseg)) {
+ if (filelayout_reset_to_mds(hdr->lseg)) {
dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
- filelayout_reset_read(rdata);
+ filelayout_reset_read(hdr);
rpc_exit(task, 0);
return;
}
- rdata->pgio_done_cb = filelayout_read_done_cb;
+ hdr->pgio_done_cb = filelayout_read_done_cb;
- if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
- &rdata->args.seq_args,
- &rdata->res.seq_res,
+ if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
+ &hdr->args.seq_args,
+ &hdr->res.seq_res,
task))
return;
- if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context,
- rdata->args.lock_context, FMODE_READ) == -EIO)
+ if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
+ hdr->args.lock_context, FMODE_READ) == -EIO)
rpc_exit(task, -EIO); /* lost lock, terminate I/O */
}
static void filelayout_read_call_done(struct rpc_task *task, void *data)
{
- struct nfs_pgio_data *rdata = data;
+ struct nfs_pgio_header *hdr = data;
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
- if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) &&
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
- nfs41_sequence_done(task, &rdata->res.seq_res);
+ nfs41_sequence_done(task, &hdr->res.seq_res);
return;
}
/* Note this may cause RPC to be resent */
- rdata->header->mds_ops->rpc_call_done(task, data);
+ hdr->mds_ops->rpc_call_done(task, data);
}
static void filelayout_read_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_pgio_data *rdata = data;
+ struct nfs_pgio_header *hdr = data;
- rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
+ rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
}
static void filelayout_read_release(void *data)
{
- struct nfs_pgio_data *rdata = data;
- struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
+ struct nfs_pgio_header *hdr = data;
+ struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
filelayout_fenceme(lo->plh_inode, lo);
- nfs_put_client(rdata->ds_clp);
- rdata->header->mds_ops->rpc_release(data);
+ nfs_put_client(hdr->ds_clp);
+ hdr->mds_ops->rpc_release(data);
}
static int filelayout_write_done_cb(struct rpc_task *task,
- struct nfs_pgio_data *data)
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
int err;
- trace_nfs4_pnfs_write(data, task->tk_status);
- err = filelayout_async_handle_error(task, data->args.context->state,
- data->ds_clp, hdr->lseg);
+ trace_nfs4_pnfs_write(hdr, task->tk_status);
+ err = filelayout_async_handle_error(task, hdr->args.context->state,
+ hdr->ds_clp, hdr->lseg);
switch (err) {
case -NFS4ERR_RESET_TO_MDS:
- filelayout_reset_write(data);
+ filelayout_reset_write(hdr);
return task->tk_status;
case -EAGAIN:
rpc_restart_call_prepare(task);
return -EAGAIN;
}
- filelayout_set_layoutcommit(data);
+ filelayout_set_layoutcommit(hdr);
return 0;
}
@@ -419,57 +408,57 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
static void filelayout_write_prepare(struct rpc_task *task, void *data)
{
- struct nfs_pgio_data *wdata = data;
+ struct nfs_pgio_header *hdr = data;
- if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
+ if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return;
}
- if (filelayout_reset_to_mds(wdata->header->lseg)) {
+ if (filelayout_reset_to_mds(hdr->lseg)) {
dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
- filelayout_reset_write(wdata);
+ filelayout_reset_write(hdr);
rpc_exit(task, 0);
return;
}
- if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
- &wdata->args.seq_args,
- &wdata->res.seq_res,
+ if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
+ &hdr->args.seq_args,
+ &hdr->res.seq_res,
task))
return;
- if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context,
- wdata->args.lock_context, FMODE_WRITE) == -EIO)
+ if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
+ hdr->args.lock_context, FMODE_WRITE) == -EIO)
rpc_exit(task, -EIO); /* lost lock, terminate I/O */
}
static void filelayout_write_call_done(struct rpc_task *task, void *data)
{
- struct nfs_pgio_data *wdata = data;
+ struct nfs_pgio_header *hdr = data;
- if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
- nfs41_sequence_done(task, &wdata->res.seq_res);
+ nfs41_sequence_done(task, &hdr->res.seq_res);
return;
}
/* Note this may cause RPC to be resent */
- wdata->header->mds_ops->rpc_call_done(task, data);
+ hdr->mds_ops->rpc_call_done(task, data);
}
static void filelayout_write_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_pgio_data *wdata = data;
+ struct nfs_pgio_header *hdr = data;
- rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
+ rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
}
static void filelayout_write_release(void *data)
{
- struct nfs_pgio_data *wdata = data;
- struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
+ struct nfs_pgio_header *hdr = data;
+ struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
filelayout_fenceme(lo->plh_inode, lo);
- nfs_put_client(wdata->ds_clp);
- wdata->header->mds_ops->rpc_release(data);
+ nfs_put_client(hdr->ds_clp);
+ hdr->mds_ops->rpc_release(data);
}
static void filelayout_commit_prepare(struct rpc_task *task, void *data)
@@ -529,19 +518,18 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
};
static enum pnfs_try_status
-filelayout_read_pagelist(struct nfs_pgio_data *data)
+filelayout_read_pagelist(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
- loff_t offset = data->args.offset;
+ loff_t offset = hdr->args.offset;
u32 j, idx;
struct nfs_fh *fh;
dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
__func__, hdr->inode->i_ino,
- data->args.pgbase, (size_t)data->args.count, offset);
+ hdr->args.pgbase, (size_t)hdr->args.count, offset);
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
@@ -559,30 +547,29 @@ filelayout_read_pagelist(struct nfs_pgio_data *data)
/* No multipath support. Use first DS */
atomic_inc(&ds->ds_clp->cl_count);
- data->ds_clp = ds->ds_clp;
- data->ds_idx = idx;
+ hdr->ds_clp = ds->ds_clp;
+ hdr->ds_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
- data->args.fh = fh;
+ hdr->args.fh = fh;
- data->args.offset = filelayout_get_dserver_offset(lseg, offset);
- data->mds_offset = offset;
+ hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
+ hdr->mds_offset = offset;
/* Perform an asynchronous read to ds */
- nfs_initiate_pgio(ds_clnt, data,
+ nfs_initiate_pgio(ds_clnt, hdr,
&filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
return PNFS_ATTEMPTED;
}
/* Perform async writes. */
static enum pnfs_try_status
-filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
+filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
{
- struct nfs_pgio_header *hdr = data->header;
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
- loff_t offset = data->args.offset;
+ loff_t offset = hdr->args.offset;
u32 j, idx;
struct nfs_fh *fh;
@@ -598,21 +585,20 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
return PNFS_NOT_ATTEMPTED;
dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
- __func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
+ __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
- data->pgio_done_cb = filelayout_write_done_cb;
+ hdr->pgio_done_cb = filelayout_write_done_cb;
atomic_inc(&ds->ds_clp->cl_count);
- data->ds_clp = ds->ds_clp;
- data->ds_idx = idx;
+ hdr->ds_clp = ds->ds_clp;
+ hdr->ds_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
- data->args.fh = fh;
-
- data->args.offset = filelayout_get_dserver_offset(lseg, offset);
+ hdr->args.fh = fh;
+ hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
/* Perform an asynchronous write */
- nfs_initiate_pgio(ds_clnt, data,
+ nfs_initiate_pgio(ds_clnt, hdr,
&filelayout_write_call_ops, sync,
RPC_TASK_SOFTCONN);
return PNFS_ATTEMPTED;
@@ -1023,6 +1009,7 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
/* The generic layer is about to remove the req from the commit list.
* If this will make the bucket empty, it will need to put the lseg reference.
+ * Note this is must be called holding the inode (/cinfo) lock
*/
static void
filelayout_clear_request_commit(struct nfs_page *req,
@@ -1030,7 +1017,6 @@ filelayout_clear_request_commit(struct nfs_page *req,
{
struct pnfs_layout_segment *freeme = NULL;
- spin_lock(cinfo->lock);
if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
goto out;
cinfo->ds->nwritten--;
@@ -1045,22 +1031,25 @@ filelayout_clear_request_commit(struct nfs_page *req,
}
out:
nfs_request_remove_commit_list(req, cinfo);
- spin_unlock(cinfo->lock);
- pnfs_put_lseg(freeme);
+ pnfs_put_lseg_async(freeme);
}
-static struct list_head *
-filelayout_choose_commit_list(struct nfs_page *req,
- struct pnfs_layout_segment *lseg,
- struct nfs_commit_info *cinfo)
+static void
+filelayout_mark_request_commit(struct nfs_page *req,
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
+
{
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
u32 i, j;
struct list_head *list;
struct pnfs_commit_bucket *buckets;
- if (fl->commit_through_mds)
- return &cinfo->mds->list;
+ if (fl->commit_through_mds) {
+ list = &cinfo->mds->list;
+ spin_lock(cinfo->lock);
+ goto mds_commit;
+ }
/* Note that we are calling nfs4_fl_calc_j_index on each page
* that ends up being committed to a data server. An attractive
@@ -1084,19 +1073,22 @@ filelayout_choose_commit_list(struct nfs_page *req,
}
set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
cinfo->ds->nwritten++;
- spin_unlock(cinfo->lock);
- return list;
-}
-static void
-filelayout_mark_request_commit(struct nfs_page *req,
- struct pnfs_layout_segment *lseg,
- struct nfs_commit_info *cinfo)
-{
- struct list_head *list;
-
- list = filelayout_choose_commit_list(req, lseg, cinfo);
- nfs_request_add_commit_list(req, list, cinfo);
+mds_commit:
+ /* nfs_request_add_commit_list(). We need to add req to list without
+ * dropping cinfo lock.
+ */
+ set_bit(PG_CLEAN, &(req)->wb_flags);
+ nfs_list_add_request(req, list);
+ cinfo->mds->ncommit++;
+ spin_unlock(cinfo->lock);
+ if (!cinfo->dreq) {
+ inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
+ BDI_RECLAIMABLE);
+ __mark_inode_dirty(req->wb_context->dentry->d_inode,
+ I_DIRTY_DATASYNC);
+ }
}
static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -1244,15 +1236,64 @@ restart:
spin_unlock(cinfo->lock);
}
+/* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest
+ * for @page
+ * @cinfo - commit info for current inode
+ * @page - page to search for matching head request
+ *
+ * Returns a the head request if one is found, otherwise returns NULL.
+ */
+static struct nfs_page *
+filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
+{
+ struct nfs_page *freq, *t;
+ struct pnfs_commit_bucket *b;
+ int i;
+
+ /* Linearly search the commit lists for each bucket until a matching
+ * request is found */
+ for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
+ list_for_each_entry_safe(freq, t, &b->written, wb_list) {
+ if (freq->wb_page == page)
+ return freq->wb_head;
+ }
+ list_for_each_entry_safe(freq, t, &b->committing, wb_list) {
+ if (freq->wb_page == page)
+ return freq->wb_head;
+ }
+ }
+
+ return NULL;
+}
+
+static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx)
+{
+ struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
+ struct pnfs_commit_bucket *bucket;
+ struct pnfs_layout_segment *freeme;
+ int i;
+
+ for (i = idx; i < fl_cinfo->nbuckets; i++) {
+ bucket = &fl_cinfo->buckets[i];
+ if (list_empty(&bucket->committing))
+ continue;
+ nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
+ spin_lock(cinfo->lock);
+ freeme = bucket->clseg;
+ bucket->clseg = NULL;
+ spin_unlock(cinfo->lock);
+ pnfs_put_lseg(freeme);
+ }
+}
+
static unsigned int
alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
{
struct pnfs_ds_commit_info *fl_cinfo;
struct pnfs_commit_bucket *bucket;
struct nfs_commit_data *data;
- int i, j;
+ int i;
unsigned int nreq = 0;
- struct pnfs_layout_segment *freeme;
fl_cinfo = cinfo->ds;
bucket = fl_cinfo->buckets;
@@ -1272,16 +1313,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
}
/* Clean up on error */
- for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
- if (list_empty(&bucket->committing))
- continue;
- nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
- spin_lock(cinfo->lock);
- freeme = bucket->clseg;
- bucket->clseg = NULL;
- spin_unlock(cinfo->lock);
- pnfs_put_lseg(freeme);
- }
+ filelayout_retry_commit(cinfo, i);
/* Caller will clean up entries put on list */
return nreq;
}
@@ -1301,8 +1333,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
data->lseg = NULL;
list_add(&data->pages, &list);
nreq++;
- } else
+ } else {
nfs_retry_commit(mds_pages, NULL, cinfo);
+ filelayout_retry_commit(cinfo, 0);
+ cinfo->completion_ops->error_cleanup(NFS_I(inode));
+ return -ENOMEM;
+ }
}
nreq += alloc_ds_commits(cinfo, &list);
@@ -1380,6 +1416,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.clear_request_commit = filelayout_clear_request_commit,
.scan_commit_lists = filelayout_scan_commit_lists,
.recover_commit_reqs = filelayout_recover_commit_reqs,
+ .search_commit_reqs = filelayout_search_commit_reqs,
.commit_pagelist = filelayout_commit_pagelist,
.read_pagelist = filelayout_read_pagelist,
.write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index e2a0361e24c..8540516f4d7 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -695,7 +695,7 @@ filelayout_get_device_info(struct inode *inode,
if (pdev == NULL)
return NULL;
- pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
+ pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
if (pages == NULL) {
kfree(pdev);
return NULL;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b94f80420a5..880618a8b04 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -112,7 +112,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
* if the dentry tree reaches them; however if the dentry already
* exists, we'll pick it up at this point and use it as the root
*/
- ret = d_obtain_alias(inode);
+ ret = d_obtain_root(inode);
if (IS_ERR(ret)) {
dprintk("nfs_get_root: get root dentry failed\n");
goto out;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 68921b01b79..577a36f0a51 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1002,6 +1002,15 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
}
EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
+int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode)
+{
+ if (!(NFS_I(inode)->cache_validity &
+ (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
+ && !nfs_attribute_cache_expired(inode))
+ return NFS_STALE(inode) ? -ESTALE : 0;
+ return -ECHILD;
+}
+
static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
{
struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e2a45ae5014..9056622d223 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -247,11 +247,11 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
int nfs_iocounter_wait(struct nfs_io_counter *c);
extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
-struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
-void nfs_rw_header_free(struct nfs_pgio_header *);
-void nfs_pgio_data_release(struct nfs_pgio_data *);
+struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
+void nfs_pgio_header_free(struct nfs_pgio_header *);
+void nfs_pgio_data_destroy(struct nfs_pgio_header *);
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
-int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
+int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *,
const struct rpc_call_ops *, int, int);
void nfs_free_request(struct nfs_page *req);
@@ -451,6 +451,7 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst,
void nfs_mark_request_commit(struct nfs_page *req,
struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo);
+int nfs_write_need_commit(struct nfs_pgio_header *);
int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
int how, struct nfs_commit_info *cinfo);
void nfs_retry_commit(struct list_head *page_list,
@@ -491,7 +492,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
/* nfs4proc.c */
-extern void __nfs4_read_done_cb(struct nfs_pgio_data *);
+extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
const char *ip_addr);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 8f854dde415..24c6898159c 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -129,7 +129,10 @@ static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
.rpc_argp = &args,
.rpc_resp = &fattr,
};
- int status;
+ int status = 0;
+
+ if (acl == NULL && (!S_ISDIR(inode->i_mode) || dfacl == NULL))
+ goto out;
status = -EOPNOTSUPP;
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
@@ -256,7 +259,7 @@ nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
char *p = data + *result;
acl = get_acl(inode, type);
- if (!acl)
+ if (IS_ERR_OR_NULL(acl))
return 0;
posix_acl_release(acl);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index f0afa291fd5..809670eba52 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -795,41 +795,44 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return status;
}
-static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
nfs_invalidate_atime(inode);
- nfs_refresh_inode(inode, &data->fattr);
+ nfs_refresh_inode(inode, &hdr->fattr);
return 0;
}
-static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
}
-static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
rpc_call_start(task);
return 0;
}
-static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
if (task->tk_status >= 0)
- nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
return 0;
}
-static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ba2affa5194..a8b855ab4e2 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -54,7 +54,7 @@ struct nfs4_minor_version_ops {
const nfs4_stateid *);
int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
struct nfs_fsinfo *);
- int (*free_lock_state)(struct nfs_server *,
+ void (*free_lock_state)(struct nfs_server *,
struct nfs4_lock_state *);
const struct rpc_call_ops *call_sync_ops;
const struct nfs4_state_recovery_ops *reboot_recovery_ops;
@@ -129,17 +129,6 @@ enum {
* LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
*/
-struct nfs4_lock_owner {
- unsigned int lo_type;
-#define NFS4_ANY_LOCK_TYPE (0U)
-#define NFS4_FLOCK_LOCK_TYPE (1U << 0)
-#define NFS4_POSIX_LOCK_TYPE (1U << 1)
- union {
- fl_owner_t posix_owner;
- pid_t flock_owner;
- } lo_u;
-};
-
struct nfs4_lock_state {
struct list_head ls_locks; /* Other lock stateids */
struct nfs4_state * ls_state; /* Pointer to open state */
@@ -149,7 +138,7 @@ struct nfs4_lock_state {
struct nfs_seqid_counter ls_seqid;
nfs4_stateid ls_stateid;
atomic_t ls_count;
- struct nfs4_lock_owner ls_owner;
+ fl_owner_t ls_owner;
};
/* bits for nfs4_state->flags */
@@ -337,11 +326,11 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
*/
static inline void
nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
- struct rpc_message *msg, struct nfs_pgio_data *wdata)
+ struct rpc_message *msg, struct nfs_pgio_header *hdr)
{
if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
!test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
- wdata->args.stable = NFS_FILE_SYNC;
+ hdr->args.stable = NFS_FILE_SYNC;
}
#else /* CONFIG_NFS_v4_1 */
static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -369,7 +358,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
static inline void
nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
- struct rpc_message *msg, struct nfs_pgio_data *wdata)
+ struct rpc_message *msg, struct nfs_pgio_header *hdr)
{
}
#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index aa9ef487604..53e435a9526 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -855,6 +855,11 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
};
struct rpc_timeout ds_timeout;
struct nfs_client *clp;
+ char buf[INET6_ADDRSTRLEN + 1];
+
+ if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
+ return ERR_PTR(-EINVAL);
+ cl_init.hostname = buf;
/*
* Set an authflavor equual to the MDS value. Use the MDS nfs_client
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bf3d97cc5a..7dd8aca31c2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1952,6 +1952,14 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
return status;
}
+/*
+ * Additional permission checks in order to distinguish between an
+ * open for read, and an open for execute. This works around the
+ * fact that NFSv4 OPEN treats read and execute permissions as being
+ * the same.
+ * Note that in the non-execute case, we want to turn off permission
+ * checking if we just created a new file (POSIX open() semantics).
+ */
static int nfs4_opendata_access(struct rpc_cred *cred,
struct nfs4_opendata *opendata,
struct nfs4_state *state, fmode_t fmode,
@@ -1966,14 +1974,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
return 0;
mask = 0;
- /* don't check MAY_WRITE - a newly created file may not have
- * write mode bits, but POSIX allows the creating process to write.
- * use openflags to check for exec, because fmode won't
- * always have FMODE_EXEC set when file open for exec. */
+ /*
+ * Use openflags to check for exec, because fmode won't
+ * always have FMODE_EXEC set when file open for exec.
+ */
if (openflags & __FMODE_EXEC) {
/* ONLY check for exec rights */
mask = MAY_EXEC;
- } else if (fmode & FMODE_READ)
+ } else if ((fmode & FMODE_READ) && !opendata->file_created)
mask = MAY_READ;
cache.cred = cred;
@@ -2216,8 +2224,15 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
ret = _nfs4_proc_open(opendata);
- if (ret != 0)
+ if (ret != 0) {
+ if (ret == -ENOENT) {
+ d_drop(opendata->dentry);
+ d_add(opendata->dentry, NULL);
+ nfs_set_verifier(opendata->dentry,
+ nfs_save_change_attribute(opendata->dir->d_inode));
+ }
goto out;
+ }
state = nfs4_opendata_to_nfs4_state(opendata);
ret = PTR_ERR(state);
@@ -2545,6 +2560,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
struct nfs_server *server = NFS_SERVER(calldata->inode);
+ nfs4_stateid *res_stateid = NULL;
dprintk("%s: begin!\n", __func__);
if (!nfs4_sequence_done(task, &calldata->res.seq_res))
@@ -2555,12 +2571,12 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
*/
switch (task->tk_status) {
case 0:
- if (calldata->roc)
+ res_stateid = &calldata->res.stateid;
+ if (calldata->arg.fmode == 0 && calldata->roc)
pnfs_roc_set_barrier(state->inode,
calldata->roc_barrier);
- nfs_clear_open_stateid(state, &calldata->res.stateid, 0);
renew_lease(server, calldata->timestamp);
- goto out_release;
+ break;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
@@ -2574,7 +2590,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
goto out_release;
}
}
- nfs_clear_open_stateid(state, NULL, calldata->arg.fmode);
+ nfs_clear_open_stateid(state, res_stateid, calldata->arg.fmode);
out_release:
nfs_release_seqid(calldata->arg.seqid);
nfs_refresh_inode(calldata->inode, calldata->res.fattr);
@@ -2586,6 +2602,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
struct inode *inode = calldata->inode;
+ bool is_rdonly, is_wronly, is_rdwr;
int call_close = 0;
dprintk("%s: begin!\n", __func__);
@@ -2593,18 +2610,24 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
goto out_wait;
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
- calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
spin_lock(&state->owner->so_lock);
+ is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
+ is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
+ is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
+ /* Calculate the current open share mode */
+ calldata->arg.fmode = 0;
+ if (is_rdonly || is_rdwr)
+ calldata->arg.fmode |= FMODE_READ;
+ if (is_wronly || is_rdwr)
+ calldata->arg.fmode |= FMODE_WRITE;
/* Calculate the change in open mode */
if (state->n_rdwr == 0) {
if (state->n_rdonly == 0) {
- call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
- call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+ call_close |= is_rdonly || is_rdwr;
calldata->arg.fmode &= ~FMODE_READ;
}
if (state->n_wronly == 0) {
- call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
- call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+ call_close |= is_wronly || is_rdwr;
calldata->arg.fmode &= ~FMODE_WRITE;
}
}
@@ -2647,6 +2670,48 @@ static const struct rpc_call_ops nfs4_close_ops = {
.rpc_release = nfs4_free_closedata,
};
+static bool nfs4_state_has_opener(struct nfs4_state *state)
+{
+ /* first check existing openers */
+ if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 &&
+ state->n_rdonly != 0)
+ return true;
+
+ if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0 &&
+ state->n_wronly != 0)
+ return true;
+
+ if (test_bit(NFS_O_RDWR_STATE, &state->flags) != 0 &&
+ state->n_rdwr != 0)
+ return true;
+
+ return false;
+}
+
+static bool nfs4_roc(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_open_context *ctx;
+ struct nfs4_state *state;
+
+ spin_lock(&inode->i_lock);
+ list_for_each_entry(ctx, &nfsi->open_files, list) {
+ state = ctx->state;
+ if (state == NULL)
+ continue;
+ if (nfs4_state_has_opener(state)) {
+ spin_unlock(&inode->i_lock);
+ return false;
+ }
+ }
+ spin_unlock(&inode->i_lock);
+
+ if (nfs4_check_delegation(inode, FMODE_READ))
+ return false;
+
+ return pnfs_roc(inode);
+}
+
/*
* It is possible for data to be read/written from a mem-mapped file
* after the sys_close call (which hits the vfs layer as a flush).
@@ -2697,7 +2762,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
calldata->res.fattr = &calldata->fattr;
calldata->res.seqid = calldata->arg.seqid;
calldata->res.server = server;
- calldata->roc = pnfs_roc(state->inode);
+ calldata->roc = nfs4_roc(state->inode);
nfs_sb_active(calldata->inode->i_sb);
msg.rpc_argp = &calldata->arg;
@@ -4033,24 +4098,25 @@ static bool nfs4_error_stateid_expired(int err)
return false;
}
-void __nfs4_read_done_cb(struct nfs_pgio_data *data)
+void __nfs4_read_done_cb(struct nfs_pgio_header *hdr)
{
- nfs_invalidate_atime(data->header->inode);
+ nfs_invalidate_atime(hdr->inode);
}
-static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- struct nfs_server *server = NFS_SERVER(data->header->inode);
+ struct nfs_server *server = NFS_SERVER(hdr->inode);
- trace_nfs4_read(data, task->tk_status);
- if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
+ trace_nfs4_read(hdr, task->tk_status);
+ if (nfs4_async_handle_error(task, server,
+ hdr->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task);
return -EAGAIN;
}
- __nfs4_read_done_cb(data);
+ __nfs4_read_done_cb(hdr);
if (task->tk_status > 0)
- renew_lease(server, data->timestamp);
+ renew_lease(server, hdr->timestamp);
return 0;
}
@@ -4068,54 +4134,59 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
return true;
}
-static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
dprintk("--> %s\n", __func__);
- if (!nfs4_sequence_done(task, &data->res.seq_res))
+ if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN;
- if (nfs4_read_stateid_changed(task, &data->args))
+ if (nfs4_read_stateid_changed(task, &hdr->args))
return -EAGAIN;
- return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
- nfs4_read_done_cb(task, data);
+ return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
+ nfs4_read_done_cb(task, hdr);
}
-static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
- data->timestamp = jiffies;
- data->pgio_done_cb = nfs4_read_done_cb;
+ hdr->timestamp = jiffies;
+ hdr->pgio_done_cb = nfs4_read_done_cb;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
- nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
+ nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0);
}
-static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
- if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
- &data->args.seq_args,
- &data->res.seq_res,
+ if (nfs4_setup_sequence(NFS_SERVER(hdr->inode),
+ &hdr->args.seq_args,
+ &hdr->res.seq_res,
task))
return 0;
- if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
- data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO)
+ if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
+ hdr->args.lock_context,
+ hdr->rw_ops->rw_mode) == -EIO)
return -EIO;
- if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
+ if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags)))
return -EIO;
return 0;
}
-static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_write_done_cb(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
- trace_nfs4_write(data, task->tk_status);
- if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
+ trace_nfs4_write(hdr, task->tk_status);
+ if (nfs4_async_handle_error(task, NFS_SERVER(inode),
+ hdr->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task);
return -EAGAIN;
}
if (task->tk_status >= 0) {
- renew_lease(NFS_SERVER(inode), data->timestamp);
- nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
+ renew_lease(NFS_SERVER(inode), hdr->timestamp);
+ nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr);
}
return 0;
}
@@ -4134,23 +4205,21 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
return true;
}
-static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- if (!nfs4_sequence_done(task, &data->res.seq_res))
+ if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN;
- if (nfs4_write_stateid_changed(task, &data->args))
+ if (nfs4_write_stateid_changed(task, &hdr->args))
return -EAGAIN;
- return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
- nfs4_write_done_cb(task, data);
+ return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
+ nfs4_write_done_cb(task, hdr);
}
static
-bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
+bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
{
- const struct nfs_pgio_header *hdr = data->header;
-
/* Don't request attributes for pNFS or O_DIRECT writes */
- if (data->ds_clp != NULL || hdr->dreq != NULL)
+ if (hdr->ds_clp != NULL || hdr->dreq != NULL)
return false;
/* Otherwise, request attributes if and only if we don't hold
* a delegation
@@ -4158,23 +4227,24 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
-static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
- struct nfs_server *server = NFS_SERVER(data->header->inode);
+ struct nfs_server *server = NFS_SERVER(hdr->inode);
- if (!nfs4_write_need_cache_consistency_data(data)) {
- data->args.bitmask = NULL;
- data->res.fattr = NULL;
+ if (!nfs4_write_need_cache_consistency_data(hdr)) {
+ hdr->args.bitmask = NULL;
+ hdr->res.fattr = NULL;
} else
- data->args.bitmask = server->cache_consistency_bitmask;
+ hdr->args.bitmask = server->cache_consistency_bitmask;
- if (!data->pgio_done_cb)
- data->pgio_done_cb = nfs4_write_done_cb;
- data->res.server = server;
- data->timestamp = jiffies;
+ if (!hdr->pgio_done_cb)
+ hdr->pgio_done_cb = nfs4_write_done_cb;
+ hdr->res.server = server;
+ hdr->timestamp = jiffies;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
- nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+ nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1);
}
static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -4881,6 +4951,18 @@ nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len)
return scnprintf(buf, len, "tcp");
}
+static void nfs4_setclientid_done(struct rpc_task *task, void *calldata)
+{
+ struct nfs4_setclientid *sc = calldata;
+
+ if (task->tk_status == 0)
+ sc->sc_cred = get_rpccred(task->tk_rqstp->rq_cred);
+}
+
+static const struct rpc_call_ops nfs4_setclientid_ops = {
+ .rpc_call_done = nfs4_setclientid_done,
+};
+
/**
* nfs4_proc_setclientid - Negotiate client ID
* @clp: state data structure
@@ -4907,6 +4989,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
.rpc_resp = res,
.rpc_cred = cred,
};
+ struct rpc_task *task;
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clp->cl_rpcclient,
+ .rpc_message = &msg,
+ .callback_ops = &nfs4_setclientid_ops,
+ .callback_data = &setclientid,
+ .flags = RPC_TASK_TIMEOUT,
+ };
int status;
/* nfs_client_id4 */
@@ -4933,7 +5023,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
dprintk("NFS call setclientid auth=%s, '%.*s'\n",
clp->cl_rpcclient->cl_auth->au_ops->au_name,
setclientid.sc_name_len, setclientid.sc_name);
- status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task)) {
+ status = PTR_ERR(task);
+ goto out;
+ }
+ status = task->tk_status;
+ if (setclientid.sc_cred) {
+ clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
+ put_rpccred(setclientid.sc_cred);
+ }
+ rpc_put_task(task);
+out:
trace_nfs4_setclientid(clp, status);
dprintk("NFS reply setclientid: %d\n", status);
return status;
@@ -4975,6 +5076,9 @@ struct nfs4_delegreturndata {
unsigned long timestamp;
struct nfs_fattr fattr;
int rpc_status;
+ struct inode *inode;
+ bool roc;
+ u32 roc_barrier;
};
static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
@@ -4988,7 +5092,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case 0:
renew_lease(data->res.server, data->timestamp);
- break;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_BAD_STATEID:
@@ -4996,6 +5099,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
task->tk_status = 0;
+ if (data->roc)
+ pnfs_roc_set_barrier(data->inode, data->roc_barrier);
break;
default:
if (nfs4_async_handle_error(task, data->res.server, NULL) ==
@@ -5009,6 +5114,10 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
static void nfs4_delegreturn_release(void *calldata)
{
+ struct nfs4_delegreturndata *data = calldata;
+
+ if (data->roc)
+ pnfs_roc_release(data->inode);
kfree(calldata);
}
@@ -5018,6 +5127,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
d_data = (struct nfs4_delegreturndata *)data;
+ if (d_data->roc &&
+ pnfs_roc_drain(d_data->inode, &d_data->roc_barrier, task))
+ return;
+
nfs4_setup_sequence(d_data->res.server,
&d_data->args.seq_args,
&d_data->res.seq_res,
@@ -5061,6 +5174,9 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
nfs_fattr_init(data->res.fattr);
data->timestamp = jiffies;
data->rpc_status = 0;
+ data->inode = inode;
+ data->roc = list_empty(&NFS_I(inode)->open_files) ?
+ pnfs_roc(inode) : false;
task_setup_data.callback_data = data;
msg.rpc_argp = &data->args;
@@ -5834,8 +5950,10 @@ struct nfs_release_lockowner_data {
static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_release_lockowner_data *data = calldata;
- nfs40_setup_sequence(data->server,
- &data->args.seq_args, &data->res.seq_res, task);
+ struct nfs_server *server = data->server;
+ nfs40_setup_sequence(server, &data->args.seq_args,
+ &data->res.seq_res, task);
+ data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
data->timestamp = jiffies;
}
@@ -5852,6 +5970,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
break;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
+ nfs4_schedule_lease_recovery(server->nfs_client);
+ break;
case -NFS4ERR_LEASE_MOVED:
case -NFS4ERR_DELAY:
if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN)
@@ -5872,7 +5992,8 @@ static const struct rpc_call_ops nfs4_release_lockowner_ops = {
.rpc_release = nfs4_release_lockowner_release,
};
-static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
+static void
+nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
{
struct nfs_release_lockowner_data *data;
struct rpc_message msg = {
@@ -5880,11 +6001,11 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
};
if (server->nfs_client->cl_mvops->minor_version != 0)
- return -EINVAL;
+ return;
data = kmalloc(sizeof(*data), GFP_NOFS);
if (!data)
- return -ENOMEM;
+ return;
data->lsp = lsp;
data->server = server;
data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
@@ -5895,7 +6016,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
msg.rpc_resp = &data->res;
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
- return 0;
}
#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
@@ -8182,7 +8302,8 @@ static int nfs41_free_stateid(struct nfs_server *server,
return ret;
}
-static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
+static void
+nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
{
struct rpc_task *task;
struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
@@ -8190,9 +8311,8 @@ static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta
task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false);
nfs4_free_lock_state(server, lsp);
if (IS_ERR(task))
- return PTR_ERR(task);
+ return;
rpc_put_task(task);
- return 0;
}
static bool nfs41_match_stateid(const nfs4_stateid *s1,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 42f12118216..22fe35104c0 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -787,21 +787,12 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
* that is compatible with current->files
*/
static struct nfs4_lock_state *
-__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
{
struct nfs4_lock_state *pos;
list_for_each_entry(pos, &state->lock_states, ls_locks) {
- if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type)
+ if (pos->ls_owner != fl_owner)
continue;
- switch (pos->ls_owner.lo_type) {
- case NFS4_POSIX_LOCK_TYPE:
- if (pos->ls_owner.lo_u.posix_owner != fl_owner)
- continue;
- break;
- case NFS4_FLOCK_LOCK_TYPE:
- if (pos->ls_owner.lo_u.flock_owner != fl_pid)
- continue;
- }
atomic_inc(&pos->ls_count);
return pos;
}
@@ -813,7 +804,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p
* exists, return an uninitialized one.
*
*/
-static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
{
struct nfs4_lock_state *lsp;
struct nfs_server *server = state->owner->so_server;
@@ -824,17 +815,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
nfs4_init_seqid_counter(&lsp->ls_seqid);
atomic_set(&lsp->ls_count, 1);
lsp->ls_state = state;
- lsp->ls_owner.lo_type = type;
- switch (lsp->ls_owner.lo_type) {
- case NFS4_FLOCK_LOCK_TYPE:
- lsp->ls_owner.lo_u.flock_owner = fl_pid;
- break;
- case NFS4_POSIX_LOCK_TYPE:
- lsp->ls_owner.lo_u.posix_owner = fl_owner;
- break;
- default:
- goto out_free;
- }
+ lsp->ls_owner = fl_owner;
lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
if (lsp->ls_seqid.owner_id < 0)
goto out_free;
@@ -857,13 +838,13 @@ void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp
* exists, return an uninitialized one.
*
*/
-static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type)
+static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
{
struct nfs4_lock_state *lsp, *new = NULL;
for(;;) {
spin_lock(&state->state_lock);
- lsp = __nfs4_find_lock_state(state, owner, pid, type);
+ lsp = __nfs4_find_lock_state(state, owner);
if (lsp != NULL)
break;
if (new != NULL) {
@@ -874,7 +855,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
break;
}
spin_unlock(&state->state_lock);
- new = nfs4_alloc_lock_state(state, owner, pid, type);
+ new = nfs4_alloc_lock_state(state, owner);
if (new == NULL)
return NULL;
}
@@ -935,13 +916,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
if (fl->fl_ops != NULL)
return 0;
- if (fl->fl_flags & FL_POSIX)
- lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
- else if (fl->fl_flags & FL_FLOCK)
- lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid,
- NFS4_FLOCK_LOCK_TYPE);
- else
- return -EINVAL;
+ lsp = nfs4_get_lock_state(state, fl->fl_owner);
if (lsp == NULL)
return -ENOMEM;
fl->fl_u.nfs4_fl.owner = lsp;
@@ -955,7 +930,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
{
struct nfs4_lock_state *lsp;
fl_owner_t fl_owner;
- pid_t fl_pid;
int ret = -ENOENT;
@@ -966,9 +940,8 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
goto out;
fl_owner = lockowner->l_owner;
- fl_pid = lockowner->l_pid;
spin_lock(&state->state_lock);
- lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
+ lsp = __nfs4_find_lock_state(state, fl_owner);
if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
ret = -EIO;
else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 0a744f3a86f..1c32adbe728 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -932,11 +932,11 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
DECLARE_EVENT_CLASS(nfs4_read_event,
TP_PROTO(
- const struct nfs_pgio_data *data,
+ const struct nfs_pgio_header *hdr,
int error
),
- TP_ARGS(data, error),
+ TP_ARGS(hdr, error),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -948,12 +948,12 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
),
TP_fast_assign(
- const struct inode *inode = data->header->inode;
+ const struct inode *inode = hdr->inode;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
- __entry->offset = data->args.offset;
- __entry->count = data->args.count;
+ __entry->offset = hdr->args.offset;
+ __entry->count = hdr->args.count;
__entry->error = error;
),
@@ -972,10 +972,10 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
#define DEFINE_NFS4_READ_EVENT(name) \
DEFINE_EVENT(nfs4_read_event, name, \
TP_PROTO( \
- const struct nfs_pgio_data *data, \
+ const struct nfs_pgio_header *hdr, \
int error \
), \
- TP_ARGS(data, error))
+ TP_ARGS(hdr, error))
DEFINE_NFS4_READ_EVENT(nfs4_read);
#ifdef CONFIG_NFS_V4_1
DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
@@ -983,11 +983,11 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
DECLARE_EVENT_CLASS(nfs4_write_event,
TP_PROTO(
- const struct nfs_pgio_data *data,
+ const struct nfs_pgio_header *hdr,
int error
),
- TP_ARGS(data, error),
+ TP_ARGS(hdr, error),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -999,12 +999,12 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
),
TP_fast_assign(
- const struct inode *inode = data->header->inode;
+ const struct inode *inode = hdr->inode;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
- __entry->offset = data->args.offset;
- __entry->count = data->args.count;
+ __entry->offset = hdr->args.offset;
+ __entry->count = hdr->args.count;
__entry->error = error;
),
@@ -1024,10 +1024,10 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
#define DEFINE_NFS4_WRITE_EVENT(name) \
DEFINE_EVENT(nfs4_write_event, name, \
TP_PROTO( \
- const struct nfs_pgio_data *data, \
+ const struct nfs_pgio_header *hdr, \
int error \
), \
- TP_ARGS(data, error))
+ TP_ARGS(hdr, error))
DEFINE_NFS4_WRITE_EVENT(nfs4_write);
#ifdef CONFIG_NFS_V4_1
DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 939ae606cfa..e13b59d8d9a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7092,7 +7092,7 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
if (!status)
status = decode_sequence(xdr, &res->seq_res, rqstp);
if (!status)
- status = decode_reclaim_complete(xdr, (void *)NULL);
+ status = decode_reclaim_complete(xdr, NULL);
return status;
}
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 611320753db..ae05278b376 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -439,22 +439,21 @@ static void _read_done(struct ore_io_state *ios, void *private)
objlayout_read_done(&objios->oir, status, objios->sync);
}
-int objio_read_pagelist(struct nfs_pgio_data *rdata)
+int objio_read_pagelist(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = rdata->header;
struct objio_state *objios;
int ret;
ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
- hdr->lseg, rdata->args.pages, rdata->args.pgbase,
- rdata->args.offset, rdata->args.count, rdata,
+ hdr->lseg, hdr->args.pages, hdr->args.pgbase,
+ hdr->args.offset, hdr->args.count, hdr,
GFP_KERNEL, &objios);
if (unlikely(ret))
return ret;
objios->ios->done = _read_done;
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
- rdata->args.offset, rdata->args.count);
+ hdr->args.offset, hdr->args.count);
ret = ore_read(objios->ios);
if (unlikely(ret))
objio_free_result(&objios->oir);
@@ -487,11 +486,11 @@ static void _write_done(struct ore_io_state *ios, void *private)
static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
{
struct objio_state *objios = priv;
- struct nfs_pgio_data *wdata = objios->oir.rpcdata;
- struct address_space *mapping = wdata->header->inode->i_mapping;
+ struct nfs_pgio_header *hdr = objios->oir.rpcdata;
+ struct address_space *mapping = hdr->inode->i_mapping;
pgoff_t index = offset / PAGE_SIZE;
struct page *page;
- loff_t i_size = i_size_read(wdata->header->inode);
+ loff_t i_size = i_size_read(hdr->inode);
if (offset >= i_size) {
*uptodate = true;
@@ -531,15 +530,14 @@ static const struct _ore_r4w_op _r4w_op = {
.put_page = &__r4w_put_page,
};
-int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
+int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
{
- struct nfs_pgio_header *hdr = wdata->header;
struct objio_state *objios;
int ret;
ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
- hdr->lseg, wdata->args.pages, wdata->args.pgbase,
- wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
+ hdr->lseg, hdr->args.pages, hdr->args.pgbase,
+ hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
&objios);
if (unlikely(ret))
return ret;
@@ -551,7 +549,7 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
objios->ios->done = _write_done;
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
- wdata->args.offset, wdata->args.count);
+ hdr->args.offset, hdr->args.count);
ret = ore_write(objios->ios);
if (unlikely(ret)) {
objio_free_result(&objios->oir);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 765d3f54e98..697a16d11fa 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -229,36 +229,36 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
static void _rpc_read_complete(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_pgio_data *rdata;
+ struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- rdata = container_of(task, struct nfs_pgio_data, task);
+ hdr = container_of(task, struct nfs_pgio_header, task);
- pnfs_ld_read_done(rdata);
+ pnfs_ld_read_done(hdr);
}
void
objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
- struct nfs_pgio_data *rdata = oir->rpcdata;
+ struct nfs_pgio_header *hdr = oir->rpcdata;
- oir->status = rdata->task.tk_status = status;
+ oir->status = hdr->task.tk_status = status;
if (status >= 0)
- rdata->res.count = status;
+ hdr->res.count = status;
else
- rdata->header->pnfs_error = status;
+ hdr->pnfs_error = status;
objlayout_iodone(oir);
/* must not use oir after this point */
dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
- status, rdata->res.eof, sync);
+ status, hdr->res.eof, sync);
if (sync)
- pnfs_ld_read_done(rdata);
+ pnfs_ld_read_done(hdr);
else {
- INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
- schedule_work(&rdata->task.u.tk_work);
+ INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
+ schedule_work(&hdr->task.u.tk_work);
}
}
@@ -266,12 +266,11 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
* Perform sync or async reads.
*/
enum pnfs_try_status
-objlayout_read_pagelist(struct nfs_pgio_data *rdata)
+objlayout_read_pagelist(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = rdata->header;
struct inode *inode = hdr->inode;
- loff_t offset = rdata->args.offset;
- size_t count = rdata->args.count;
+ loff_t offset = hdr->args.offset;
+ size_t count = hdr->args.count;
int err;
loff_t eof;
@@ -279,23 +278,23 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
if (unlikely(offset + count > eof)) {
if (offset >= eof) {
err = 0;
- rdata->res.count = 0;
- rdata->res.eof = 1;
+ hdr->res.count = 0;
+ hdr->res.eof = 1;
/*FIXME: do we need to call pnfs_ld_read_done() */
goto out;
}
count = eof - offset;
}
- rdata->res.eof = (offset + count) >= eof;
- _fix_verify_io_params(hdr->lseg, &rdata->args.pages,
- &rdata->args.pgbase,
- rdata->args.offset, rdata->args.count);
+ hdr->res.eof = (offset + count) >= eof;
+ _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
+ &hdr->args.pgbase,
+ hdr->args.offset, hdr->args.count);
dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
- __func__, inode->i_ino, offset, count, rdata->res.eof);
+ __func__, inode->i_ino, offset, count, hdr->res.eof);
- err = objio_read_pagelist(rdata);
+ err = objio_read_pagelist(hdr);
out:
if (unlikely(err)) {
hdr->pnfs_error = err;
@@ -312,38 +311,38 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
static void _rpc_write_complete(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_pgio_data *wdata;
+ struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- wdata = container_of(task, struct nfs_pgio_data, task);
+ hdr = container_of(task, struct nfs_pgio_header, task);
- pnfs_ld_write_done(wdata);
+ pnfs_ld_write_done(hdr);
}
void
objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
- struct nfs_pgio_data *wdata = oir->rpcdata;
+ struct nfs_pgio_header *hdr = oir->rpcdata;
- oir->status = wdata->task.tk_status = status;
+ oir->status = hdr->task.tk_status = status;
if (status >= 0) {
- wdata->res.count = status;
- wdata->verf.committed = oir->committed;
+ hdr->res.count = status;
+ hdr->verf.committed = oir->committed;
} else {
- wdata->header->pnfs_error = status;
+ hdr->pnfs_error = status;
}
objlayout_iodone(oir);
/* must not use oir after this point */
dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
- status, wdata->verf.committed, sync);
+ status, hdr->verf.committed, sync);
if (sync)
- pnfs_ld_write_done(wdata);
+ pnfs_ld_write_done(hdr);
else {
- INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
- schedule_work(&wdata->task.u.tk_work);
+ INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
+ schedule_work(&hdr->task.u.tk_work);
}
}
@@ -351,17 +350,15 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
* Perform sync or async writes.
*/
enum pnfs_try_status
-objlayout_write_pagelist(struct nfs_pgio_data *wdata,
- int how)
+objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
{
- struct nfs_pgio_header *hdr = wdata->header;
int err;
- _fix_verify_io_params(hdr->lseg, &wdata->args.pages,
- &wdata->args.pgbase,
- wdata->args.offset, wdata->args.count);
+ _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
+ &hdr->args.pgbase,
+ hdr->args.offset, hdr->args.count);
- err = objio_write_pagelist(wdata, how);
+ err = objio_write_pagelist(hdr, how);
if (unlikely(err)) {
hdr->pnfs_error = err;
dprintk("%s: Returned Error %d\n", __func__, err);
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 01e041029a6..fd13f1d2f13 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
*/
extern void objio_free_result(struct objlayout_io_res *oir);
-extern int objio_read_pagelist(struct nfs_pgio_data *rdata);
-extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how);
+extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
+extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
/*
* callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
extern void objlayout_free_lseg(struct pnfs_layout_segment *);
extern enum pnfs_try_status objlayout_read_pagelist(
- struct nfs_pgio_data *);
+ struct nfs_pgio_header *);
extern enum pnfs_try_status objlayout_write_pagelist(
- struct nfs_pgio_data *,
+ struct nfs_pgio_header *,
int how);
extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 0be5050638f..be7cbce6e4c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -116,7 +116,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
if (atomic_read(&c->io_count) == 0)
break;
ret = nfs_wait_bit_killable(&q.key);
- } while (atomic_read(&c->io_count) != 0);
+ } while (atomic_read(&c->io_count) != 0 && !ret);
finish_wait(wq, &q.wait);
return ret;
}
@@ -139,18 +139,49 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
/*
* nfs_page_group_lock - lock the head of the page group
* @req - request in group that is to be locked
+ * @nonblock - if true don't block waiting for lock
*
* this lock must be held if modifying the page group list
+ *
+ * return 0 on success, < 0 on error: -EDELAY if nonblocking or the
+ * result from wait_on_bit_lock
+ *
+ * NOTE: calling with nonblock=false should always have set the
+ * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock
+ * with TASK_UNINTERRUPTIBLE), so there is no need to check the result.
+ */
+int
+nfs_page_group_lock(struct nfs_page *req, bool nonblock)
+{
+ struct nfs_page *head = req->wb_head;
+
+ WARN_ON_ONCE(head != head->wb_head);
+
+ if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
+ return 0;
+
+ if (!nonblock)
+ return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+ TASK_UNINTERRUPTIBLE);
+
+ return -EAGAIN;
+}
+
+/*
+ * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it
+ * @req - a request in the group
+ *
+ * This is a blocking call to wait for the group lock to be cleared.
*/
void
-nfs_page_group_lock(struct nfs_page *req)
+nfs_page_group_lock_wait(struct nfs_page *req)
{
struct nfs_page *head = req->wb_head;
WARN_ON_ONCE(head != head->wb_head);
- wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
- TASK_UNINTERRUPTIBLE);
+ wait_on_bit(&head->wb_flags, PG_HEADLOCK,
+ TASK_UNINTERRUPTIBLE);
}
/*
@@ -211,7 +242,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
{
bool ret;
- nfs_page_group_lock(req);
+ nfs_page_group_lock(req, false);
ret = nfs_page_group_sync_on_bit_locked(req, bit);
nfs_page_group_unlock(req);
@@ -454,123 +485,72 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
}
EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
-static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr)
-{
- return container_of(hdr, struct nfs_rw_header, header);
-}
-
-/**
- * nfs_rw_header_alloc - Allocate a header for a read or write
- * @ops: Read or write function vector
- */
-struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
+struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
{
- struct nfs_rw_header *header = ops->rw_alloc_header();
-
- if (header) {
- struct nfs_pgio_header *hdr = &header->header;
+ struct nfs_pgio_header *hdr = ops->rw_alloc_header();
+ if (hdr) {
INIT_LIST_HEAD(&hdr->pages);
spin_lock_init(&hdr->lock);
- atomic_set(&hdr->refcnt, 0);
hdr->rw_ops = ops;
}
- return header;
+ return hdr;
}
-EXPORT_SYMBOL_GPL(nfs_rw_header_alloc);
+EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc);
/*
- * nfs_rw_header_free - Free a read or write header
+ * nfs_pgio_header_free - Free a read or write header
* @hdr: The header to free
*/
-void nfs_rw_header_free(struct nfs_pgio_header *hdr)
+void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
{
- hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr));
+ hdr->rw_ops->rw_free_header(hdr);
}
-EXPORT_SYMBOL_GPL(nfs_rw_header_free);
+EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
/**
- * nfs_pgio_data_alloc - Allocate pageio data
- * @hdr: The header making a request
- * @pagecount: Number of pages to create
- */
-static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr,
- unsigned int pagecount)
-{
- struct nfs_pgio_data *data, *prealloc;
-
- prealloc = &NFS_RW_HEADER(hdr)->rpc_data;
- if (prealloc->header == NULL)
- data = prealloc;
- else
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- goto out;
-
- if (nfs_pgarray_set(&data->pages, pagecount)) {
- data->header = hdr;
- atomic_inc(&hdr->refcnt);
- } else {
- if (data != prealloc)
- kfree(data);
- data = NULL;
- }
-out:
- return data;
-}
-
-/**
- * nfs_pgio_data_release - Properly free pageio data
- * @data: The data to release
+ * nfs_pgio_data_destroy - make @hdr suitable for reuse
+ *
+ * Frees memory and releases refs from nfs_generic_pgio, so that it may
+ * be called again.
+ *
+ * @hdr: A header that has had nfs_generic_pgio called
*/
-void nfs_pgio_data_release(struct nfs_pgio_data *data)
+void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
- struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr);
-
- put_nfs_open_context(data->args.context);
- if (data->pages.pagevec != data->pages.page_array)
- kfree(data->pages.pagevec);
- if (data == &pageio_header->rpc_data) {
- data->header = NULL;
- data = NULL;
- }
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- /* Note: we only free the rpc_task after callbacks are done.
- * See the comment in rpc_free_task() for why
- */
- kfree(data);
+ put_nfs_open_context(hdr->args.context);
+ if (hdr->page_array.pagevec != hdr->page_array.page_array)
+ kfree(hdr->page_array.pagevec);
}
-EXPORT_SYMBOL_GPL(nfs_pgio_data_release);
+EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
/**
* nfs_pgio_rpcsetup - Set up arguments for a pageio call
- * @data: The pageio data
+ * @hdr: The pageio hdr
* @count: Number of bytes to read
* @offset: Initial offset
* @how: How to commit data (writes only)
* @cinfo: Commit information for the call (writes only)
*/
-static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
+static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
unsigned int count, unsigned int offset,
int how, struct nfs_commit_info *cinfo)
{
- struct nfs_page *req = data->header->req;
+ struct nfs_page *req = hdr->req;
/* Set up the RPC argument and reply structs
- * NB: take care not to mess about with data->commit et al. */
+ * NB: take care not to mess about with hdr->commit et al. */
- data->args.fh = NFS_FH(data->header->inode);
- data->args.offset = req_offset(req) + offset;
+ hdr->args.fh = NFS_FH(hdr->inode);
+ hdr->args.offset = req_offset(req) + offset;
/* pnfs_set_layoutcommit needs this */
- data->mds_offset = data->args.offset;
- data->args.pgbase = req->wb_pgbase + offset;
- data->args.pages = data->pages.pagevec;
- data->args.count = count;
- data->args.context = get_nfs_open_context(req->wb_context);
- data->args.lock_context = req->wb_lock_context;
- data->args.stable = NFS_UNSTABLE;
+ hdr->mds_offset = hdr->args.offset;
+ hdr->args.pgbase = req->wb_pgbase + offset;
+ hdr->args.pages = hdr->page_array.pagevec;
+ hdr->args.count = count;
+ hdr->args.context = get_nfs_open_context(req->wb_context);
+ hdr->args.lock_context = req->wb_lock_context;
+ hdr->args.stable = NFS_UNSTABLE;
switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
case 0:
break;
@@ -578,59 +558,59 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
if (nfs_reqs_to_commit(cinfo))
break;
default:
- data->args.stable = NFS_FILE_SYNC;
+ hdr->args.stable = NFS_FILE_SYNC;
}
- data->res.fattr = &data->fattr;
- data->res.count = count;
- data->res.eof = 0;
- data->res.verf = &data->verf;
- nfs_fattr_init(&data->fattr);
+ hdr->res.fattr = &hdr->fattr;
+ hdr->res.count = count;
+ hdr->res.eof = 0;
+ hdr->res.verf = &hdr->verf;
+ nfs_fattr_init(&hdr->fattr);
}
/**
- * nfs_pgio_prepare - Prepare pageio data to go over the wire
+ * nfs_pgio_prepare - Prepare pageio hdr to go over the wire
* @task: The current task
- * @calldata: pageio data to prepare
+ * @calldata: pageio header to prepare
*/
static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
{
- struct nfs_pgio_data *data = calldata;
+ struct nfs_pgio_header *hdr = calldata;
int err;
- err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data);
+ err = NFS_PROTO(hdr->inode)->pgio_rpc_prepare(task, hdr);
if (err)
rpc_exit(task, err);
}
-int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data,
+int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
const struct rpc_call_ops *call_ops, int how, int flags)
{
struct rpc_task *task;
struct rpc_message msg = {
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->header->cred,
+ .rpc_argp = &hdr->args,
+ .rpc_resp = &hdr->res,
+ .rpc_cred = hdr->cred,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = clnt,
- .task = &data->task,
+ .task = &hdr->task,
.rpc_message = &msg,
.callback_ops = call_ops,
- .callback_data = data,
+ .callback_data = hdr,
.workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC | flags,
};
int ret = 0;
- data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how);
+ hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how);
dprintk("NFS: %5u initiated pgio call "
"(req %s/%llu, %u bytes @ offset %llu)\n",
- data->task.tk_pid,
- data->header->inode->i_sb->s_id,
- (unsigned long long)NFS_FILEID(data->header->inode),
- data->args.count,
- (unsigned long long)data->args.offset);
+ hdr->task.tk_pid,
+ hdr->inode->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(hdr->inode),
+ hdr->args.count,
+ (unsigned long long)hdr->args.offset);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) {
@@ -657,22 +637,23 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr)
{
set_bit(NFS_IOHDR_REDO, &hdr->flags);
- nfs_pgio_data_release(hdr->data);
- hdr->data = NULL;
+ nfs_pgio_data_destroy(hdr);
+ hdr->completion_ops->completion(hdr);
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
return -ENOMEM;
}
/**
* nfs_pgio_release - Release pageio data
- * @calldata: The pageio data to release
+ * @calldata: The pageio header to release
*/
static void nfs_pgio_release(void *calldata)
{
- struct nfs_pgio_data *data = calldata;
- if (data->header->rw_ops->rw_release)
- data->header->rw_ops->rw_release(data);
- nfs_pgio_data_release(data);
+ struct nfs_pgio_header *hdr = calldata;
+ if (hdr->rw_ops->rw_release)
+ hdr->rw_ops->rw_release(hdr);
+ nfs_pgio_data_destroy(hdr);
+ hdr->completion_ops->completion(hdr);
}
/**
@@ -713,22 +694,22 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init);
/**
* nfs_pgio_result - Basic pageio error handling
* @task: The task that ran
- * @calldata: Pageio data to check
+ * @calldata: Pageio header to check
*/
static void nfs_pgio_result(struct rpc_task *task, void *calldata)
{
- struct nfs_pgio_data *data = calldata;
- struct inode *inode = data->header->inode;
+ struct nfs_pgio_header *hdr = calldata;
+ struct inode *inode = hdr->inode;
dprintk("NFS: %s: %5u, (status %d)\n", __func__,
task->tk_pid, task->tk_status);
- if (data->header->rw_ops->rw_done(task, data, inode) != 0)
+ if (hdr->rw_ops->rw_done(task, hdr, inode) != 0)
return;
if (task->tk_status < 0)
- nfs_set_pgio_error(data->header, task->tk_status, data->args.offset);
+ nfs_set_pgio_error(hdr, task->tk_status, hdr->args.offset);
else
- data->header->rw_ops->rw_result(task, data);
+ hdr->rw_ops->rw_result(task, hdr);
}
/*
@@ -743,32 +724,42 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr)
{
struct nfs_page *req;
- struct page **pages;
- struct nfs_pgio_data *data;
+ struct page **pages,
+ *last_page;
struct list_head *head = &desc->pg_list;
struct nfs_commit_info cinfo;
+ unsigned int pagecount, pageused;
- data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base,
- desc->pg_count));
- if (!data)
+ pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count);
+ if (!nfs_pgarray_set(&hdr->page_array, pagecount))
return nfs_pgio_error(desc, hdr);
nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
- pages = data->pages.pagevec;
+ pages = hdr->page_array.pagevec;
+ last_page = NULL;
+ pageused = 0;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_list_add_request(req, &hdr->pages);
- *pages++ = req->wb_page;
+
+ if (WARN_ON_ONCE(pageused >= pagecount))
+ return nfs_pgio_error(desc, hdr);
+
+ if (!last_page || last_page != req->wb_page) {
+ *pages++ = last_page = req->wb_page;
+ pageused++;
+ }
}
+ if (WARN_ON_ONCE(pageused != pagecount))
+ return nfs_pgio_error(desc, hdr);
if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
/* Set up the argument struct */
- nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
- hdr->data = data;
+ nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
desc->pg_rpc_callops = &nfs_pgio_common_ops;
return 0;
}
@@ -776,25 +767,20 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio);
static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
{
- struct nfs_rw_header *rw_hdr;
struct nfs_pgio_header *hdr;
int ret;
- rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops);
- if (!rw_hdr) {
+ hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
+ if (!hdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
return -ENOMEM;
}
- hdr = &rw_hdr->header;
- nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
- atomic_inc(&hdr->refcnt);
+ nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
ret = nfs_generic_pgio(desc, hdr);
if (ret == 0)
ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
- hdr->data, desc->pg_rpc_callops,
+ hdr, desc->pg_rpc_callops,
desc->pg_ioflags, 0);
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
return ret;
}
@@ -837,6 +823,14 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
return false;
if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
return false;
+ if (req->wb_page == prev->wb_page) {
+ if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes)
+ return false;
+ } else {
+ if (req->wb_pgbase != 0 ||
+ prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
+ return false;
+ }
}
size = pgio->pg_ops->pg_test(pgio, prev, req);
WARN_ON_ONCE(size > req->wb_bytes);
@@ -908,7 +902,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
unsigned int bytes_left = 0;
unsigned int offset, pgbase;
- nfs_page_group_lock(req);
+ nfs_page_group_lock(req, false);
subreq = req;
bytes_left = subreq->wb_bytes;
@@ -930,7 +924,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (desc->pg_recoalesce)
return 0;
/* retry add_request for this subreq */
- nfs_page_group_lock(req);
+ nfs_page_group_lock(req, false);
continue;
}
@@ -1005,7 +999,38 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
} while (ret);
return ret;
}
-EXPORT_SYMBOL_GPL(nfs_pageio_add_request);
+
+/*
+ * nfs_pageio_resend - Transfer requests to new descriptor and resend
+ * @hdr - the pgio header to move request from
+ * @desc - the pageio descriptor to add requests to
+ *
+ * Try to move each request (nfs_page) from @hdr to @desc then attempt
+ * to send them.
+ *
+ * Returns 0 on success and < 0 on error.
+ */
+int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
+{
+ LIST_HEAD(failed);
+
+ desc->pg_dreq = hdr->dreq;
+ while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+ nfs_list_remove_request(req);
+ if (!nfs_pageio_add_request(desc, req))
+ nfs_list_add_request(req, &failed);
+ }
+ nfs_pageio_complete(desc);
+ if (!list_empty(&failed)) {
+ list_move(&failed, &hdr->pages);
+ return -EIO;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nfs_pageio_resend);
/**
* nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
@@ -1021,7 +1046,6 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
break;
}
}
-EXPORT_SYMBOL_GPL(nfs_pageio_complete);
/**
* nfs_pageio_cond_complete - Conditional I/O completion
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a8914b33561..a3851debf8a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -361,6 +361,23 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
}
EXPORT_SYMBOL_GPL(pnfs_put_lseg);
+static void pnfs_put_lseg_async_work(struct work_struct *work)
+{
+ struct pnfs_layout_segment *lseg;
+
+ lseg = container_of(work, struct pnfs_layout_segment, pls_work);
+
+ pnfs_put_lseg(lseg);
+}
+
+void
+pnfs_put_lseg_async(struct pnfs_layout_segment *lseg)
+{
+ INIT_WORK(&lseg->pls_work, pnfs_put_lseg_async_work);
+ schedule_work(&lseg->pls_work);
+}
+EXPORT_SYMBOL_GPL(pnfs_put_lseg_async);
+
static u64
end_offset(u64 start, u64 len)
{
@@ -1470,41 +1487,19 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
-int pnfs_write_done_resend_to_mds(struct inode *inode,
- struct list_head *head,
- const struct nfs_pgio_completion_ops *compl_ops,
- struct nfs_direct_req *dreq)
+int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
{
struct nfs_pageio_descriptor pgio;
- LIST_HEAD(failed);
/* Resend all requests through the MDS */
- nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops);
- pgio.pg_dreq = dreq;
- while (!list_empty(head)) {
- struct nfs_page *req = nfs_list_entry(head->next);
-
- nfs_list_remove_request(req);
- if (!nfs_pageio_add_request(&pgio, req))
- nfs_list_add_request(req, &failed);
- }
- nfs_pageio_complete(&pgio);
-
- if (!list_empty(&failed)) {
- /* For some reason our attempt to resend pages. Mark the
- * overall send request as having failed, and let
- * nfs_writeback_release_full deal with the error.
- */
- list_move(&failed, head);
- return -EIO;
- }
- return 0;
+ nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
+ hdr->completion_ops);
+ return nfs_pageio_resend(&pgio, hdr);
}
EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
-static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
+static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
dprintk("pnfs write error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
@@ -1512,50 +1507,42 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
- data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
- &hdr->pages,
- hdr->completion_ops,
- hdr->dreq);
+ hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr);
}
/*
* Called by non rpc-based layout drivers
*/
-void pnfs_ld_write_done(struct nfs_pgio_data *data)
+void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
-
- trace_nfs4_pnfs_write(data, hdr->pnfs_error);
+ trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
if (!hdr->pnfs_error) {
- pnfs_set_layoutcommit(data);
- hdr->mds_ops->rpc_call_done(&data->task, data);
+ pnfs_set_layoutcommit(hdr);
+ hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
} else
- pnfs_ld_handle_write_error(data);
- hdr->mds_ops->rpc_release(data);
+ pnfs_ld_handle_write_error(hdr);
+ hdr->mds_ops->rpc_release(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
static void
pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_data *data)
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
-
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
list_splice_tail_init(&hdr->pages, &desc->pg_list);
nfs_pageio_reset_write_mds(desc);
desc->pg_recoalesce = 1;
}
- nfs_pgio_data_release(data);
+ nfs_pgio_data_destroy(hdr);
}
static enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
+pnfs_try_to_write_data(struct nfs_pgio_header *hdr,
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg,
int how)
{
- struct nfs_pgio_header *hdr = wdata->header;
struct inode *inode = hdr->inode;
enum pnfs_try_status trypnfs;
struct nfs_server *nfss = NFS_SERVER(inode);
@@ -1563,8 +1550,8 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
hdr->mds_ops = call_ops;
dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
- inode->i_ino, wdata->args.count, wdata->args.offset, how);
- trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
+ inode->i_ino, hdr->args.count, hdr->args.offset, how);
+ trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
if (trypnfs != PNFS_NOT_ATTEMPTED)
nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1575,139 +1562,105 @@ static void
pnfs_do_write(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr, int how)
{
- struct nfs_pgio_data *data = hdr->data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
enum pnfs_try_status trypnfs;
desc->pg_lseg = NULL;
- trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+ trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
if (trypnfs == PNFS_NOT_ATTEMPTED)
- pnfs_write_through_mds(desc, data);
+ pnfs_write_through_mds(desc, hdr);
pnfs_put_lseg(lseg);
}
static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
{
pnfs_put_lseg(hdr->lseg);
- nfs_rw_header_free(hdr);
+ nfs_pgio_header_free(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_rw_header *whdr;
struct nfs_pgio_header *hdr;
int ret;
- whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
- if (!whdr) {
+ hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
+ if (!hdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return -ENOMEM;
}
- hdr = &whdr->header;
nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
- atomic_inc(&hdr->refcnt);
ret = nfs_generic_pgio(desc, hdr);
if (ret != 0) {
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
} else
pnfs_do_write(desc, hdr, desc->pg_ioflags);
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
return ret;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
-int pnfs_read_done_resend_to_mds(struct inode *inode,
- struct list_head *head,
- const struct nfs_pgio_completion_ops *compl_ops,
- struct nfs_direct_req *dreq)
+int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr)
{
struct nfs_pageio_descriptor pgio;
- LIST_HEAD(failed);
/* Resend all requests through the MDS */
- nfs_pageio_init_read(&pgio, inode, true, compl_ops);
- pgio.pg_dreq = dreq;
- while (!list_empty(head)) {
- struct nfs_page *req = nfs_list_entry(head->next);
-
- nfs_list_remove_request(req);
- if (!nfs_pageio_add_request(&pgio, req))
- nfs_list_add_request(req, &failed);
- }
- nfs_pageio_complete(&pgio);
-
- if (!list_empty(&failed)) {
- list_move(&failed, head);
- return -EIO;
- }
- return 0;
+ nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops);
+ return nfs_pageio_resend(&pgio, hdr);
}
EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
-static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
+static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
-
dprintk("pnfs read error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
- data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
- &hdr->pages,
- hdr->completion_ops,
- hdr->dreq);
+ hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr);
}
/*
* Called by non rpc-based layout drivers
*/
-void pnfs_ld_read_done(struct nfs_pgio_data *data)
+void pnfs_ld_read_done(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
-
- trace_nfs4_pnfs_read(data, hdr->pnfs_error);
+ trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
if (likely(!hdr->pnfs_error)) {
- __nfs4_read_done_cb(data);
- hdr->mds_ops->rpc_call_done(&data->task, data);
+ __nfs4_read_done_cb(hdr);
+ hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
} else
- pnfs_ld_handle_read_error(data);
- hdr->mds_ops->rpc_release(data);
+ pnfs_ld_handle_read_error(hdr);
+ hdr->mds_ops->rpc_release(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
static void
pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_data *data)
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
-
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
list_splice_tail_init(&hdr->pages, &desc->pg_list);
nfs_pageio_reset_read_mds(desc);
desc->pg_recoalesce = 1;
}
- nfs_pgio_data_release(data);
+ nfs_pgio_data_destroy(hdr);
}
/*
* Call the appropriate parallel I/O subsystem read function.
*/
static enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
+pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg)
{
- struct nfs_pgio_header *hdr = rdata->header;
struct inode *inode = hdr->inode;
struct nfs_server *nfss = NFS_SERVER(inode);
enum pnfs_try_status trypnfs;
@@ -1715,9 +1668,9 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
hdr->mds_ops = call_ops;
dprintk("%s: Reading ino:%lu %u@%llu\n",
- __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
+ __func__, inode->i_ino, hdr->args.count, hdr->args.offset);
- trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
+ trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
if (trypnfs != PNFS_NOT_ATTEMPTED)
nfs_inc_stats(inode, NFSIOS_PNFS_READ);
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1727,52 +1680,46 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
static void
pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_data *data = hdr->data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
enum pnfs_try_status trypnfs;
desc->pg_lseg = NULL;
- trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+ trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
if (trypnfs == PNFS_NOT_ATTEMPTED)
- pnfs_read_through_mds(desc, data);
+ pnfs_read_through_mds(desc, hdr);
pnfs_put_lseg(lseg);
}
static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
{
pnfs_put_lseg(hdr->lseg);
- nfs_rw_header_free(hdr);
+ nfs_pgio_header_free(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_rw_header *rhdr;
struct nfs_pgio_header *hdr;
int ret;
- rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
- if (!rhdr) {
+ hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
+ if (!hdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
ret = -ENOMEM;
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return ret;
}
- hdr = &rhdr->header;
nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
- atomic_inc(&hdr->refcnt);
ret = nfs_generic_pgio(desc, hdr);
if (ret != 0) {
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
} else
pnfs_do_read(desc, hdr);
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
return ret;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
@@ -1820,12 +1767,11 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
void
-pnfs_set_layoutcommit(struct nfs_pgio_data *wdata)
+pnfs_set_layoutcommit(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = wdata->header;
struct inode *inode = hdr->inode;
struct nfs_inode *nfsi = NFS_I(inode);
- loff_t end_pos = wdata->mds_offset + wdata->res.count;
+ loff_t end_pos = hdr->mds_offset + hdr->res.count;
bool mark_as_dirty = false;
spin_lock(&inode->i_lock);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 4fb309a2b4c..aca3dff5dae 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -32,6 +32,7 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
+#include <linux/workqueue.h>
enum {
NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
@@ -46,6 +47,7 @@ struct pnfs_layout_segment {
atomic_t pls_refcount;
unsigned long pls_flags;
struct pnfs_layout_hdr *pls_layout;
+ struct work_struct pls_work;
};
enum pnfs_try_status {
@@ -104,6 +106,8 @@ struct pnfs_layoutdriver_type {
int max);
void (*recover_commit_reqs) (struct list_head *list,
struct nfs_commit_info *cinfo);
+ struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
+ struct page *page);
int (*commit_pagelist)(struct inode *inode,
struct list_head *mds_pages,
int how,
@@ -113,8 +117,8 @@ struct pnfs_layoutdriver_type {
* Return PNFS_ATTEMPTED to indicate the layout code has attempted
* I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
*/
- enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data);
- enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how);
+ enum pnfs_try_status (*read_pagelist)(struct nfs_pgio_header *);
+ enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int);
void (*free_deviceid_node) (struct nfs4_deviceid_node *);
@@ -179,6 +183,7 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
/* pnfs.c */
void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
+void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg);
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
void unset_pnfs_layoutdriver(struct nfs_server *);
@@ -213,13 +218,13 @@ bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
-void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata);
+void pnfs_set_layoutcommit(struct nfs_pgio_header *);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *);
int pnfs_commit_and_return_layout(struct inode *);
-void pnfs_ld_write_done(struct nfs_pgio_data *);
-void pnfs_ld_read_done(struct nfs_pgio_data *);
+void pnfs_ld_write_done(struct nfs_pgio_header *);
+void pnfs_ld_read_done(struct nfs_pgio_header *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
@@ -228,12 +233,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
gfp_t gfp_flags);
void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
-int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
- const struct nfs_pgio_completion_ops *compl_ops,
- struct nfs_direct_req *dreq);
-int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
- const struct nfs_pgio_completion_ops *compl_ops,
- struct nfs_direct_req *dreq);
+int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *);
+int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
/* nfs4_deviceid_flags */
@@ -345,6 +346,17 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
}
+static inline struct nfs_page *
+pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
+ struct page *page)
+{
+ struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+
+ if (ld == NULL || ld->search_commit_reqs == NULL)
+ return NULL;
+ return ld->search_commit_reqs(cinfo, page);
+}
+
/* Should the pNFS client commit and return the layout upon a setattr */
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
@@ -410,6 +422,10 @@ static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg)
{
}
+static inline void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg)
+{
+}
+
static inline int pnfs_return_layout(struct inode *ino)
{
return 0;
@@ -496,6 +512,13 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
{
}
+static inline struct nfs_page *
+pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
+ struct page *page)
+{
+ return NULL;
+}
+
static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
{
return 0;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index c171ce1a8a3..b09cc23d6f4 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -578,46 +578,49 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return 0;
}
-static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
nfs_invalidate_atime(inode);
if (task->tk_status >= 0) {
- nfs_refresh_inode(inode, data->res.fattr);
+ nfs_refresh_inode(inode, hdr->res.fattr);
/* Emulate the eof flag, which isn't normally needed in NFSv2
* as it is guaranteed to always return the file attributes
*/
- if (data->args.offset + data->res.count >= data->res.fattr->size)
- data->res.eof = 1;
+ if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size)
+ hdr->res.eof = 1;
}
return 0;
}
-static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs_proc_read_setup(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
}
-static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
rpc_call_start(task);
return 0;
}
-static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
if (task->tk_status >= 0)
- nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
return 0;
}
-static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs_proc_write_setup(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
- data->args.stable = NFS_FILE_SYNC;
+ hdr->args.stable = NFS_FILE_SYNC;
msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e818a475ca6..beff2769c5c 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -33,12 +33,12 @@ static const struct nfs_rw_ops nfs_rw_read_ops;
static struct kmem_cache *nfs_rdata_cachep;
-static struct nfs_rw_header *nfs_readhdr_alloc(void)
+static struct nfs_pgio_header *nfs_readhdr_alloc(void)
{
return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
}
-static void nfs_readhdr_free(struct nfs_rw_header *rhdr)
+static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
{
kmem_cache_free(nfs_rdata_cachep, rhdr);
}
@@ -115,12 +115,6 @@ static void nfs_readpage_release(struct nfs_page *req)
unlock_page(req->wb_page);
}
-
- dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
- req->wb_context->dentry->d_inode->i_sb->s_id,
- (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
- req->wb_bytes,
- (long long)req_offset(req));
nfs_release_request(req);
}
@@ -172,14 +166,15 @@ out:
hdr->release(hdr);
}
-static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg,
+static void nfs_initiate_read(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg,
struct rpc_task_setup *task_setup_data, int how)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
task_setup_data->flags |= swap_flags;
- NFS_PROTO(inode)->read_setup(data, msg);
+ NFS_PROTO(inode)->read_setup(hdr, msg);
}
static void
@@ -203,14 +198,15 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
+static int nfs_readpage_done(struct rpc_task *task,
+ struct nfs_pgio_header *hdr,
struct inode *inode)
{
- int status = NFS_PROTO(inode)->read_done(task, data);
+ int status = NFS_PROTO(inode)->read_done(task, hdr);
if (status != 0)
return status;
- nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
+ nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
if (task->tk_status == -ESTALE) {
set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
@@ -219,34 +215,34 @@ static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
return 0;
}
-static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data)
+static void nfs_readpage_retry(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_args *argp = &data->args;
- struct nfs_pgio_res *resp = &data->res;
+ struct nfs_pgio_args *argp = &hdr->args;
+ struct nfs_pgio_res *resp = &hdr->res;
/* This is a short read! */
- nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
+ nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD);
/* Has the server at least made some progress? */
if (resp->count == 0) {
- nfs_set_pgio_error(data->header, -EIO, argp->offset);
+ nfs_set_pgio_error(hdr, -EIO, argp->offset);
return;
}
- /* Yes, so retry the read at the end of the data */
- data->mds_offset += resp->count;
+ /* Yes, so retry the read at the end of the hdr */
+ hdr->mds_offset += resp->count;
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
rpc_restart_call_prepare(task);
}
-static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
+static void nfs_readpage_result(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
-
- if (data->res.eof) {
+ if (hdr->res.eof) {
loff_t bound;
- bound = data->args.offset + data->res.count;
+ bound = hdr->args.offset + hdr->res.count;
spin_lock(&hdr->lock);
if (bound < hdr->io_start + hdr->good_bytes) {
set_bit(NFS_IOHDR_EOF, &hdr->flags);
@@ -254,8 +250,8 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *dat
hdr->good_bytes = bound - hdr->io_start;
}
spin_unlock(&hdr->lock);
- } else if (data->res.count != data->args.count)
- nfs_readpage_retry(task, data);
+ } else if (hdr->res.count != hdr->args.count)
+ nfs_readpage_retry(task, hdr);
}
/*
@@ -404,7 +400,7 @@ out:
int __init nfs_init_readpagecache(void)
{
nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
- sizeof(struct nfs_rw_header),
+ sizeof(struct nfs_pgio_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 084af1060d7..e4499d5b51e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1027,8 +1027,7 @@ static bool nfs_auth_info_add(struct nfs_auth_info *auth_info,
rpc_authflavor_t flavor)
{
unsigned int i;
- unsigned int max_flavor_len = (sizeof(auth_info->flavors) /
- sizeof(auth_info->flavors[0]));
+ unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors);
/* make sure this flavor isn't already in the list */
for (i = 0; i < auth_info->flavor_len; i++) {
@@ -2180,7 +2179,7 @@ out_no_address:
return -EINVAL;
}
-#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
+#define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
| NFS_MOUNT_SECURE \
| NFS_MOUNT_TCP \
| NFS_MOUNT_VER3 \
@@ -2188,15 +2187,16 @@ out_no_address:
| NFS_MOUNT_NONLM \
| NFS_MOUNT_BROKEN_SUID \
| NFS_MOUNT_STRICTLOCK \
- | NFS_MOUNT_UNSHARED \
- | NFS_MOUNT_NORESVPORT \
| NFS_MOUNT_LEGACY_INTERFACE)
+#define NFS_MOUNT_CMP_FLAGMASK (NFS_REMOUNT_CMP_FLAGMASK & \
+ ~(NFS_MOUNT_UNSHARED | NFS_MOUNT_NORESVPORT))
+
static int
nfs_compare_remount_data(struct nfs_server *nfss,
struct nfs_parsed_mount_data *data)
{
- if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK ||
+ if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK ||
data->rsize != nfss->rsize ||
data->wsize != nfss->wsize ||
data->version != nfss->nfs_client->rpc_ops->version ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 962c9ee758b..175d5d073cc 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -47,6 +47,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
static const struct nfs_rw_ops nfs_rw_write_ops;
static void nfs_clear_request_commit(struct nfs_page *req);
+static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
+ struct inode *inode);
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -71,18 +73,18 @@ void nfs_commit_free(struct nfs_commit_data *p)
}
EXPORT_SYMBOL_GPL(nfs_commit_free);
-static struct nfs_rw_header *nfs_writehdr_alloc(void)
+static struct nfs_pgio_header *nfs_writehdr_alloc(void)
{
- struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
+ struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
if (p)
memset(p, 0, sizeof(*p));
return p;
}
-static void nfs_writehdr_free(struct nfs_rw_header *whdr)
+static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
{
- mempool_free(whdr, nfs_wdata_mempool);
+ mempool_free(hdr, nfs_wdata_mempool);
}
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -93,6 +95,38 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
}
/*
+ * nfs_page_search_commits_for_head_request_locked
+ *
+ * Search through commit lists on @inode for the head request for @page.
+ * Must be called while holding the inode (which is cinfo) lock.
+ *
+ * Returns the head request if found, or NULL if not found.
+ */
+static struct nfs_page *
+nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
+ struct page *page)
+{
+ struct nfs_page *freq, *t;
+ struct nfs_commit_info cinfo;
+ struct inode *inode = &nfsi->vfs_inode;
+
+ nfs_init_cinfo_from_inode(&cinfo, inode);
+
+ /* search through pnfs commit lists */
+ freq = pnfs_search_commit_reqs(inode, &cinfo, page);
+ if (freq)
+ return freq->wb_head;
+
+ /* Linearly search the commit list for the correct request */
+ list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
+ if (freq->wb_page == page)
+ return freq->wb_head;
+ }
+
+ return NULL;
+}
+
+/*
* nfs_page_find_head_request_locked - find head request associated with @page
*
* must be called while holding the inode lock.
@@ -106,21 +140,12 @@ nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
if (PagePrivate(page))
req = (struct nfs_page *)page_private(page);
- else if (unlikely(PageSwapCache(page))) {
- struct nfs_page *freq, *t;
-
- /* Linearly search the commit list for the correct req */
- list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
- if (freq->wb_page == page) {
- req = freq->wb_head;
- break;
- }
- }
- }
+ else if (unlikely(PageSwapCache(page)))
+ req = nfs_page_search_commits_for_head_request_locked(nfsi,
+ page);
if (req) {
WARN_ON_ONCE(req->wb_head != req);
-
kref_get(&req->wb_kref);
}
@@ -216,7 +241,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
unsigned int pos = 0;
unsigned int len = nfs_page_length(req->wb_page);
- nfs_page_group_lock(req);
+ nfs_page_group_lock(req, false);
do {
tmp = nfs_page_group_search_locked(req->wb_head, pos);
@@ -379,8 +404,6 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
subreq->wb_head = subreq;
subreq->wb_this_page = subreq;
- nfs_clear_request_commit(subreq);
-
/* subreq is now totally disconnected from page group or any
* write / commit lists. last chance to wake any waiters */
nfs_unlock_request(subreq);
@@ -455,8 +478,23 @@ try_again:
return NULL;
}
+ /* holding inode lock, so always make a non-blocking call to try the
+ * page group lock */
+ ret = nfs_page_group_lock(head, true);
+ if (ret < 0) {
+ spin_unlock(&inode->i_lock);
+
+ if (!nonblock && ret == -EAGAIN) {
+ nfs_page_group_lock_wait(head);
+ nfs_release_request(head);
+ goto try_again;
+ }
+
+ nfs_release_request(head);
+ return ERR_PTR(ret);
+ }
+
/* lock each request in the page group */
- nfs_page_group_lock(head);
subreq = head;
do {
/*
@@ -488,7 +526,7 @@ try_again:
* Commit list removal accounting is done after locks are dropped */
subreq = head;
do {
- nfs_list_remove_request(subreq);
+ nfs_clear_request_commit(subreq);
subreq = subreq->wb_this_page;
} while (subreq != head);
@@ -518,15 +556,11 @@ try_again:
nfs_page_group_unlock(head);
- /* drop lock to clear_request_commit the head req and clean up
- * requests on destroy list */
+ /* drop lock to clean uprequests on destroy list */
spin_unlock(&inode->i_lock);
nfs_destroy_unlinked_subrequests(destroy_list, head);
- /* clean up commit list state */
- nfs_clear_request_commit(head);
-
/* still holds ref on head from nfs_page_find_head_request_locked
* and still has lock on head from lock loop */
return head;
@@ -705,6 +739,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
nfs_release_request(req);
+ else
+ WARN_ON_ONCE(1);
}
static void
@@ -808,6 +844,7 @@ nfs_clear_page_commit(struct page *page)
dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE);
}
+/* Called holding inode (/cinfo) lock */
static void
nfs_clear_request_commit(struct nfs_page *req)
{
@@ -817,20 +854,17 @@ nfs_clear_request_commit(struct nfs_page *req)
nfs_init_cinfo_from_inode(&cinfo, inode);
if (!pnfs_clear_request_commit(req, &cinfo)) {
- spin_lock(cinfo.lock);
nfs_request_remove_commit_list(req, &cinfo);
- spin_unlock(cinfo.lock);
}
nfs_clear_page_commit(req->wb_page);
}
}
-static inline
-int nfs_write_need_commit(struct nfs_pgio_data *data)
+int nfs_write_need_commit(struct nfs_pgio_header *hdr)
{
- if (data->verf.committed == NFS_DATA_SYNC)
- return data->header->lseg == NULL;
- return data->verf.committed != NFS_FILE_SYNC;
+ if (hdr->verf.committed == NFS_DATA_SYNC)
+ return hdr->lseg == NULL;
+ return hdr->verf.committed != NFS_FILE_SYNC;
}
#else
@@ -856,8 +890,7 @@ nfs_clear_request_commit(struct nfs_page *req)
{
}
-static inline
-int nfs_write_need_commit(struct nfs_pgio_data *data)
+int nfs_write_need_commit(struct nfs_pgio_header *hdr)
{
return 0;
}
@@ -883,11 +916,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
nfs_context_set_write_error(req->wb_context, hdr->error);
goto remove_req;
}
- if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
- nfs_mark_request_dirty(req);
- goto next;
- }
- if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+ if (nfs_write_need_commit(hdr)) {
memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
nfs_mark_request_commit(req, hdr->lseg, &cinfo);
goto next;
@@ -1038,9 +1067,9 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
else
req->wb_bytes = rqend - req->wb_offset;
out_unlock:
- spin_unlock(&inode->i_lock);
if (req)
nfs_clear_request_commit(req);
+ spin_unlock(&inode->i_lock);
return req;
out_flushme:
spin_unlock(&inode->i_lock);
@@ -1241,17 +1270,18 @@ static int flush_task_priority(int how)
return RPC_PRIORITY_NORMAL;
}
-static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg,
+static void nfs_initiate_write(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg,
struct rpc_task_setup *task_setup_data, int how)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
int priority = flush_task_priority(how);
task_setup_data->priority = priority;
- NFS_PROTO(inode)->write_setup(data, msg);
+ NFS_PROTO(inode)->write_setup(hdr, msg);
nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
- &task_setup_data->rpc_client, msg, data);
+ &task_setup_data->rpc_client, msg, hdr);
}
/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1313,21 +1343,9 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
-static void nfs_writeback_release_common(struct nfs_pgio_data *data)
+static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
- int status = data->task.tk_status;
-
- if ((status >= 0) && nfs_write_need_commit(data)) {
- spin_lock(&hdr->lock);
- if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
- ; /* Do nothing */
- else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
- memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
- else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
- set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
- spin_unlock(&hdr->lock);
- }
+ /* do nothing! */
}
/*
@@ -1358,7 +1376,8 @@ static int nfs_should_remove_suid(const struct inode *inode)
/*
* This function is called when the WRITE call is complete.
*/
-static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
+static int nfs_writeback_done(struct rpc_task *task,
+ struct nfs_pgio_header *hdr,
struct inode *inode)
{
int status;
@@ -1370,13 +1389,14 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
* another writer had changed the file, but some applications
* depend on tighter cache coherency when writing.
*/
- status = NFS_PROTO(inode)->write_done(task, data);
+ status = NFS_PROTO(inode)->write_done(task, hdr);
if (status != 0)
return status;
- nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count);
+ nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
- if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) {
+ if (hdr->res.verf->committed < hdr->args.stable &&
+ task->tk_status >= 0) {
/* We tried a write call, but the server did not
* commit data to stable storage even though we
* requested it.
@@ -1392,7 +1412,7 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
NFS_SERVER(inode)->nfs_client->cl_hostname,
- data->res.verf->committed, data->args.stable);
+ hdr->res.verf->committed, hdr->args.stable);
complain = jiffies + 300 * HZ;
}
}
@@ -1407,16 +1427,17 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
/*
* This function is called when the WRITE call is complete.
*/
-static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data)
+static void nfs_writeback_result(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_args *argp = &data->args;
- struct nfs_pgio_res *resp = &data->res;
+ struct nfs_pgio_args *argp = &hdr->args;
+ struct nfs_pgio_res *resp = &hdr->res;
if (resp->count < argp->count) {
static unsigned long complain;
/* This a short write! */
- nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE);
+ nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
/* Has the server at least made some progress? */
if (resp->count == 0) {
@@ -1426,14 +1447,14 @@ static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *da
argp->count);
complain = jiffies + 300 * HZ;
}
- nfs_set_pgio_error(data->header, -EIO, argp->offset);
+ nfs_set_pgio_error(hdr, -EIO, argp->offset);
task->tk_status = -EIO;
return;
}
/* Was this an NFSv2 write or an NFSv3 stable write? */
if (resp->verf->committed != NFS_UNSTABLE) {
/* Resend from where the server left off */
- data->mds_offset += resp->count;
+ hdr->mds_offset += resp->count;
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
@@ -1884,7 +1905,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
int __init nfs_init_writepagecache(void)
{
nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
- sizeof(struct nfs_rw_header),
+ sizeof(struct nfs_pgio_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_wdata_cachep == NULL)
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index ed628f71274..538f142935e 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -30,9 +30,6 @@
MODULE_LICENSE("GPL");
-EXPORT_SYMBOL_GPL(nfsacl_encode);
-EXPORT_SYMBOL_GPL(nfsacl_decode);
-
struct nfsacl_encode_desc {
struct xdr_array2_desc desc;
unsigned int count;
@@ -136,6 +133,7 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
nfsacl_desc.desc.array_len;
return err;
}
+EXPORT_SYMBOL_GPL(nfsacl_encode);
struct nfsacl_decode_desc {
struct xdr_array2_desc desc;
@@ -295,3 +293,4 @@ int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
return 8 + nfsacl_desc.desc.elem_size *
nfsacl_desc.desc.array_len;
}
+EXPORT_SYMBOL_GPL(nfsacl_decode);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f9821ce6658..e94457c33ad 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2657,6 +2657,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
struct xdr_stream *xdr = cd->xdr;
int start_offset = xdr->buf->len;
int cookie_offset;
+ u32 name_and_cookie;
int entry_bytes;
__be32 nfserr = nfserr_toosmall;
__be64 wire_offset;
@@ -2718,7 +2719,14 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
cd->rd_maxcount -= entry_bytes;
if (!cd->rd_dircount)
goto fail;
- cd->rd_dircount--;
+ /*
+ * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so
+ * let's always let through the first entry, at least:
+ */
+ name_and_cookie = 4 * XDR_QUADLEN(namlen) + 8;
+ if (name_and_cookie > cd->rd_dircount && cd->cookie_offset)
+ goto fail;
+ cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie);
cd->cookie_offset = cookie_offset;
skip_entry:
cd->common.err = nfs_ok;
@@ -3321,6 +3329,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
}
maxcount = min_t(int, maxcount-16, bytes_left);
+ /* RFC 3530 14.2.24 allows us to ignore dircount when it's 0: */
+ if (!readdir->rd_dircount)
+ readdir->rd_dircount = INT_MAX;
+
readdir->xdr = xdr;
readdir->rd_maxcount = maxcount;
readdir->common.err = 0;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index c519927b7b5..228f5bdf077 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -942,7 +942,7 @@ static int nilfs_get_root_dentry(struct super_block *sb,
iput(inode);
}
} else {
- dentry = d_obtain_alias(inode);
+ dentry = d_obtain_root(inode);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
goto failed_dentry;
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 238a5930cb3..9d7e2b9659c 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -42,7 +42,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
{
struct {
struct file_handle handle;
- u8 pad[64];
+ u8 pad[MAX_HANDLE_SZ];
} f;
int size, ret, i;
@@ -50,7 +50,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
size = f.handle.handle_bytes >> 2;
ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0);
- if ((ret == 255) || (ret == -ENOSPC)) {
+ if ((ret == FILEID_INVALID) || (ret < 0)) {
WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
return 0;
}
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 1ec141e758d..62e8ec619b4 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -160,9 +160,18 @@ static void o2quo_make_decision(struct work_struct *work)
}
out:
- spin_unlock(&qs->qs_lock);
- if (fence)
+ if (fence) {
+ spin_unlock(&qs->qs_lock);
o2quo_fence_self();
+ } else {
+ mlog(ML_NOTICE, "not fencing this node, heartbeating: %d, "
+ "connected: %d, lowest: %d (%sreachable)\n",
+ qs->qs_heartbeating, qs->qs_connected, lowest_hb,
+ lowest_reachable ? "" : "un");
+ spin_unlock(&qs->qs_lock);
+
+ }
+
}
static void o2quo_set_hold(struct o2quo_state *qs, u8 node)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 681691bc233..ea34952f949 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1480,6 +1480,14 @@ static int o2net_set_nodelay(struct socket *sock)
return ret;
}
+static int o2net_set_usertimeout(struct socket *sock)
+{
+ int user_timeout = O2NET_TCP_USER_TIMEOUT;
+
+ return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
+ (char *)&user_timeout, sizeof(user_timeout));
+}
+
static void o2net_initialize_handshake(void)
{
o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
@@ -1536,16 +1544,20 @@ static void o2net_idle_timer(unsigned long data)
#endif
printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been "
- "idle for %lu.%lu secs, shutting it down.\n", SC_NODEF_ARGS(sc),
- msecs / 1000, msecs % 1000);
+ "idle for %lu.%lu secs.\n",
+ SC_NODEF_ARGS(sc), msecs / 1000, msecs % 1000);
- /*
- * Initialize the nn_timeout so that the next connection attempt
- * will continue in o2net_start_connect.
+ /* idle timerout happen, don't shutdown the connection, but
+ * make fence decision. Maybe the connection can recover before
+ * the decision is made.
*/
atomic_set(&nn->nn_timeout, 1);
+ o2quo_conn_err(o2net_num_from_nn(nn));
+ queue_delayed_work(o2net_wq, &nn->nn_still_up,
+ msecs_to_jiffies(O2NET_QUORUM_DELAY_MS));
+
+ o2net_sc_reset_idle_timer(sc);
- o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
}
static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
@@ -1560,6 +1572,15 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
{
+ struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
+
+ /* clear fence decision since the connection recover from timeout*/
+ if (atomic_read(&nn->nn_timeout)) {
+ o2quo_conn_up(o2net_num_from_nn(nn));
+ cancel_delayed_work(&nn->nn_still_up);
+ atomic_set(&nn->nn_timeout, 0);
+ }
+
/* Only push out an existing timer */
if (timer_pending(&sc->sc_idle_timeout))
o2net_sc_reset_idle_timer(sc);
@@ -1650,6 +1671,12 @@ static void o2net_start_connect(struct work_struct *work)
goto out;
}
+ ret = o2net_set_usertimeout(sock);
+ if (ret) {
+ mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
+ goto out;
+ }
+
o2net_register_callbacks(sc->sc_sock->sk, sc);
spin_lock(&nn->nn_lock);
@@ -1831,6 +1858,12 @@ static int o2net_accept_one(struct socket *sock, int *more)
goto out;
}
+ ret = o2net_set_usertimeout(new_sock);
+ if (ret) {
+ mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
+ goto out;
+ }
+
slen = sizeof(sin);
ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
&slen, 1);
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index 5bada2a69b5..c571e849fda 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -63,6 +63,7 @@ typedef void (o2net_post_msg_handler_func)(int status, void *data,
#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000
#define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000
+#define O2NET_TCP_USER_TIMEOUT 0x7fffffff
/* TODO: figure this out.... */
static inline int o2net_link_down(int err, struct socket *sock)
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 6f66b3751ac..53e6c40ed4c 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -35,9 +35,8 @@
copy_to_user((typeof(a) __user *)b, &(a), sizeof(a))
/*
- * This call is void because we are already reporting an error that may
- * be -EFAULT. The error will be returned from the ioctl(2) call. It's
- * just a best-effort to tell userspace that this request caused the error.
+ * This is just a best-effort to tell userspace that this request
+ * caused the error.
*/
static inline void o2info_set_request_error(struct ocfs2_info_request *kreq,
struct ocfs2_info_request __user *req)
@@ -146,136 +145,105 @@ bail:
static int ocfs2_info_handle_blocksize(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_blocksize oib;
if (o2info_from_user(oib, req))
- goto bail;
+ return -EFAULT;
oib.ib_blocksize = inode->i_sb->s_blocksize;
o2info_set_request_filled(&oib.ib_req);
if (o2info_to_user(oib, req))
- goto bail;
-
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oib.ib_req, req);
+ return -EFAULT;
- return status;
+ return 0;
}
static int ocfs2_info_handle_clustersize(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_clustersize oic;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oic, req))
- goto bail;
+ return -EFAULT;
oic.ic_clustersize = osb->s_clustersize;
o2info_set_request_filled(&oic.ic_req);
if (o2info_to_user(oic, req))
- goto bail;
-
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oic.ic_req, req);
+ return -EFAULT;
- return status;
+ return 0;
}
static int ocfs2_info_handle_maxslots(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_maxslots oim;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oim, req))
- goto bail;
+ return -EFAULT;
oim.im_max_slots = osb->max_slots;
o2info_set_request_filled(&oim.im_req);
if (o2info_to_user(oim, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oim.im_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_handle_label(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_label oil;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oil, req))
- goto bail;
+ return -EFAULT;
memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
o2info_set_request_filled(&oil.il_req);
if (o2info_to_user(oil, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oil.il_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_handle_uuid(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_uuid oiu;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oiu, req))
- goto bail;
+ return -EFAULT;
memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
o2info_set_request_filled(&oiu.iu_req);
if (o2info_to_user(oiu, req))
- goto bail;
-
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oiu.iu_req, req);
+ return -EFAULT;
- return status;
+ return 0;
}
static int ocfs2_info_handle_fs_features(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_fs_features oif;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oif, req))
- goto bail;
+ return -EFAULT;
oif.if_compat_features = osb->s_feature_compat;
oif.if_incompat_features = osb->s_feature_incompat;
@@ -284,39 +252,28 @@ static int ocfs2_info_handle_fs_features(struct inode *inode,
o2info_set_request_filled(&oif.if_req);
if (o2info_to_user(oif, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oif.if_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_handle_journal_size(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_journal_size oij;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oij, req))
- goto bail;
+ return -EFAULT;
oij.ij_journal_size = i_size_read(osb->journal->j_inode);
o2info_set_request_filled(&oij.ij_req);
if (o2info_to_user(oij, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oij.ij_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
@@ -373,7 +330,7 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
u32 i;
u64 blkno = -1;
char namebuf[40];
- int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE;
+ int status, type = INODE_ALLOC_SYSTEM_INODE;
struct ocfs2_info_freeinode *oifi = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct inode *inode_alloc = NULL;
@@ -385,8 +342,10 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
goto out_err;
}
- if (o2info_from_user(*oifi, req))
- goto bail;
+ if (o2info_from_user(*oifi, req)) {
+ status = -EFAULT;
+ goto out_free;
+ }
oifi->ifi_slotnum = osb->max_slots;
@@ -424,14 +383,16 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
o2info_set_request_filled(&oifi->ifi_req);
- if (o2info_to_user(*oifi, req))
- goto bail;
+ if (o2info_to_user(*oifi, req)) {
+ status = -EFAULT;
+ goto out_free;
+ }
status = 0;
bail:
if (status)
o2info_set_request_error(&oifi->ifi_req, req);
-
+out_free:
kfree(oifi);
out_err:
return status;
@@ -658,7 +619,7 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
{
u64 blkno = -1;
char namebuf[40];
- int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE;
+ int status, type = GLOBAL_BITMAP_SYSTEM_INODE;
struct ocfs2_info_freefrag *oiff;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -671,8 +632,10 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
goto out_err;
}
- if (o2info_from_user(*oiff, req))
- goto bail;
+ if (o2info_from_user(*oiff, req)) {
+ status = -EFAULT;
+ goto out_free;
+ }
/*
* chunksize from userspace should be power of 2.
*/
@@ -711,14 +674,14 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
if (o2info_to_user(*oiff, req)) {
status = -EFAULT;
- goto bail;
+ goto out_free;
}
status = 0;
bail:
if (status)
o2info_set_request_error(&oiff->iff_req, req);
-
+out_free:
kfree(oiff);
out_err:
return status;
@@ -727,23 +690,17 @@ out_err:
static int ocfs2_info_handle_unknown(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_request oir;
if (o2info_from_user(oir, req))
- goto bail;
+ return -EFAULT;
o2info_clear_request_filled(&oir);
if (o2info_to_user(oir, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oir, req);
-
- return status;
+ return 0;
}
/*
diff --git a/fs/pnode.c b/fs/pnode.c
index 302bf22c4a3..aae331a5d03 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -381,6 +381,7 @@ static void __propagate_umount(struct mount *mnt)
* other children
*/
if (child && list_empty(&child->mnt_mounts)) {
+ list_del_init(&child->mnt_child);
hlist_del_init_rcu(&child->mnt_hash);
hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash);
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 7f30bdc57d1..f2d0eee9d1f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -96,13 +96,16 @@
* Note that some things (eg. sb pointer, type, id) doesn't change during
* the life of the dquot structure and so needn't to be protected by a lock
*
- * Any operation working on dquots via inode pointers must hold dqptr_sem. If
- * operation is just reading pointers from inode (or not using them at all) the
- * read lock is enough. If pointers are altered function must hold write lock.
+ * Operation accessing dquots via inode pointers are protected by dquot_srcu.
+ * Operation of reading pointer needs srcu_read_lock(&dquot_srcu), and
+ * synchronize_srcu(&dquot_srcu) is called after clearing pointers from
+ * inode and before dropping dquot references to avoid use of dquots after
+ * they are freed. dq_data_lock is used to serialize the pointer setting and
+ * clearing operations.
* Special care needs to be taken about S_NOQUOTA inode flag (marking that
* inode is a quota file). Functions adding pointers from inode to dquots have
- * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they
- * have to do all pointer modifications before dropping dqptr_sem. This makes
+ * to check this flag under dq_data_lock and then (if S_NOQUOTA is not set) they
+ * have to do all pointer modifications before dropping dq_data_lock. This makes
* sure they cannot race with quotaon which first sets S_NOQUOTA flag and
* then drops all pointers to dquots from an inode.
*
@@ -116,21 +119,15 @@
* spinlock to internal buffers before writing.
*
* Lock ordering (including related VFS locks) is the following:
- * dqonoff_mutex > i_mutex > journal_lock > dqptr_sem > dquot->dq_lock >
- * dqio_mutex
+ * dqonoff_mutex > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex
* dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc.
- * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem >
- * dqptr_sem. But filesystem has to count with the fact that functions such as
- * dquot_alloc_space() acquire dqptr_sem and they usually have to be called
- * from inside a transaction to keep filesystem consistency after a crash. Also
- * filesystems usually want to do some IO on dquot from ->mark_dirty which is
- * called with dqptr_sem held.
*/
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
EXPORT_SYMBOL(dq_data_lock);
+DEFINE_STATIC_SRCU(dquot_srcu);
void __quota_error(struct super_block *sb, const char *func,
const char *fmt, ...)
@@ -733,7 +730,6 @@ static struct shrinker dqcache_shrinker = {
/*
* Put reference to dquot
- * NOTE: If you change this function please check whether dqput_blocks() works right...
*/
void dqput(struct dquot *dquot)
{
@@ -963,46 +959,33 @@ static void add_dquot_ref(struct super_block *sb, int type)
}
/*
- * Return 0 if dqput() won't block.
- * (note that 1 doesn't necessarily mean blocking)
- */
-static inline int dqput_blocks(struct dquot *dquot)
-{
- if (atomic_read(&dquot->dq_count) <= 1)
- return 1;
- return 0;
-}
-
-/*
* Remove references to dquots from inode and add dquot to list for freeing
* if we have the last reference to dquot
- * We can't race with anybody because we hold dqptr_sem for writing...
*/
-static int remove_inode_dquot_ref(struct inode *inode, int type,
- struct list_head *tofree_head)
+static void remove_inode_dquot_ref(struct inode *inode, int type,
+ struct list_head *tofree_head)
{
struct dquot *dquot = inode->i_dquot[type];
inode->i_dquot[type] = NULL;
- if (dquot) {
- if (dqput_blocks(dquot)) {
-#ifdef CONFIG_QUOTA_DEBUG
- if (atomic_read(&dquot->dq_count) != 1)
- quota_error(inode->i_sb, "Adding dquot with "
- "dq_count %d to dispose list",
- atomic_read(&dquot->dq_count));
-#endif
- spin_lock(&dq_list_lock);
- /* As dquot must have currently users it can't be on
- * the free list... */
- list_add(&dquot->dq_free, tofree_head);
- spin_unlock(&dq_list_lock);
- return 1;
- }
- else
- dqput(dquot); /* We have guaranteed we won't block */
+ if (!dquot)
+ return;
+
+ if (list_empty(&dquot->dq_free)) {
+ /*
+ * The inode still has reference to dquot so it can't be in the
+ * free list
+ */
+ spin_lock(&dq_list_lock);
+ list_add(&dquot->dq_free, tofree_head);
+ spin_unlock(&dq_list_lock);
+ } else {
+ /*
+ * Dquot is already in a list to put so we won't drop the last
+ * reference here.
+ */
+ dqput(dquot);
}
- return 0;
}
/*
@@ -1037,13 +1020,15 @@ static void remove_dquot_ref(struct super_block *sb, int type,
* We have to scan also I_NEW inodes because they can already
* have quota pointer initialized. Luckily, we need to touch
* only quota pointers and these have separate locking
- * (dqptr_sem).
+ * (dq_data_lock).
*/
+ spin_lock(&dq_data_lock);
if (!IS_NOQUOTA(inode)) {
if (unlikely(inode_get_rsv_space(inode) > 0))
reserved = 1;
remove_inode_dquot_ref(inode, type, tofree_head);
}
+ spin_unlock(&dq_data_lock);
}
spin_unlock(&inode_sb_list_lock);
#ifdef CONFIG_QUOTA_DEBUG
@@ -1061,9 +1046,8 @@ static void drop_dquot_ref(struct super_block *sb, int type)
LIST_HEAD(tofree_head);
if (sb->dq_op) {
- down_write(&sb_dqopt(sb)->dqptr_sem);
remove_dquot_ref(sb, type, &tofree_head);
- up_write(&sb_dqopt(sb)->dqptr_sem);
+ synchronize_srcu(&dquot_srcu);
put_dquot_list(&tofree_head);
}
}
@@ -1394,21 +1378,16 @@ static int dquot_active(const struct inode *inode)
/*
* Initialize quota pointers in inode
*
- * We do things in a bit complicated way but by that we avoid calling
- * dqget() and thus filesystem callbacks under dqptr_sem.
- *
* It is better to call this function outside of any transaction as it
* might need a lot of space in journal for dquot structure allocation.
*/
static void __dquot_initialize(struct inode *inode, int type)
{
- int cnt;
+ int cnt, init_needed = 0;
struct dquot *got[MAXQUOTAS];
struct super_block *sb = inode->i_sb;
qsize_t rsv;
- /* First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex */
if (!dquot_active(inode))
return;
@@ -1418,6 +1397,15 @@ static void __dquot_initialize(struct inode *inode, int type)
got[cnt] = NULL;
if (type != -1 && cnt != type)
continue;
+ /*
+ * The i_dquot should have been initialized in most cases,
+ * we check it without locking here to avoid unnecessary
+ * dqget()/dqput() calls.
+ */
+ if (inode->i_dquot[cnt])
+ continue;
+ init_needed = 1;
+
switch (cnt) {
case USRQUOTA:
qid = make_kqid_uid(inode->i_uid);
@@ -1429,7 +1417,11 @@ static void __dquot_initialize(struct inode *inode, int type)
got[cnt] = dqget(sb, qid);
}
- down_write(&sb_dqopt(sb)->dqptr_sem);
+ /* All required i_dquot has been initialized */
+ if (!init_needed)
+ return;
+
+ spin_lock(&dq_data_lock);
if (IS_NOQUOTA(inode))
goto out_err;
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1449,15 +1441,12 @@ static void __dquot_initialize(struct inode *inode, int type)
* did a write before quota was turned on
*/
rsv = inode_get_rsv_space(inode);
- if (unlikely(rsv)) {
- spin_lock(&dq_data_lock);
+ if (unlikely(rsv))
dquot_resv_space(inode->i_dquot[cnt], rsv);
- spin_unlock(&dq_data_lock);
- }
}
}
out_err:
- up_write(&sb_dqopt(sb)->dqptr_sem);
+ spin_unlock(&dq_data_lock);
/* Drop unused references */
dqput_all(got);
}
@@ -1469,19 +1458,24 @@ void dquot_initialize(struct inode *inode)
EXPORT_SYMBOL(dquot_initialize);
/*
- * Release all quotas referenced by inode
+ * Release all quotas referenced by inode.
+ *
+ * This function only be called on inode free or converting
+ * a file to quota file, no other users for the i_dquot in
+ * both cases, so we needn't call synchronize_srcu() after
+ * clearing i_dquot.
*/
static void __dquot_drop(struct inode *inode)
{
int cnt;
struct dquot *put[MAXQUOTAS];
- down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
put[cnt] = inode->i_dquot[cnt];
inode->i_dquot[cnt] = NULL;
}
- up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ spin_unlock(&dq_data_lock);
dqput_all(put);
}
@@ -1599,15 +1593,11 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
*/
int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
{
- int cnt, ret = 0;
+ int cnt, ret = 0, index;
struct dquot_warn warn[MAXQUOTAS];
struct dquot **dquots = inode->i_dquot;
int reserve = flags & DQUOT_SPACE_RESERVE;
- /*
- * First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex
- */
if (!dquot_active(inode)) {
inode_incr_space(inode, number, reserve);
goto out;
@@ -1616,7 +1606,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warn[cnt].w_type = QUOTA_NL_NOWARN;
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquots[cnt])
@@ -1643,7 +1633,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
goto out_flush_warn;
mark_all_dquot_dirty(dquots);
out_flush_warn:
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ srcu_read_unlock(&dquot_srcu, index);
flush_warnings(warn);
out:
return ret;
@@ -1655,17 +1645,16 @@ EXPORT_SYMBOL(__dquot_alloc_space);
*/
int dquot_alloc_inode(const struct inode *inode)
{
- int cnt, ret = 0;
+ int cnt, ret = 0, index;
struct dquot_warn warn[MAXQUOTAS];
struct dquot * const *dquots = inode->i_dquot;
- /* First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex */
if (!dquot_active(inode))
return 0;
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warn[cnt].w_type = QUOTA_NL_NOWARN;
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+
+ index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquots[cnt])
@@ -1685,7 +1674,7 @@ warn_put_all:
spin_unlock(&dq_data_lock);
if (ret == 0)
mark_all_dquot_dirty(dquots);
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ srcu_read_unlock(&dquot_srcu, index);
flush_warnings(warn);
return ret;
}
@@ -1696,14 +1685,14 @@ EXPORT_SYMBOL(dquot_alloc_inode);
*/
int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
{
- int cnt;
+ int cnt, index;
if (!dquot_active(inode)) {
inode_claim_rsv_space(inode, number);
return 0;
}
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1715,7 +1704,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
inode_claim_rsv_space(inode, number);
spin_unlock(&dq_data_lock);
mark_all_dquot_dirty(inode->i_dquot);
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ srcu_read_unlock(&dquot_srcu, index);
return 0;
}
EXPORT_SYMBOL(dquot_claim_space_nodirty);
@@ -1725,14 +1714,14 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
*/
void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
{
- int cnt;
+ int cnt, index;
if (!dquot_active(inode)) {
inode_reclaim_rsv_space(inode, number);
return;
}
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1744,7 +1733,7 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
inode_reclaim_rsv_space(inode, number);
spin_unlock(&dq_data_lock);
mark_all_dquot_dirty(inode->i_dquot);
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ srcu_read_unlock(&dquot_srcu, index);
return;
}
EXPORT_SYMBOL(dquot_reclaim_space_nodirty);
@@ -1757,16 +1746,14 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
unsigned int cnt;
struct dquot_warn warn[MAXQUOTAS];
struct dquot **dquots = inode->i_dquot;
- int reserve = flags & DQUOT_SPACE_RESERVE;
+ int reserve = flags & DQUOT_SPACE_RESERVE, index;
- /* First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex */
if (!dquot_active(inode)) {
inode_decr_space(inode, number, reserve);
return;
}
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
int wtype;
@@ -1789,7 +1776,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
goto out_unlock;
mark_all_dquot_dirty(dquots);
out_unlock:
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ srcu_read_unlock(&dquot_srcu, index);
flush_warnings(warn);
}
EXPORT_SYMBOL(__dquot_free_space);
@@ -1802,13 +1789,12 @@ void dquot_free_inode(const struct inode *inode)
unsigned int cnt;
struct dquot_warn warn[MAXQUOTAS];
struct dquot * const *dquots = inode->i_dquot;
+ int index;
- /* First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex */
if (!dquot_active(inode))
return;
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
int wtype;
@@ -1823,7 +1809,7 @@ void dquot_free_inode(const struct inode *inode)
}
spin_unlock(&dq_data_lock);
mark_all_dquot_dirty(dquots);
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ srcu_read_unlock(&dquot_srcu, index);
flush_warnings(warn);
}
EXPORT_SYMBOL(dquot_free_inode);
@@ -1837,6 +1823,8 @@ EXPORT_SYMBOL(dquot_free_inode);
* This operation can block, but only after everything is updated
* A transaction must be started when entering this function.
*
+ * We are holding reference on transfer_from & transfer_to, no need to
+ * protect them by srcu_read_lock().
*/
int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
{
@@ -1849,8 +1837,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
struct dquot_warn warn_from_inodes[MAXQUOTAS];
struct dquot_warn warn_from_space[MAXQUOTAS];
- /* First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode))
return 0;
/* Initialize the arrays */
@@ -1859,12 +1845,12 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN;
warn_from_space[cnt].w_type = QUOTA_NL_NOWARN;
}
- down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+
+ spin_lock(&dq_data_lock);
if (IS_NOQUOTA(inode)) { /* File without quota accounting? */
- up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ spin_unlock(&dq_data_lock);
return 0;
}
- spin_lock(&dq_data_lock);
cur_space = inode_get_bytes(inode);
rsv_space = inode_get_rsv_space(inode);
space = cur_space + rsv_space;
@@ -1918,7 +1904,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
inode->i_dquot[cnt] = transfer_to[cnt];
}
spin_unlock(&dq_data_lock);
- up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
mark_all_dquot_dirty(transfer_from);
mark_all_dquot_dirty(transfer_to);
@@ -1932,7 +1917,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
return 0;
over_quota:
spin_unlock(&dq_data_lock);
- up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
flush_warnings(warn_to);
return ret;
}
diff --git a/fs/quota/kqid.c b/fs/quota/kqid.c
index 2f97b0e2c50..ebc5e628580 100644
--- a/fs/quota/kqid.c
+++ b/fs/quota/kqid.c
@@ -55,7 +55,7 @@ EXPORT_SYMBOL(qid_lt);
/**
* from_kqid - Create a qid from a kqid user-namespace pair.
* @targ: The user namespace we want a qid in.
- * @kuid: The kernel internal quota identifier to start with.
+ * @kqid: The kernel internal quota identifier to start with.
*
* Map @kqid into the user-namespace specified by @targ and
* return the resulting qid.
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index 72d29177998..bb2869f5dfd 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -32,8 +32,7 @@ static struct genl_family quota_genl_family = {
/**
* quota_send_warning - Send warning to userspace about exceeded quota
- * @type: The quota type: USRQQUOTA, GRPQUOTA,...
- * @id: The user or group id of the quota that was exceeded
+ * @qid: The kernel internal quota identifier.
* @dev: The device on which the fs is mounted (sb->s_dev)
* @warntype: The type of the warning: QUOTA_NL_...
*
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index ff3f0b3cfdb..75621649dbd 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -79,13 +79,13 @@ static int quota_getfmt(struct super_block *sb, int type, void __user *addr)
{
__u32 fmt;
- down_read(&sb_dqopt(sb)->dqptr_sem);
+ mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
if (!sb_has_quota_active(sb, type)) {
- up_read(&sb_dqopt(sb)->dqptr_sem);
+ mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return -ESRCH;
}
fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id;
- up_read(&sb_dqopt(sb)->dqptr_sem);
+ mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
if (copy_to_user(addr, &fmt, sizeof(fmt)))
return -EFAULT;
return 0;
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 5739cb99de7..9c02d96d3a4 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -286,12 +286,14 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
return 0;
}
-static void balance_leaf_insert_left(struct tree_balance *tb,
- struct item_head *ih, const char *body)
+static unsigned int balance_leaf_insert_left(struct tree_balance *tb,
+ struct item_head *const ih,
+ const char * const body)
{
int ret;
struct buffer_info bi;
int n = B_NR_ITEMS(tb->L[0]);
+ unsigned body_shift_bytes = 0;
if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
/* part of new item falls into L[0] */
@@ -329,7 +331,7 @@ static void balance_leaf_insert_left(struct tree_balance *tb,
put_ih_item_len(ih, new_item_len);
if (tb->lbytes > tb->zeroes_num) {
- body += (tb->lbytes - tb->zeroes_num);
+ body_shift_bytes = tb->lbytes - tb->zeroes_num;
tb->zeroes_num = 0;
} else
tb->zeroes_num -= tb->lbytes;
@@ -349,11 +351,12 @@ static void balance_leaf_insert_left(struct tree_balance *tb,
tb->insert_size[0] = 0;
tb->zeroes_num = 0;
}
+ return body_shift_bytes;
}
static void balance_leaf_paste_left_shift_dirent(struct tree_balance *tb,
- struct item_head *ih,
- const char *body)
+ struct item_head * const ih,
+ const char * const body)
{
int n = B_NR_ITEMS(tb->L[0]);
struct buffer_info bi;
@@ -413,17 +416,18 @@ static void balance_leaf_paste_left_shift_dirent(struct tree_balance *tb,
tb->pos_in_item -= tb->lbytes;
}
-static void balance_leaf_paste_left_shift(struct tree_balance *tb,
- struct item_head *ih,
- const char *body)
+static unsigned int balance_leaf_paste_left_shift(struct tree_balance *tb,
+ struct item_head * const ih,
+ const char * const body)
{
struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
int n = B_NR_ITEMS(tb->L[0]);
struct buffer_info bi;
+ int body_shift_bytes = 0;
if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) {
balance_leaf_paste_left_shift_dirent(tb, ih, body);
- return;
+ return 0;
}
RFALSE(tb->lbytes <= 0,
@@ -497,7 +501,7 @@ static void balance_leaf_paste_left_shift(struct tree_balance *tb,
* insert_size[0]
*/
if (l_n > tb->zeroes_num) {
- body += (l_n - tb->zeroes_num);
+ body_shift_bytes = l_n - tb->zeroes_num;
tb->zeroes_num = 0;
} else
tb->zeroes_num -= l_n;
@@ -526,13 +530,14 @@ static void balance_leaf_paste_left_shift(struct tree_balance *tb,
*/
leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
}
+ return body_shift_bytes;
}
/* appended item will be in L[0] in whole */
static void balance_leaf_paste_left_whole(struct tree_balance *tb,
- struct item_head *ih,
- const char *body)
+ struct item_head * const ih,
+ const char * const body)
{
struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
int n = B_NR_ITEMS(tb->L[0]);
@@ -584,39 +589,44 @@ static void balance_leaf_paste_left_whole(struct tree_balance *tb,
tb->zeroes_num = 0;
}
-static void balance_leaf_paste_left(struct tree_balance *tb,
- struct item_head *ih, const char *body)
+static unsigned int balance_leaf_paste_left(struct tree_balance *tb,
+ struct item_head * const ih,
+ const char * const body)
{
/* we must shift the part of the appended item */
if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1)
- balance_leaf_paste_left_shift(tb, ih, body);
+ return balance_leaf_paste_left_shift(tb, ih, body);
else
balance_leaf_paste_left_whole(tb, ih, body);
+ return 0;
}
/* Shift lnum[0] items from S[0] to the left neighbor L[0] */
-static void balance_leaf_left(struct tree_balance *tb, struct item_head *ih,
- const char *body, int flag)
+static unsigned int balance_leaf_left(struct tree_balance *tb,
+ struct item_head * const ih,
+ const char * const body, int flag)
{
if (tb->lnum[0] <= 0)
- return;
+ return 0;
/* new item or it part falls to L[0], shift it too */
if (tb->item_pos < tb->lnum[0]) {
BUG_ON(flag != M_INSERT && flag != M_PASTE);
if (flag == M_INSERT)
- balance_leaf_insert_left(tb, ih, body);
+ return balance_leaf_insert_left(tb, ih, body);
else /* M_PASTE */
- balance_leaf_paste_left(tb, ih, body);
+ return balance_leaf_paste_left(tb, ih, body);
} else
/* new item doesn't fall into L[0] */
leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
+ return 0;
}
static void balance_leaf_insert_right(struct tree_balance *tb,
- struct item_head *ih, const char *body)
+ struct item_head * const ih,
+ const char * const body)
{
struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
@@ -704,7 +714,8 @@ static void balance_leaf_insert_right(struct tree_balance *tb,
static void balance_leaf_paste_right_shift_dirent(struct tree_balance *tb,
- struct item_head *ih, const char *body)
+ struct item_head * const ih,
+ const char * const body)
{
struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
struct buffer_info bi;
@@ -754,7 +765,8 @@ static void balance_leaf_paste_right_shift_dirent(struct tree_balance *tb,
}
static void balance_leaf_paste_right_shift(struct tree_balance *tb,
- struct item_head *ih, const char *body)
+ struct item_head * const ih,
+ const char * const body)
{
struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
int n_shift, n_rem, r_zeroes_number, version;
@@ -831,7 +843,8 @@ static void balance_leaf_paste_right_shift(struct tree_balance *tb,
}
static void balance_leaf_paste_right_whole(struct tree_balance *tb,
- struct item_head *ih, const char *body)
+ struct item_head * const ih,
+ const char * const body)
{
struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
int n = B_NR_ITEMS(tbS0);
@@ -874,7 +887,8 @@ static void balance_leaf_paste_right_whole(struct tree_balance *tb,
}
static void balance_leaf_paste_right(struct tree_balance *tb,
- struct item_head *ih, const char *body)
+ struct item_head * const ih,
+ const char * const body)
{
struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
int n = B_NR_ITEMS(tbS0);
@@ -896,8 +910,9 @@ static void balance_leaf_paste_right(struct tree_balance *tb,
}
/* shift rnum[0] items from S[0] to the right neighbor R[0] */
-static void balance_leaf_right(struct tree_balance *tb, struct item_head *ih,
- const char *body, int flag)
+static void balance_leaf_right(struct tree_balance *tb,
+ struct item_head * const ih,
+ const char * const body, int flag)
{
if (tb->rnum[0] <= 0)
return;
@@ -911,8 +926,8 @@ static void balance_leaf_right(struct tree_balance *tb, struct item_head *ih,
}
static void balance_leaf_new_nodes_insert(struct tree_balance *tb,
- struct item_head *ih,
- const char *body,
+ struct item_head * const ih,
+ const char * const body,
struct item_head *insert_key,
struct buffer_head **insert_ptr,
int i)
@@ -1003,8 +1018,8 @@ static void balance_leaf_new_nodes_insert(struct tree_balance *tb,
/* we append to directory item */
static void balance_leaf_new_nodes_paste_dirent(struct tree_balance *tb,
- struct item_head *ih,
- const char *body,
+ struct item_head * const ih,
+ const char * const body,
struct item_head *insert_key,
struct buffer_head **insert_ptr,
int i)
@@ -1058,8 +1073,8 @@ static void balance_leaf_new_nodes_paste_dirent(struct tree_balance *tb,
}
static void balance_leaf_new_nodes_paste_shift(struct tree_balance *tb,
- struct item_head *ih,
- const char *body,
+ struct item_head * const ih,
+ const char * const body,
struct item_head *insert_key,
struct buffer_head **insert_ptr,
int i)
@@ -1131,8 +1146,8 @@ static void balance_leaf_new_nodes_paste_shift(struct tree_balance *tb,
}
static void balance_leaf_new_nodes_paste_whole(struct tree_balance *tb,
- struct item_head *ih,
- const char *body,
+ struct item_head * const ih,
+ const char * const body,
struct item_head *insert_key,
struct buffer_head **insert_ptr,
int i)
@@ -1184,8 +1199,8 @@ static void balance_leaf_new_nodes_paste_whole(struct tree_balance *tb,
}
static void balance_leaf_new_nodes_paste(struct tree_balance *tb,
- struct item_head *ih,
- const char *body,
+ struct item_head * const ih,
+ const char * const body,
struct item_head *insert_key,
struct buffer_head **insert_ptr,
int i)
@@ -1214,8 +1229,8 @@ static void balance_leaf_new_nodes_paste(struct tree_balance *tb,
/* Fill new nodes that appear in place of S[0] */
static void balance_leaf_new_nodes(struct tree_balance *tb,
- struct item_head *ih,
- const char *body,
+ struct item_head * const ih,
+ const char * const body,
struct item_head *insert_key,
struct buffer_head **insert_ptr,
int flag)
@@ -1254,8 +1269,8 @@ static void balance_leaf_new_nodes(struct tree_balance *tb,
}
static void balance_leaf_finish_node_insert(struct tree_balance *tb,
- struct item_head *ih,
- const char *body)
+ struct item_head * const ih,
+ const char * const body)
{
struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
struct buffer_info bi;
@@ -1271,8 +1286,8 @@ static void balance_leaf_finish_node_insert(struct tree_balance *tb,
}
static void balance_leaf_finish_node_paste_dirent(struct tree_balance *tb,
- struct item_head *ih,
- const char *body)
+ struct item_head * const ih,
+ const char * const body)
{
struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
struct item_head *pasted = item_head(tbS0, tb->item_pos);
@@ -1305,8 +1320,8 @@ static void balance_leaf_finish_node_paste_dirent(struct tree_balance *tb,
}
static void balance_leaf_finish_node_paste(struct tree_balance *tb,
- struct item_head *ih,
- const char *body)
+ struct item_head * const ih,
+ const char * const body)
{
struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
struct buffer_info bi;
@@ -1349,8 +1364,8 @@ static void balance_leaf_finish_node_paste(struct tree_balance *tb,
* of the affected item which remains in S
*/
static void balance_leaf_finish_node(struct tree_balance *tb,
- struct item_head *ih,
- const char *body, int flag)
+ struct item_head * const ih,
+ const char * const body, int flag)
{
/* if we must insert or append into buffer S[0] */
if (0 <= tb->item_pos && tb->item_pos < tb->s0num) {
@@ -1402,7 +1417,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,
&& is_indirect_le_ih(item_head(tbS0, tb->item_pos)))
tb->pos_in_item *= UNFM_P_SIZE;
- balance_leaf_left(tb, ih, body, flag);
+ body += balance_leaf_left(tb, ih, body, flag);
/* tb->lnum[0] > 0 */
/* Calculate new item position */
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e8870de4627..a88b1b3e7db 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1947,8 +1947,6 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
}
}
- /* wait for all commits to finish */
- cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
/*
* We must release the write lock here because
@@ -1956,8 +1954,14 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
*/
reiserfs_write_unlock(sb);
+ /*
+ * Cancel flushing of old commits. Note that neither of these works
+ * will be requeued because superblock is being shutdown and doesn't
+ * have MS_ACTIVE set.
+ */
cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work);
- flush_workqueue(REISERFS_SB(sb)->commit_wq);
+ /* wait for all commits to finish */
+ cancel_delayed_work_sync(&SB_JOURNAL(sb)->j_work);
free_journal_ram(sb);
@@ -4292,9 +4296,15 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, int flags)
if (flush) {
flush_commit_list(sb, jl, 1);
flush_journal_list(sb, jl, 1);
- } else if (!(jl->j_state & LIST_COMMIT_PENDING))
- queue_delayed_work(REISERFS_SB(sb)->commit_wq,
- &journal->j_work, HZ / 10);
+ } else if (!(jl->j_state & LIST_COMMIT_PENDING)) {
+ /*
+ * Avoid queueing work when sb is being shut down. Transaction
+ * will be flushed on journal shutdown.
+ */
+ if (sb->s_flags & MS_ACTIVE)
+ queue_delayed_work(REISERFS_SB(sb)->commit_wq,
+ &journal->j_work, HZ / 10);
+ }
/*
* if the next transaction has any chance of wrapping, flush
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 814dda3ec99..249594a821e 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -899,8 +899,9 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
/* insert item into the leaf node in position before */
void leaf_insert_into_buf(struct buffer_info *bi, int before,
- struct item_head *inserted_item_ih,
- const char *inserted_item_body, int zeros_number)
+ struct item_head * const inserted_item_ih,
+ const char * const inserted_item_body,
+ int zeros_number)
{
struct buffer_head *bh = bi->bi_bh;
int nr, free_space;
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index bf53888c7f5..735c2c2b453 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -3216,11 +3216,12 @@ int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes);
void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first,
int del_num, int del_bytes);
void leaf_insert_into_buf(struct buffer_info *bi, int before,
- struct item_head *inserted_item_ih,
- const char *inserted_item_body, int zeros_number);
-void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num,
- int pos_in_item, int paste_size, const char *body,
+ struct item_head * const inserted_item_ih,
+ const char * const inserted_item_body,
int zeros_number);
+void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num,
+ int pos_in_item, int paste_size,
+ const char * const body, int zeros_number);
void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
int pos_in_item, int cut_size);
void leaf_paste_entries(struct buffer_info *bi, int item_num, int before,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 709ea92d716..d46e88a33b0 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -100,7 +100,11 @@ void reiserfs_schedule_old_flush(struct super_block *s)
struct reiserfs_sb_info *sbi = REISERFS_SB(s);
unsigned long delay;
- if (s->s_flags & MS_RDONLY)
+ /*
+ * Avoid scheduling flush when sb is being shut down. It can race
+ * with journal shutdown and free still queued delayed work.
+ */
+ if (s->s_flags & MS_RDONLY || !(s->s_flags & MS_ACTIVE))
return;
spin_lock(&sbi->old_work_lock);
diff --git a/fs/super.c b/fs/super.c
index d20d5b11ded..b9a214d2fe9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -22,7 +22,6 @@
#include <linux/export.h>
#include <linux/slab.h>
-#include <linux/acct.h>
#include <linux/blkdev.h>
#include <linux/mount.h>
#include <linux/security.h>
@@ -218,7 +217,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
mutex_init(&s->s_dquot.dqio_mutex);
mutex_init(&s->s_dquot.dqonoff_mutex);
- init_rwsem(&s->s_dquot.dqptr_sem);
s->s_maxbytes = MAX_NON_LFS;
s->s_op = &default_op;
s->s_time_gran = 1000000000;
@@ -702,12 +700,22 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
return -EACCES;
#endif
- if (flags & MS_RDONLY)
- acct_auto_close(sb);
- shrink_dcache_sb(sb);
-
remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
+ if (remount_ro) {
+ if (sb->s_pins.first) {
+ up_write(&sb->s_umount);
+ sb_pin_kill(sb);
+ down_write(&sb->s_umount);
+ if (!sb->s_root)
+ return 0;
+ if (sb->s_writers.frozen != SB_UNFROZEN)
+ return -EBUSY;
+ remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
+ }
+ }
+ shrink_dcache_sb(sb);
+
/* If we are remounting RDONLY and current sb is read/write,
make sure there are no rw files opened */
if (remount_ro) {
diff --git a/fs/sync.c b/fs/sync.c
index b28d1dd10e8..bdc729d80e5 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -65,7 +65,7 @@ int sync_filesystem(struct super_block *sb)
return ret;
return __sync_filesystem(sb, 1);
}
-EXPORT_SYMBOL_GPL(sync_filesystem);
+EXPORT_SYMBOL(sync_filesystem);
static void sync_inodes_one_sb(struct super_block *sb, void *arg)
{
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index ff8229340cd..aa13ad053b1 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -174,7 +174,6 @@ static int do_commit(struct ubifs_info *c)
if (err)
goto out;
- mutex_lock(&c->mst_mutex);
c->mst_node->cmt_no = cpu_to_le64(c->cmt_no);
c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum);
c->mst_node->root_lnum = cpu_to_le32(zroot.lnum);
@@ -204,7 +203,6 @@ static int do_commit(struct ubifs_info *c)
else
c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS);
err = ubifs_write_master(c);
- mutex_unlock(&c->mst_mutex);
if (err)
goto out;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 2290d586672..fb08b0c514b 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -431,7 +431,7 @@ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last)
/**
* wbuf_timer_callback - write-buffer timer callback function.
- * @data: timer data (write-buffer descriptor)
+ * @timer: timer data (write-buffer descriptor)
*
* This function is called when the write-buffer timer expires.
*/
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index a902c5919e4..a47ddfc9be6 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -240,6 +240,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
+ ubifs_assert(c->lhead_lnum != c->ltail_lnum);
c->lhead_offs = 0;
}
@@ -404,15 +405,14 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
/* Switch to the next log LEB */
if (c->lhead_offs) {
c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
+ ubifs_assert(c->lhead_lnum != c->ltail_lnum);
c->lhead_offs = 0;
}
- if (c->lhead_offs == 0) {
- /* Must ensure next LEB has been unmapped */
- err = ubifs_leb_unmap(c, c->lhead_lnum);
- if (err)
- goto out;
- }
+ /* Must ensure next LEB has been unmapped */
+ err = ubifs_leb_unmap(c, c->lhead_lnum);
+ if (err)
+ goto out;
len = ALIGN(len, c->min_io_size);
dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len);
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index d46b19ec181..421bd0a8042 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1464,7 +1464,6 @@ struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum)
return ERR_CAST(nnode);
}
iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
- shft -= UBIFS_LPT_FANOUT_SHIFT;
pnode = ubifs_get_pnode(c, nnode, iip);
if (IS_ERR(pnode))
return ERR_CAST(pnode);
@@ -1604,7 +1603,6 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
return ERR_CAST(nnode);
}
iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
- shft -= UBIFS_LPT_FANOUT_SHIFT;
pnode = ubifs_get_pnode(c, nnode, iip);
if (IS_ERR(pnode))
return ERR_CAST(pnode);
@@ -1964,7 +1962,6 @@ again:
}
}
iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
- shft -= UBIFS_LPT_FANOUT_SHIFT;
pnode = scan_get_pnode(c, path + h, nnode, iip);
if (IS_ERR(pnode)) {
err = PTR_ERR(pnode);
@@ -2198,6 +2195,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
lprops->dirty);
return -EINVAL;
}
+ break;
case LPROPS_FREEABLE:
case LPROPS_FRDI_IDX:
if (lprops->free + lprops->dirty != c->leb_size) {
@@ -2206,6 +2204,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
lprops->dirty);
return -EINVAL;
}
+ break;
}
}
return 0;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 45d4e96a6ba..d9c02928e99 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -304,7 +304,6 @@ static int layout_cnodes(struct ubifs_info *c)
ubifs_assert(lnum >= c->lpt_first &&
lnum <= c->lpt_last);
}
- done_ltab = 1;
c->ltab_lnum = lnum;
c->ltab_offs = offs;
offs += c->ltab_sz;
@@ -514,7 +513,6 @@ static int write_cnodes(struct ubifs_info *c)
if (err)
return err;
}
- done_ltab = 1;
ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
offs += c->ltab_sz;
dbg_chk_lpt_sz(c, 1, c->ltab_sz);
@@ -1941,6 +1939,11 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
pr_err("LEB %d:%d, nnode, ",
lnum, offs);
err = ubifs_unpack_nnode(c, p, &nnode);
+ if (err) {
+ pr_err("failed to unpack_node, error %d\n",
+ err);
+ break;
+ }
for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
pr_cont("%d:%d", nnode.nbranch[i].lnum,
nnode.nbranch[i].offs);
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index ab83ace9910..1a4bb9e8b3b 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -352,10 +352,9 @@ int ubifs_read_master(struct ubifs_info *c)
* ubifs_write_master - write master node.
* @c: UBIFS file-system description object
*
- * This function writes the master node. The caller has to take the
- * @c->mst_mutex lock before calling this function. Returns zero in case of
- * success and a negative error code in case of failure. The master node is
- * written twice to enable recovery.
+ * This function writes the master node. Returns zero in case of success and a
+ * negative error code in case of failure. The master node is written twice to
+ * enable recovery.
*/
int ubifs_write_master(struct ubifs_info *c)
{
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index f1c3e5a1b31..4409f486ece 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -346,7 +346,6 @@ static int write_orph_nodes(struct ubifs_info *c, int atomic)
int lnum;
/* Unmap any unused LEBs after consolidation */
- lnum = c->ohead_lnum + 1;
for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) {
err = ubifs_leb_unmap(c, lnum);
if (err)
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index c14adb2f420..c640938f62f 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -596,7 +596,6 @@ static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
* drop_last_node - drop the last node.
* @sleb: scanned LEB information
* @offs: offset of dropped nodes is returned here
- * @grouped: non-zero if whole group of nodes have to be dropped
*
* This is a helper function for 'ubifs_recover_leb()' which drops the last
* node of the scanned LEB.
@@ -629,8 +628,8 @@ static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
*
* This function does a scan of a LEB, but caters for errors that might have
* been caused by the unclean unmount from which we are attempting to recover.
- * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is
- * found, and a negative error code in case of failure.
+ * Returns the scanned information on success and a negative error code on
+ * failure.
*/
struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
int offs, void *sbuf, int jhead)
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 4c37607a958..79c6dbbc0e0 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -332,6 +332,8 @@ static int create_default_filesystem(struct ubifs_info *c)
cs->ch.node_type = UBIFS_CS_NODE;
err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, 0);
kfree(cs);
+ if (err)
+ return err;
ubifs_msg("default file-system created");
return 0;
@@ -447,7 +449,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
goto failed;
}
- if (c->default_compr < 0 || c->default_compr >= UBIFS_COMPR_TYPES_CNT) {
+ if (c->default_compr >= UBIFS_COMPR_TYPES_CNT) {
err = 13;
goto failed;
}
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 58aa05df2bb..89adbc4d08a 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -131,7 +131,8 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
* @offs: offset to start at (usually zero)
* @sbuf: scan buffer (must be c->leb_size)
*
- * This function returns %0 on success and a negative error code on failure.
+ * This function returns the scanned information on success and a negative error
+ * code on failure.
*/
struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
int offs, void *sbuf)
@@ -157,9 +158,10 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
return ERR_PTR(err);
}
- if (err == -EBADMSG)
- sleb->ecc = 1;
-
+ /*
+ * Note, we ignore integrity errors (EBASMSG) because all the nodes are
+ * protected by CRC checksums.
+ */
return sleb;
}
@@ -169,8 +171,6 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
* @sleb: scanning information
* @lnum: logical eraseblock number
* @offs: offset to start at (usually zero)
- *
- * This function returns %0 on success and a negative error code on failure.
*/
void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
int lnum, int offs)
@@ -257,7 +257,7 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
* @quiet: print no messages
*
* This function scans LEB number @lnum and returns complete information about
- * its contents. Returns the scaned information in case of success and,
+ * its contents. Returns the scanned information in case of success and,
* %-EUCLEAN if the LEB neads recovery, and other negative error codes in case
* of failure.
*
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 3904c8574ef..106bf20629c 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -75,7 +75,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
return 1;
}
- if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) {
+ if (ui->compr_type >= UBIFS_COMPR_TYPES_CNT) {
ubifs_err("unknown compression type %d", ui->compr_type);
return 2;
}
@@ -424,19 +424,19 @@ static int ubifs_show_options(struct seq_file *s, struct dentry *root)
struct ubifs_info *c = root->d_sb->s_fs_info;
if (c->mount_opts.unmount_mode == 2)
- seq_printf(s, ",fast_unmount");
+ seq_puts(s, ",fast_unmount");
else if (c->mount_opts.unmount_mode == 1)
- seq_printf(s, ",norm_unmount");
+ seq_puts(s, ",norm_unmount");
if (c->mount_opts.bulk_read == 2)
- seq_printf(s, ",bulk_read");
+ seq_puts(s, ",bulk_read");
else if (c->mount_opts.bulk_read == 1)
- seq_printf(s, ",no_bulk_read");
+ seq_puts(s, ",no_bulk_read");
if (c->mount_opts.chk_data_crc == 2)
- seq_printf(s, ",chk_data_crc");
+ seq_puts(s, ",chk_data_crc");
else if (c->mount_opts.chk_data_crc == 1)
- seq_printf(s, ",no_chk_data_crc");
+ seq_puts(s, ",no_chk_data_crc");
if (c->mount_opts.override_compr) {
seq_printf(s, ",compr=%s",
@@ -796,8 +796,8 @@ static int alloc_wbufs(struct ubifs_info *c)
{
int i, err;
- c->jheads = kzalloc(c->jhead_cnt * sizeof(struct ubifs_jhead),
- GFP_KERNEL);
+ c->jheads = kcalloc(c->jhead_cnt, sizeof(struct ubifs_jhead),
+ GFP_KERNEL);
if (!c->jheads)
return -ENOMEM;
@@ -1963,7 +1963,6 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
mutex_init(&c->lp_mutex);
mutex_init(&c->tnc_mutex);
mutex_init(&c->log_mutex);
- mutex_init(&c->mst_mutex);
mutex_init(&c->umount_mutex);
mutex_init(&c->bu_mutex);
mutex_init(&c->write_reserve_mutex);
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 8a40cf9c02d..6793db0754f 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -3294,7 +3294,6 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
goto out_unlock;
if (err) {
- err = -EINVAL;
key = &from_key;
goto out_dump;
}
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 3600994f841..7a205e04677 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -389,7 +389,6 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
ubifs_dump_lprops(c);
}
/* Try to commit anyway */
- err = 0;
break;
}
p++;
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index c1f71fe17cc..c4fe900c67a 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -314,7 +314,6 @@ struct ubifs_scan_node {
* @nodes_cnt: number of nodes scanned
* @nodes: list of struct ubifs_scan_node
* @endpt: end point (and therefore the start of empty space)
- * @ecc: read returned -EBADMSG
* @buf: buffer containing entire LEB scanned
*/
struct ubifs_scan_leb {
@@ -322,7 +321,6 @@ struct ubifs_scan_leb {
int nodes_cnt;
struct list_head nodes;
int endpt;
- int ecc;
void *buf;
};
@@ -1051,7 +1049,6 @@ struct ubifs_debug_info;
*
* @mst_node: master node
* @mst_offs: offset of valid master node
- * @mst_mutex: protects the master node area, @mst_node, and @mst_offs
*
* @max_bu_buf_len: maximum bulk-read buffer length
* @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
@@ -1292,7 +1289,6 @@ struct ubifs_info {
struct ubifs_mst_node *mst_node;
int mst_offs;
- struct mutex mst_mutex;
int max_bu_buf_len;
struct mutex bu_mutex;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index d80738fdf42..86c6743ec1f 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -27,7 +27,7 @@
#include "udfdecl.h"
#include <linux/fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/string.h> /* memset */
#include <linux/capability.h>
@@ -100,24 +100,6 @@ static int udf_adinicb_write_begin(struct file *file,
return 0;
}
-static int udf_adinicb_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- struct inode *inode = mapping->host;
- unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
- char *kaddr;
- struct udf_inode_info *iinfo = UDF_I(inode);
-
- kaddr = kmap_atomic(page);
- memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset,
- kaddr + offset, copied);
- kunmap_atomic(kaddr);
-
- return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
-}
-
static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
struct iov_iter *iter,
loff_t offset)
@@ -130,7 +112,7 @@ const struct address_space_operations udf_adinicb_aops = {
.readpage = udf_adinicb_readpage,
.writepage = udf_adinicb_writepage,
.write_begin = udf_adinicb_write_begin,
- .write_end = udf_adinicb_write_end,
+ .write_end = simple_write_end,
.direct_IO = udf_adinicb_direct_IO,
};
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 6eaf5edf1ea..e77db621ec8 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -45,7 +45,7 @@ void udf_free_inode(struct inode *inode)
udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1);
}
-struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
+struct inode *udf_new_inode(struct inode *dir, umode_t mode)
{
struct super_block *sb = dir->i_sb;
struct udf_sb_info *sbi = UDF_SB(sb);
@@ -55,14 +55,12 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
struct udf_inode_info *iinfo;
struct udf_inode_info *dinfo = UDF_I(dir);
struct logicalVolIntegrityDescImpUse *lvidiu;
+ int err;
inode = new_inode(sb);
- if (!inode) {
- *err = -ENOMEM;
- return NULL;
- }
- *err = -ENOSPC;
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
iinfo = UDF_I(inode);
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) {
@@ -80,21 +78,22 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
}
if (!iinfo->i_ext.i_data) {
iput(inode);
- *err = -ENOMEM;
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
+ err = -ENOSPC;
block = udf_new_block(dir->i_sb, NULL,
dinfo->i_location.partitionReferenceNum,
- start, err);
- if (*err) {
+ start, &err);
+ if (err) {
iput(inode);
- return NULL;
+ return ERR_PTR(err);
}
lvidiu = udf_sb_lvidiu(sb);
if (lvidiu) {
iinfo->i_unique = lvid_get_unique_id(sb);
+ inode->i_generation = iinfo->i_unique;
mutex_lock(&sbi->s_alloc_mutex);
if (S_ISDIR(mode))
le32_add_cpu(&lvidiu->numDirs, 1);
@@ -123,9 +122,12 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
inode->i_mtime = inode->i_atime = inode->i_ctime =
iinfo->i_crtime = current_fs_time(inode->i_sb);
- insert_inode_hash(inode);
+ if (unlikely(insert_inode_locked(inode) < 0)) {
+ make_bad_inode(inode);
+ iput(inode);
+ return ERR_PTR(-EIO);
+ }
mark_inode_dirty(inode);
- *err = 0;
return inode;
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 236cd48184c..08598843288 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -51,7 +51,6 @@ MODULE_LICENSE("GPL");
static umode_t udf_convert_permissions(struct fileEntry *);
static int udf_update_inode(struct inode *, int);
-static void udf_fill_inode(struct inode *, struct buffer_head *);
static int udf_sync_inode(struct inode *inode);
static int udf_alloc_i_data(struct inode *inode, size_t size);
static sector_t inode_getblk(struct inode *, sector_t, int *, int *);
@@ -1271,12 +1270,33 @@ update_time:
return 0;
}
-static void __udf_read_inode(struct inode *inode)
+/*
+ * Maximum length of linked list formed by ICB hierarchy. The chosen number is
+ * arbitrary - just that we hopefully don't limit any real use of rewritten
+ * inode on write-once media but avoid looping for too long on corrupted media.
+ */
+#define UDF_MAX_ICB_NESTING 1024
+
+static int udf_read_inode(struct inode *inode)
{
struct buffer_head *bh = NULL;
struct fileEntry *fe;
+ struct extendedFileEntry *efe;
uint16_t ident;
struct udf_inode_info *iinfo = UDF_I(inode);
+ struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
+ struct kernel_lb_addr *iloc = &iinfo->i_location;
+ unsigned int link_count;
+ unsigned int indirections = 0;
+ int ret = -EIO;
+
+reread:
+ if (iloc->logicalBlockNum >=
+ sbi->s_partmaps[iloc->partitionReferenceNum].s_partition_len) {
+ udf_debug("block=%d, partition=%d out of range\n",
+ iloc->logicalBlockNum, iloc->partitionReferenceNum);
+ return -EIO;
+ }
/*
* Set defaults, but the inode is still incomplete!
@@ -1290,78 +1310,54 @@ static void __udf_read_inode(struct inode *inode)
* i_nlink = 1
* i_op = NULL;
*/
- bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident);
+ bh = udf_read_ptagged(inode->i_sb, iloc, 0, &ident);
if (!bh) {
udf_err(inode->i_sb, "(ino %ld) failed !bh\n", inode->i_ino);
- make_bad_inode(inode);
- return;
+ return -EIO;
}
if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE &&
ident != TAG_IDENT_USE) {
udf_err(inode->i_sb, "(ino %ld) failed ident=%d\n",
inode->i_ino, ident);
- brelse(bh);
- make_bad_inode(inode);
- return;
+ goto out;
}
fe = (struct fileEntry *)bh->b_data;
+ efe = (struct extendedFileEntry *)bh->b_data;
if (fe->icbTag.strategyType == cpu_to_le16(4096)) {
struct buffer_head *ibh;
- ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1,
- &ident);
+ ibh = udf_read_ptagged(inode->i_sb, iloc, 1, &ident);
if (ident == TAG_IDENT_IE && ibh) {
- struct buffer_head *nbh = NULL;
struct kernel_lb_addr loc;
struct indirectEntry *ie;
ie = (struct indirectEntry *)ibh->b_data;
loc = lelb_to_cpu(ie->indirectICB.extLocation);
- if (ie->indirectICB.extLength &&
- (nbh = udf_read_ptagged(inode->i_sb, &loc, 0,
- &ident))) {
- if (ident == TAG_IDENT_FE ||
- ident == TAG_IDENT_EFE) {
- memcpy(&iinfo->i_location,
- &loc,
- sizeof(struct kernel_lb_addr));
- brelse(bh);
- brelse(ibh);
- brelse(nbh);
- __udf_read_inode(inode);
- return;
+ if (ie->indirectICB.extLength) {
+ brelse(ibh);
+ memcpy(&iinfo->i_location, &loc,
+ sizeof(struct kernel_lb_addr));
+ if (++indirections > UDF_MAX_ICB_NESTING) {
+ udf_err(inode->i_sb,
+ "too many ICBs in ICB hierarchy"
+ " (max %d supported)\n",
+ UDF_MAX_ICB_NESTING);
+ goto out;
}
- brelse(nbh);
+ brelse(bh);
+ goto reread;
}
}
brelse(ibh);
} else if (fe->icbTag.strategyType != cpu_to_le16(4)) {
udf_err(inode->i_sb, "unsupported strategy type: %d\n",
le16_to_cpu(fe->icbTag.strategyType));
- brelse(bh);
- make_bad_inode(inode);
- return;
+ goto out;
}
- udf_fill_inode(inode, bh);
-
- brelse(bh);
-}
-
-static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
-{
- struct fileEntry *fe;
- struct extendedFileEntry *efe;
- struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
- struct udf_inode_info *iinfo = UDF_I(inode);
- unsigned int link_count;
-
- fe = (struct fileEntry *)bh->b_data;
- efe = (struct extendedFileEntry *)bh->b_data;
-
if (fe->icbTag.strategyType == cpu_to_le16(4))
iinfo->i_strat4096 = 0;
else /* if (fe->icbTag.strategyType == cpu_to_le16(4096)) */
@@ -1378,11 +1374,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) {
iinfo->i_efe = 1;
iinfo->i_use = 0;
- if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize -
- sizeof(struct extendedFileEntry))) {
- make_bad_inode(inode);
- return;
- }
+ ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize -
+ sizeof(struct extendedFileEntry));
+ if (ret)
+ goto out;
memcpy(iinfo->i_ext.i_data,
bh->b_data + sizeof(struct extendedFileEntry),
inode->i_sb->s_blocksize -
@@ -1390,11 +1385,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
} else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) {
iinfo->i_efe = 0;
iinfo->i_use = 0;
- if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize -
- sizeof(struct fileEntry))) {
- make_bad_inode(inode);
- return;
- }
+ ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize -
+ sizeof(struct fileEntry));
+ if (ret)
+ goto out;
memcpy(iinfo->i_ext.i_data,
bh->b_data + sizeof(struct fileEntry),
inode->i_sb->s_blocksize - sizeof(struct fileEntry));
@@ -1404,18 +1398,18 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
iinfo->i_lenAlloc = le32_to_cpu(
((struct unallocSpaceEntry *)bh->b_data)->
lengthAllocDescs);
- if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize -
- sizeof(struct unallocSpaceEntry))) {
- make_bad_inode(inode);
- return;
- }
+ ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize -
+ sizeof(struct unallocSpaceEntry));
+ if (ret)
+ goto out;
memcpy(iinfo->i_ext.i_data,
bh->b_data + sizeof(struct unallocSpaceEntry),
inode->i_sb->s_blocksize -
sizeof(struct unallocSpaceEntry));
- return;
+ return 0;
}
+ ret = -EIO;
read_lock(&sbi->s_cred_lock);
i_uid_write(inode, le32_to_cpu(fe->uid));
if (!uid_valid(inode->i_uid) ||
@@ -1441,8 +1435,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
read_unlock(&sbi->s_cred_lock);
link_count = le16_to_cpu(fe->fileLinkCount);
- if (!link_count)
- link_count = 1;
+ if (!link_count) {
+ ret = -ESTALE;
+ goto out;
+ }
set_nlink(inode, link_count);
inode->i_size = le64_to_cpu(fe->informationLength);
@@ -1488,6 +1484,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs);
iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint);
}
+ inode->i_generation = iinfo->i_unique;
switch (fe->icbTag.fileType) {
case ICBTAG_FILE_TYPE_DIRECTORY:
@@ -1537,8 +1534,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
default:
udf_err(inode->i_sb, "(ino %ld) failed unknown file type=%d\n",
inode->i_ino, fe->icbTag.fileType);
- make_bad_inode(inode);
- return;
+ goto out;
}
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
struct deviceSpec *dsea =
@@ -1549,8 +1545,12 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
le32_to_cpu(dsea->minorDeviceIdent)));
/* Developer ID ??? */
} else
- make_bad_inode(inode);
+ goto out;
}
+ ret = 0;
+out:
+ brelse(bh);
+ return ret;
}
static int udf_alloc_i_data(struct inode *inode, size_t size)
@@ -1664,7 +1664,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
FE_PERM_U_DELETE | FE_PERM_U_CHATTR));
fe->permissions = cpu_to_le32(udfperms);
- if (S_ISDIR(inode->i_mode))
+ if (S_ISDIR(inode->i_mode) && inode->i_nlink > 0)
fe->fileLinkCount = cpu_to_le16(inode->i_nlink - 1);
else
fe->fileLinkCount = cpu_to_le16(inode->i_nlink);
@@ -1830,32 +1830,23 @@ struct inode *udf_iget(struct super_block *sb, struct kernel_lb_addr *ino)
{
unsigned long block = udf_get_lb_pblock(sb, ino, 0);
struct inode *inode = iget_locked(sb, block);
+ int err;
if (!inode)
- return NULL;
-
- if (inode->i_state & I_NEW) {
- memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr));
- __udf_read_inode(inode);
- unlock_new_inode(inode);
- }
+ return ERR_PTR(-ENOMEM);
- if (is_bad_inode(inode))
- goto out_iput;
+ if (!(inode->i_state & I_NEW))
+ return inode;
- if (ino->logicalBlockNum >= UDF_SB(sb)->
- s_partmaps[ino->partitionReferenceNum].s_partition_len) {
- udf_debug("block=%d, partition=%d out of range\n",
- ino->logicalBlockNum, ino->partitionReferenceNum);
- make_bad_inode(inode);
- goto out_iput;
+ memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr));
+ err = udf_read_inode(inode);
+ if (err < 0) {
+ iget_failed(inode);
+ return ERR_PTR(err);
}
+ unlock_new_inode(inode);
return inode;
-
- out_iput:
- iput(inode);
- return NULL;
}
int udf_add_aext(struct inode *inode, struct extent_position *epos,
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 6583fe9b064..6ad5a453af9 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -21,7 +21,7 @@
#include <linux/blkdev.h>
#include <linux/cdrom.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "udf_sb.h"
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 9737cba1357..c12e260fd6c 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -270,9 +270,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
NULL, 0),
};
inode = udf_iget(dir->i_sb, lb);
- if (!inode) {
- return ERR_PTR(-EACCES);
- }
+ if (IS_ERR(inode))
+ return inode;
} else
#endif /* UDF_RECOVERY */
@@ -285,9 +284,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
loc = lelb_to_cpu(cfi.icb.extLocation);
inode = udf_iget(dir->i_sb, &loc);
- if (!inode) {
- return ERR_PTR(-EACCES);
- }
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
}
return d_splice_alias(inode, dentry);
@@ -550,32 +548,18 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL);
}
-static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
- bool excl)
+static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
{
+ struct udf_inode_info *iinfo = UDF_I(inode);
+ struct inode *dir = dentry->d_parent->d_inode;
struct udf_fileident_bh fibh;
- struct inode *inode;
struct fileIdentDesc cfi, *fi;
int err;
- struct udf_inode_info *iinfo;
-
- inode = udf_new_inode(dir, mode, &err);
- if (!inode) {
- return err;
- }
-
- iinfo = UDF_I(inode);
- if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- inode->i_data.a_ops = &udf_adinicb_aops;
- else
- inode->i_data.a_ops = &udf_aops;
- inode->i_op = &udf_file_inode_operations;
- inode->i_fop = &udf_file_operations;
- mark_inode_dirty(inode);
fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
- if (!fi) {
+ if (unlikely(!fi)) {
inode_dec_link_count(inode);
+ unlock_new_inode(inode);
iput(inode);
return err;
}
@@ -589,23 +573,21 @@ static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
+ unlock_new_inode(inode);
d_instantiate(dentry, inode);
return 0;
}
-static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ bool excl)
{
- struct inode *inode;
- struct udf_inode_info *iinfo;
- int err;
+ struct inode *inode = udf_new_inode(dir, mode);
- inode = udf_new_inode(dir, mode, &err);
- if (!inode)
- return err;
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
- iinfo = UDF_I(inode);
- if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
+ if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
inode->i_data.a_ops = &udf_adinicb_aops;
else
inode->i_data.a_ops = &udf_aops;
@@ -613,7 +595,25 @@ static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
inode->i_fop = &udf_file_operations;
mark_inode_dirty(inode);
+ return udf_add_nondir(dentry, inode);
+}
+
+static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct inode *inode = udf_new_inode(dir, mode);
+
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
+ inode->i_data.a_ops = &udf_adinicb_aops;
+ else
+ inode->i_data.a_ops = &udf_aops;
+ inode->i_op = &udf_file_inode_operations;
+ inode->i_fop = &udf_file_operations;
+ mark_inode_dirty(inode);
d_tmpfile(dentry, inode);
+ unlock_new_inode(inode);
return 0;
}
@@ -621,44 +621,16 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
dev_t rdev)
{
struct inode *inode;
- struct udf_fileident_bh fibh;
- struct fileIdentDesc cfi, *fi;
- int err;
- struct udf_inode_info *iinfo;
if (!old_valid_dev(rdev))
return -EINVAL;
- err = -EIO;
- inode = udf_new_inode(dir, mode, &err);
- if (!inode)
- goto out;
+ inode = udf_new_inode(dir, mode);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
- iinfo = UDF_I(inode);
init_special_inode(inode, mode, rdev);
- fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
- if (!fi) {
- inode_dec_link_count(inode);
- iput(inode);
- return err;
- }
- cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
- cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location);
- *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
- cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL);
- udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
- if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- mark_inode_dirty(dir);
- mark_inode_dirty(inode);
-
- if (fibh.sbh != fibh.ebh)
- brelse(fibh.ebh);
- brelse(fibh.sbh);
- d_instantiate(dentry, inode);
- err = 0;
-
-out:
- return err;
+ return udf_add_nondir(dentry, inode);
}
static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
@@ -670,10 +642,9 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct udf_inode_info *dinfo = UDF_I(dir);
struct udf_inode_info *iinfo;
- err = -EIO;
- inode = udf_new_inode(dir, S_IFDIR | mode, &err);
- if (!inode)
- goto out;
+ inode = udf_new_inode(dir, S_IFDIR | mode);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
iinfo = UDF_I(inode);
inode->i_op = &udf_dir_inode_operations;
@@ -681,6 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err);
if (!fi) {
inode_dec_link_count(inode);
+ unlock_new_inode(inode);
iput(inode);
goto out;
}
@@ -699,6 +671,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (!fi) {
clear_nlink(inode);
mark_inode_dirty(inode);
+ unlock_new_inode(inode);
iput(inode);
goto out;
}
@@ -710,6 +683,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
inc_nlink(dir);
mark_inode_dirty(dir);
+ unlock_new_inode(inode);
d_instantiate(dentry, inode);
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
@@ -876,14 +850,11 @@ out:
static int udf_symlink(struct inode *dir, struct dentry *dentry,
const char *symname)
{
- struct inode *inode;
+ struct inode *inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO);
struct pathComponent *pc;
const char *compstart;
- struct udf_fileident_bh fibh;
struct extent_position epos = {};
int eoffset, elen = 0;
- struct fileIdentDesc *fi;
- struct fileIdentDesc cfi;
uint8_t *ea;
int err;
int block;
@@ -892,9 +863,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
struct udf_inode_info *iinfo;
struct super_block *sb = dir->i_sb;
- inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err);
- if (!inode)
- goto out;
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
iinfo = UDF_I(inode);
down_write(&iinfo->i_data_sem);
@@ -1012,24 +982,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
mark_inode_dirty(inode);
up_write(&iinfo->i_data_sem);
- fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
- if (!fi)
- goto out_no_entry;
- cfi.icb.extLength = cpu_to_le32(sb->s_blocksize);
- cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location);
- if (UDF_SB(inode->i_sb)->s_lvid_bh) {
- *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
- cpu_to_le32(lvid_get_unique_id(sb));
- }
- udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
- if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- mark_inode_dirty(dir);
- if (fibh.sbh != fibh.ebh)
- brelse(fibh.ebh);
- brelse(fibh.sbh);
- d_instantiate(dentry, inode);
- err = 0;
-
+ err = udf_add_nondir(dentry, inode);
out:
kfree(name);
return err;
@@ -1037,6 +990,7 @@ out:
out_no_entry:
up_write(&iinfo->i_data_sem);
inode_dec_link_count(inode);
+ unlock_new_inode(inode);
iput(inode);
goto out;
}
@@ -1221,7 +1175,7 @@ static struct dentry *udf_get_parent(struct dentry *child)
struct udf_fileident_bh fibh;
if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
- goto out_unlock;
+ return ERR_PTR(-EACCES);
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
@@ -1229,12 +1183,10 @@ static struct dentry *udf_get_parent(struct dentry *child)
tloc = lelb_to_cpu(cfi.icb.extLocation);
inode = udf_iget(child->d_inode->i_sb, &tloc);
- if (!inode)
- goto out_unlock;
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
return d_obtain_alias(inode);
-out_unlock:
- return ERR_PTR(-EACCES);
}
@@ -1251,8 +1203,8 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
loc.partitionReferenceNum = partref;
inode = udf_iget(sb, &loc);
- if (inode == NULL)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
if (generation && inode->i_generation != generation) {
iput(inode);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 3286db047a4..5401fc33f5c 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -63,7 +63,7 @@
#include "udf_i.h"
#include <linux/init.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#define VDS_POS_PRIMARY_VOL_DESC 0
#define VDS_POS_UNALLOC_SPACE_DESC 1
@@ -961,12 +961,14 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
metadata_fe = udf_iget(sb, &addr);
- if (metadata_fe == NULL)
+ if (IS_ERR(metadata_fe)) {
udf_warn(sb, "metadata inode efe not found\n");
- else if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) {
+ return metadata_fe;
+ }
+ if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) {
udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n");
iput(metadata_fe);
- metadata_fe = NULL;
+ return ERR_PTR(-EIO);
}
return metadata_fe;
@@ -978,6 +980,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
struct udf_part_map *map;
struct udf_meta_data *mdata;
struct kernel_lb_addr addr;
+ struct inode *fe;
map = &sbi->s_partmaps[partition];
mdata = &map->s_type_specific.s_metadata;
@@ -986,22 +989,24 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
udf_debug("Metadata file location: block = %d part = %d\n",
mdata->s_meta_file_loc, map->s_partition_num);
- mdata->s_metadata_fe = udf_find_metadata_inode_efe(sb,
- mdata->s_meta_file_loc, map->s_partition_num);
-
- if (mdata->s_metadata_fe == NULL) {
+ fe = udf_find_metadata_inode_efe(sb, mdata->s_meta_file_loc,
+ map->s_partition_num);
+ if (IS_ERR(fe)) {
/* mirror file entry */
udf_debug("Mirror metadata file location: block = %d part = %d\n",
mdata->s_mirror_file_loc, map->s_partition_num);
- mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
- mdata->s_mirror_file_loc, map->s_partition_num);
+ fe = udf_find_metadata_inode_efe(sb, mdata->s_mirror_file_loc,
+ map->s_partition_num);
- if (mdata->s_mirror_fe == NULL) {
+ if (IS_ERR(fe)) {
udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n");
- return -EIO;
+ return PTR_ERR(fe);
}
- }
+ mdata->s_mirror_fe = fe;
+ } else
+ mdata->s_metadata_fe = fe;
+
/*
* bitmap file entry
@@ -1015,15 +1020,16 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
udf_debug("Bitmap file location: block = %d part = %d\n",
addr.logicalBlockNum, addr.partitionReferenceNum);
- mdata->s_bitmap_fe = udf_iget(sb, &addr);
- if (mdata->s_bitmap_fe == NULL) {
+ fe = udf_iget(sb, &addr);
+ if (IS_ERR(fe)) {
if (sb->s_flags & MS_RDONLY)
udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n");
else {
udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n");
- return -EIO;
+ return PTR_ERR(fe);
}
- }
+ } else
+ mdata->s_bitmap_fe = fe;
}
udf_debug("udf_load_metadata_files Ok\n");
@@ -1111,13 +1117,15 @@ static int udf_fill_partdesc_info(struct super_block *sb,
phd->unallocSpaceTable.extPosition),
.partitionReferenceNum = p_index,
};
+ struct inode *inode;
- map->s_uspace.s_table = udf_iget(sb, &loc);
- if (!map->s_uspace.s_table) {
+ inode = udf_iget(sb, &loc);
+ if (IS_ERR(inode)) {
udf_debug("cannot load unallocSpaceTable (part %d)\n",
p_index);
- return -EIO;
+ return PTR_ERR(inode);
}
+ map->s_uspace.s_table = inode;
map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE;
udf_debug("unallocSpaceTable (part %d) @ %ld\n",
p_index, map->s_uspace.s_table->i_ino);
@@ -1144,14 +1152,15 @@ static int udf_fill_partdesc_info(struct super_block *sb,
phd->freedSpaceTable.extPosition),
.partitionReferenceNum = p_index,
};
+ struct inode *inode;
- map->s_fspace.s_table = udf_iget(sb, &loc);
- if (!map->s_fspace.s_table) {
+ inode = udf_iget(sb, &loc);
+ if (IS_ERR(inode)) {
udf_debug("cannot load freedSpaceTable (part %d)\n",
p_index);
- return -EIO;
+ return PTR_ERR(inode);
}
-
+ map->s_fspace.s_table = inode;
map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE;
udf_debug("freedSpaceTable (part %d) @ %ld\n",
p_index, map->s_fspace.s_table->i_ino);
@@ -1178,6 +1187,7 @@ static void udf_find_vat_block(struct super_block *sb, int p_index,
struct udf_part_map *map = &sbi->s_partmaps[p_index];
sector_t vat_block;
struct kernel_lb_addr ino;
+ struct inode *inode;
/*
* VAT file entry is in the last recorded block. Some broken disks have
@@ -1186,10 +1196,13 @@ static void udf_find_vat_block(struct super_block *sb, int p_index,
ino.partitionReferenceNum = type1_index;
for (vat_block = start_block;
vat_block >= map->s_partition_root &&
- vat_block >= start_block - 3 &&
- !sbi->s_vat_inode; vat_block--) {
+ vat_block >= start_block - 3; vat_block--) {
ino.logicalBlockNum = vat_block - map->s_partition_root;
- sbi->s_vat_inode = udf_iget(sb, &ino);
+ inode = udf_iget(sb, &ino);
+ if (!IS_ERR(inode)) {
+ sbi->s_vat_inode = inode;
+ break;
+ }
}
}
@@ -2205,10 +2218,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
/* assign inodes by physical block number */
/* perhaps it's not extensible enough, but for now ... */
inode = udf_iget(sb, &rootdir);
- if (!inode) {
+ if (IS_ERR(inode)) {
udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n",
rootdir.logicalBlockNum, rootdir.partitionReferenceNum);
- ret = -EIO;
+ ret = PTR_ERR(inode);
goto error_out;
}
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index d7c6dbe4194..6fb7945c1e6 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -20,7 +20,7 @@
*/
#include "udfdecl.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/time.h>
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index be7dabbbcb4..742557be993 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -143,7 +143,6 @@ extern int udf_expand_file_adinicb(struct inode *);
extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
extern struct buffer_head *udf_bread(struct inode *, int, int, int *);
extern int udf_setsize(struct inode *, loff_t);
-extern void udf_read_inode(struct inode *);
extern void udf_evict_inode(struct inode *);
extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
extern long udf_block_map(struct inode *, sector_t);
@@ -209,7 +208,7 @@ extern int udf_CS0toUTF8(struct ustr *, const struct ustr *);
/* ialloc.c */
extern void udf_free_inode(struct inode *);
-extern struct inode *udf_new_inode(struct inode *, umode_t, int *);
+extern struct inode *udf_new_inode(struct inode *, umode_t);
/* truncate.c */
extern void udf_truncate_tail_extent(struct inode *);
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 44b815e57f9..afd470e588f 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -412,7 +412,6 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName,
int extIndex = 0, newExtIndex = 0, hasExt = 0;
unsigned short valueCRC;
uint8_t curr;
- const uint8_t hexChar[] = "0123456789ABCDEF";
if (udfName[0] == '.' &&
(udfLen == 1 || (udfLen == 2 && udfName[1] == '.'))) {
@@ -477,10 +476,10 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName,
newIndex = 250;
newName[newIndex++] = CRC_MARK;
valueCRC = crc_itu_t(0, fidName, fidNameLen);
- newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
- newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
- newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
- newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
+ newName[newIndex++] = hex_asc_upper_hi(valueCRC >> 8);
+ newName[newIndex++] = hex_asc_upper_lo(valueCRC >> 8);
+ newName[newIndex++] = hex_asc_upper_hi(valueCRC);
+ newName[newIndex++] = hex_asc_upper_lo(valueCRC);
if (hasExt) {
newName[newIndex++] = EXT_MARK;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7c580c97990..be7d42c7d93 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -902,9 +902,6 @@ void ufs_evict_inode(struct inode * inode)
invalidate_inode_buffers(inode);
clear_inode(inode);
- if (want_delete) {
- lock_ufs(inode->i_sb);
- ufs_free_inode (inode);
- unlock_ufs(inode->i_sb);
- }
+ if (want_delete)
+ ufs_free_inode(inode);
}
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 90d74b8f8eb..2df62a73f20 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -126,12 +126,12 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
if (l > sb->s_blocksize)
goto out_notlocked;
- lock_ufs(dir->i_sb);
inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
err = PTR_ERR(inode);
if (IS_ERR(inode))
- goto out;
+ goto out_notlocked;
+ lock_ufs(dir->i_sb);
if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) {
/* slow symlink */
inode->i_op = &ufs_symlink_inode_operations;
@@ -181,13 +181,9 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
struct inode * inode;
int err;
- lock_ufs(dir->i_sb);
- inode_inc_link_count(dir);
-
inode = ufs_new_inode(dir, S_IFDIR|mode);
- err = PTR_ERR(inode);
if (IS_ERR(inode))
- goto out_dir;
+ return PTR_ERR(inode);
inode->i_op = &ufs_dir_inode_operations;
inode->i_fop = &ufs_dir_operations;
@@ -195,6 +191,9 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
inode_inc_link_count(inode);
+ lock_ufs(dir->i_sb);
+ inode_inc_link_count(dir);
+
err = ufs_make_empty(inode, dir);
if (err)
goto out_fail;
@@ -212,7 +211,6 @@ out_fail:
inode_dec_link_count(inode);
inode_dec_link_count(inode);
iput (inode);
-out_dir:
inode_dec_link_count(dir);
unlock_ufs(dir->i_sb);
goto out;
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 399e8cec6e6..5d47b4df61e 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -1,6 +1,7 @@
config XFS_FS
tristate "XFS filesystem support"
depends on BLOCK
+ depends on (64BIT || LBDAF)
select EXPORTFS
select LIBCRC32C
help
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c21f4350666..d6179994958 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -17,6 +17,7 @@
#
ccflags-y += -I$(src) # needed for trace events
+ccflags-y += -I$(src)/libxfs
ccflags-$(CONFIG_XFS_DEBUG) += -g
@@ -25,6 +26,39 @@ obj-$(CONFIG_XFS_FS) += xfs.o
# this one should be compiled first, as the tracing macros can easily blow up
xfs-y += xfs_trace.o
+# build the libxfs code first
+xfs-y += $(addprefix libxfs/, \
+ xfs_alloc.o \
+ xfs_alloc_btree.o \
+ xfs_attr.o \
+ xfs_attr_leaf.o \
+ xfs_attr_remote.o \
+ xfs_bmap.o \
+ xfs_bmap_btree.o \
+ xfs_btree.o \
+ xfs_da_btree.o \
+ xfs_da_format.o \
+ xfs_dir2.o \
+ xfs_dir2_block.o \
+ xfs_dir2_data.o \
+ xfs_dir2_leaf.o \
+ xfs_dir2_node.o \
+ xfs_dir2_sf.o \
+ xfs_dquot_buf.o \
+ xfs_ialloc.o \
+ xfs_ialloc_btree.o \
+ xfs_inode_fork.o \
+ xfs_inode_buf.o \
+ xfs_log_rlimit.o \
+ xfs_sb.o \
+ xfs_symlink_remote.o \
+ xfs_trans_resv.o \
+ )
+# xfs_rtbitmap is shared with libxfs
+xfs-$(CONFIG_XFS_RT) += $(addprefix libxfs/, \
+ xfs_rtbitmap.o \
+ )
+
# highlevel code
xfs-y += xfs_aops.o \
xfs_attr_inactive.o \
@@ -45,53 +79,27 @@ xfs-y += xfs_aops.o \
xfs_ioctl.o \
xfs_iomap.o \
xfs_iops.o \
+ xfs_inode.o \
xfs_itable.o \
xfs_message.o \
xfs_mount.o \
xfs_mru_cache.o \
xfs_super.o \
xfs_symlink.o \
+ xfs_sysfs.o \
xfs_trans.o \
xfs_xattr.o \
kmem.o \
uuid.o
-# code shared with libxfs
-xfs-y += xfs_alloc.o \
- xfs_alloc_btree.o \
- xfs_attr.o \
- xfs_attr_leaf.o \
- xfs_attr_remote.o \
- xfs_bmap.o \
- xfs_bmap_btree.o \
- xfs_btree.o \
- xfs_da_btree.o \
- xfs_da_format.o \
- xfs_dir2.o \
- xfs_dir2_block.o \
- xfs_dir2_data.o \
- xfs_dir2_leaf.o \
- xfs_dir2_node.o \
- xfs_dir2_sf.o \
- xfs_dquot_buf.o \
- xfs_ialloc.o \
- xfs_ialloc_btree.o \
- xfs_icreate_item.o \
- xfs_inode.o \
- xfs_inode_fork.o \
- xfs_inode_buf.o \
- xfs_log_recover.o \
- xfs_log_rlimit.o \
- xfs_sb.o \
- xfs_symlink_remote.o \
- xfs_trans_resv.o
-
# low-level transaction/log code
xfs-y += xfs_log.o \
xfs_log_cil.o \
xfs_buf_item.o \
xfs_extfree_item.o \
+ xfs_icreate_item.o \
xfs_inode_item.o \
+ xfs_log_recover.o \
xfs_trans_ail.o \
xfs_trans_buf.o \
xfs_trans_extfree.o \
@@ -107,8 +115,7 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
xfs_quotaops.o
# xfs_rtbitmap is shared with libxfs
-xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o \
- xfs_rtbitmap.o
+xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_PROC_FS) += xfs_stats.o
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 6e247a99f5d..6e247a99f5d 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index d43813267a8..4bffffe038a 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -483,9 +483,9 @@ xfs_agfl_read_verify(
return;
if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_agfl_verify(bp))
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
xfs_verifier_error(bp);
@@ -503,7 +503,7 @@ xfs_agfl_write_verify(
return;
if (!xfs_agfl_verify(bp)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
@@ -559,7 +559,7 @@ xfs_alloc_update_counters(
xfs_trans_agblocks_delta(tp, len);
if (unlikely(be32_to_cpu(agf->agf_freeblks) >
be32_to_cpu(agf->agf_length)))
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
return 0;
@@ -2234,11 +2234,11 @@ xfs_agf_read_verify(
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
XFS_ERRTAG_ALLOC_READ_AGF,
XFS_RANDOM_ALLOC_READ_AGF))
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
xfs_verifier_error(bp);
@@ -2252,7 +2252,7 @@ xfs_agf_write_verify(
struct xfs_buf_log_item *bip = bp->b_fspriv;
if (!xfs_agf_verify(mp, bp)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
@@ -2601,11 +2601,11 @@ xfs_free_extent(
*/
args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
if (args.agno >= args.mp->m_sb.sb_agcount)
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
if (args.agbno >= args.mp->m_sb.sb_agblocks)
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
args.pag = xfs_perag_get(args.mp, args.agno);
ASSERT(args.pag);
@@ -2617,7 +2617,7 @@ xfs_free_extent(
/* validate the extent size is legal now we have the agf locked */
if (args.agbno + len >
be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto error0;
}
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index feacb061bab..feacb061bab 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 8358f1ded94..e0e83e24d3e 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -355,9 +355,9 @@ xfs_allocbt_read_verify(
struct xfs_buf *bp)
{
if (!xfs_btree_sblock_verify_crc(bp))
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_allocbt_verify(bp))
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
@@ -371,7 +371,7 @@ xfs_allocbt_write_verify(
{
if (!xfs_allocbt_verify(bp)) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h
index 45e189e7e81..45e189e7e81 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/libxfs/xfs_alloc_btree.h
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index bfe36fc2cdc..353fb425fae 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -85,7 +85,7 @@ xfs_attr_args_init(
{
if (!name)
- return EINVAL;
+ return -EINVAL;
memset(args, 0, sizeof(*args));
args->geo = dp->i_mount->m_attr_geo;
@@ -95,7 +95,7 @@ xfs_attr_args_init(
args->name = name;
args->namelen = strlen((const char *)name);
if (args->namelen >= MAXNAMELEN)
- return EFAULT; /* match IRIX behaviour */
+ return -EFAULT; /* match IRIX behaviour */
args->hashval = xfs_da_hashname(args->name, args->namelen);
return 0;
@@ -131,10 +131,10 @@ xfs_attr_get(
XFS_STATS_INC(xs_attr_get);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return EIO;
+ return -EIO;
if (!xfs_inode_hasattr(ip))
- return ENOATTR;
+ return -ENOATTR;
error = xfs_attr_args_init(&args, ip, name, flags);
if (error)
@@ -145,7 +145,7 @@ xfs_attr_get(
lock_mode = xfs_ilock_attr_map_shared(ip);
if (!xfs_inode_hasattr(ip))
- error = ENOATTR;
+ error = -ENOATTR;
else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
error = xfs_attr_shortform_getvalue(&args);
else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
@@ -155,7 +155,7 @@ xfs_attr_get(
xfs_iunlock(ip, lock_mode);
*valuelenp = args.valuelen;
- return error == EEXIST ? 0 : error;
+ return error == -EEXIST ? 0 : error;
}
/*
@@ -213,7 +213,7 @@ xfs_attr_set(
XFS_STATS_INC(xs_attr_set);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
- return EIO;
+ return -EIO;
error = xfs_attr_args_init(&args, dp, name, flags);
if (error)
@@ -304,7 +304,7 @@ xfs_attr_set(
* the inode.
*/
error = xfs_attr_shortform_addname(&args);
- if (error != ENOSPC) {
+ if (error != -ENOSPC) {
/*
* Commit the shortform mods, and we're done.
* NOTE: this is also the error path (EEXIST, etc).
@@ -419,10 +419,10 @@ xfs_attr_remove(
XFS_STATS_INC(xs_attr_remove);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
- return EIO;
+ return -EIO;
if (!xfs_inode_hasattr(dp))
- return ENOATTR;
+ return -ENOATTR;
error = xfs_attr_args_init(&args, dp, name, flags);
if (error)
@@ -477,7 +477,7 @@ xfs_attr_remove(
xfs_trans_ijoin(args.trans, dp, 0);
if (!xfs_inode_hasattr(dp)) {
- error = XFS_ERROR(ENOATTR);
+ error = -ENOATTR;
} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
error = xfs_attr_shortform_remove(&args);
@@ -534,28 +534,28 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
trace_xfs_attr_sf_addname(args);
retval = xfs_attr_shortform_lookup(args);
- if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
- return(retval);
- } else if (retval == EEXIST) {
+ if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
+ return retval;
+ } else if (retval == -EEXIST) {
if (args->flags & ATTR_CREATE)
- return(retval);
+ return retval;
retval = xfs_attr_shortform_remove(args);
ASSERT(retval == 0);
}
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
- return(XFS_ERROR(ENOSPC));
+ return -ENOSPC;
newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
if (!forkoff)
- return(XFS_ERROR(ENOSPC));
+ return -ENOSPC;
xfs_attr_shortform_add(args, forkoff);
- return(0);
+ return 0;
}
@@ -592,10 +592,10 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* the given flags produce an error or call for an atomic rename.
*/
retval = xfs_attr3_leaf_lookup_int(bp, args);
- if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
+ if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
xfs_trans_brelse(args->trans, bp);
return retval;
- } else if (retval == EEXIST) {
+ } else if (retval == -EEXIST) {
if (args->flags & ATTR_CREATE) { /* pure create op */
xfs_trans_brelse(args->trans, bp);
return retval;
@@ -626,7 +626,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* if required.
*/
retval = xfs_attr3_leaf_add(bp, args);
- if (retval == ENOSPC) {
+ if (retval == -ENOSPC) {
/*
* Promote the attribute list to the Btree format, then
* Commit that transaction so that the node_addname() call
@@ -642,7 +642,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
- return(error);
+ return error;
}
/*
@@ -658,13 +658,13 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
*/
error = xfs_trans_roll(&args->trans, dp);
if (error)
- return (error);
+ return error;
/*
* Fob the whole rest of the problem off on the Btree code.
*/
error = xfs_attr_node_addname(args);
- return(error);
+ return error;
}
/*
@@ -673,7 +673,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
*/
error = xfs_trans_roll(&args->trans, dp);
if (error)
- return (error);
+ return error;
/*
* If there was an out-of-line value, allocate the blocks we
@@ -684,7 +684,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
if (args->rmtblkno > 0) {
error = xfs_attr_rmtval_set(args);
if (error)
- return(error);
+ return error;
}
/*
@@ -700,7 +700,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
*/
error = xfs_attr3_leaf_flipflags(args);
if (error)
- return(error);
+ return error;
/*
* Dismantle the "old" attribute/value pair by removing
@@ -714,7 +714,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
if (args->rmtblkno) {
error = xfs_attr_rmtval_remove(args);
if (error)
- return(error);
+ return error;
}
/*
@@ -744,7 +744,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
- return(error);
+ return error;
}
/*
@@ -795,7 +795,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
return error;
error = xfs_attr3_leaf_lookup_int(bp, args);
- if (error == ENOATTR) {
+ if (error == -ENOATTR) {
xfs_trans_brelse(args->trans, bp);
return error;
}
@@ -850,7 +850,7 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
return error;
error = xfs_attr3_leaf_lookup_int(bp, args);
- if (error != EEXIST) {
+ if (error != -EEXIST) {
xfs_trans_brelse(args->trans, bp);
return error;
}
@@ -906,9 +906,9 @@ restart:
goto out;
blk = &state->path.blk[ state->path.active-1 ];
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
- if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
+ if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
goto out;
- } else if (retval == EEXIST) {
+ } else if (retval == -EEXIST) {
if (args->flags & ATTR_CREATE)
goto out;
@@ -933,7 +933,7 @@ restart:
}
retval = xfs_attr3_leaf_add(blk->bp, state->args);
- if (retval == ENOSPC) {
+ if (retval == -ENOSPC) {
if (state->path.active == 1) {
/*
* Its really a single leaf node, but it had
@@ -1031,7 +1031,7 @@ restart:
if (args->rmtblkno > 0) {
error = xfs_attr_rmtval_set(args);
if (error)
- return(error);
+ return error;
}
/*
@@ -1061,7 +1061,7 @@ restart:
if (args->rmtblkno) {
error = xfs_attr_rmtval_remove(args);
if (error)
- return(error);
+ return error;
}
/*
@@ -1134,8 +1134,8 @@ out:
if (state)
xfs_da_state_free(state);
if (error)
- return(error);
- return(retval);
+ return error;
+ return retval;
}
/*
@@ -1168,7 +1168,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
* Search to see if name exists, and get back a pointer to it.
*/
error = xfs_da3_node_lookup_int(state, &retval);
- if (error || (retval != EEXIST)) {
+ if (error || (retval != -EEXIST)) {
if (error == 0)
error = retval;
goto out;
@@ -1297,7 +1297,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
out:
xfs_da_state_free(state);
- return(error);
+ return error;
}
/*
@@ -1345,7 +1345,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
}
}
- return(0);
+ return 0;
}
/*
@@ -1376,7 +1376,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
blk->blkno, blk->disk_blkno,
&blk->bp, XFS_ATTR_FORK);
if (error)
- return(error);
+ return error;
} else {
blk->bp = NULL;
}
@@ -1395,13 +1395,13 @@ xfs_attr_refillstate(xfs_da_state_t *state)
blk->blkno, blk->disk_blkno,
&blk->bp, XFS_ATTR_FORK);
if (error)
- return(error);
+ return error;
} else {
blk->bp = NULL;
}
}
- return(0);
+ return 0;
}
/*
@@ -1431,7 +1431,7 @@ xfs_attr_node_get(xfs_da_args_t *args)
error = xfs_da3_node_lookup_int(state, &retval);
if (error) {
retval = error;
- } else if (retval == EEXIST) {
+ } else if (retval == -EEXIST) {
blk = &state->path.blk[ state->path.active-1 ];
ASSERT(blk->bp != NULL);
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
@@ -1455,5 +1455,5 @@ xfs_attr_node_get(xfs_da_args_t *args)
}
xfs_da_state_free(state);
- return(retval);
+ return retval;
}
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 28712d29e43..b1f73dbbf3d 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -214,7 +214,7 @@ xfs_attr3_leaf_write_verify(
struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
if (!xfs_attr3_leaf_verify(bp)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
@@ -242,9 +242,9 @@ xfs_attr3_leaf_read_verify(
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_attr3_leaf_verify(bp))
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
xfs_verifier_error(bp);
@@ -547,7 +547,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
break;
}
if (i == end)
- return(XFS_ERROR(ENOATTR));
+ return -ENOATTR;
/*
* Fix up the attribute fork data, covering the hole
@@ -582,7 +582,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
xfs_sbversion_add_attr2(mp, args->trans);
- return(0);
+ return 0;
}
/*
@@ -611,9 +611,9 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
continue;
if (!xfs_attr_namesp_match(args->flags, sfe->flags))
continue;
- return(XFS_ERROR(EEXIST));
+ return -EEXIST;
}
- return(XFS_ERROR(ENOATTR));
+ return -ENOATTR;
}
/*
@@ -640,18 +640,18 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
continue;
if (args->flags & ATTR_KERNOVAL) {
args->valuelen = sfe->valuelen;
- return(XFS_ERROR(EEXIST));
+ return -EEXIST;
}
if (args->valuelen < sfe->valuelen) {
args->valuelen = sfe->valuelen;
- return(XFS_ERROR(ERANGE));
+ return -ERANGE;
}
args->valuelen = sfe->valuelen;
memcpy(args->value, &sfe->nameval[args->namelen],
args->valuelen);
- return(XFS_ERROR(EEXIST));
+ return -EEXIST;
}
- return(XFS_ERROR(ENOATTR));
+ return -ENOATTR;
}
/*
@@ -691,7 +691,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
* If we hit an IO error middle of the transaction inside
* grow_inode(), we may have inconsistent data. Bail out.
*/
- if (error == EIO)
+ if (error == -EIO)
goto out;
xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */
memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */
@@ -730,9 +730,9 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
sfe->namelen);
nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags);
error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
- ASSERT(error == ENOATTR);
+ ASSERT(error == -ENOATTR);
error = xfs_attr3_leaf_add(bp, &nargs);
- ASSERT(error != ENOSPC);
+ ASSERT(error != -ENOSPC);
if (error)
goto out;
sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
@@ -741,7 +741,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
out:
kmem_free(tmpbuffer);
- return(error);
+ return error;
}
/*
@@ -769,12 +769,12 @@ xfs_attr_shortform_allfit(
if (entry->flags & XFS_ATTR_INCOMPLETE)
continue; /* don't copy partial entries */
if (!(entry->flags & XFS_ATTR_LOCAL))
- return(0);
+ return 0;
name_loc = xfs_attr3_leaf_name_local(leaf, i);
if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX)
- return(0);
+ return 0;
if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX)
- return(0);
+ return 0;
bytes += sizeof(struct xfs_attr_sf_entry) - 1
+ name_loc->namelen
+ be16_to_cpu(name_loc->valuelen);
@@ -809,7 +809,7 @@ xfs_attr3_leaf_to_shortform(
tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP);
if (!tmpbuffer)
- return ENOMEM;
+ return -ENOMEM;
memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
@@ -1017,10 +1017,10 @@ xfs_attr3_leaf_split(
ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC);
error = xfs_da_grow_inode(state->args, &blkno);
if (error)
- return(error);
+ return error;
error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp);
if (error)
- return(error);
+ return error;
newblk->blkno = blkno;
newblk->magic = XFS_ATTR_LEAF_MAGIC;
@@ -1031,7 +1031,7 @@ xfs_attr3_leaf_split(
xfs_attr3_leaf_rebalance(state, oldblk, newblk);
error = xfs_da3_blk_link(state, oldblk, newblk);
if (error)
- return(error);
+ return error;
/*
* Save info on "old" attribute for "atomic rename" ops, leaf_add()
@@ -1053,7 +1053,7 @@ xfs_attr3_leaf_split(
*/
oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
- return(error);
+ return error;
}
/*
@@ -1108,7 +1108,7 @@ xfs_attr3_leaf_add(
* no good and we should just give up.
*/
if (!ichdr.holes && sum < entsize)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
/*
* Compact the entries to coalesce free space.
@@ -1121,7 +1121,7 @@ xfs_attr3_leaf_add(
* free region, in freemap[0]. If it is not big enough, give up.
*/
if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) {
- tmp = ENOSPC;
+ tmp = -ENOSPC;
goto out_log_hdr;
}
@@ -1692,7 +1692,7 @@ xfs_attr3_leaf_toosmall(
ichdr.usedbytes;
if (bytes > (state->args->geo->blksize >> 1)) {
*action = 0; /* blk over 50%, don't try to join */
- return(0);
+ return 0;
}
/*
@@ -1711,7 +1711,7 @@ xfs_attr3_leaf_toosmall(
error = xfs_da3_path_shift(state, &state->altpath, forward,
0, &retval);
if (error)
- return(error);
+ return error;
if (retval) {
*action = 0;
} else {
@@ -1740,7 +1740,7 @@ xfs_attr3_leaf_toosmall(
error = xfs_attr3_leaf_read(state->args->trans, state->args->dp,
blkno, -1, &bp);
if (error)
- return(error);
+ return error;
xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr);
@@ -1757,7 +1757,7 @@ xfs_attr3_leaf_toosmall(
}
if (i >= 2) {
*action = 0;
- return(0);
+ return 0;
}
/*
@@ -1773,13 +1773,13 @@ xfs_attr3_leaf_toosmall(
0, &retval);
}
if (error)
- return(error);
+ return error;
if (retval) {
*action = 0;
} else {
*action = 1;
}
- return(0);
+ return 0;
}
/*
@@ -2123,7 +2123,7 @@ xfs_attr3_leaf_lookup_int(
}
if (probe == ichdr.count || be32_to_cpu(entry->hashval) != hashval) {
args->index = probe;
- return XFS_ERROR(ENOATTR);
+ return -ENOATTR;
}
/*
@@ -2152,7 +2152,7 @@ xfs_attr3_leaf_lookup_int(
if (!xfs_attr_namesp_match(args->flags, entry->flags))
continue;
args->index = probe;
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
} else {
name_rmt = xfs_attr3_leaf_name_remote(leaf, probe);
if (name_rmt->namelen != args->namelen)
@@ -2168,11 +2168,11 @@ xfs_attr3_leaf_lookup_int(
args->rmtblkcnt = xfs_attr3_rmt_blocks(
args->dp->i_mount,
args->rmtvaluelen);
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
}
}
args->index = probe;
- return XFS_ERROR(ENOATTR);
+ return -ENOATTR;
}
/*
@@ -2208,7 +2208,7 @@ xfs_attr3_leaf_getvalue(
}
if (args->valuelen < valuelen) {
args->valuelen = valuelen;
- return XFS_ERROR(ERANGE);
+ return -ERANGE;
}
args->valuelen = valuelen;
memcpy(args->value, &name_loc->nameval[args->namelen], valuelen);
@@ -2226,7 +2226,7 @@ xfs_attr3_leaf_getvalue(
}
if (args->valuelen < args->rmtvaluelen) {
args->valuelen = args->rmtvaluelen;
- return XFS_ERROR(ERANGE);
+ return -ERANGE;
}
args->valuelen = args->rmtvaluelen;
}
@@ -2481,7 +2481,7 @@ xfs_attr3_leaf_clearflag(
*/
error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
if (error)
- return(error);
+ return error;
leaf = bp->b_addr;
entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
@@ -2548,7 +2548,7 @@ xfs_attr3_leaf_setflag(
*/
error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
if (error)
- return(error);
+ return error;
leaf = bp->b_addr;
#ifdef DEBUG
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index e2929da7c3b..e2929da7c3b 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index b5adfecbb8e..7510ab8058a 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -138,11 +138,11 @@ xfs_attr3_rmt_read_verify(
while (len > 0) {
if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
break;
}
if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
break;
}
len -= blksize;
@@ -178,7 +178,7 @@ xfs_attr3_rmt_write_verify(
while (len > 0) {
if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
@@ -257,7 +257,7 @@ xfs_attr_rmtval_copyout(
xfs_alert(mp,
"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
bno, *offset, byte_cnt, ino);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
}
@@ -452,7 +452,7 @@ xfs_attr_rmtval_set(
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
- return(error);
+ return error;
}
/*
@@ -473,7 +473,7 @@ xfs_attr_rmtval_set(
*/
error = xfs_trans_roll(&args->trans, dp);
if (error)
- return (error);
+ return error;
}
/*
@@ -498,7 +498,7 @@ xfs_attr_rmtval_set(
blkcnt, &map, &nmap,
XFS_BMAPI_ATTRFORK);
if (error)
- return(error);
+ return error;
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
@@ -508,7 +508,7 @@ xfs_attr_rmtval_set(
bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
bp->b_ops = &xfs_attr3_rmt_buf_ops;
xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
@@ -563,7 +563,7 @@ xfs_attr_rmtval_remove(
error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
if (error)
- return(error);
+ return error;
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
@@ -622,7 +622,7 @@ xfs_attr_rmtval_remove(
*/
error = xfs_trans_roll(&args->trans, args->dp);
if (error)
- return (error);
+ return error;
}
- return(0);
+ return 0;
}
diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h
index 5a9acfa156d..5a9acfa156d 100644
--- a/fs/xfs/xfs_attr_remote.h
+++ b/fs/xfs/libxfs/xfs_attr_remote.h
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index 919756e3ba5..919756e3ba5 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/libxfs/xfs_bit.h
index e1649c0d3e0..e1649c0d3e0 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/libxfs/xfs_bit.h
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 75c3fe5f3d9..86df952d3e2 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -392,7 +392,7 @@ xfs_bmap_check_leaf_extents(
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLDFSBNO);
+ ASSERT(bno != NULLFSBLOCK);
ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
@@ -1033,7 +1033,7 @@ xfs_bmap_add_attrfork_btree(
goto error0;
if (stat == 0) {
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
}
*firstblock = cur->bc_private.b.firstblock;
cur->bc_private.b.allocated = 0;
@@ -1115,7 +1115,7 @@ xfs_bmap_add_attrfork_local(
/* should only be called for types that support local format data */
ASSERT(0);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
/*
@@ -1192,7 +1192,7 @@ xfs_bmap_add_attrfork(
break;
default:
ASSERT(0);
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto trans_cancel;
}
@@ -1299,7 +1299,7 @@ xfs_bmap_read_extents(
ASSERT(level > 0);
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLDFSBNO);
+ ASSERT(bno != NULLFSBLOCK);
ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
/*
@@ -1399,7 +1399,7 @@ xfs_bmap_read_extents(
return 0;
error0:
xfs_trans_brelse(tp, bp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
@@ -1429,11 +1429,7 @@ xfs_bmap_search_multi_extents(
gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
gotp->br_state = XFS_EXT_INVALID;
-#if XFS_BIG_BLKNOS
gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
-#else
- gotp->br_startblock = 0xffffa5a5;
-#endif
prevp->br_startoff = NULLFILEOFF;
ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
@@ -1576,7 +1572,7 @@ xfs_bmap_last_before(
if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
- return XFS_ERROR(EIO);
+ return -EIO;
if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
*last_block = 0;
return 0;
@@ -1690,7 +1686,7 @@ xfs_bmap_last_offset(
if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
- return XFS_ERROR(EIO);
+ return -EIO;
error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
if (error || is_empty)
@@ -3323,7 +3319,7 @@ xfs_bmap_extsize_align(
if (orig_off < align_off ||
orig_end > align_off + align_alen ||
align_alen - temp < orig_alen)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* Try to fix it by moving the start up.
*/
@@ -3348,7 +3344,7 @@ xfs_bmap_extsize_align(
* Result doesn't cover the request, fail it.
*/
if (orig_off < align_off || orig_end > align_off + align_alen)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
} else {
ASSERT(orig_off >= align_off);
ASSERT(orig_end <= align_off + align_alen);
@@ -4051,11 +4047,11 @@ xfs_bmapi_read(
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
XFS_STATS_INC(xs_blk_mapr);
@@ -4246,11 +4242,11 @@ xfs_bmapi_delay(
XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
XFS_STATS_INC(xs_blk_mapw);
@@ -4469,7 +4465,7 @@ xfs_bmapi_convert_unwritten(
* so generate another request.
*/
if (mval->br_blockcount < len)
- return EAGAIN;
+ return -EAGAIN;
return 0;
}
@@ -4540,11 +4536,11 @@ xfs_bmapi_write(
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -4620,7 +4616,7 @@ xfs_bmapi_write(
/* Execute unwritten extent conversion if necessary */
error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
- if (error == EAGAIN)
+ if (error == -EAGAIN)
continue;
if (error)
goto error0;
@@ -4922,7 +4918,7 @@ xfs_bmap_del_extent(
goto done;
cur->bc_rec.b = new;
error = xfs_btree_insert(cur, &i);
- if (error && error != ENOSPC)
+ if (error && error != -ENOSPC)
goto done;
/*
* If get no-space back from btree insert,
@@ -4930,7 +4926,7 @@ xfs_bmap_del_extent(
* block reservation.
* Fix up our state and return the error.
*/
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
/*
* Reset the cursor, don't trust
* it after any insert operation.
@@ -4958,7 +4954,7 @@ xfs_bmap_del_extent(
xfs_bmbt_set_blockcount(ep,
got.br_blockcount);
flags = 0;
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
goto done;
}
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
@@ -5076,11 +5072,11 @@ xfs_bunmapi(
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
mp = ip->i_mount;
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(len > 0);
@@ -5325,7 +5321,7 @@ xfs_bunmapi(
del.br_startoff > got.br_startoff &&
del.br_startoff + del.br_blockcount <
got.br_startoff + got.br_blockcount) {
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
goto error0;
}
error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
@@ -5428,7 +5424,7 @@ xfs_bmap_shift_extents(
struct xfs_bmap_free *flist,
int num_exts)
{
- struct xfs_btree_cur *cur;
+ struct xfs_btree_cur *cur = NULL;
struct xfs_bmbt_rec_host *gotp;
struct xfs_bmbt_irec got;
struct xfs_bmbt_irec left;
@@ -5439,7 +5435,7 @@ xfs_bmap_shift_extents(
int error = 0;
int i;
int whichfork = XFS_DATA_FORK;
- int logflags;
+ int logflags = 0;
xfs_filblks_t blockcount = 0;
int total_extents;
@@ -5449,11 +5445,11 @@ xfs_bmap_shift_extents(
mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmap_shift_extents",
XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
ASSERT(current_ext != NULL);
@@ -5482,16 +5478,11 @@ xfs_bmap_shift_extents(
}
}
- /* We are going to change core inode */
- logflags = XFS_ILOG_CORE;
if (ifp->if_flags & XFS_IFBROOT) {
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_private.b.firstblock = *firstblock;
cur->bc_private.b.flist = flist;
cur->bc_private.b.flags = 0;
- } else {
- cur = NULL;
- logflags |= XFS_ILOG_DEXT;
}
/*
@@ -5516,14 +5507,14 @@ xfs_bmap_shift_extents(
*current_ext - 1), &left);
if (startoff < left.br_startoff + left.br_blockcount)
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
} else if (offset_shift_fsb > got.br_startoff) {
/*
* When first extent is shifted, offset_shift_fsb
* should be less than the stating offset of
* the first extent.
*/
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
}
if (error)
@@ -5549,11 +5540,14 @@ xfs_bmap_shift_extents(
blockcount = left.br_blockcount +
got.br_blockcount;
xfs_iext_remove(ip, *current_ext, 1, 0);
+ logflags |= XFS_ILOG_CORE;
if (cur) {
error = xfs_btree_delete(cur, &i);
if (error)
goto del_cursor;
XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+ } else {
+ logflags |= XFS_ILOG_DEXT;
}
XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
@@ -5579,6 +5573,7 @@ xfs_bmap_shift_extents(
got.br_startoff = startoff;
}
+ logflags |= XFS_ILOG_CORE;
if (cur) {
error = xfs_bmbt_update(cur, got.br_startoff,
got.br_startblock,
@@ -5586,6 +5581,8 @@ xfs_bmap_shift_extents(
got.br_state);
if (error)
goto del_cursor;
+ } else {
+ logflags |= XFS_ILOG_DEXT;
}
(*current_ext)++;
@@ -5601,6 +5598,7 @@ del_cursor:
xfs_btree_del_cursor(cur,
error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
- xfs_trans_log_inode(tp, ip, logflags);
+ if (logflags)
+ xfs_trans_log_inode(tp, ip, logflags);
return error;
}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index b879ca56a64..b879ca56a64 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 948836c4fd9..fba753308f3 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -111,23 +111,8 @@ __xfs_bmbt_get_all(
ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN));
s->br_startoff = ((xfs_fileoff_t)l0 &
xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
-#if XFS_BIG_BLKNOS
s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) |
(((xfs_fsblock_t)l1) >> 21);
-#else
-#ifdef DEBUG
- {
- xfs_dfsbno_t b;
-
- b = (((xfs_dfsbno_t)l0 & xfs_mask64lo(9)) << 43) |
- (((xfs_dfsbno_t)l1) >> 21);
- ASSERT((b >> 32) == 0 || isnulldstartblock(b));
- s->br_startblock = (xfs_fsblock_t)b;
- }
-#else /* !DEBUG */
- s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)l1) >> 21);
-#endif /* DEBUG */
-#endif /* XFS_BIG_BLKNOS */
s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21));
/* This is xfs_extent_state() in-line */
if (ext_flag) {
@@ -163,21 +148,8 @@ xfs_fsblock_t
xfs_bmbt_get_startblock(
xfs_bmbt_rec_host_t *r)
{
-#if XFS_BIG_BLKNOS
return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) |
(((xfs_fsblock_t)r->l1) >> 21);
-#else
-#ifdef DEBUG
- xfs_dfsbno_t b;
-
- b = (((xfs_dfsbno_t)r->l0 & xfs_mask64lo(9)) << 43) |
- (((xfs_dfsbno_t)r->l1) >> 21);
- ASSERT((b >> 32) == 0 || isnulldstartblock(b));
- return (xfs_fsblock_t)b;
-#else /* !DEBUG */
- return (xfs_fsblock_t)(((xfs_dfsbno_t)r->l1) >> 21);
-#endif /* DEBUG */
-#endif /* XFS_BIG_BLKNOS */
}
/*
@@ -241,7 +213,6 @@ xfs_bmbt_set_allf(
ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
-#if XFS_BIG_BLKNOS
ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
@@ -250,23 +221,6 @@ xfs_bmbt_set_allf(
r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
((xfs_bmbt_rec_base_t)blockcount &
(xfs_bmbt_rec_base_t)xfs_mask64lo(21));
-#else /* !XFS_BIG_BLKNOS */
- if (isnullstartblock(startblock)) {
- r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
- ((xfs_bmbt_rec_base_t)startoff << 9) |
- (xfs_bmbt_rec_base_t)xfs_mask64lo(9);
- r->l1 = xfs_mask64hi(11) |
- ((xfs_bmbt_rec_base_t)startblock << 21) |
- ((xfs_bmbt_rec_base_t)blockcount &
- (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
- } else {
- r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
- ((xfs_bmbt_rec_base_t)startoff << 9);
- r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
- ((xfs_bmbt_rec_base_t)blockcount &
- (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
- }
-#endif /* XFS_BIG_BLKNOS */
}
/*
@@ -298,8 +252,6 @@ xfs_bmbt_disk_set_allf(
ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
-
-#if XFS_BIG_BLKNOS
ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
r->l0 = cpu_to_be64(
@@ -310,26 +262,6 @@ xfs_bmbt_disk_set_allf(
((xfs_bmbt_rec_base_t)startblock << 21) |
((xfs_bmbt_rec_base_t)blockcount &
(xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
-#else /* !XFS_BIG_BLKNOS */
- if (isnullstartblock(startblock)) {
- r->l0 = cpu_to_be64(
- ((xfs_bmbt_rec_base_t)extent_flag << 63) |
- ((xfs_bmbt_rec_base_t)startoff << 9) |
- (xfs_bmbt_rec_base_t)xfs_mask64lo(9));
- r->l1 = cpu_to_be64(xfs_mask64hi(11) |
- ((xfs_bmbt_rec_base_t)startblock << 21) |
- ((xfs_bmbt_rec_base_t)blockcount &
- (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
- } else {
- r->l0 = cpu_to_be64(
- ((xfs_bmbt_rec_base_t)extent_flag << 63) |
- ((xfs_bmbt_rec_base_t)startoff << 9));
- r->l1 = cpu_to_be64(
- ((xfs_bmbt_rec_base_t)startblock << 21) |
- ((xfs_bmbt_rec_base_t)blockcount &
- (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
- }
-#endif /* XFS_BIG_BLKNOS */
}
/*
@@ -365,24 +297,11 @@ xfs_bmbt_set_startblock(
xfs_bmbt_rec_host_t *r,
xfs_fsblock_t v)
{
-#if XFS_BIG_BLKNOS
ASSERT((v & xfs_mask64hi(12)) == 0);
r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) |
(xfs_bmbt_rec_base_t)(v >> 43);
r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) |
(xfs_bmbt_rec_base_t)(v << 21);
-#else /* !XFS_BIG_BLKNOS */
- if (isnullstartblock(v)) {
- r->l0 |= (xfs_bmbt_rec_base_t)xfs_mask64lo(9);
- r->l1 = (xfs_bmbt_rec_base_t)xfs_mask64hi(11) |
- ((xfs_bmbt_rec_base_t)v << 21) |
- (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
- } else {
- r->l0 &= ~(xfs_bmbt_rec_base_t)xfs_mask64lo(9);
- r->l1 = ((xfs_bmbt_rec_base_t)v << 21) |
- (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
- }
-#endif /* XFS_BIG_BLKNOS */
}
/*
@@ -438,8 +357,8 @@ xfs_bmbt_to_bmdr(
cpu_to_be64(XFS_BUF_DADDR_NULL));
} else
ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
- ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
- ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
+ ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK));
+ ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK));
ASSERT(rblock->bb_level != 0);
dblock->bb_level = rblock->bb_level;
dblock->bb_numrecs = rblock->bb_numrecs;
@@ -554,7 +473,7 @@ xfs_bmbt_alloc_block(
args.minlen = args.maxlen = args.prod = 1;
args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
goto error0;
}
error = xfs_alloc_vextent(&args);
@@ -763,11 +682,11 @@ xfs_bmbt_verify(
/* sibling pointer verification */
if (!block->bb_u.l.bb_leftsib ||
- (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) &&
+ (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
!XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
return false;
if (!block->bb_u.l.bb_rightsib ||
- (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) &&
+ (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
!XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
return false;
@@ -779,9 +698,9 @@ xfs_bmbt_read_verify(
struct xfs_buf *bp)
{
if (!xfs_btree_lblock_verify_crc(bp))
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_bmbt_verify(bp))
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
@@ -795,7 +714,7 @@ xfs_bmbt_write_verify(
{
if (!xfs_bmbt_verify(bp)) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
@@ -959,7 +878,7 @@ xfs_bmbt_change_owner(
cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
if (!cur)
- return ENOMEM;
+ return -ENOMEM;
error = xfs_btree_change_owner(cur, new_owner, buffer_list);
xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index 819a8a4dee9..819a8a4dee9 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index cf893bc1e37..8fe6a93ff47 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -78,11 +78,11 @@ xfs_btree_check_lblock(
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
block->bb_u.l.bb_leftsib &&
- (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
+ (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) ||
XFS_FSB_SANITY_CHECK(mp,
be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
block->bb_u.l.bb_rightsib &&
- (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
+ (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) ||
XFS_FSB_SANITY_CHECK(mp,
be64_to_cpu(block->bb_u.l.bb_rightsib)));
@@ -92,7 +92,7 @@ xfs_btree_check_lblock(
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -140,7 +140,7 @@ xfs_btree_check_sblock(
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -167,12 +167,12 @@ xfs_btree_check_block(
int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lptr(
struct xfs_btree_cur *cur, /* btree cursor */
- xfs_dfsbno_t bno, /* btree block disk address */
+ xfs_fsblock_t bno, /* btree block disk address */
int level) /* btree block level */
{
XFS_WANT_CORRUPTED_RETURN(
level > 0 &&
- bno != NULLDFSBNO &&
+ bno != NULLFSBLOCK &&
XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
return 0;
}
@@ -595,7 +595,7 @@ xfs_btree_islastblock(
block = xfs_btree_get_block(cur, level, &bp);
xfs_btree_check_block(cur, block, level, bp);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
+ return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
else
return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
}
@@ -771,16 +771,16 @@ xfs_btree_readahead_lblock(
struct xfs_btree_block *block)
{
int rval = 0;
- xfs_dfsbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
- xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
+ xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
+ xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
- if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
+ if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
xfs_btree_reada_bufl(cur->bc_mp, left, 1,
cur->bc_ops->buf_ops);
rval++;
}
- if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
+ if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
xfs_btree_reada_bufl(cur->bc_mp, right, 1,
cur->bc_ops->buf_ops);
rval++;
@@ -852,7 +852,7 @@ xfs_btree_ptr_to_daddr(
union xfs_btree_ptr *ptr)
{
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
+ ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK));
return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
} else {
@@ -900,9 +900,9 @@ xfs_btree_setbuf(
b = XFS_BUF_TO_BLOCK(bp);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
+ if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
- if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
+ if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
} else {
if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
@@ -918,7 +918,7 @@ xfs_btree_ptr_is_null(
union xfs_btree_ptr *ptr)
{
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return ptr->l == cpu_to_be64(NULLDFSBNO);
+ return ptr->l == cpu_to_be64(NULLFSBLOCK);
else
return ptr->s == cpu_to_be32(NULLAGBLOCK);
}
@@ -929,7 +929,7 @@ xfs_btree_set_ptr_null(
union xfs_btree_ptr *ptr)
{
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- ptr->l = cpu_to_be64(NULLDFSBNO);
+ ptr->l = cpu_to_be64(NULLFSBLOCK);
else
ptr->s = cpu_to_be32(NULLAGBLOCK);
}
@@ -997,8 +997,8 @@ xfs_btree_init_block_int(
buf->bb_numrecs = cpu_to_be16(numrecs);
if (flags & XFS_BTREE_LONG_PTRS) {
- buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
- buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+ buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
+ buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
if (flags & XFS_BTREE_CRC_BLOCKS) {
buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
buf->bb_u.l.bb_owner = cpu_to_be64(owner);
@@ -1140,7 +1140,7 @@ xfs_btree_get_buf_block(
mp->m_bsize, flags);
if (!*bpp)
- return ENOMEM;
+ return -ENOMEM;
(*bpp)->b_ops = cur->bc_ops->buf_ops;
*block = XFS_BUF_TO_BLOCK(*bpp);
@@ -1498,7 +1498,7 @@ xfs_btree_increment(
if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
goto out0;
ASSERT(0);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto error0;
}
ASSERT(lev < cur->bc_nlevels);
@@ -1597,7 +1597,7 @@ xfs_btree_decrement(
if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
goto out0;
ASSERT(0);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto error0;
}
ASSERT(lev < cur->bc_nlevels);
@@ -4018,7 +4018,7 @@ xfs_btree_block_change_owner(
/* now read rh sibling block for next iteration */
xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
if (xfs_btree_ptr_is_null(cur, &rptr))
- return ENOENT;
+ return -ENOENT;
return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
}
@@ -4061,7 +4061,7 @@ xfs_btree_change_owner(
buffer_list);
} while (!error);
- if (error != ENOENT)
+ if (error != -ENOENT)
return error;
}
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index a04b69422f6..8f18bab73ea 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -258,7 +258,7 @@ xfs_btree_check_block(
int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lptr(
struct xfs_btree_cur *cur, /* btree cursor */
- xfs_dfsbno_t ptr, /* btree block disk address */
+ xfs_fsblock_t ptr, /* btree block disk address */
int level); /* btree block level */
/*
diff --git a/fs/xfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h
index fad1676ad8c..fad1676ad8c 100644
--- a/fs/xfs/xfs_cksum.h
+++ b/fs/xfs/libxfs/xfs_cksum.h
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index a514ab61665..2c42ae28d02 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -185,7 +185,7 @@ xfs_da3_node_write_verify(
struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
if (!xfs_da3_node_verify(bp)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
@@ -214,13 +214,13 @@ xfs_da3_node_read_verify(
switch (be16_to_cpu(info->magic)) {
case XFS_DA3_NODE_MAGIC:
if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
break;
}
/* fall through */
case XFS_DA_NODE_MAGIC:
if (!xfs_da3_node_verify(bp)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
break;
}
return;
@@ -315,7 +315,7 @@ xfs_da3_node_create(
error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork);
if (error)
- return(error);
+ return error;
bp->b_ops = &xfs_da3_node_buf_ops;
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
node = bp->b_addr;
@@ -337,7 +337,7 @@ xfs_da3_node_create(
XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
*bpp = bp;
- return(0);
+ return 0;
}
/*
@@ -385,8 +385,8 @@ xfs_da3_split(
switch (oldblk->magic) {
case XFS_ATTR_LEAF_MAGIC:
error = xfs_attr3_leaf_split(state, oldblk, newblk);
- if ((error != 0) && (error != ENOSPC)) {
- return(error); /* GROT: attr is inconsistent */
+ if ((error != 0) && (error != -ENOSPC)) {
+ return error; /* GROT: attr is inconsistent */
}
if (!error) {
addblk = newblk;
@@ -408,7 +408,7 @@ xfs_da3_split(
&state->extrablk);
}
if (error)
- return(error); /* GROT: attr inconsistent */
+ return error; /* GROT: attr inconsistent */
addblk = newblk;
break;
case XFS_DIR2_LEAFN_MAGIC:
@@ -422,7 +422,7 @@ xfs_da3_split(
max - i, &action);
addblk->bp = NULL;
if (error)
- return(error); /* GROT: dir is inconsistent */
+ return error; /* GROT: dir is inconsistent */
/*
* Record the newly split block for the next time thru?
*/
@@ -439,7 +439,7 @@ xfs_da3_split(
xfs_da3_fixhashpath(state, &state->path);
}
if (!addblk)
- return(0);
+ return 0;
/*
* Split the root node.
@@ -449,7 +449,7 @@ xfs_da3_split(
error = xfs_da3_root_split(state, oldblk, addblk);
if (error) {
addblk->bp = NULL;
- return(error); /* GROT: dir is inconsistent */
+ return error; /* GROT: dir is inconsistent */
}
/*
@@ -492,7 +492,7 @@ xfs_da3_split(
sizeof(node->hdr.info)));
}
addblk->bp = NULL;
- return(0);
+ return 0;
}
/*
@@ -670,18 +670,18 @@ xfs_da3_node_split(
*/
error = xfs_da_grow_inode(state->args, &blkno);
if (error)
- return(error); /* GROT: dir is inconsistent */
+ return error; /* GROT: dir is inconsistent */
error = xfs_da3_node_create(state->args, blkno, treelevel,
&newblk->bp, state->args->whichfork);
if (error)
- return(error); /* GROT: dir is inconsistent */
+ return error; /* GROT: dir is inconsistent */
newblk->blkno = blkno;
newblk->magic = XFS_DA_NODE_MAGIC;
xfs_da3_node_rebalance(state, oldblk, newblk);
error = xfs_da3_blk_link(state, oldblk, newblk);
if (error)
- return(error);
+ return error;
*result = 1;
} else {
*result = 0;
@@ -721,7 +721,7 @@ xfs_da3_node_split(
}
}
- return(0);
+ return 0;
}
/*
@@ -963,9 +963,9 @@ xfs_da3_join(
case XFS_ATTR_LEAF_MAGIC:
error = xfs_attr3_leaf_toosmall(state, &action);
if (error)
- return(error);
+ return error;
if (action == 0)
- return(0);
+ return 0;
xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
break;
case XFS_DIR2_LEAFN_MAGIC:
@@ -985,7 +985,7 @@ xfs_da3_join(
xfs_da3_fixhashpath(state, &state->path);
error = xfs_da3_node_toosmall(state, &action);
if (error)
- return(error);
+ return error;
if (action == 0)
return 0;
xfs_da3_node_unbalance(state, drop_blk, save_blk);
@@ -995,12 +995,12 @@ xfs_da3_join(
error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
xfs_da_state_kill_altpath(state);
if (error)
- return(error);
+ return error;
error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
drop_blk->bp);
drop_blk->bp = NULL;
if (error)
- return(error);
+ return error;
}
/*
* We joined all the way to the top. If it turns out that
@@ -1010,7 +1010,7 @@ xfs_da3_join(
xfs_da3_node_remove(state, drop_blk);
xfs_da3_fixhashpath(state, &state->path);
error = xfs_da3_root_join(state, &state->path.blk[0]);
- return(error);
+ return error;
}
#ifdef DEBUG
@@ -1099,7 +1099,7 @@ xfs_da3_root_join(
xfs_trans_log_buf(args->trans, root_blk->bp, 0,
args->geo->blksize - 1);
error = xfs_da_shrink_inode(args, child, bp);
- return(error);
+ return error;
}
/*
@@ -1142,7 +1142,7 @@ xfs_da3_node_toosmall(
dp->d_ops->node_hdr_from_disk(&nodehdr, node);
if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
*action = 0; /* blk over 50%, don't try to join */
- return(0); /* blk over 50%, don't try to join */
+ return 0; /* blk over 50%, don't try to join */
}
/*
@@ -1161,13 +1161,13 @@ xfs_da3_node_toosmall(
error = xfs_da3_path_shift(state, &state->altpath, forward,
0, &retval);
if (error)
- return(error);
+ return error;
if (retval) {
*action = 0;
} else {
*action = 2;
}
- return(0);
+ return 0;
}
/*
@@ -1194,7 +1194,7 @@ xfs_da3_node_toosmall(
error = xfs_da3_node_read(state->args->trans, dp,
blkno, -1, &bp, state->args->whichfork);
if (error)
- return(error);
+ return error;
node = bp->b_addr;
dp->d_ops->node_hdr_from_disk(&thdr, node);
@@ -1486,7 +1486,7 @@ xfs_da3_node_lookup_int(
if (error) {
blk->blkno = 0;
state->path.active--;
- return(error);
+ return error;
}
curr = blk->bp->b_addr;
blk->magic = be16_to_cpu(curr->magic);
@@ -1579,25 +1579,25 @@ xfs_da3_node_lookup_int(
args->blkno = blk->blkno;
} else {
ASSERT(0);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
- if (((retval == ENOENT) || (retval == ENOATTR)) &&
+ if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
(blk->hashval == args->hashval)) {
error = xfs_da3_path_shift(state, &state->path, 1, 1,
&retval);
if (error)
- return(error);
+ return error;
if (retval == 0) {
continue;
} else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
/* path_shift() gives ENOENT */
- retval = XFS_ERROR(ENOATTR);
+ retval = -ENOATTR;
}
}
break;
}
*result = retval;
- return(0);
+ return 0;
}
/*========================================================================
@@ -1692,7 +1692,7 @@ xfs_da3_blk_link(
be32_to_cpu(old_info->back),
-1, &bp, args->whichfork);
if (error)
- return(error);
+ return error;
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == old_info->magic);
@@ -1713,7 +1713,7 @@ xfs_da3_blk_link(
be32_to_cpu(old_info->forw),
-1, &bp, args->whichfork);
if (error)
- return(error);
+ return error;
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == old_info->magic);
@@ -1726,7 +1726,7 @@ xfs_da3_blk_link(
xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
- return(0);
+ return 0;
}
/*
@@ -1772,7 +1772,7 @@ xfs_da3_blk_unlink(
be32_to_cpu(drop_info->back),
-1, &bp, args->whichfork);
if (error)
- return(error);
+ return error;
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == save_info->magic);
@@ -1789,7 +1789,7 @@ xfs_da3_blk_unlink(
be32_to_cpu(drop_info->forw),
-1, &bp, args->whichfork);
if (error)
- return(error);
+ return error;
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == save_info->magic);
@@ -1801,7 +1801,7 @@ xfs_da3_blk_unlink(
}
xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
- return(0);
+ return 0;
}
/*
@@ -1859,9 +1859,9 @@ xfs_da3_path_shift(
}
}
if (level < 0) {
- *result = XFS_ERROR(ENOENT); /* we're out of our tree */
+ *result = -ENOENT; /* we're out of our tree */
ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
- return(0);
+ return 0;
}
/*
@@ -1883,7 +1883,7 @@ xfs_da3_path_shift(
error = xfs_da3_node_read(args->trans, dp, blkno, -1,
&blk->bp, args->whichfork);
if (error)
- return(error);
+ return error;
info = blk->bp->b_addr;
ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
@@ -2004,7 +2004,7 @@ xfs_da_grow_inode_int(
struct xfs_trans *tp = args->trans;
struct xfs_inode *dp = args->dp;
int w = args->whichfork;
- xfs_drfsbno_t nblks = dp->i_d.di_nblocks;
+ xfs_rfsblock_t nblks = dp->i_d.di_nblocks;
struct xfs_bmbt_irec map, *mapp;
int nmap, error, got, i, mapi;
@@ -2068,7 +2068,7 @@ xfs_da_grow_inode_int(
if (got != count || mapp[0].br_startoff != *bno ||
mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
*bno + count) {
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
goto out_free_map;
}
@@ -2158,7 +2158,7 @@ xfs_da3_swap_lastblock(
if (unlikely(lastoff == 0)) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
/*
* Read the last block in the btree space.
@@ -2209,7 +2209,7 @@ xfs_da3_swap_lastblock(
sib_info->magic != dead_info->magic)) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto done;
}
sib_info->forw = cpu_to_be32(dead_blkno);
@@ -2231,7 +2231,7 @@ xfs_da3_swap_lastblock(
sib_info->magic != dead_info->magic)) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto done;
}
sib_info->back = cpu_to_be32(dead_blkno);
@@ -2254,7 +2254,7 @@ xfs_da3_swap_lastblock(
if (level >= 0 && level != par_hdr.level + 1) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto done;
}
level = par_hdr.level;
@@ -2267,7 +2267,7 @@ xfs_da3_swap_lastblock(
if (entno == par_hdr.count) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto done;
}
par_blkno = be32_to_cpu(btree[entno].before);
@@ -2294,7 +2294,7 @@ xfs_da3_swap_lastblock(
if (unlikely(par_blkno == 0)) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto done;
}
error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
@@ -2305,7 +2305,7 @@ xfs_da3_swap_lastblock(
if (par_hdr.level != level) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto done;
}
btree = dp->d_ops->node_tree_p(par_node);
@@ -2359,7 +2359,7 @@ xfs_da_shrink_inode(
error = xfs_bunmapi(tp, dp, dead_blkno, count,
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
0, args->firstblock, args->flist, &done);
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
if (w != XFS_DATA_FORK)
break;
error = xfs_da3_swap_lastblock(args, &dead_blkno,
@@ -2427,7 +2427,7 @@ xfs_buf_map_from_irec(
map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map),
KM_SLEEP | KM_NOFS);
if (!map)
- return ENOMEM;
+ return -ENOMEM;
*mapp = map;
}
@@ -2500,8 +2500,8 @@ xfs_dabuf_map(
}
if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) {
- error = mappedbno == -2 ? -1 : XFS_ERROR(EFSCORRUPTED);
- if (unlikely(error == EFSCORRUPTED)) {
+ error = mappedbno == -2 ? -1 : -EFSCORRUPTED;
+ if (unlikely(error == -EFSCORRUPTED)) {
if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
int i;
xfs_alert(mp, "%s: bno %lld dir: inode %lld",
@@ -2561,7 +2561,7 @@ xfs_da_get_buf(
bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp,
mapp, nmap, 0);
- error = bp ? bp->b_error : XFS_ERROR(EIO);
+ error = bp ? bp->b_error : -EIO;
if (error) {
xfs_trans_brelse(trans, bp);
goto out_free;
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 6e153e399a7..6e153e399a7 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
diff --git a/fs/xfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
index c9aee52a37e..c9aee52a37e 100644
--- a/fs/xfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
diff --git a/fs/xfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 0a49b028637..0a49b028637 100644
--- a/fs/xfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/libxfs/xfs_dinode.h
index 623bbe8fd92..623bbe8fd92 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/libxfs/xfs_dinode.h
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 79670cda48a..6cef22152fd 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -108,7 +108,7 @@ xfs_da_mount(
if (!mp->m_dir_geo || !mp->m_attr_geo) {
kmem_free(mp->m_dir_geo);
kmem_free(mp->m_attr_geo);
- return ENOMEM;
+ return -ENOMEM;
}
/* set up directory geometry */
@@ -202,7 +202,7 @@ xfs_dir_ino_validate(
xfs_warn(mp, "Invalid inode number 0x%Lx",
(unsigned long long) ino);
XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -226,7 +226,7 @@ xfs_dir_init(
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
if (!args)
- return ENOMEM;
+ return -ENOMEM;
args->geo = dp->i_mount->m_dir_geo;
args->dp = dp;
@@ -261,7 +261,7 @@ xfs_dir_createname(
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
if (!args)
- return ENOMEM;
+ return -ENOMEM;
args->geo = dp->i_mount->m_dir_geo;
args->name = name->name;
@@ -314,18 +314,18 @@ xfs_dir_cilookup_result(
int len)
{
if (args->cmpresult == XFS_CMP_DIFFERENT)
- return ENOENT;
+ return -ENOENT;
if (args->cmpresult != XFS_CMP_CASE ||
!(args->op_flags & XFS_DA_OP_CILOOKUP))
- return EEXIST;
+ return -EEXIST;
args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
if (!args->value)
- return ENOMEM;
+ return -ENOMEM;
memcpy(args->value, name, len);
args->valuelen = len;
- return EEXIST;
+ return -EEXIST;
}
/*
@@ -392,7 +392,7 @@ xfs_dir_lookup(
rval = xfs_dir2_node_lookup(args);
out_check_rval:
- if (rval == EEXIST)
+ if (rval == -EEXIST)
rval = 0;
if (!rval) {
*inum = args->inumber;
@@ -428,7 +428,7 @@ xfs_dir_removename(
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
if (!args)
- return ENOMEM;
+ return -ENOMEM;
args->geo = dp->i_mount->m_dir_geo;
args->name = name->name;
@@ -493,7 +493,7 @@ xfs_dir_replace(
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
if (!args)
- return ENOMEM;
+ return -ENOMEM;
args->geo = dp->i_mount->m_dir_geo;
args->name = name->name;
@@ -555,7 +555,7 @@ xfs_dir_canenter(
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
if (!args)
- return ENOMEM;
+ return -ENOMEM;
args->geo = dp->i_mount->m_dir_geo;
args->name = name->name;
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index c8e86b0b5e9..c8e86b0b5e9 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index c7cd3154026..9628ceccfa0 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -91,9 +91,9 @@ xfs_dir3_block_read_verify(
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_dir3_block_verify(bp))
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
xfs_verifier_error(bp);
@@ -108,7 +108,7 @@ xfs_dir3_block_write_verify(
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (!xfs_dir3_block_verify(bp)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
@@ -392,7 +392,7 @@ xfs_dir2_block_addname(
if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
xfs_trans_brelse(tp, bp);
if (!dup)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
return 0;
}
@@ -402,7 +402,7 @@ xfs_dir2_block_addname(
if (!dup) {
/* Don't have a space reservation: return no-space. */
if (args->total == 0)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
/*
* Convert to the next larger format.
* Then add the new entry in that format.
@@ -647,7 +647,7 @@ xfs_dir2_block_lookup(
args->filetype = dp->d_ops->data_get_ftype(dep);
error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
xfs_trans_brelse(args->trans, bp);
- return XFS_ERROR(error);
+ return error;
}
/*
@@ -703,7 +703,7 @@ xfs_dir2_block_lookup_int(
if (low > high) {
ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
xfs_trans_brelse(tp, bp);
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
}
}
/*
@@ -751,7 +751,7 @@ xfs_dir2_block_lookup_int(
* No match, release the buffer and return ENOENT.
*/
xfs_trans_brelse(tp, bp);
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
}
/*
@@ -1091,7 +1091,7 @@ xfs_dir2_sf_to_block(
*/
if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
- return XFS_ERROR(EIO);
+ return -EIO;
}
oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data;
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 8c2f6422648..fdd803fecb8 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -100,7 +100,7 @@ __xfs_dir3_data_check(
break;
default:
XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
/*
@@ -256,7 +256,7 @@ xfs_dir3_data_reada_verify(
xfs_dir3_data_verify(bp);
return;
default:
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
break;
}
@@ -270,9 +270,9 @@ xfs_dir3_data_read_verify(
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_dir3_data_verify(bp))
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
xfs_verifier_error(bp);
@@ -287,7 +287,7 @@ xfs_dir3_data_write_verify(
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (!xfs_dir3_data_verify(bp)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index fb0aad4440c..a19174eb3cb 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -183,9 +183,9 @@ __read_verify(
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_dir3_leaf_verify(bp, magic))
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
xfs_verifier_error(bp);
@@ -201,7 +201,7 @@ __write_verify(
struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
if (!xfs_dir3_leaf_verify(bp, magic)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
@@ -731,7 +731,7 @@ xfs_dir2_leaf_addname(
if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
args->total == 0) {
xfs_trans_brelse(tp, lbp);
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
}
/*
* Convert to node form.
@@ -755,7 +755,7 @@ xfs_dir2_leaf_addname(
*/
if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
xfs_trans_brelse(tp, lbp);
- return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
+ return use_block == -1 ? -ENOSPC : 0;
}
/*
* If no allocations are allowed, return now before we've
@@ -763,7 +763,7 @@ xfs_dir2_leaf_addname(
*/
if (args->total == 0 && use_block == -1) {
xfs_trans_brelse(tp, lbp);
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
}
/*
* Need to compact the leaf entries, removing stale ones.
@@ -1198,7 +1198,7 @@ xfs_dir2_leaf_lookup(
error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
xfs_trans_brelse(tp, dbp);
xfs_trans_brelse(tp, lbp);
- return XFS_ERROR(error);
+ return error;
}
/*
@@ -1327,13 +1327,13 @@ xfs_dir2_leaf_lookup_int(
return 0;
}
/*
- * No match found, return ENOENT.
+ * No match found, return -ENOENT.
*/
ASSERT(cidb == -1);
if (dbp)
xfs_trans_brelse(tp, dbp);
xfs_trans_brelse(tp, lbp);
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
}
/*
@@ -1440,7 +1440,7 @@ xfs_dir2_leaf_removename(
* Just go on, returning success, leaving the
* empty block in place.
*/
- if (error == ENOSPC && args->total == 0)
+ if (error == -ENOSPC && args->total == 0)
error = 0;
xfs_dir3_leaf_check(dp, lbp);
return error;
@@ -1641,7 +1641,7 @@ xfs_dir2_leaf_trim_data(
* Get rid of the data block.
*/
if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
- ASSERT(error != ENOSPC);
+ ASSERT(error != -ENOSPC);
xfs_trans_brelse(tp, dbp);
return error;
}
@@ -1815,7 +1815,7 @@ xfs_dir2_node_to_leaf(
* punching out the middle of an extent, and this is an
* isolated block.
*/
- ASSERT(error != ENOSPC);
+ ASSERT(error != -ENOSPC);
return error;
}
fbp = NULL;
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index da43d304fca..2ae6ac2c11a 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -117,9 +117,9 @@ xfs_dir3_free_read_verify(
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_dir3_free_verify(bp))
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
xfs_verifier_error(bp);
@@ -134,7 +134,7 @@ xfs_dir3_free_write_verify(
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (!xfs_dir3_free_verify(bp)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
@@ -406,7 +406,7 @@ xfs_dir2_leafn_add(
* into other peoples memory
*/
if (index < 0)
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
/*
* If there are already the maximum number of leaf entries in
@@ -417,7 +417,7 @@ xfs_dir2_leafn_add(
if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) {
if (!leafhdr.stale)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
compact = leafhdr.stale > 1;
} else
compact = 0;
@@ -629,7 +629,7 @@ xfs_dir2_leafn_lookup_for_addname(
XFS_ERRLEVEL_LOW, mp);
if (curfdb != newfdb)
xfs_trans_brelse(tp, curbp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
curfdb = newfdb;
if (be16_to_cpu(bests[fi]) >= length)
@@ -660,7 +660,7 @@ out:
* Return the index, that will be the insertion point.
*/
*indexp = index;
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
}
/*
@@ -789,7 +789,7 @@ xfs_dir2_leafn_lookup_for_entry(
curbp->b_ops = &xfs_dir3_data_buf_ops;
xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF);
if (cmp == XFS_CMP_EXACT)
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
}
}
ASSERT(index == leafhdr.count || (args->op_flags & XFS_DA_OP_OKNOENT));
@@ -812,7 +812,7 @@ xfs_dir2_leafn_lookup_for_entry(
state->extravalid = 0;
}
*indexp = index;
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
}
/*
@@ -1133,7 +1133,7 @@ xfs_dir3_data_block_free(
if (error == 0) {
fbp = NULL;
logfree = 0;
- } else if (error != ENOSPC || args->total != 0)
+ } else if (error != -ENOSPC || args->total != 0)
return error;
/*
* It's possible to get ENOSPC if there is no
@@ -1287,7 +1287,7 @@ xfs_dir2_leafn_remove(
* In this case just drop the buffer and some one else
* will eventually get rid of the empty block.
*/
- else if (!(error == ENOSPC && args->total == 0))
+ else if (!(error == -ENOSPC && args->total == 0))
return error;
}
/*
@@ -1599,7 +1599,7 @@ xfs_dir2_node_addname(
error = xfs_da3_node_lookup_int(state, &rval);
if (error)
rval = error;
- if (rval != ENOENT) {
+ if (rval != -ENOENT) {
goto done;
}
/*
@@ -1628,7 +1628,7 @@ xfs_dir2_node_addname(
* It didn't work, we need to split the leaf block.
*/
if (args->total == 0) {
- ASSERT(rval == ENOSPC);
+ ASSERT(rval == -ENOSPC);
goto done;
}
/*
@@ -1815,7 +1815,7 @@ xfs_dir2_node_addname_int(
* Not allowed to allocate, return failure.
*/
if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
/*
* Allocate and initialize the new data block.
@@ -1876,7 +1876,7 @@ xfs_dir2_node_addname_int(
}
XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
/*
@@ -2042,8 +2042,8 @@ xfs_dir2_node_lookup(
error = xfs_da3_node_lookup_int(state, &rval);
if (error)
rval = error;
- else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) {
- /* If a CI match, dup the actual name and return EEXIST */
+ else if (rval == -ENOENT && args->cmpresult == XFS_CMP_CASE) {
+ /* If a CI match, dup the actual name and return -EEXIST */
xfs_dir2_data_entry_t *dep;
dep = (xfs_dir2_data_entry_t *)
@@ -2096,7 +2096,7 @@ xfs_dir2_node_removename(
goto out_free;
/* Didn't find it, upper layer screwed up. */
- if (rval != EEXIST) {
+ if (rval != -EEXIST) {
error = rval;
goto out_free;
}
@@ -2169,7 +2169,7 @@ xfs_dir2_node_replace(
* It should be found, since the vnodeops layer has looked it up
* and locked it. But paranoia is good.
*/
- if (rval == EEXIST) {
+ if (rval == -EEXIST) {
struct xfs_dir2_leaf_entry *ents;
/*
* Find the leaf entry.
@@ -2272,7 +2272,7 @@ xfs_dir2_node_trim_free(
* space reservation, when breaking up an extent into two
* pieces. This is the last block of an extent.
*/
- ASSERT(error != ENOSPC);
+ ASSERT(error != -ENOSPC);
xfs_trans_brelse(tp, bp);
return error;
}
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 27ce0794d19..27ce0794d19 100644
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 53c3be619db..5079e051ef0 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -51,10 +51,9 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args);
#else
#define xfs_dir2_sf_check(args)
#endif /* DEBUG */
-#if XFS_BIG_INUMS
+
static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
-#endif /* XFS_BIG_INUMS */
/*
* Given a block directory (dp/block), calculate its size as a shortform (sf)
@@ -117,10 +116,10 @@ xfs_dir2_block_sfsize(
isdotdot =
dep->namelen == 2 &&
dep->name[0] == '.' && dep->name[1] == '.';
-#if XFS_BIG_INUMS
+
if (!isdot)
i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM;
-#endif
+
/* take into account the file type field */
if (!isdot && !isdotdot) {
count++;
@@ -251,7 +250,7 @@ xfs_dir2_block_to_sf(
logflags = XFS_ILOG_CORE;
error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp);
if (error) {
- ASSERT(error != ENOSPC);
+ ASSERT(error != -ENOSPC);
goto out;
}
@@ -299,7 +298,7 @@ xfs_dir2_sf_addname(
trace_xfs_dir2_sf_addname(args);
- ASSERT(xfs_dir2_sf_lookup(args) == ENOENT);
+ ASSERT(xfs_dir2_sf_lookup(args) == -ENOENT);
dp = args->dp;
ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
/*
@@ -307,7 +306,7 @@ xfs_dir2_sf_addname(
*/
if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return XFS_ERROR(EIO);
+ return -EIO;
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
@@ -318,7 +317,7 @@ xfs_dir2_sf_addname(
*/
incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen);
objchange = 0;
-#if XFS_BIG_INUMS
+
/*
* Do we have to change to 8 byte inodes?
*/
@@ -332,7 +331,7 @@ xfs_dir2_sf_addname(
(uint)sizeof(xfs_dir2_ino4_t));
objchange = 1;
}
-#endif
+
new_isize = (int)dp->i_d.di_size + incr_isize;
/*
* Won't fit as shortform any more (due to size),
@@ -345,7 +344,7 @@ xfs_dir2_sf_addname(
* Just checking or no space reservation, it doesn't fit.
*/
if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
/*
* Convert to block form then add the name.
*/
@@ -370,10 +369,8 @@ xfs_dir2_sf_addname(
*/
else {
ASSERT(pick == 2);
-#if XFS_BIG_INUMS
if (objchange)
xfs_dir2_sf_toino8(args);
-#endif
xfs_dir2_sf_addname_hard(args, objchange, new_isize);
}
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
@@ -425,10 +422,8 @@ xfs_dir2_sf_addname_easy(
* Update the header and inode.
*/
sfp->count++;
-#if XFS_BIG_INUMS
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM)
sfp->i8count++;
-#endif
dp->i_d.di_size = new_isize;
xfs_dir2_sf_check(args);
}
@@ -516,10 +511,8 @@ xfs_dir2_sf_addname_hard(
dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
dp->d_ops->sf_put_ftype(sfep, args->filetype);
sfp->count++;
-#if XFS_BIG_INUMS
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
sfp->i8count++;
-#endif
/*
* If there's more left to copy, do that.
*/
@@ -593,13 +586,8 @@ xfs_dir2_sf_addname_pick(
/*
* If changing the inode number size, do it the hard way.
*/
-#if XFS_BIG_INUMS
- if (objchange) {
+ if (objchange)
return 2;
- }
-#else
- ASSERT(objchange == 0);
-#endif
/*
* If it won't fit at the end then do it the hard way (use the hole).
*/
@@ -650,7 +638,6 @@ xfs_dir2_sf_check(
ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX);
}
ASSERT(i8count == sfp->i8count);
- ASSERT(XFS_BIG_INUMS || i8count == 0);
ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
ASSERT(offset +
(sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
@@ -738,7 +725,7 @@ xfs_dir2_sf_lookup(
*/
if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return XFS_ERROR(EIO);
+ return -EIO;
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
@@ -751,7 +738,7 @@ xfs_dir2_sf_lookup(
args->inumber = dp->i_ino;
args->cmpresult = XFS_CMP_EXACT;
args->filetype = XFS_DIR3_FT_DIR;
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
}
/*
* Special case for ..
@@ -761,7 +748,7 @@ xfs_dir2_sf_lookup(
args->inumber = dp->d_ops->sf_get_parent_ino(sfp);
args->cmpresult = XFS_CMP_EXACT;
args->filetype = XFS_DIR3_FT_DIR;
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
}
/*
* Loop over all the entries trying to match ours.
@@ -781,20 +768,20 @@ xfs_dir2_sf_lookup(
args->inumber = dp->d_ops->sf_get_ino(sfp, sfep);
args->filetype = dp->d_ops->sf_get_ftype(sfep);
if (cmp == XFS_CMP_EXACT)
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
ci_sfep = sfep;
}
}
ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
/*
* Here, we can only be doing a lookup (not a rename or replace).
- * If a case-insensitive match was not found, return ENOENT.
+ * If a case-insensitive match was not found, return -ENOENT.
*/
if (!ci_sfep)
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
/* otherwise process the CI match as required by the caller */
error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
- return XFS_ERROR(error);
+ return error;
}
/*
@@ -824,7 +811,7 @@ xfs_dir2_sf_removename(
*/
if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) {
ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return XFS_ERROR(EIO);
+ return -EIO;
}
ASSERT(dp->i_df.if_bytes == oldsize);
ASSERT(dp->i_df.if_u1.if_data != NULL);
@@ -847,7 +834,7 @@ xfs_dir2_sf_removename(
* Didn't find it.
*/
if (i == sfp->count)
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
/*
* Calculate sizes.
*/
@@ -870,7 +857,6 @@ xfs_dir2_sf_removename(
*/
xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-#if XFS_BIG_INUMS
/*
* Are we changing inode number size?
*/
@@ -880,7 +866,6 @@ xfs_dir2_sf_removename(
else
sfp->i8count--;
}
-#endif
xfs_dir2_sf_check(args);
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
return 0;
@@ -895,12 +880,8 @@ xfs_dir2_sf_replace(
{
xfs_inode_t *dp; /* incore directory inode */
int i; /* entry index */
-#if XFS_BIG_INUMS || defined(DEBUG)
xfs_ino_t ino=0; /* entry old inode number */
-#endif
-#if XFS_BIG_INUMS
int i8elevated; /* sf_toino8 set i8count=1 */
-#endif
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
@@ -914,13 +895,13 @@ xfs_dir2_sf_replace(
*/
if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return XFS_ERROR(EIO);
+ return -EIO;
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
-#if XFS_BIG_INUMS
+
/*
* New inode number is large, and need to convert to 8-byte inodes.
*/
@@ -951,17 +932,15 @@ xfs_dir2_sf_replace(
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
} else
i8elevated = 0;
-#endif
+
ASSERT(args->namelen != 1 || args->name[0] != '.');
/*
* Replace ..'s entry.
*/
if (args->namelen == 2 &&
args->name[0] == '.' && args->name[1] == '.') {
-#if XFS_BIG_INUMS || defined(DEBUG)
ino = dp->d_ops->sf_get_parent_ino(sfp);
ASSERT(args->inumber != ino);
-#endif
dp->d_ops->sf_put_parent_ino(sfp, args->inumber);
}
/*
@@ -972,10 +951,8 @@ xfs_dir2_sf_replace(
i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
XFS_CMP_EXACT) {
-#if XFS_BIG_INUMS || defined(DEBUG)
ino = dp->d_ops->sf_get_ino(sfp, sfep);
ASSERT(args->inumber != ino);
-#endif
dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
dp->d_ops->sf_put_ftype(sfep, args->filetype);
break;
@@ -986,14 +963,11 @@ xfs_dir2_sf_replace(
*/
if (i == sfp->count) {
ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
-#if XFS_BIG_INUMS
if (i8elevated)
xfs_dir2_sf_toino4(args);
-#endif
- return XFS_ERROR(ENOENT);
+ return -ENOENT;
}
}
-#if XFS_BIG_INUMS
/*
* See if the old number was large, the new number is small.
*/
@@ -1020,13 +994,11 @@ xfs_dir2_sf_replace(
if (!i8elevated)
sfp->i8count++;
}
-#endif
xfs_dir2_sf_check(args);
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
return 0;
}
-#if XFS_BIG_INUMS
/*
* Convert from 8-byte inode numbers to 4-byte inode numbers.
* The last 8-byte inode number is gone, but the count is still 1.
@@ -1181,4 +1153,3 @@ xfs_dir2_sf_toino8(
dp->i_d.di_size = newsize;
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
}
-#endif /* XFS_BIG_INUMS */
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index c2ac0c611ad..bb969337efc 100644
--- a/fs/xfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -257,9 +257,9 @@ xfs_dquot_buf_read_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;
if (!xfs_dquot_buf_verify_crc(mp, bp))
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_dquot_buf_verify(mp, bp))
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
xfs_verifier_error(bp);
@@ -277,7 +277,7 @@ xfs_dquot_buf_write_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;
if (!xfs_dquot_buf_verify(mp, bp)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 34d85aca305..7e42bba9a42 100644
--- a/fs/xfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -68,11 +68,7 @@ struct xfs_ifork;
#define XFS_RTLOBIT(w) xfs_lowbit32(w)
#define XFS_RTHIBIT(w) xfs_highbit32(w)
-#if XFS_BIG_BLKNOS
#define XFS_RTBLOCKLOG(b) xfs_highbit64(b)
-#else
-#define XFS_RTBLOCKLOG(b) xfs_highbit32(b)
-#endif
/*
* Dquot and dquot block format definitions
@@ -304,23 +300,15 @@ typedef struct xfs_bmbt_rec_host {
* Values and macros for delayed-allocation startblock fields.
*/
#define STARTBLOCKVALBITS 17
-#define STARTBLOCKMASKBITS (15 + XFS_BIG_BLKNOS * 20)
-#define DSTARTBLOCKMASKBITS (15 + 20)
+#define STARTBLOCKMASKBITS (15 + 20)
#define STARTBLOCKMASK \
(((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
-#define DSTARTBLOCKMASK \
- (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
static inline int isnullstartblock(xfs_fsblock_t x)
{
return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK;
}
-static inline int isnulldstartblock(xfs_dfsbno_t x)
-{
- return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK;
-}
-
static inline xfs_fsblock_t nullstartblock(int k)
{
ASSERT(k < (1 << STARTBLOCKVALBITS));
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 5960e5593fe..b62771f1f4b 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -292,7 +292,7 @@ xfs_ialloc_inode_init(
mp->m_bsize * blks_per_cluster,
XBF_UNMAPPED);
if (!fbuf)
- return ENOMEM;
+ return -ENOMEM;
/* Initialize the inode buffers and log them appropriately. */
fbuf->b_ops = &xfs_inode_buf_ops;
@@ -380,7 +380,7 @@ xfs_ialloc_ag_alloc(
newlen = args.mp->m_ialloc_inos;
if (args.mp->m_maxicount &&
args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
args.minlen = args.maxlen = args.mp->m_ialloc_blks;
/*
* First try to allocate inodes contiguous with the last-allocated
@@ -1385,7 +1385,7 @@ xfs_dialloc(
if (error) {
xfs_trans_brelse(tp, agbp);
- if (error != ENOSPC)
+ if (error != -ENOSPC)
goto out_error;
xfs_perag_put(pag);
@@ -1416,7 +1416,7 @@ nextag:
agno = 0;
if (agno == start_agno) {
*inop = NULLFSINO;
- return noroom ? ENOSPC : 0;
+ return noroom ? -ENOSPC : 0;
}
}
@@ -1425,7 +1425,7 @@ out_alloc:
return xfs_dialloc_ag(tp, agbp, parent, inop);
out_error:
xfs_perag_put(pag);
- return XFS_ERROR(error);
+ return error;
}
STATIC int
@@ -1682,7 +1682,7 @@ xfs_difree(
xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
__func__, agno, mp->m_sb.sb_agcount);
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
agino = XFS_INO_TO_AGINO(mp, inode);
if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
@@ -1690,14 +1690,14 @@ xfs_difree(
__func__, (unsigned long long)inode,
(unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
agbno = XFS_AGINO_TO_AGBNO(mp, agino);
if (agbno >= mp->m_sb.sb_agblocks) {
xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
__func__, agbno, mp->m_sb.sb_agblocks);
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
/*
* Get the allocation group header.
@@ -1769,7 +1769,7 @@ xfs_imap_lookup(
if (i)
error = xfs_inobt_get_rec(cur, &rec, &i);
if (!error && i == 0)
- error = EINVAL;
+ error = -EINVAL;
}
xfs_trans_brelse(tp, agbp);
@@ -1780,12 +1780,12 @@ xfs_imap_lookup(
/* check that the returned record contains the required inode */
if (rec.ir_startino > agino ||
rec.ir_startino + mp->m_ialloc_inos <= agino)
- return EINVAL;
+ return -EINVAL;
/* for untrusted inodes check it is allocated first */
if ((flags & XFS_IGET_UNTRUSTED) &&
(rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
- return EINVAL;
+ return -EINVAL;
*chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
*offset_agbno = agbno - *chunk_agbno;
@@ -1829,7 +1829,7 @@ xfs_imap(
* as they can be invalid without implying corruption.
*/
if (flags & XFS_IGET_UNTRUSTED)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
if (agno >= mp->m_sb.sb_agcount) {
xfs_alert(mp,
"%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
@@ -1849,7 +1849,7 @@ xfs_imap(
}
xfs_stack_trace();
#endif /* DEBUG */
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
blks_per_cluster = xfs_icluster_size_fsb(mp);
@@ -1922,7 +1922,7 @@ out_map:
__func__, (unsigned long long) imap->im_blkno,
(unsigned long long) imap->im_len,
XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
return 0;
}
@@ -2072,11 +2072,11 @@ xfs_agi_read_verify(
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
XFS_ERRTAG_IALLOC_READ_AGI,
XFS_RANDOM_IALLOC_READ_AGI))
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
xfs_verifier_error(bp);
@@ -2090,7 +2090,7 @@ xfs_agi_write_verify(
struct xfs_buf_log_item *bip = bp->b_fspriv;
if (!xfs_agi_verify(bp)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 95ad1c002d6..95ad1c002d6 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 726f83a681a..c9b06f30fe8 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -272,9 +272,9 @@ xfs_inobt_read_verify(
struct xfs_buf *bp)
{
if (!xfs_btree_sblock_verify_crc(bp))
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_inobt_verify(bp))
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
@@ -288,7 +288,7 @@ xfs_inobt_write_verify(
{
if (!xfs_inobt_verify(bp)) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index d7ebea72c2d..d7ebea72c2d 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index cb35ae41d4a..f18fd2da49f 100644
--- a/fs/xfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -101,7 +101,7 @@ xfs_inode_buf_verify(
return;
}
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
#ifdef DEBUG
xfs_alert(mp,
@@ -174,14 +174,14 @@ xfs_imap_to_bp(
(int)imap->im_len, buf_flags, &bp,
&xfs_inode_buf_ops);
if (error) {
- if (error == EAGAIN) {
+ if (error == -EAGAIN) {
ASSERT(buf_flags & XBF_TRYLOCK);
return error;
}
- if (error == EFSCORRUPTED &&
+ if (error == -EFSCORRUPTED &&
(iget_flags & XFS_IGET_UNTRUSTED))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
__func__, error);
@@ -390,7 +390,7 @@ xfs_iread(
__func__, ip->i_ino);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto out_brelse;
}
diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 9308c47f2a5..9308c47f2a5 100644
--- a/fs/xfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index b031e8d0d92..6a00f7fed69 100644
--- a/fs/xfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -102,7 +102,7 @@ xfs_iformat_fork(
be64_to_cpu(dip->di_nblocks));
XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
@@ -111,7 +111,7 @@ xfs_iformat_fork(
dip->di_forkoff);
XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
@@ -121,7 +121,7 @@ xfs_iformat_fork(
ip->i_ino);
XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
XFS_ERRLEVEL_LOW, ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
switch (ip->i_d.di_mode & S_IFMT) {
@@ -132,7 +132,7 @@ xfs_iformat_fork(
if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
ip->i_d.di_size = 0;
ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
@@ -153,7 +153,7 @@ xfs_iformat_fork(
XFS_CORRUPTION_ERROR("xfs_iformat(4)",
XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
di_size = be64_to_cpu(dip->di_size);
@@ -166,7 +166,7 @@ xfs_iformat_fork(
XFS_CORRUPTION_ERROR("xfs_iformat(5)",
XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
size = (int)di_size;
@@ -181,13 +181,13 @@ xfs_iformat_fork(
default:
XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
break;
default:
XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (error) {
return error;
@@ -211,7 +211,7 @@ xfs_iformat_fork(
XFS_CORRUPTION_ERROR("xfs_iformat(8)",
XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
@@ -223,7 +223,7 @@ xfs_iformat_fork(
error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
break;
default:
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
break;
}
if (error) {
@@ -266,7 +266,7 @@ xfs_iformat_local(
XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
ifp = XFS_IFORK_PTR(ip, whichfork);
real_size = 0;
@@ -322,7 +322,7 @@ xfs_iformat_extents(
(unsigned long long) ip->i_ino, nex);
XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
ifp->if_real_bytes = 0;
@@ -350,7 +350,7 @@ xfs_iformat_extents(
XFS_ERROR_REPORT("xfs_iformat_extents(2)",
XFS_ERRLEVEL_LOW,
ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
}
ifp->if_flags |= XFS_IFEXTENTS;
@@ -399,7 +399,7 @@ xfs_iformat_btree(
(unsigned long long) ip->i_ino);
XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
mp, dip);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
ifp->if_broot_bytes = size;
@@ -436,7 +436,7 @@ xfs_iread_extents(
if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -528,7 +528,7 @@ xfs_iroot_realloc(
ifp->if_broot_bytes = (int)new_size;
ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
XFS_IFORK_SIZE(ip, whichfork));
- memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
+ memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t));
return;
}
@@ -575,7 +575,7 @@ xfs_iroot_realloc(
ifp->if_broot_bytes);
np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
(int)new_size);
- memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
+ memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t));
}
kmem_free(ifp->if_broot);
ifp->if_broot = new_broot;
@@ -1692,7 +1692,7 @@ xfs_iext_idx_to_irec(
}
*idxp = page_idx;
*erp_idxp = erp_idx;
- return(erp);
+ return erp;
}
/*
diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 7d3b1ed6dcb..7d3b1ed6dcb 100644
--- a/fs/xfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/libxfs/xfs_inum.h
index 90efdaf1706..4ff2278e147 100644
--- a/fs/xfs/xfs_inum.h
+++ b/fs/xfs/libxfs/xfs_inum.h
@@ -54,11 +54,7 @@ struct xfs_mount;
#define XFS_OFFBNO_TO_AGINO(mp,b,o) \
((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o)))
-#if XFS_BIG_INUMS
#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL))
-#else
-#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL))
-#endif
#define XFS_MAXINUMBER_32 ((xfs_ino_t)((1ULL << 32) - 1ULL))
#endif /* __XFS_INUM_H__ */
diff --git a/fs/xfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index f0969c77bdb..aff12f2d442 100644
--- a/fs/xfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -380,7 +380,7 @@ typedef struct xfs_icdinode {
xfs_ictimestamp_t di_mtime; /* time last modified */
xfs_ictimestamp_t di_ctime; /* time created/inode modified */
xfs_fsize_t di_size; /* number of bytes in file */
- xfs_drfsbno_t di_nblocks; /* # of direct & btree blocks used */
+ xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
xfs_extnum_t di_nextents; /* number of extents in data fork */
xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/
@@ -516,7 +516,7 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf)
* EFI/EFD log format definitions
*/
typedef struct xfs_extent {
- xfs_dfsbno_t ext_start;
+ xfs_fsblock_t ext_start;
xfs_extlen_t ext_len;
} xfs_extent_t;
diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 1c55ccbb379..1c55ccbb379 100644
--- a/fs/xfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
index ee7e0e80246..ee7e0e80246 100644
--- a/fs/xfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 137e2093707..1b0a0837975 100644
--- a/fs/xfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -98,8 +98,6 @@ typedef __uint16_t xfs_qwarncnt_t;
#define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
XFS_GQUOTA_ACTIVE | \
XFS_PQUOTA_ACTIVE))
-#define XFS_IS_OQUOTA_ON(mp) ((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \
- XFS_PQUOTA_ACTIVE))
#define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
#define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE)
#define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index f4dd697cac0..f4dd697cac0 100644
--- a/fs/xfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 7703fa6770f..ad525a5623a 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -186,13 +186,13 @@ xfs_mount_validate_sb(
*/
if (sbp->sb_magicnum != XFS_SB_MAGIC) {
xfs_warn(mp, "bad magic number");
- return XFS_ERROR(EWRONGFS);
+ return -EWRONGFS;
}
if (!xfs_sb_good_version(sbp)) {
xfs_warn(mp, "bad version");
- return XFS_ERROR(EWRONGFS);
+ return -EWRONGFS;
}
/*
@@ -220,7 +220,7 @@ xfs_mount_validate_sb(
xfs_warn(mp,
"Attempted to mount read-only compatible filesystem read-write.\n"
"Filesystem can only be safely mounted read only.");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
}
if (xfs_sb_has_incompat_feature(sbp,
@@ -230,7 +230,7 @@ xfs_mount_validate_sb(
"Filesystem can not be safely mounted by this kernel.",
(sbp->sb_features_incompat &
XFS_SB_FEAT_INCOMPAT_UNKNOWN));
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
}
@@ -238,13 +238,13 @@ xfs_mount_validate_sb(
if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) {
xfs_notice(mp,
"Version 5 of Super block has XFS_OQUOTA bits.");
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
xfs_notice(mp,
"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.");
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (unlikely(
@@ -252,7 +252,7 @@ xfs_mount_validate_sb(
xfs_warn(mp,
"filesystem is marked as having an external log; "
"specify logdev on the mount command line.");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
if (unlikely(
@@ -260,7 +260,7 @@ xfs_mount_validate_sb(
xfs_warn(mp,
"filesystem is marked as having an internal log; "
"do not specify logdev on the mount command line.");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
/*
@@ -294,7 +294,7 @@ xfs_mount_validate_sb(
sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp) ||
sbp->sb_shared_vn != 0)) {
xfs_notice(mp, "SB sanity check failed");
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
/*
@@ -305,7 +305,7 @@ xfs_mount_validate_sb(
"File system with blocksize %d bytes. "
"Only pagesize (%ld) or less will currently work.",
sbp->sb_blocksize, PAGE_SIZE);
- return XFS_ERROR(ENOSYS);
+ return -ENOSYS;
}
/*
@@ -320,19 +320,19 @@ xfs_mount_validate_sb(
default:
xfs_warn(mp, "inode size of %d bytes not supported",
sbp->sb_inodesize);
- return XFS_ERROR(ENOSYS);
+ return -ENOSYS;
}
if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
xfs_warn(mp,
"file system too large to be mounted on this system.");
- return XFS_ERROR(EFBIG);
+ return -EFBIG;
}
if (check_inprogress && sbp->sb_inprogress) {
xfs_warn(mp, "Offline file system operation in progress!");
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -386,10 +386,11 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp)
}
}
-void
-xfs_sb_from_disk(
+static void
+__xfs_sb_from_disk(
struct xfs_sb *to,
- xfs_dsb_t *from)
+ xfs_dsb_t *from,
+ bool convert_xquota)
{
to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
@@ -445,6 +446,17 @@ xfs_sb_from_disk(
to->sb_pad = 0;
to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
to->sb_lsn = be64_to_cpu(from->sb_lsn);
+ /* Convert on-disk flags to in-memory flags? */
+ if (convert_xquota)
+ xfs_sb_quota_from_disk(to);
+}
+
+void
+xfs_sb_from_disk(
+ struct xfs_sb *to,
+ xfs_dsb_t *from)
+{
+ __xfs_sb_from_disk(to, from, true);
}
static inline void
@@ -577,7 +589,11 @@ xfs_sb_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_sb sb;
- xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
+ /*
+ * Use call variant which doesn't convert quota flags from disk
+ * format, because xfs_mount_validate_sb checks the on-disk flags.
+ */
+ __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false);
/*
* Only check the in progress field for the primary superblock as
@@ -620,7 +636,7 @@ xfs_sb_read_verify(
/* Only fail bad secondaries on a known V5 filesystem */
if (bp->b_bn == XFS_SB_DADDR ||
xfs_sb_version_hascrc(&mp->m_sb)) {
- error = EFSBADCRC;
+ error = -EFSBADCRC;
goto out_error;
}
}
@@ -630,7 +646,7 @@ xfs_sb_read_verify(
out_error:
if (error) {
xfs_buf_ioerror(bp, error);
- if (error == EFSCORRUPTED || error == EFSBADCRC)
+ if (error == -EFSCORRUPTED || error == -EFSBADCRC)
xfs_verifier_error(bp);
}
}
@@ -653,7 +669,7 @@ xfs_sb_quiet_read_verify(
return;
}
/* quietly fail */
- xfs_buf_ioerror(bp, EWRONGFS);
+ xfs_buf_ioerror(bp, -EWRONGFS);
}
static void
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index c43c2d609a2..2e739708afd 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -87,11 +87,11 @@ struct xfs_trans;
typedef struct xfs_sb {
__uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
__uint32_t sb_blocksize; /* logical block size, bytes */
- xfs_drfsbno_t sb_dblocks; /* number of data blocks */
- xfs_drfsbno_t sb_rblocks; /* number of realtime blocks */
- xfs_drtbno_t sb_rextents; /* number of realtime extents */
+ xfs_rfsblock_t sb_dblocks; /* number of data blocks */
+ xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */
+ xfs_rtblock_t sb_rextents; /* number of realtime extents */
uuid_t sb_uuid; /* file system unique id */
- xfs_dfsbno_t sb_logstart; /* starting block of log if internal */
+ xfs_fsblock_t sb_logstart; /* starting block of log if internal */
xfs_ino_t sb_rootino; /* root inode number */
xfs_ino_t sb_rbmino; /* bitmap inode for realtime extents */
xfs_ino_t sb_rsumino; /* summary inode for rt bitmap */
diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 82404da2ca6..82404da2ca6 100644
--- a/fs/xfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 23c2f2577c8..5782f037eab 100644
--- a/fs/xfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -133,9 +133,9 @@ xfs_symlink_read_verify(
return;
if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
- xfs_buf_ioerror(bp, EFSBADCRC);
+ xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_symlink_verify(bp))
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
xfs_verifier_error(bp);
@@ -153,7 +153,7 @@ xfs_symlink_write_verify(
return;
if (!xfs_symlink_verify(bp)) {
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index f2bda7c76b8..f2bda7c76b8 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h
index 1097d14cd58..1097d14cd58 100644
--- a/fs/xfs/xfs_trans_resv.h
+++ b/fs/xfs/libxfs/xfs_trans_resv.h
diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index bf9c4579334..bf9c4579334 100644
--- a/fs/xfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 6888ad886ff..a65fa5dde6e 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -152,7 +152,7 @@ xfs_get_acl(struct inode *inode, int type)
if (!xfs_acl)
return ERR_PTR(-ENOMEM);
- error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
+ error = xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
&len, ATTR_ROOT);
if (error) {
/*
@@ -210,7 +210,7 @@ __xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
len -= sizeof(struct xfs_acl_entry) *
(XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count);
- error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
+ error = xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
len, ATTR_ROOT);
kmem_free(xfs_acl);
@@ -218,7 +218,7 @@ __xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
/*
* A NULL ACL argument means we want to remove the ACL.
*/
- error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
+ error = xfs_attr_remove(ip, ea_name, ATTR_ROOT);
/*
* If the attribute didn't exist to start with that's fine.
@@ -244,7 +244,7 @@ xfs_set_mode(struct inode *inode, umode_t mode)
iattr.ia_mode = mode;
iattr.ia_ctime = current_fs_time(inode->i_sb);
- error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+ error = xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
}
return error;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index faaf716e208..b984647c24d 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -240,7 +240,7 @@ xfs_end_io(
done:
if (error)
- ioend->io_error = -error;
+ ioend->io_error = error;
xfs_destroy_ioend(ioend);
}
@@ -308,14 +308,14 @@ xfs_map_blocks(
int nimaps = 1;
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
if (type == XFS_IO_UNWRITTEN)
bmapi_flags |= XFS_BMAPI_IGSTATE;
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
if (nonblocking)
- return -XFS_ERROR(EAGAIN);
+ return -EAGAIN;
xfs_ilock(ip, XFS_ILOCK_SHARED);
}
@@ -332,14 +332,14 @@ xfs_map_blocks(
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (error)
- return -XFS_ERROR(error);
+ return error;
if (type == XFS_IO_DELALLOC &&
(!nimaps || isnullstartblock(imap->br_startblock))) {
error = xfs_iomap_write_allocate(ip, offset, imap);
if (!error)
trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
- return -XFS_ERROR(error);
+ return error;
}
#ifdef DEBUG
@@ -502,7 +502,7 @@ xfs_submit_ioend(
* time.
*/
if (fail) {
- ioend->io_error = -fail;
+ ioend->io_error = fail;
xfs_finish_ioend(ioend);
continue;
}
@@ -1253,7 +1253,7 @@ __xfs_get_blocks(
int new = 0;
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
offset = (xfs_off_t)iblock << inode->i_blkbits;
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
@@ -1302,7 +1302,7 @@ __xfs_get_blocks(
error = xfs_iomap_write_direct(ip, offset, size,
&imap, nimaps);
if (error)
- return -error;
+ return error;
new = 1;
} else {
/*
@@ -1415,7 +1415,7 @@ __xfs_get_blocks(
out_unlock:
xfs_iunlock(ip, lockmode);
- return -error;
+ return error;
}
int
@@ -1753,11 +1753,72 @@ xfs_vm_readpages(
return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
}
+/*
+ * This is basically a copy of __set_page_dirty_buffers() with one
+ * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
+ * dirty, we'll never be able to clean them because we don't write buffers
+ * beyond EOF, and that means we can't invalidate pages that span EOF
+ * that have been marked dirty. Further, the dirty state can leak into
+ * the file interior if the file is extended, resulting in all sorts of
+ * bad things happening as the state does not match the underlying data.
+ *
+ * XXX: this really indicates that bufferheads in XFS need to die. Warts like
+ * this only exist because of bufferheads and how the generic code manages them.
+ */
+STATIC int
+xfs_vm_set_page_dirty(
+ struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ loff_t end_offset;
+ loff_t offset;
+ int newly_dirty;
+
+ if (unlikely(!mapping))
+ return !TestSetPageDirty(page);
+
+ end_offset = i_size_read(inode);
+ offset = page_offset(page);
+
+ spin_lock(&mapping->private_lock);
+ if (page_has_buffers(page)) {
+ struct buffer_head *head = page_buffers(page);
+ struct buffer_head *bh = head;
+
+ do {
+ if (offset < end_offset)
+ set_buffer_dirty(bh);
+ bh = bh->b_this_page;
+ offset += 1 << inode->i_blkbits;
+ } while (bh != head);
+ }
+ newly_dirty = !TestSetPageDirty(page);
+ spin_unlock(&mapping->private_lock);
+
+ if (newly_dirty) {
+ /* sigh - __set_page_dirty() is static, so copy it here, too */
+ unsigned long flags;
+
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ if (page->mapping) { /* Race with truncate? */
+ WARN_ON_ONCE(!PageUptodate(page));
+ account_page_dirtied(page, mapping);
+ radix_tree_tag_set(&mapping->page_tree,
+ page_index(page), PAGECACHE_TAG_DIRTY);
+ }
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+ }
+ return newly_dirty;
+}
+
const struct address_space_operations xfs_address_space_operations = {
.readpage = xfs_vm_readpage,
.readpages = xfs_vm_readpages,
.writepage = xfs_vm_writepage,
.writepages = xfs_vm_writepages,
+ .set_page_dirty = xfs_vm_set_page_dirty,
.releasepage = xfs_vm_releasepage,
.invalidatepage = xfs_vm_invalidatepage,
.write_begin = xfs_vm_write_begin,
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 09480c57f06..aa2a8b1838a 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -76,7 +76,7 @@ xfs_attr3_leaf_freextent(
error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
&map, &nmap, XFS_BMAPI_ATTRFORK);
if (error) {
- return(error);
+ return error;
}
ASSERT(nmap == 1);
ASSERT(map.br_startblock != DELAYSTARTBLOCK);
@@ -95,21 +95,21 @@ xfs_attr3_leaf_freextent(
dp->i_mount->m_ddev_targp,
dblkno, dblkcnt, 0);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
xfs_trans_binval(*trans, bp);
/*
* Roll to next transaction.
*/
error = xfs_trans_roll(trans, dp);
if (error)
- return (error);
+ return error;
}
tblkno += map.br_blockcount;
tblkcnt -= map.br_blockcount;
}
- return(0);
+ return 0;
}
/*
@@ -227,7 +227,7 @@ xfs_attr3_node_inactive(
*/
if (level > XFS_DA_NODE_MAXDEPTH) {
xfs_trans_brelse(*trans, bp); /* no locks for later trans */
- return XFS_ERROR(EIO);
+ return -EIO;
}
node = bp->b_addr;
@@ -256,7 +256,7 @@ xfs_attr3_node_inactive(
error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp,
XFS_ATTR_FORK);
if (error)
- return(error);
+ return error;
if (child_bp) {
/* save for re-read later */
child_blkno = XFS_BUF_ADDR(child_bp);
@@ -277,7 +277,7 @@ xfs_attr3_node_inactive(
child_bp);
break;
default:
- error = XFS_ERROR(EIO);
+ error = -EIO;
xfs_trans_brelse(*trans, child_bp);
break;
}
@@ -360,7 +360,7 @@ xfs_attr3_root_inactive(
error = xfs_attr3_leaf_inactive(trans, dp, bp);
break;
default:
- error = XFS_ERROR(EIO);
+ error = -EIO;
xfs_trans_brelse(*trans, bp);
break;
}
@@ -414,7 +414,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0);
if (error) {
xfs_trans_cancel(trans, 0);
- return(error);
+ return error;
}
xfs_ilock(dp, XFS_ILOCK_EXCL);
@@ -443,10 +443,10 @@ xfs_attr_inactive(xfs_inode_t *dp)
error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- return(error);
+ return error;
out:
xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- return(error);
+ return error;
}
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 90e2eeb2120..62db83ab6cb 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -50,11 +50,11 @@ xfs_attr_shortform_compare(const void *a, const void *b)
sa = (xfs_attr_sf_sort_t *)a;
sb = (xfs_attr_sf_sort_t *)b;
if (sa->hash < sb->hash) {
- return(-1);
+ return -1;
} else if (sa->hash > sb->hash) {
- return(1);
+ return 1;
} else {
- return(sa->entno - sb->entno);
+ return sa->entno - sb->entno;
}
}
@@ -86,7 +86,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
ASSERT(sf != NULL);
if (!sf->hdr.count)
- return(0);
+ return 0;
cursor = context->cursor;
ASSERT(cursor != NULL);
@@ -124,7 +124,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
}
trace_xfs_attr_list_sf_all(context);
- return(0);
+ return 0;
}
/* do no more for a search callback */
@@ -150,7 +150,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
XFS_ERRLEVEL_LOW,
context->dp->i_mount, sfe);
kmem_free(sbuf);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
sbp->entno = i;
@@ -188,7 +188,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
}
if (i == nsbuf) {
kmem_free(sbuf);
- return(0);
+ return 0;
}
/*
@@ -213,7 +213,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
}
kmem_free(sbuf);
- return(0);
+ return 0;
}
STATIC int
@@ -243,8 +243,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
if (cursor->blkno > 0) {
error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1,
&bp, XFS_ATTR_FORK);
- if ((error != 0) && (error != EFSCORRUPTED))
- return(error);
+ if ((error != 0) && (error != -EFSCORRUPTED))
+ return error;
if (bp) {
struct xfs_attr_leaf_entry *entries;
@@ -295,7 +295,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
cursor->blkno, -1, &bp,
XFS_ATTR_FORK);
if (error)
- return(error);
+ return error;
node = bp->b_addr;
magic = be16_to_cpu(node->hdr.info.magic);
if (magic == XFS_ATTR_LEAF_MAGIC ||
@@ -308,7 +308,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
context->dp->i_mount,
node);
xfs_trans_brelse(NULL, bp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
dp->d_ops->node_hdr_from_disk(&nodehdr, node);
@@ -496,11 +496,11 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
context->cursor->blkno = 0;
error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
if (error)
- return XFS_ERROR(error);
+ return error;
error = xfs_attr3_leaf_list_int(bp, context);
xfs_trans_brelse(NULL, bp);
- return XFS_ERROR(error);
+ return error;
}
int
@@ -514,7 +514,7 @@ xfs_attr_list_int(
XFS_STATS_INC(xs_attr_list);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
- return EIO;
+ return -EIO;
/*
* Decide on what work routines to call based on the inode size.
@@ -616,16 +616,16 @@ xfs_attr_list(
* Validate the cursor.
*/
if (cursor->pad1 || cursor->pad2)
- return(XFS_ERROR(EINVAL));
+ return -EINVAL;
if ((cursor->initted == 0) &&
(cursor->hashval || cursor->blkno || cursor->offset))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* Check for a properly aligned buffer.
*/
if (((long)buffer) & (sizeof(int)-1))
- return XFS_ERROR(EFAULT);
+ return -EFAULT;
if (flags & ATTR_KERNOVAL)
bufsize = 0;
@@ -648,6 +648,6 @@ xfs_attr_list(
alist->al_offset[0] = context.bufsize;
error = xfs_attr_list_int(&context);
- ASSERT(error >= 0);
+ ASSERT(error <= 0);
return error;
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 64731ef3324..1707980f9a4 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -133,7 +133,7 @@ xfs_bmap_finish(
mp = ntp->t_mountp;
if (!XFS_FORCED_SHUTDOWN(mp))
xfs_force_shutdown(mp,
- (error == EFSCORRUPTED) ?
+ (error == -EFSCORRUPTED) ?
SHUTDOWN_CORRUPT_INCORE :
SHUTDOWN_META_IO_ERROR);
return error;
@@ -365,7 +365,7 @@ xfs_bmap_count_tree(
xfs_trans_brelse(tp, bp);
XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
xfs_trans_brelse(tp, bp);
} else {
@@ -425,14 +425,14 @@ xfs_bmap_count_blocks(
ASSERT(level > 0);
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLDFSBNO);
+ ASSERT(bno != NULLFSBLOCK);
ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
@@ -524,13 +524,13 @@ xfs_getbmap(
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
} else if (unlikely(
ip->i_d.di_aformat != 0 &&
ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
prealloced = 0;
@@ -539,7 +539,7 @@ xfs_getbmap(
if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
if (xfs_get_extsz_hint(ip) ||
ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
@@ -559,26 +559,26 @@ xfs_getbmap(
bmv->bmv_entries = 0;
return 0;
} else if (bmv->bmv_length < 0) {
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
nex = bmv->bmv_count - 1;
if (nex <= 0)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
bmvend = bmv->bmv_offset + bmv->bmv_length;
if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
if (!out)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (whichfork == XFS_DATA_FORK) {
if (!(iflags & BMV_IF_DELALLOC) &&
(ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
- error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
+ error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
if (error)
goto out_unlock_iolock;
@@ -611,7 +611,7 @@ xfs_getbmap(
/*
* Allocate enough space to handle "subnex" maps at a time.
*/
- error = ENOMEM;
+ error = -ENOMEM;
subnex = 16;
map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
if (!map)
@@ -809,7 +809,7 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
* have speculative prealloc/delalloc blocks to remove.
*/
if (VFS_I(ip)->i_size == 0 &&
- VN_CACHED(VFS_I(ip)) == 0 &&
+ VFS_I(ip)->i_mapping->nrpages == 0 &&
ip->i_delayed_blks == 0)
return false;
@@ -882,7 +882,7 @@ xfs_free_eofblocks(
if (need_iolock) {
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
xfs_trans_cancel(tp, 0);
- return EAGAIN;
+ return -EAGAIN;
}
}
@@ -955,14 +955,14 @@ xfs_alloc_file_space(
trace_xfs_alloc_file_space(ip);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
error = xfs_qm_dqattach(ip, 0);
if (error)
return error;
if (len <= 0)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip);
@@ -1028,7 +1028,7 @@ xfs_alloc_file_space(
/*
* Free the transaction structure.
*/
- ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+ ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0);
break;
}
@@ -1065,7 +1065,7 @@ xfs_alloc_file_space(
allocated_fsb = imapp->br_blockcount;
if (nimaps == 0) {
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
break;
}
@@ -1126,7 +1126,7 @@ xfs_zero_remaining_bytes(
mp->m_rtdev_targp : mp->m_ddev_targp,
BTOBB(mp->m_sb.sb_blocksize), 0);
if (!bp)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
xfs_buf_unlock(bp);
@@ -1158,7 +1158,7 @@ xfs_zero_remaining_bytes(
XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
if (XFS_FORCED_SHUTDOWN(mp)) {
- error = XFS_ERROR(EIO);
+ error = -EIO;
break;
}
xfs_buf_iorequest(bp);
@@ -1176,7 +1176,7 @@ xfs_zero_remaining_bytes(
XFS_BUF_WRITE(bp);
if (XFS_FORCED_SHUTDOWN(mp)) {
- error = XFS_ERROR(EIO);
+ error = -EIO;
break;
}
xfs_buf_iorequest(bp);
@@ -1234,7 +1234,7 @@ xfs_free_file_space(
rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
ioffset = offset & ~(rounding - 1);
- error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+ error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ioffset, -1);
if (error)
goto out;
@@ -1315,7 +1315,7 @@ xfs_free_file_space(
/*
* Free the transaction structure.
*/
- ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+ ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0);
break;
}
@@ -1470,6 +1470,26 @@ xfs_collapse_file_space(
start_fsb = XFS_B_TO_FSB(mp, offset + len);
shift_fsb = XFS_B_TO_FSB(mp, len);
+ /*
+ * Writeback the entire file and force remove any post-eof blocks. The
+ * writeback prevents changes to the extent list via concurrent
+ * writeback and the eofblocks trim prevents the extent shift algorithm
+ * from running into a post-eof delalloc extent.
+ *
+ * XXX: This is a temporary fix until the extent shift loop below is
+ * converted to use offsets and lookups within the ILOCK rather than
+ * carrying around the index into the extent list for the next
+ * iteration.
+ */
+ error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+ if (error)
+ return error;
+ if (xfs_can_free_eofblocks(ip, true)) {
+ error = xfs_free_eofblocks(mp, ip, false);
+ if (error)
+ return error;
+ }
+
error = xfs_free_file_space(ip, offset, len);
if (error)
return error;
@@ -1557,14 +1577,14 @@ xfs_swap_extents_check_format(
/* Should never get a local format */
if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- return EINVAL;
+ return -EINVAL;
/*
* if the target inode has less extents that then temporary inode then
* why did userspace call us?
*/
if (ip->i_d.di_nextents < tip->i_d.di_nextents)
- return EINVAL;
+ return -EINVAL;
/*
* if the target inode is in extent form and the temp inode is in btree
@@ -1573,19 +1593,19 @@ xfs_swap_extents_check_format(
*/
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
- return EINVAL;
+ return -EINVAL;
/* Check temp in extent form to max in target */
if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
- return EINVAL;
+ return -EINVAL;
/* Check target in extent form to max in temp */
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
- return EINVAL;
+ return -EINVAL;
/*
* If we are in a btree format, check that the temp root block will fit
@@ -1599,26 +1619,50 @@ xfs_swap_extents_check_format(
if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
if (XFS_IFORK_BOFF(ip) &&
XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
- return EINVAL;
+ return -EINVAL;
if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
- return EINVAL;
+ return -EINVAL;
}
/* Reciprocal target->temp btree format checks */
if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
if (XFS_IFORK_BOFF(tip) &&
XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
- return EINVAL;
+ return -EINVAL;
if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
- return EINVAL;
+ return -EINVAL;
}
return 0;
}
int
+xfs_swap_extent_flush(
+ struct xfs_inode *ip)
+{
+ int error;
+
+ error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+ if (error)
+ return error;
+ truncate_pagecache_range(VFS_I(ip), 0, -1);
+
+ /* Verify O_DIRECT for ftmp */
+ if (VFS_I(ip)->i_mapping->nrpages)
+ return -EINVAL;
+
+ /*
+ * Don't try to swap extents on mmap()d files because we can't lock
+ * out races against page faults safely.
+ */
+ if (mapping_mapped(VFS_I(ip)->i_mapping))
+ return -EBUSY;
+ return 0;
+}
+
+int
xfs_swap_extents(
xfs_inode_t *ip, /* target inode */
xfs_inode_t *tip, /* tmp inode */
@@ -1633,51 +1677,57 @@ xfs_swap_extents(
int aforkblks = 0;
int taforkblks = 0;
__uint64_t tmp;
+ int lock_flags;
tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
if (!tempifp) {
- error = XFS_ERROR(ENOMEM);
+ error = -ENOMEM;
goto out;
}
/*
- * we have to do two separate lock calls here to keep lockdep
- * happy. If we try to get all the locks in one call, lock will
- * report false positives when we drop the ILOCK and regain them
- * below.
+ * Lock up the inodes against other IO and truncate to begin with.
+ * Then we can ensure the inodes are flushed and have no page cache
+ * safely. Once we have done this we can take the ilocks and do the rest
+ * of the checks.
*/
+ lock_flags = XFS_IOLOCK_EXCL;
xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
- xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
/* Verify that both files have the same format */
if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_unlock;
}
/* Verify both files are either real-time or non-realtime */
if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_unlock;
}
- error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
+ error = xfs_swap_extent_flush(ip);
+ if (error)
+ goto out_unlock;
+ error = xfs_swap_extent_flush(tip);
if (error)
goto out_unlock;
- truncate_pagecache_range(VFS_I(tip), 0, -1);
- /* Verify O_DIRECT for ftmp */
- if (VN_CACHED(VFS_I(tip)) != 0) {
- error = XFS_ERROR(EINVAL);
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
goto out_unlock;
}
+ xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
+ lock_flags |= XFS_ILOCK_EXCL;
/* Verify all data are being swapped */
if (sxp->sx_offset != 0 ||
sxp->sx_length != ip->i_d.di_size ||
sxp->sx_length != tip->i_d.di_size) {
- error = XFS_ERROR(EFAULT);
- goto out_unlock;
+ error = -EFAULT;
+ goto out_trans_cancel;
}
trace_xfs_swap_extent_before(ip, 0);
@@ -1689,7 +1739,7 @@ xfs_swap_extents(
xfs_notice(mp,
"%s: inode 0x%llx format is incompatible for exchanging.",
__func__, ip->i_ino);
- goto out_unlock;
+ goto out_trans_cancel;
}
/*
@@ -1703,43 +1753,9 @@ xfs_swap_extents(
(sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
(sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
(sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
- error = XFS_ERROR(EBUSY);
- goto out_unlock;
+ error = -EBUSY;
+ goto out_trans_cancel;
}
-
- /* We need to fail if the file is memory mapped. Once we have tossed
- * all existing pages, the page fault will have no option
- * but to go to the filesystem for pages. By making the page fault call
- * vop_read (or write in the case of autogrow) they block on the iolock
- * until we have switched the extents.
- */
- if (VN_MAPPED(VFS_I(ip))) {
- error = XFS_ERROR(EBUSY);
- goto out_unlock;
- }
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_iunlock(tip, XFS_ILOCK_EXCL);
-
- /*
- * There is a race condition here since we gave up the
- * ilock. However, the data fork will not change since
- * we have the iolock (locked for truncation too) so we
- * are safe. We don't really care if non-io related
- * fields change.
- */
- truncate_pagecache_range(VFS_I(ip), 0, -1);
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
- if (error) {
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- xfs_iunlock(tip, XFS_IOLOCK_EXCL);
- xfs_trans_cancel(tp, 0);
- goto out;
- }
- xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
-
/*
* Count the number of extended attribute blocks
*/
@@ -1757,8 +1773,8 @@ xfs_swap_extents(
goto out_trans_cancel;
}
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, lock_flags);
+ xfs_trans_ijoin(tp, tip, lock_flags);
/*
* Before we've swapped the forks, lets set the owners of the forks
@@ -1887,8 +1903,8 @@ out:
return error;
out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_iunlock(ip, lock_flags);
+ xfs_iunlock(tip, lock_flags);
goto out;
out_trans_cancel:
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 7a34a1ae655..cd7b8ca9b06 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -130,7 +130,7 @@ xfs_buf_get_maps(
bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
KM_NOFS);
if (!bp->b_maps)
- return ENOMEM;
+ return -ENOMEM;
return 0;
}
@@ -344,7 +344,7 @@ retry:
if (unlikely(page == NULL)) {
if (flags & XBF_READ_AHEAD) {
bp->b_page_count = i;
- error = ENOMEM;
+ error = -ENOMEM;
goto out_free_pages;
}
@@ -465,7 +465,7 @@ _xfs_buf_find(
eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
if (blkno >= eofs) {
/*
- * XXX (dgc): we should really be returning EFSCORRUPTED here,
+ * XXX (dgc): we should really be returning -EFSCORRUPTED here,
* but none of the higher level infrastructure supports
* returning a specific error on buffer lookup failures.
*/
@@ -1052,8 +1052,8 @@ xfs_buf_ioerror(
xfs_buf_t *bp,
int error)
{
- ASSERT(error >= 0 && error <= 0xffff);
- bp->b_error = (unsigned short)error;
+ ASSERT(error <= 0 && error >= -1000);
+ bp->b_error = error;
trace_xfs_buf_ioerror(bp, error, _RET_IP_);
}
@@ -1064,7 +1064,7 @@ xfs_buf_ioerror_alert(
{
xfs_alert(bp->b_target->bt_mount,
"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d",
- (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length);
+ (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
}
/*
@@ -1083,7 +1083,7 @@ xfs_bioerror(
/*
* No need to wait until the buffer is unpinned, we aren't flushing it.
*/
- xfs_buf_ioerror(bp, EIO);
+ xfs_buf_ioerror(bp, -EIO);
/*
* We're calling xfs_buf_ioend, so delete XBF_DONE flag.
@@ -1094,7 +1094,7 @@ xfs_bioerror(
xfs_buf_ioend(bp, 0);
- return EIO;
+ return -EIO;
}
/*
@@ -1127,13 +1127,13 @@ xfs_bioerror_relse(
* There's no reason to mark error for
* ASYNC buffers.
*/
- xfs_buf_ioerror(bp, EIO);
+ xfs_buf_ioerror(bp, -EIO);
complete(&bp->b_iowait);
} else {
xfs_buf_relse(bp);
}
- return EIO;
+ return -EIO;
}
STATIC int
@@ -1199,7 +1199,7 @@ xfs_buf_bio_end_io(
* buffers that require multiple bios to complete.
*/
if (!bp->b_error)
- xfs_buf_ioerror(bp, -error);
+ xfs_buf_ioerror(bp, error);
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
@@ -1286,7 +1286,7 @@ next_chunk:
* because the caller (xfs_buf_iorequest) holds a count itself.
*/
atomic_dec(&bp->b_io_remaining);
- xfs_buf_ioerror(bp, EIO);
+ xfs_buf_ioerror(bp, -EIO);
bio_put(bio);
}
@@ -1330,6 +1330,20 @@ _xfs_buf_ioapply(
SHUTDOWN_CORRUPT_INCORE);
return;
}
+ } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ /*
+ * non-crc filesystems don't attach verifiers during
+ * log recovery, so don't warn for such filesystems.
+ */
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ xfs_warn(mp,
+ "%s: no ops on block 0x%llx/0x%x",
+ __func__, bp->b_bn, bp->b_length);
+ xfs_hex_dump(bp->b_addr, 64);
+ dump_stack();
+ }
}
} else if (bp->b_flags & XBF_READ_AHEAD) {
rw = READA;
@@ -1628,7 +1642,7 @@ xfs_setsize_buftarg(
xfs_warn(btp->bt_mount,
"Cannot set_blocksize to %u on device %s",
sectorsize, name);
- return EINVAL;
+ return -EINVAL;
}
/* Set up device logical sector size mask */
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 3a7a5523d3d..c753183900b 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -178,7 +178,7 @@ typedef struct xfs_buf {
atomic_t b_io_remaining; /* #outstanding I/O requests */
unsigned int b_page_count; /* size of page array */
unsigned int b_offset; /* page offset in first page */
- unsigned short b_error; /* error code on I/O */
+ int b_error; /* error code on I/O */
const struct xfs_buf_ops *b_ops;
#ifdef XFS_BUF_LOCK_TRACKING
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 4654338b03f..76007deed31 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -488,7 +488,7 @@ xfs_buf_item_unpin(
xfs_buf_lock(bp);
xfs_buf_hold(bp);
bp->b_flags |= XBF_ASYNC;
- xfs_buf_ioerror(bp, EIO);
+ xfs_buf_ioerror(bp, -EIO);
XFS_BUF_UNDONE(bp);
xfs_buf_stale(bp);
xfs_buf_ioend(bp, 0);
@@ -725,7 +725,7 @@ xfs_buf_item_get_format(
bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format),
KM_SLEEP);
if (!bip->bli_formats)
- return ENOMEM;
+ return -ENOMEM;
return 0;
}
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 48e99afb9cb..f1b69edcdf3 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -95,7 +95,7 @@ xfs_dir2_sf_getdents(
*/
if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return XFS_ERROR(EIO);
+ return -EIO;
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
@@ -677,7 +677,7 @@ xfs_readdir(
trace_xfs_readdir(dp);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
- return XFS_ERROR(EIO);
+ return -EIO;
ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_getdents);
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 4f11ef01113..13d08a1b390 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -124,7 +124,7 @@ xfs_trim_extents(
}
trace_xfs_discard_extent(mp, agno, fbno, flen);
- error = -blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0);
+ error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0);
if (error)
goto out_del_cursor;
*blocks_trimmed += flen;
@@ -166,11 +166,11 @@ xfs_ioc_trim(
int error, last_error = 0;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (!blk_queue_discard(q))
- return -XFS_ERROR(EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (copy_from_user(&range, urange, sizeof(range)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
/*
* Truncating down the len isn't actually quite correct, but using
@@ -182,7 +182,7 @@ xfs_ioc_trim(
if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) ||
range.len < mp->m_sb.sb_blocksize)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
start = BTOBB(range.start);
end = start + BTOBBT(range.len) - 1;
@@ -195,7 +195,7 @@ xfs_ioc_trim(
end_agno = xfs_daddr_to_agno(mp, end);
for (agno = start_agno; agno <= end_agno; agno++) {
- error = -xfs_trim_extents(mp, agno, start, end, minlen,
+ error = xfs_trim_extents(mp, agno, start, end, minlen,
&blocks_trimmed);
if (error)
last_error = error;
@@ -206,7 +206,7 @@ xfs_ioc_trim(
range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
if (copy_to_user(urange, &range, sizeof(range)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -222,11 +222,11 @@ xfs_discard_extents(
trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
busyp->length);
- error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
+ error = blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
XFS_FSB_TO_BB(mp, busyp->length),
GFP_NOFS, 0);
- if (error && error != EOPNOTSUPP) {
+ if (error && error != -EOPNOTSUPP) {
xfs_info(mp,
"discard failed for extent [0x%llu,%u], error %d",
(unsigned long long)busyp->bno,
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 3ee0cd43edc..63c2de49f61 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -327,7 +327,7 @@ xfs_qm_dqalloc(
*/
if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
xfs_iunlock(quotip, XFS_ILOCK_EXCL);
- return (ESRCH);
+ return -ESRCH;
}
xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
@@ -354,7 +354,7 @@ xfs_qm_dqalloc(
mp->m_quotainfo->qi_dqchunklen,
0);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error1;
}
bp->b_ops = &xfs_dquot_buf_ops;
@@ -400,7 +400,7 @@ xfs_qm_dqalloc(
error0:
xfs_iunlock(quotip, XFS_ILOCK_EXCL);
- return (error);
+ return error;
}
STATIC int
@@ -426,7 +426,7 @@ xfs_qm_dqrepair(
if (error) {
ASSERT(*bpp == NULL);
- return XFS_ERROR(error);
+ return error;
}
(*bpp)->b_ops = &xfs_dquot_buf_ops;
@@ -442,7 +442,7 @@ xfs_qm_dqrepair(
if (error) {
/* repair failed, we're screwed */
xfs_trans_brelse(tp, *bpp);
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
@@ -480,7 +480,7 @@ xfs_qm_dqtobp(
* didn't have the quota inode lock.
*/
xfs_iunlock(quotip, lock_mode);
- return ESRCH;
+ return -ESRCH;
}
/*
@@ -508,7 +508,7 @@ xfs_qm_dqtobp(
* We don't allocate unless we're asked to
*/
if (!(flags & XFS_QMOPT_DQALLOC))
- return ENOENT;
+ return -ENOENT;
ASSERT(tp);
error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
@@ -530,7 +530,7 @@ xfs_qm_dqtobp(
mp->m_quotainfo->qi_dqchunklen,
0, &bp, &xfs_dquot_buf_ops);
- if (error == EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) {
+ if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) {
xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff *
mp->m_quotainfo->qi_dqperchunk;
ASSERT(bp == NULL);
@@ -539,7 +539,7 @@ xfs_qm_dqtobp(
if (error) {
ASSERT(bp == NULL);
- return XFS_ERROR(error);
+ return error;
}
}
@@ -547,7 +547,7 @@ xfs_qm_dqtobp(
*O_bpp = bp;
*O_ddpp = bp->b_addr + dqp->q_bufoffset;
- return (0);
+ return 0;
}
@@ -715,7 +715,7 @@ xfs_qm_dqget(
if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
(! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
(! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
- return (ESRCH);
+ return -ESRCH;
}
#ifdef DEBUG
@@ -723,7 +723,7 @@ xfs_qm_dqget(
if ((xfs_dqerror_target == mp->m_ddev_targp) &&
(xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
xfs_debug(mp, "Returning error in dqget");
- return (EIO);
+ return -EIO;
}
}
@@ -796,14 +796,14 @@ restart:
} else {
/* inode stays locked on return */
xfs_qm_dqdestroy(dqp);
- return XFS_ERROR(ESRCH);
+ return -ESRCH;
}
}
mutex_lock(&qi->qi_tree_lock);
- error = -radix_tree_insert(tree, id, dqp);
+ error = radix_tree_insert(tree, id, dqp);
if (unlikely(error)) {
- WARN_ON(error != EEXIST);
+ WARN_ON(error != -EEXIST);
/*
* Duplicate found. Just throw away the new dquot and start
@@ -829,7 +829,7 @@ restart:
ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
trace_xfs_dqget_miss(dqp);
*O_dqpp = dqp;
- return (0);
+ return 0;
}
/*
@@ -966,7 +966,7 @@ xfs_qm_dqflush(
SHUTDOWN_CORRUPT_INCORE);
else
spin_unlock(&mp->m_ail->xa_lock);
- error = XFS_ERROR(EIO);
+ error = -EIO;
goto out_unlock;
}
@@ -974,7 +974,8 @@ xfs_qm_dqflush(
* Get the buffer containing the on-disk dquot
*/
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL);
+ mp->m_quotainfo->qi_dqchunklen, 0, &bp,
+ &xfs_dquot_buf_ops);
if (error)
goto out_unlock;
@@ -992,7 +993,7 @@ xfs_qm_dqflush(
xfs_buf_relse(bp);
xfs_dqfunlock(dqp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/* This is the only portion of data that needs to persist */
@@ -1045,7 +1046,7 @@ xfs_qm_dqflush(
out_unlock:
xfs_dqfunlock(dqp);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/*
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 68a68f70483..c24c67e22a2 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -139,6 +139,21 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
}
}
+/*
+ * Check whether a dquot is under low free space conditions. We assume the quota
+ * is enabled and enforced.
+ */
+static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp)
+{
+ int64_t freesp;
+
+ freesp = be64_to_cpu(dqp->q_core.d_blk_hardlimit) - dqp->q_res_bcount;
+ if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT])
+ return true;
+
+ return false;
+}
+
#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index edac5b057d2..b92fd7bc49e 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -27,29 +27,6 @@
#ifdef DEBUG
-int xfs_etrap[XFS_ERROR_NTRAP] = {
- 0,
-};
-
-int
-xfs_error_trap(int e)
-{
- int i;
-
- if (!e)
- return 0;
- for (i = 0; i < XFS_ERROR_NTRAP; i++) {
- if (xfs_etrap[i] == 0)
- break;
- if (e != xfs_etrap[i])
- continue;
- xfs_notice(NULL, "%s: error %d", __func__, e);
- BUG();
- break;
- }
- return e;
-}
-
int xfs_etest[XFS_NUM_INJECT_ERROR];
int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
@@ -190,7 +167,7 @@ xfs_verifier_error(
struct xfs_mount *mp = bp->b_target->bt_mount;
xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
- bp->b_error == EFSBADCRC ? "CRC error" : "corruption",
+ bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
__return_address, bp->b_bn);
xfs_alert(mp, "Unmount and run xfs_repair");
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index c1c57d4a4b5..279a76e5279 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -18,15 +18,6 @@
#ifndef __XFS_ERROR_H__
#define __XFS_ERROR_H__
-#ifdef DEBUG
-#define XFS_ERROR_NTRAP 10
-extern int xfs_etrap[XFS_ERROR_NTRAP];
-extern int xfs_error_trap(int);
-#define XFS_ERROR(e) xfs_error_trap(e)
-#else
-#define XFS_ERROR(e) (e)
-#endif
-
struct xfs_mount;
extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
@@ -56,7 +47,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
if (unlikely(!fs_is_ok)) { \
XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \
XFS_ERRLEVEL_LOW, NULL); \
- error = XFS_ERROR(EFSCORRUPTED); \
+ error = -EFSCORRUPTED; \
goto l; \
} \
}
@@ -68,7 +59,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
if (unlikely(!fs_is_ok)) { \
XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \
XFS_ERRLEVEL_LOW, NULL); \
- return XFS_ERROR(EFSCORRUPTED); \
+ return -EFSCORRUPTED; \
} \
}
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 753e467aa1a..5a6bd5d8779 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -147,9 +147,9 @@ xfs_nfs_get_inode(
* We don't use ESTALE directly down the chain to not
* confuse applications using bulkstat that expect EINVAL.
*/
- if (error == EINVAL || error == ENOENT)
- error = ESTALE;
- return ERR_PTR(-error);
+ if (error == -EINVAL || error == -ENOENT)
+ error = -ESTALE;
+ return ERR_PTR(error);
}
if (ip->i_d.di_gen != generation) {
@@ -217,7 +217,7 @@ xfs_fs_get_parent(
error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
if (unlikely(error))
- return ERR_PTR(-error);
+ return ERR_PTR(error);
return d_obtain_alias(VFS_I(cip));
}
@@ -237,7 +237,7 @@ xfs_fs_nfs_commit_metadata(
if (!lsn)
return 0;
- return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+ return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
}
const struct export_operations xfs_export_operations = {
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index fb7a4c1ce1c..c4327419dc5 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -298,7 +298,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
}
return 0;
}
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 1f66779d7a4..de5368c803f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -38,6 +38,7 @@
#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_dinode.h"
+#include "xfs_icache.h"
#include <linux/aio.h>
#include <linux/dcache.h>
@@ -155,7 +156,7 @@ xfs_dir_fsync(
if (!lsn)
return 0;
- return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+ return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
}
STATIC int
@@ -179,7 +180,7 @@ xfs_file_fsync(
return error;
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
xfs_iflags_clear(ip, XFS_ITRUNCATED);
@@ -225,7 +226,7 @@ xfs_file_fsync(
!log_flushed)
xfs_blkdev_issue_flush(mp->m_ddev_targp);
- return -error;
+ return error;
}
STATIC ssize_t
@@ -246,11 +247,11 @@ xfs_file_read_iter(
XFS_STATS_INC(xs_read_calls);
if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
+ ioflags |= XFS_IO_ISDIRECT;
if (file->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
+ ioflags |= XFS_IO_INVIS;
- if (unlikely(ioflags & IO_ISDIRECT)) {
+ if (unlikely(ioflags & XFS_IO_ISDIRECT)) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
@@ -258,7 +259,7 @@ xfs_file_read_iter(
if ((pos | size) & target->bt_logical_sectormask) {
if (pos == i_size_read(inode))
return 0;
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
}
}
@@ -283,19 +284,29 @@ xfs_file_read_iter(
* proceeed concurrently without serialisation.
*/
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
- if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
+ if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
if (inode->i_mapping->nrpages) {
ret = filemap_write_and_wait_range(
VFS_I(ip)->i_mapping,
- pos, -1);
+ pos, pos + size - 1);
if (ret) {
xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
- truncate_pagecache_range(VFS_I(ip), pos, -1);
+
+ /*
+ * Invalidate whole pages. This can return an error if
+ * we fail to invalidate a page, but this should never
+ * happen on XFS. Warn if it does fail.
+ */
+ ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+ pos >> PAGE_CACHE_SHIFT,
+ (pos + size - 1) >> PAGE_CACHE_SHIFT);
+ WARN_ON_ONCE(ret);
+ ret = 0;
}
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
}
@@ -325,7 +336,7 @@ xfs_file_splice_read(
XFS_STATS_INC(xs_read_calls);
if (infilp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
+ ioflags |= XFS_IO_INVIS;
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
@@ -524,7 +535,7 @@ restart:
xfs_rw_ilock(ip, *iolock);
goto restart;
}
- error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
+ error = xfs_zero_eof(ip, *pos, i_size_read(inode));
if (error)
return error;
}
@@ -594,7 +605,7 @@ xfs_file_dio_aio_write(
/* DIO must be aligned to device logical sector size */
if ((pos | count) & target->bt_logical_sectormask)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
/* "unaligned" here means not aligned to a filesystem block */
if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
@@ -631,10 +642,19 @@ xfs_file_dio_aio_write(
if (mapping->nrpages) {
ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
- pos, -1);
+ pos, pos + count - 1);
if (ret)
goto out;
- truncate_pagecache_range(VFS_I(ip), pos, -1);
+ /*
+ * Invalidate whole pages. This can return an error if
+ * we fail to invalidate a page, but this should never
+ * happen on XFS. Warn if it does fail.
+ */
+ ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+ pos >> PAGE_CACHE_SHIFT,
+ (pos + count - 1) >> PAGE_CACHE_SHIFT);
+ WARN_ON_ONCE(ret);
+ ret = 0;
}
/*
@@ -689,14 +709,28 @@ write_retry:
ret = generic_perform_write(file, from, pos);
if (likely(ret >= 0))
iocb->ki_pos = pos + ret;
+
/*
- * If we just got an ENOSPC, try to write back all dirty inodes to
- * convert delalloc space to free up some of the excess reserved
- * metadata space.
+ * If we hit a space limit, try to free up some lingering preallocated
+ * space before returning an error. In the case of ENOSPC, first try to
+ * write back all dirty inodes to free up some of the excess reserved
+ * metadata space. This reduces the chances that the eofblocks scan
+ * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
+ * also behaves as a filter to prevent too many eofblocks scans from
+ * running at the same time.
*/
- if (ret == -ENOSPC && !enospc) {
+ if (ret == -EDQUOT && !enospc) {
+ enospc = xfs_inode_free_quota_eofblocks(ip);
+ if (enospc)
+ goto write_retry;
+ } else if (ret == -ENOSPC && !enospc) {
+ struct xfs_eofblocks eofb = {0};
+
enospc = 1;
xfs_flush_inodes(ip->i_mount);
+ eofb.eof_scan_owner = ip->i_ino; /* for locking */
+ eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
+ xfs_icache_free_eofblocks(ip->i_mount, &eofb);
goto write_retry;
}
@@ -772,7 +806,7 @@ xfs_file_fallocate(
unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
if (offset & blksize_mask || len & blksize_mask) {
- error = EINVAL;
+ error = -EINVAL;
goto out_unlock;
}
@@ -781,7 +815,7 @@ xfs_file_fallocate(
* in which case it is effectively a truncate operation
*/
if (offset + len >= i_size_read(inode)) {
- error = EINVAL;
+ error = -EINVAL;
goto out_unlock;
}
@@ -794,7 +828,7 @@ xfs_file_fallocate(
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode)) {
new_size = offset + len;
- error = -inode_newsize_ok(inode, new_size);
+ error = inode_newsize_ok(inode, new_size);
if (error)
goto out_unlock;
}
@@ -844,7 +878,7 @@ xfs_file_fallocate(
out_unlock:
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return -error;
+ return error;
}
@@ -889,7 +923,7 @@ xfs_file_release(
struct inode *inode,
struct file *filp)
{
- return -xfs_release(XFS_I(inode));
+ return xfs_release(XFS_I(inode));
}
STATIC int
@@ -918,7 +952,7 @@ xfs_file_readdir(
error = xfs_readdir(ip, ctx, bufsize);
if (error)
- return -error;
+ return error;
return 0;
}
@@ -1184,7 +1218,7 @@ xfs_seek_data(
isize = i_size_read(inode);
if (start >= isize) {
- error = ENXIO;
+ error = -ENXIO;
goto out_unlock;
}
@@ -1206,7 +1240,7 @@ xfs_seek_data(
/* No extents at given offset, must be beyond EOF */
if (nmap == 0) {
- error = ENXIO;
+ error = -ENXIO;
goto out_unlock;
}
@@ -1237,7 +1271,7 @@ xfs_seek_data(
* we are reading after EOF if nothing in map[1].
*/
if (nmap == 1) {
- error = ENXIO;
+ error = -ENXIO;
goto out_unlock;
}
@@ -1250,7 +1284,7 @@ xfs_seek_data(
fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
start = XFS_FSB_TO_B(mp, fsbno);
if (start >= isize) {
- error = ENXIO;
+ error = -ENXIO;
goto out_unlock;
}
}
@@ -1262,7 +1296,7 @@ out_unlock:
xfs_iunlock(ip, lock);
if (error)
- return -error;
+ return error;
return offset;
}
@@ -1282,13 +1316,13 @@ xfs_seek_hole(
int error;
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
lock = xfs_ilock_data_map_shared(ip);
isize = i_size_read(inode);
if (start >= isize) {
- error = ENXIO;
+ error = -ENXIO;
goto out_unlock;
}
@@ -1307,7 +1341,7 @@ xfs_seek_hole(
/* No extents at given offset, must be beyond EOF */
if (nmap == 0) {
- error = ENXIO;
+ error = -ENXIO;
goto out_unlock;
}
@@ -1370,7 +1404,7 @@ out_unlock:
xfs_iunlock(ip, lock);
if (error)
- return -error;
+ return error;
return offset;
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 8ec81bed799..e92730c1d3c 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -258,7 +258,7 @@ next_ag:
if (*agp == NULLAGNUMBER)
return 0;
- err = ENOMEM;
+ err = -ENOMEM;
item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
if (!item)
goto out_put_ag;
@@ -268,7 +268,7 @@ next_ag:
err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
if (err) {
- if (err == EEXIST)
+ if (err == -EEXIST)
err = 0;
goto out_free_item;
}
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index d34703dbcb4..18dc721ca19 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -255,8 +255,8 @@ typedef struct xfs_fsop_resblks {
((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
/* Used for sanity checks on superblock */
-#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks)
-#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) * \
+#define XFS_MAX_DBLOCKS(s) ((xfs_rfsblock_t)(s)->sb_agcount * (s)->sb_agblocks)
+#define XFS_MIN_DBLOCKS(s) ((xfs_rfsblock_t)((s)->sb_agcount - 1) * \
(s)->sb_agblocks + XFS_MIN_AG_BLOCKS)
/*
@@ -375,6 +375,9 @@ struct xfs_fs_eofblocks {
#define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */
#define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */
#define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */
+#define XFS_EOF_FLAGS_UNION (1 << 5) /* union filter algorithm;
+ * kernel only, not included in
+ * valid mask */
#define XFS_EOF_FLAGS_VALID \
(XFS_EOF_FLAGS_SYNC | \
XFS_EOF_FLAGS_UID | \
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index d2295561570..f91de1ef05e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -168,7 +168,7 @@ xfs_growfs_data_private(
nb = in->newblocks;
pct = in->imaxpct;
if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
return error;
dpct = pct - mp->m_sb.sb_imax_pct;
@@ -176,7 +176,7 @@ xfs_growfs_data_private(
XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
XFS_FSS_TO_BB(mp, 1), 0, NULL);
if (!bp)
- return EIO;
+ return -EIO;
if (bp->b_error) {
error = bp->b_error;
xfs_buf_relse(bp);
@@ -191,7 +191,7 @@ xfs_growfs_data_private(
nagcount--;
nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
if (nb < mp->m_sb.sb_dblocks)
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
new = nb - mp->m_sb.sb_dblocks;
oagcount = mp->m_sb.sb_agcount;
@@ -229,7 +229,7 @@ xfs_growfs_data_private(
XFS_FSS_TO_BB(mp, 1), 0,
&xfs_agf_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error0;
}
@@ -270,7 +270,7 @@ xfs_growfs_data_private(
XFS_FSS_TO_BB(mp, 1), 0,
&xfs_agfl_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error0;
}
@@ -298,7 +298,7 @@ xfs_growfs_data_private(
XFS_FSS_TO_BB(mp, 1), 0,
&xfs_agi_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error0;
}
@@ -336,7 +336,7 @@ xfs_growfs_data_private(
&xfs_allocbt_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error0;
}
@@ -365,7 +365,7 @@ xfs_growfs_data_private(
BTOBB(mp->m_sb.sb_blocksize), 0,
&xfs_allocbt_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error0;
}
@@ -395,7 +395,7 @@ xfs_growfs_data_private(
BTOBB(mp->m_sb.sb_blocksize), 0,
&xfs_inobt_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error0;
}
@@ -420,7 +420,7 @@ xfs_growfs_data_private(
BTOBB(mp->m_sb.sb_blocksize), 0,
&xfs_inobt_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error0;
}
@@ -531,7 +531,7 @@ xfs_growfs_data_private(
bp->b_ops = &xfs_sb_buf_ops;
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
} else
- error = ENOMEM;
+ error = -ENOMEM;
}
/*
@@ -576,17 +576,17 @@ xfs_growfs_log_private(
nb = in->newblocks;
if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
if (nb == mp->m_sb.sb_logblocks &&
in->isint == (mp->m_sb.sb_logstart != 0))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* Moving the log is hard, need new interfaces to sync
* the log first, hold off all activity while moving it.
* Can have shorter or longer log in the same space,
* or transform internal to external log or vice versa.
*/
- return XFS_ERROR(ENOSYS);
+ return -ENOSYS;
}
/*
@@ -604,9 +604,9 @@ xfs_growfs_data(
int error;
if (!capable(CAP_SYS_ADMIN))
- return XFS_ERROR(EPERM);
+ return -EPERM;
if (!mutex_trylock(&mp->m_growlock))
- return XFS_ERROR(EWOULDBLOCK);
+ return -EWOULDBLOCK;
error = xfs_growfs_data_private(mp, in);
mutex_unlock(&mp->m_growlock);
return error;
@@ -620,9 +620,9 @@ xfs_growfs_log(
int error;
if (!capable(CAP_SYS_ADMIN))
- return XFS_ERROR(EPERM);
+ return -EPERM;
if (!mutex_trylock(&mp->m_growlock))
- return XFS_ERROR(EWOULDBLOCK);
+ return -EWOULDBLOCK;
error = xfs_growfs_log_private(mp, in);
mutex_unlock(&mp->m_growlock);
return error;
@@ -674,7 +674,7 @@ xfs_reserve_blocks(
/* If inval is null, report current values and return */
if (inval == (__uint64_t *)NULL) {
if (!outval)
- return EINVAL;
+ return -EINVAL;
outval->resblks = mp->m_resblks;
outval->resblks_avail = mp->m_resblks_avail;
return 0;
@@ -757,7 +757,7 @@ out:
int error;
error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
fdblks_delta, 0);
- if (error == ENOSPC)
+ if (error == -ENOSPC)
goto retry;
}
return 0;
@@ -818,7 +818,7 @@ xfs_fs_goingdown(
SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR);
break;
default:
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
return 0;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index c48df5f25b9..981b2cf5198 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -33,6 +33,9 @@
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_bmap_util.h"
+#include "xfs_quota.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -158,7 +161,7 @@ xfs_iget_cache_hit(
if (ip->i_ino != ino) {
trace_xfs_iget_skip(ip);
XFS_STATS_INC(xs_ig_frecycle);
- error = EAGAIN;
+ error = -EAGAIN;
goto out_error;
}
@@ -176,7 +179,7 @@ xfs_iget_cache_hit(
if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
trace_xfs_iget_skip(ip);
XFS_STATS_INC(xs_ig_frecycle);
- error = EAGAIN;
+ error = -EAGAIN;
goto out_error;
}
@@ -184,7 +187,7 @@ xfs_iget_cache_hit(
* If lookup is racing with unlink return an error immediately.
*/
if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
- error = ENOENT;
+ error = -ENOENT;
goto out_error;
}
@@ -206,7 +209,7 @@ xfs_iget_cache_hit(
spin_unlock(&ip->i_flags_lock);
rcu_read_unlock();
- error = -inode_init_always(mp->m_super, inode);
+ error = inode_init_always(mp->m_super, inode);
if (error) {
/*
* Re-initializing the inode failed, and we are in deep
@@ -243,7 +246,7 @@ xfs_iget_cache_hit(
/* If the VFS inode is being torn down, pause and try again. */
if (!igrab(inode)) {
trace_xfs_iget_skip(ip);
- error = EAGAIN;
+ error = -EAGAIN;
goto out_error;
}
@@ -285,7 +288,7 @@ xfs_iget_cache_miss(
ip = xfs_inode_alloc(mp, ino);
if (!ip)
- return ENOMEM;
+ return -ENOMEM;
error = xfs_iread(mp, tp, ip, flags);
if (error)
@@ -294,7 +297,7 @@ xfs_iget_cache_miss(
trace_xfs_iget_miss(ip);
if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
- error = ENOENT;
+ error = -ENOENT;
goto out_destroy;
}
@@ -305,7 +308,7 @@ xfs_iget_cache_miss(
* recurse into the file system.
*/
if (radix_tree_preload(GFP_NOFS)) {
- error = EAGAIN;
+ error = -EAGAIN;
goto out_destroy;
}
@@ -341,7 +344,7 @@ xfs_iget_cache_miss(
if (unlikely(error)) {
WARN_ON(error != -EEXIST);
XFS_STATS_INC(xs_ig_dup);
- error = EAGAIN;
+ error = -EAGAIN;
goto out_preload_end;
}
spin_unlock(&pag->pag_ici_lock);
@@ -408,7 +411,7 @@ xfs_iget(
/* reject inode numbers outside existing AGs */
if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
- return EINVAL;
+ return -EINVAL;
/* get the perag structure and ensure that it's inode capable */
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
@@ -445,7 +448,7 @@ again:
return 0;
out_error_or_again:
- if (error == EAGAIN) {
+ if (error == -EAGAIN) {
delay(1);
goto again;
}
@@ -489,18 +492,18 @@ xfs_inode_ag_walk_grab(
/* nothing to sync during shutdown */
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
/* If we can't grab the inode, it must on it's way to reclaim. */
if (!igrab(inode))
- return ENOENT;
+ return -ENOENT;
/* inode is valid */
return 0;
out_unlock_noent:
spin_unlock(&ip->i_flags_lock);
- return ENOENT;
+ return -ENOENT;
}
STATIC int
@@ -583,16 +586,16 @@ restart:
continue;
error = execute(batch[i], flags, args);
IRELE(batch[i]);
- if (error == EAGAIN) {
+ if (error == -EAGAIN) {
skipped++;
continue;
}
- if (error && last_error != EFSCORRUPTED)
+ if (error && last_error != -EFSCORRUPTED)
last_error = error;
}
/* bail out if the filesystem is corrupted. */
- if (error == EFSCORRUPTED)
+ if (error == -EFSCORRUPTED)
break;
cond_resched();
@@ -652,11 +655,11 @@ xfs_inode_ag_iterator(
xfs_perag_put(pag);
if (error) {
last_error = error;
- if (error == EFSCORRUPTED)
+ if (error == -EFSCORRUPTED)
break;
}
}
- return XFS_ERROR(last_error);
+ return last_error;
}
int
@@ -680,11 +683,11 @@ xfs_inode_ag_iterator_tag(
xfs_perag_put(pag);
if (error) {
last_error = error;
- if (error == EFSCORRUPTED)
+ if (error == -EFSCORRUPTED)
break;
}
}
- return XFS_ERROR(last_error);
+ return last_error;
}
/*
@@ -944,7 +947,7 @@ restart:
* see the stale flag set on the inode.
*/
error = xfs_iflush(ip, &bp);
- if (error == EAGAIN) {
+ if (error == -EAGAIN) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/* backoff longer than in xfs_ifree_cluster */
delay(2);
@@ -997,7 +1000,7 @@ out:
xfs_iflags_clear(ip, XFS_IRECLAIM);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/*
- * We could return EAGAIN here to make reclaim rescan the inode tree in
+ * We could return -EAGAIN here to make reclaim rescan the inode tree in
* a short while. However, this just burns CPU time scanning the tree
* waiting for IO to complete and the reclaim work never goes back to
* the idle state. Instead, return 0 to let the next scheduled
@@ -1100,7 +1103,7 @@ restart:
if (!batch[i])
continue;
error = xfs_reclaim_inode(batch[i], pag, flags);
- if (error && last_error != EFSCORRUPTED)
+ if (error && last_error != -EFSCORRUPTED)
last_error = error;
}
@@ -1129,7 +1132,7 @@ restart:
trylock = 0;
goto restart;
}
- return XFS_ERROR(last_error);
+ return last_error;
}
int
@@ -1203,6 +1206,30 @@ xfs_inode_match_id(
return 1;
}
+/*
+ * A union-based inode filtering algorithm. Process the inode if any of the
+ * criteria match. This is for global/internal scans only.
+ */
+STATIC int
+xfs_inode_match_id_union(
+ struct xfs_inode *ip,
+ struct xfs_eofblocks *eofb)
+{
+ if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
+ uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
+ return 1;
+
+ if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
+ gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
+ return 1;
+
+ if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
+ xfs_get_projid(ip) == eofb->eof_prid)
+ return 1;
+
+ return 0;
+}
+
STATIC int
xfs_inode_free_eofblocks(
struct xfs_inode *ip,
@@ -1211,6 +1238,10 @@ xfs_inode_free_eofblocks(
{
int ret;
struct xfs_eofblocks *eofb = args;
+ bool need_iolock = true;
+ int match;
+
+ ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
if (!xfs_can_free_eofblocks(ip, false)) {
/* inode could be preallocated or append-only */
@@ -1228,19 +1259,31 @@ xfs_inode_free_eofblocks(
return 0;
if (eofb) {
- if (!xfs_inode_match_id(ip, eofb))
+ if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
+ match = xfs_inode_match_id_union(ip, eofb);
+ else
+ match = xfs_inode_match_id(ip, eofb);
+ if (!match)
return 0;
/* skip the inode if the file size is too small */
if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
XFS_ISIZE(ip) < eofb->eof_min_file_size)
return 0;
+
+ /*
+ * A scan owner implies we already hold the iolock. Skip it in
+ * xfs_free_eofblocks() to avoid deadlock. This also eliminates
+ * the possibility of EAGAIN being returned.
+ */
+ if (eofb->eof_scan_owner == ip->i_ino)
+ need_iolock = false;
}
- ret = xfs_free_eofblocks(ip->i_mount, ip, true);
+ ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock);
/* don't revisit the inode if we're not waiting */
- if (ret == EAGAIN && !(flags & SYNC_WAIT))
+ if (ret == -EAGAIN && !(flags & SYNC_WAIT))
ret = 0;
return ret;
@@ -1260,6 +1303,55 @@ xfs_icache_free_eofblocks(
eofb, XFS_ICI_EOFBLOCKS_TAG);
}
+/*
+ * Run eofblocks scans on the quotas applicable to the inode. For inodes with
+ * multiple quotas, we don't know exactly which quota caused an allocation
+ * failure. We make a best effort by including each quota under low free space
+ * conditions (less than 1% free space) in the scan.
+ */
+int
+xfs_inode_free_quota_eofblocks(
+ struct xfs_inode *ip)
+{
+ int scan = 0;
+ struct xfs_eofblocks eofb = {0};
+ struct xfs_dquot *dq;
+
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+
+ /*
+ * Set the scan owner to avoid a potential livelock. Otherwise, the scan
+ * can repeatedly trylock on the inode we're currently processing. We
+ * run a sync scan to increase effectiveness and use the union filter to
+ * cover all applicable quotas in a single scan.
+ */
+ eofb.eof_scan_owner = ip->i_ino;
+ eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;
+
+ if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
+ dq = xfs_inode_dquot(ip, XFS_DQ_USER);
+ if (dq && xfs_dquot_lowsp(dq)) {
+ eofb.eof_uid = VFS_I(ip)->i_uid;
+ eofb.eof_flags |= XFS_EOF_FLAGS_UID;
+ scan = 1;
+ }
+ }
+
+ if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) {
+ dq = xfs_inode_dquot(ip, XFS_DQ_GROUP);
+ if (dq && xfs_dquot_lowsp(dq)) {
+ eofb.eof_gid = VFS_I(ip)->i_gid;
+ eofb.eof_flags |= XFS_EOF_FLAGS_GID;
+ scan = 1;
+ }
+ }
+
+ if (scan)
+ xfs_icache_free_eofblocks(ip->i_mount, &eofb);
+
+ return scan;
+}
+
void
xfs_inode_set_eofblocks_tag(
xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 9cf017b899b..46748b86b12 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -27,6 +27,7 @@ struct xfs_eofblocks {
kgid_t eof_gid;
prid_t eof_prid;
__u64 eof_min_file_size;
+ xfs_ino_t eof_scan_owner;
};
#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
@@ -57,6 +58,7 @@ void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *);
+int xfs_inode_free_quota_eofblocks(struct xfs_inode *ip);
void xfs_eofblocks_worker(struct work_struct *);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
@@ -72,31 +74,32 @@ xfs_fs_eofblocks_from_user(
struct xfs_eofblocks *dst)
{
if (src->eof_version != XFS_EOFBLOCKS_VERSION)
- return EINVAL;
+ return -EINVAL;
if (src->eof_flags & ~XFS_EOF_FLAGS_VALID)
- return EINVAL;
+ return -EINVAL;
if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) ||
memchr_inv(src->pad64, 0, sizeof(src->pad64)))
- return EINVAL;
+ return -EINVAL;
dst->eof_flags = src->eof_flags;
dst->eof_prid = src->eof_prid;
dst->eof_min_file_size = src->eof_min_file_size;
+ dst->eof_scan_owner = NULLFSINO;
dst->eof_uid = INVALID_UID;
if (src->eof_flags & XFS_EOF_FLAGS_UID) {
dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid);
if (!uid_valid(dst->eof_uid))
- return EINVAL;
+ return -EINVAL;
}
dst->eof_gid = INVALID_GID;
if (src->eof_flags & XFS_EOF_FLAGS_GID) {
dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid);
if (!gid_valid(dst->eof_gid))
- return EINVAL;
+ return -EINVAL;
}
return 0;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a6115fe1ac9..fea3c92fb3f 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -583,7 +583,7 @@ xfs_lookup(
trace_xfs_lookup(dp, name);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
- return XFS_ERROR(EIO);
+ return -EIO;
lock_mode = xfs_ilock_data_map_shared(dp);
error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
@@ -893,7 +893,7 @@ xfs_dir_ialloc(
}
if (!ialloc_context && !ip) {
*ipp = NULL;
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
}
/*
@@ -1088,7 +1088,7 @@ xfs_create(
trace_xfs_create(dp, name);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
prid = xfs_get_initial_prid(dp);
@@ -1125,12 +1125,12 @@ xfs_create(
*/
tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
error = xfs_trans_reserve(tp, &tres, resblks, 0);
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
/* flush outstanding delalloc blocks and retry */
xfs_flush_inodes(mp);
error = xfs_trans_reserve(tp, &tres, resblks, 0);
}
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
/* No space at all so try a "no-allocation" reservation */
resblks = 0;
error = xfs_trans_reserve(tp, &tres, 0, 0);
@@ -1165,7 +1165,7 @@ xfs_create(
error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
prid, resblks > 0, &ip, &committed);
if (error) {
- if (error == ENOSPC)
+ if (error == -ENOSPC)
goto out_trans_cancel;
goto out_trans_abort;
}
@@ -1184,7 +1184,7 @@ xfs_create(
&first_block, &free_list, resblks ?
resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
if (error) {
- ASSERT(error != ENOSPC);
+ ASSERT(error != -ENOSPC);
goto out_trans_abort;
}
xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -1274,7 +1274,7 @@ xfs_create_tmpfile(
uint resblks;
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
prid = xfs_get_initial_prid(dp);
@@ -1293,7 +1293,7 @@ xfs_create_tmpfile(
tres = &M_RES(mp)->tr_create_tmpfile;
error = xfs_trans_reserve(tp, tres, resblks, 0);
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
/* No space at all so try a "no-allocation" reservation */
resblks = 0;
error = xfs_trans_reserve(tp, tres, 0, 0);
@@ -1311,7 +1311,7 @@ xfs_create_tmpfile(
error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
prid, resblks > 0, &ip, NULL);
if (error) {
- if (error == ENOSPC)
+ if (error == -ENOSPC)
goto out_trans_cancel;
goto out_trans_abort;
}
@@ -1382,7 +1382,7 @@ xfs_link(
ASSERT(!S_ISDIR(sip->i_d.di_mode));
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
error = xfs_qm_dqattach(sip, 0);
if (error)
@@ -1396,7 +1396,7 @@ xfs_link(
cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
}
@@ -1417,7 +1417,7 @@ xfs_link(
*/
if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
(xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
- error = XFS_ERROR(EXDEV);
+ error = -EXDEV;
goto error_return;
}
@@ -1635,8 +1635,8 @@ xfs_release(
truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
if (truncated) {
xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
- if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) {
- error = -filemap_flush(VFS_I(ip)->i_mapping);
+ if (ip->i_delayed_blks > 0) {
+ error = filemap_flush(VFS_I(ip)->i_mapping);
if (error)
return error;
}
@@ -1673,7 +1673,7 @@ xfs_release(
return 0;
error = xfs_free_eofblocks(mp, ip, true);
- if (error && error != EAGAIN)
+ if (error && error != -EAGAIN)
return error;
/* delalloc blocks after truncation means it really is dirty */
@@ -1772,7 +1772,7 @@ xfs_inactive_ifree(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree,
XFS_IFREE_SPACE_RES(mp), 0);
if (error) {
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
xfs_warn_ratelimited(mp,
"Failed to remove inode(s) from unlinked list. "
"Please free space, unmount and run xfs_repair.");
@@ -2219,7 +2219,7 @@ xfs_ifree_cluster(
XBF_UNMAPPED);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
/*
* This buffer may not have been correctly initialised as we
@@ -2491,7 +2491,7 @@ xfs_remove(
trace_xfs_remove(dp, name);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
error = xfs_qm_dqattach(dp, 0);
if (error)
@@ -2521,12 +2521,12 @@ xfs_remove(
*/
resblks = XFS_REMOVE_SPACE_RES(mp);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0);
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0);
}
if (error) {
- ASSERT(error != ENOSPC);
+ ASSERT(error != -ENOSPC);
cancel_flags = 0;
goto out_trans_cancel;
}
@@ -2543,11 +2543,11 @@ xfs_remove(
if (is_dir) {
ASSERT(ip->i_d.di_nlink >= 2);
if (ip->i_d.di_nlink != 2) {
- error = XFS_ERROR(ENOTEMPTY);
+ error = -ENOTEMPTY;
goto out_trans_cancel;
}
if (!xfs_dir_isempty(ip)) {
- error = XFS_ERROR(ENOTEMPTY);
+ error = -ENOTEMPTY;
goto out_trans_cancel;
}
@@ -2582,7 +2582,7 @@ xfs_remove(
error = xfs_dir_removename(tp, dp, name, ip->i_ino,
&first_block, &free_list, resblks);
if (error) {
- ASSERT(error != ENOENT);
+ ASSERT(error != -ENOENT);
goto out_bmap_cancel;
}
@@ -2702,7 +2702,7 @@ xfs_rename(
cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
- if (error == ENOSPC) {
+ if (error == -ENOSPC) {
spaceres = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
}
@@ -2747,7 +2747,7 @@ xfs_rename(
*/
if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
(xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
- error = XFS_ERROR(EXDEV);
+ error = -EXDEV;
goto error_return;
}
@@ -2770,7 +2770,7 @@ xfs_rename(
error = xfs_dir_createname(tp, target_dp, target_name,
src_ip->i_ino, &first_block,
&free_list, spaceres);
- if (error == ENOSPC)
+ if (error == -ENOSPC)
goto error_return;
if (error)
goto abort_return;
@@ -2795,7 +2795,7 @@ xfs_rename(
*/
if (!(xfs_dir_isempty(target_ip)) ||
(target_ip->i_d.di_nlink > 2)) {
- error = XFS_ERROR(EEXIST);
+ error = -EEXIST;
goto error_return;
}
}
@@ -2847,7 +2847,7 @@ xfs_rename(
error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
target_dp->i_ino,
&first_block, &free_list, spaceres);
- ASSERT(error != EEXIST);
+ ASSERT(error != -EEXIST);
if (error)
goto abort_return;
}
@@ -3055,7 +3055,7 @@ cluster_corrupt_out:
if (bp->b_iodone) {
XFS_BUF_UNDONE(bp);
xfs_buf_stale(bp);
- xfs_buf_ioerror(bp, EIO);
+ xfs_buf_ioerror(bp, -EIO);
xfs_buf_ioend(bp, 0);
} else {
xfs_buf_stale(bp);
@@ -3069,7 +3069,7 @@ cluster_corrupt_out:
xfs_iflush_abort(iq, false);
kmem_free(ilist);
xfs_perag_put(pag);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
/*
@@ -3124,7 +3124,7 @@ xfs_iflush(
* as we wait for an empty AIL as part of the unmount process.
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
- error = XFS_ERROR(EIO);
+ error = -EIO;
goto abort_out;
}
@@ -3167,7 +3167,7 @@ corrupt_out:
xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
cluster_corrupt_out:
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
abort_out:
/*
* Unlocks the flush lock
@@ -3331,5 +3331,5 @@ xfs_iflush_int(
return 0;
corrupt_out:
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f72bffa6726..c10e3fadd9a 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -398,4 +398,14 @@ do { \
extern struct kmem_zone *xfs_inode_zone;
+/*
+ * Flags for read/write calls
+ */
+#define XFS_IO_ISDIRECT 0x00001 /* bypass page cache */
+#define XFS_IO_INVIS 0x00002 /* don't update inode timestamps */
+
+#define XFS_IO_FLAGS \
+ { XFS_IO_ISDIRECT, "DIRECT" }, \
+ { XFS_IO_INVIS, "INVIS"}
+
#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index a640137b357..de5a7be36e6 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -788,5 +788,5 @@ xfs_inode_item_format_convert(
in_f->ilf_boffset = in_f64->ilf_boffset;
return 0;
}
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 8bc1bbce745..3799695b924 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -207,7 +207,7 @@ xfs_open_by_handle(
struct path path;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
dentry = xfs_handlereq_to_dentry(parfilp, hreq);
if (IS_ERR(dentry))
@@ -216,7 +216,7 @@ xfs_open_by_handle(
/* Restrict xfs_open_by_handle to directories & regular files. */
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
- error = -XFS_ERROR(EPERM);
+ error = -EPERM;
goto out_dput;
}
@@ -228,18 +228,18 @@ xfs_open_by_handle(
fmode = OPEN_FMODE(permflag);
if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
(fmode & FMODE_WRITE) && IS_APPEND(inode)) {
- error = -XFS_ERROR(EPERM);
+ error = -EPERM;
goto out_dput;
}
if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
- error = -XFS_ERROR(EACCES);
+ error = -EACCES;
goto out_dput;
}
/* Can't write directories. */
if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) {
- error = -XFS_ERROR(EISDIR);
+ error = -EISDIR;
goto out_dput;
}
@@ -282,7 +282,7 @@ xfs_readlink_by_handle(
int error;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
dentry = xfs_handlereq_to_dentry(parfilp, hreq);
if (IS_ERR(dentry))
@@ -290,22 +290,22 @@ xfs_readlink_by_handle(
/* Restrict this handle operation to symlinks only. */
if (!S_ISLNK(dentry->d_inode->i_mode)) {
- error = -XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_dput;
}
if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
- error = -XFS_ERROR(EFAULT);
+ error = -EFAULT;
goto out_dput;
}
link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
if (!link) {
- error = -XFS_ERROR(ENOMEM);
+ error = -ENOMEM;
goto out_dput;
}
- error = -xfs_readlink(XFS_I(dentry->d_inode), link);
+ error = xfs_readlink(XFS_I(dentry->d_inode), link);
if (error)
goto out_kfree;
error = readlink_copy(hreq->ohandle, olen, link);
@@ -330,10 +330,10 @@ xfs_set_dmattrs(
int error;
if (!capable(CAP_SYS_ADMIN))
- return XFS_ERROR(EPERM);
+ return -EPERM;
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
@@ -364,9 +364,9 @@ xfs_fssetdm_by_handle(
struct dentry *dentry;
if (!capable(CAP_MKNOD))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(parfilp);
if (error)
@@ -379,16 +379,16 @@ xfs_fssetdm_by_handle(
}
if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
- error = -XFS_ERROR(EPERM);
+ error = -EPERM;
goto out;
}
if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
- error = -XFS_ERROR(EFAULT);
+ error = -EFAULT;
goto out;
}
- error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+ error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
fsd.fsd_dmstate);
out:
@@ -409,18 +409,18 @@ xfs_attrlist_by_handle(
char *kbuf;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (al_hreq.buflen < sizeof(struct attrlist) ||
al_hreq.buflen > XATTR_LIST_MAX)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* Reject flags, only allow namespaces.
*/
if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
if (IS_ERR(dentry))
@@ -431,7 +431,7 @@ xfs_attrlist_by_handle(
goto out_dput;
cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
- error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+ error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
al_hreq.flags, cursor);
if (error)
goto out_kfree;
@@ -455,20 +455,20 @@ xfs_attrmulti_attr_get(
__uint32_t flags)
{
unsigned char *kbuf;
- int error = EFAULT;
+ int error = -EFAULT;
if (*len > XATTR_SIZE_MAX)
- return EINVAL;
+ return -EINVAL;
kbuf = kmem_zalloc_large(*len, KM_SLEEP);
if (!kbuf)
- return ENOMEM;
+ return -ENOMEM;
error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
if (error)
goto out_kfree;
if (copy_to_user(ubuf, kbuf, *len))
- error = EFAULT;
+ error = -EFAULT;
out_kfree:
kmem_free(kbuf);
@@ -484,20 +484,17 @@ xfs_attrmulti_attr_set(
__uint32_t flags)
{
unsigned char *kbuf;
- int error = EFAULT;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- return EPERM;
+ return -EPERM;
if (len > XATTR_SIZE_MAX)
- return EINVAL;
+ return -EINVAL;
kbuf = memdup_user(ubuf, len);
if (IS_ERR(kbuf))
return PTR_ERR(kbuf);
- error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
-
- return error;
+ return xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
}
int
@@ -507,7 +504,7 @@ xfs_attrmulti_attr_remove(
__uint32_t flags)
{
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- return EPERM;
+ return -EPERM;
return xfs_attr_remove(XFS_I(inode), name, flags);
}
@@ -524,9 +521,9 @@ xfs_attrmulti_by_handle(
unsigned char *attr_name;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
/* overflow check */
if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
@@ -536,18 +533,18 @@ xfs_attrmulti_by_handle(
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- error = E2BIG;
+ error = -E2BIG;
size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
if (!size || size > 16 * PAGE_SIZE)
goto out_dput;
ops = memdup_user(am_hreq.ops, size);
if (IS_ERR(ops)) {
- error = -PTR_ERR(ops);
+ error = PTR_ERR(ops);
goto out_dput;
}
- error = ENOMEM;
+ error = -ENOMEM;
attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
if (!attr_name)
goto out_kfree_ops;
@@ -557,7 +554,7 @@ xfs_attrmulti_by_handle(
ops[i].am_error = strncpy_from_user((char *)attr_name,
ops[i].am_attrname, MAXNAMELEN);
if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
- error = ERANGE;
+ error = -ERANGE;
if (ops[i].am_error < 0)
break;
@@ -588,19 +585,19 @@ xfs_attrmulti_by_handle(
mnt_drop_write_file(parfilp);
break;
default:
- ops[i].am_error = EINVAL;
+ ops[i].am_error = -EINVAL;
}
}
if (copy_to_user(am_hreq.ops, ops, size))
- error = XFS_ERROR(EFAULT);
+ error = -EFAULT;
kfree(attr_name);
out_kfree_ops:
kfree(ops);
out_dput:
dput(dentry);
- return -error;
+ return error;
}
int
@@ -625,16 +622,16 @@ xfs_ioc_space(
*/
if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (!(filp->f_mode & FMODE_WRITE))
- return -XFS_ERROR(EBADF);
+ return -EBADF;
if (!S_ISREG(inode->i_mode))
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
error = mnt_want_write_file(filp);
if (error)
@@ -652,7 +649,7 @@ xfs_ioc_space(
bf->l_start += XFS_ISIZE(ip);
break;
default:
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_unlock;
}
@@ -669,7 +666,7 @@ xfs_ioc_space(
case XFS_IOC_UNRESVSP:
case XFS_IOC_UNRESVSP64:
if (bf->l_len <= 0) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_unlock;
}
break;
@@ -682,7 +679,7 @@ xfs_ioc_space(
bf->l_start > mp->m_super->s_maxbytes ||
bf->l_start + bf->l_len < 0 ||
bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_unlock;
}
@@ -723,7 +720,7 @@ xfs_ioc_space(
break;
default:
ASSERT(0);
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
}
if (error)
@@ -739,7 +736,7 @@ xfs_ioc_space(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- if (!(ioflags & IO_INVIS)) {
+ if (!(ioflags & XFS_IO_INVIS)) {
ip->i_d.di_mode &= ~S_ISUID;
if (ip->i_d.di_mode & S_IXGRP)
ip->i_d.di_mode &= ~S_ISGID;
@@ -759,7 +756,7 @@ xfs_ioc_space(
out_unlock:
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
STATIC int
@@ -781,41 +778,41 @@ xfs_ioc_bulkstat(
return -EPERM;
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if ((count = bulkreq.icount) <= 0)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
if (bulkreq.ubuffer == NULL)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
if (cmd == XFS_IOC_FSINUMBERS)
error = xfs_inumbers(mp, &inlast, &count,
bulkreq.ubuffer, xfs_inumbers_fmt);
else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
- error = xfs_bulkstat_single(mp, &inlast,
- bulkreq.ubuffer, &done);
+ error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer,
+ sizeof(xfs_bstat_t), NULL, &done);
else /* XFS_IOC_FSBULKSTAT */
error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
sizeof(xfs_bstat_t), bulkreq.ubuffer,
&done);
if (error)
- return -error;
+ return error;
if (bulkreq.ocount != NULL) {
if (copy_to_user(bulkreq.lastip, &inlast,
sizeof(xfs_ino_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
}
return 0;
@@ -831,7 +828,7 @@ xfs_ioc_fsgeometry_v1(
error = xfs_fs_geometry(mp, &fsgeo, 3);
if (error)
- return -error;
+ return error;
/*
* Caller should have passed an argument of type
@@ -839,7 +836,7 @@ xfs_ioc_fsgeometry_v1(
* xfs_fsop_geom_t that xfs_fs_geometry() fills in.
*/
if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -853,10 +850,10 @@ xfs_ioc_fsgeometry(
error = xfs_fs_geometry(mp, &fsgeo, 4);
if (error)
- return -error;
+ return error;
if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1041,16 +1038,16 @@ xfs_ioctl_setattr(
trace_xfs_ioctl_setattr(ip);
if (mp->m_flags & XFS_MOUNT_RDONLY)
- return XFS_ERROR(EROFS);
+ return -EROFS;
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
/*
* Disallow 32bit project ids when projid32bit feature is not enabled.
*/
if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
!xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* If disk quotas is on, we make sure that the dquots do exist on disk,
@@ -1088,7 +1085,7 @@ xfs_ioctl_setattr(
* CAP_FSETID capability is applicable.
*/
if (!inode_owner_or_capable(VFS_I(ip))) {
- code = XFS_ERROR(EPERM);
+ code = -EPERM;
goto error_return;
}
@@ -1099,7 +1096,7 @@ xfs_ioctl_setattr(
*/
if (mask & FSX_PROJID) {
if (current_user_ns() != &init_user_ns) {
- code = XFS_ERROR(EINVAL);
+ code = -EINVAL;
goto error_return;
}
@@ -1122,7 +1119,7 @@ xfs_ioctl_setattr(
if (ip->i_d.di_nextents &&
((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
fa->fsx_extsize)) {
- code = XFS_ERROR(EINVAL); /* EFBIG? */
+ code = -EINVAL; /* EFBIG? */
goto error_return;
}
@@ -1141,7 +1138,7 @@ xfs_ioctl_setattr(
extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
if (extsize_fsb > MAXEXTLEN) {
- code = XFS_ERROR(EINVAL);
+ code = -EINVAL;
goto error_return;
}
@@ -1153,13 +1150,13 @@ xfs_ioctl_setattr(
} else {
size = mp->m_sb.sb_blocksize;
if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
- code = XFS_ERROR(EINVAL);
+ code = -EINVAL;
goto error_return;
}
}
if (fa->fsx_extsize % size) {
- code = XFS_ERROR(EINVAL);
+ code = -EINVAL;
goto error_return;
}
}
@@ -1173,7 +1170,7 @@ xfs_ioctl_setattr(
if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
(XFS_IS_REALTIME_INODE(ip)) !=
(fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
- code = XFS_ERROR(EINVAL); /* EFBIG? */
+ code = -EINVAL; /* EFBIG? */
goto error_return;
}
@@ -1184,7 +1181,7 @@ xfs_ioctl_setattr(
if ((mp->m_sb.sb_rblocks == 0) ||
(mp->m_sb.sb_rextsize == 0) ||
(ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
- code = XFS_ERROR(EINVAL);
+ code = -EINVAL;
goto error_return;
}
}
@@ -1198,7 +1195,7 @@ xfs_ioctl_setattr(
(fa->fsx_xflags &
(XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
!capable(CAP_LINUX_IMMUTABLE)) {
- code = XFS_ERROR(EPERM);
+ code = -EPERM;
goto error_return;
}
}
@@ -1301,7 +1298,7 @@ xfs_ioc_fssetxattr(
return error;
error = xfs_ioctl_setattr(ip, &fa, mask);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
STATIC int
@@ -1346,7 +1343,7 @@ xfs_ioc_setxflags(
return error;
error = xfs_ioctl_setattr(ip, &fa, mask);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
STATIC int
@@ -1356,7 +1353,7 @@ xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
/* copy only getbmap portion (not getbmapx) */
if (copy_to_user(base, bmv, sizeof(struct getbmap)))
- return XFS_ERROR(EFAULT);
+ return -EFAULT;
*ap += sizeof(struct getbmap);
return 0;
@@ -1373,23 +1370,23 @@ xfs_ioc_getbmap(
int error;
if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (bmx.bmv_count < 2)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
- if (ioflags & IO_INVIS)
+ if (ioflags & XFS_IO_INVIS)
bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
(struct getbmap *)arg+1);
if (error)
- return -error;
+ return error;
/* copy back header - only size of getbmap */
if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1399,7 +1396,7 @@ xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
struct getbmapx __user *base = *ap;
if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
- return XFS_ERROR(EFAULT);
+ return -EFAULT;
*ap += sizeof(struct getbmapx);
return 0;
@@ -1414,22 +1411,22 @@ xfs_ioc_getbmapx(
int error;
if (copy_from_user(&bmx, arg, sizeof(bmx)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (bmx.bmv_count < 2)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
if (bmx.bmv_iflags & (~BMV_IF_VALID))
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
(struct getbmapx *)arg+1);
if (error)
- return -error;
+ return error;
/* copy back header */
if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1445,33 +1442,33 @@ xfs_ioc_swapext(
/* Pull information for the target fd */
f = fdget((int)sxp->sx_fdtarget);
if (!f.file) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out;
}
if (!(f.file->f_mode & FMODE_WRITE) ||
!(f.file->f_mode & FMODE_READ) ||
(f.file->f_flags & O_APPEND)) {
- error = XFS_ERROR(EBADF);
+ error = -EBADF;
goto out_put_file;
}
tmp = fdget((int)sxp->sx_fdtmp);
if (!tmp.file) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_put_file;
}
if (!(tmp.file->f_mode & FMODE_WRITE) ||
!(tmp.file->f_mode & FMODE_READ) ||
(tmp.file->f_flags & O_APPEND)) {
- error = XFS_ERROR(EBADF);
+ error = -EBADF;
goto out_put_tmp_file;
}
if (IS_SWAPFILE(file_inode(f.file)) ||
IS_SWAPFILE(file_inode(tmp.file))) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_put_tmp_file;
}
@@ -1479,17 +1476,17 @@ xfs_ioc_swapext(
tip = XFS_I(file_inode(tmp.file));
if (ip->i_mount != tip->i_mount) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_put_tmp_file;
}
if (ip->i_ino == tip->i_ino) {
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto out_put_tmp_file;
}
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- error = XFS_ERROR(EIO);
+ error = -EIO;
goto out_put_tmp_file;
}
@@ -1523,7 +1520,7 @@ xfs_file_ioctl(
int error;
if (filp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
+ ioflags |= XFS_IO_INVIS;
trace_xfs_file_ioctl(ip);
@@ -1542,7 +1539,7 @@ xfs_file_ioctl(
xfs_flock64_t bf;
if (copy_from_user(&bf, arg, sizeof(bf)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
}
case XFS_IOC_DIOINFO: {
@@ -1555,7 +1552,7 @@ xfs_file_ioctl(
da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
if (copy_to_user(arg, &da, sizeof(da)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1588,7 +1585,7 @@ xfs_file_ioctl(
struct fsdmidata dmi;
if (copy_from_user(&dmi, arg, sizeof(dmi)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
@@ -1597,7 +1594,7 @@ xfs_file_ioctl(
error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
dmi.fsd_dmstate);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_GETBMAP:
@@ -1613,14 +1610,14 @@ xfs_file_ioctl(
xfs_fsop_handlereq_t hreq;
if (copy_from_user(&hreq, arg, sizeof(hreq)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return xfs_find_handle(cmd, &hreq);
}
case XFS_IOC_OPEN_BY_HANDLE: {
xfs_fsop_handlereq_t hreq;
if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return xfs_open_by_handle(filp, &hreq);
}
case XFS_IOC_FSSETDM_BY_HANDLE:
@@ -1630,7 +1627,7 @@ xfs_file_ioctl(
xfs_fsop_handlereq_t hreq;
if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return xfs_readlink_by_handle(filp, &hreq);
}
case XFS_IOC_ATTRLIST_BY_HANDLE:
@@ -1643,13 +1640,13 @@ xfs_file_ioctl(
struct xfs_swapext sxp;
if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_ioc_swapext(&sxp);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_FSCOUNTS: {
@@ -1657,10 +1654,10 @@ xfs_file_ioctl(
error = xfs_fs_counts(mp, &out);
if (error)
- return -error;
+ return error;
if (copy_to_user(arg, &out, sizeof(out)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1672,10 +1669,10 @@ xfs_file_ioctl(
return -EPERM;
if (mp->m_flags & XFS_MOUNT_RDONLY)
- return -XFS_ERROR(EROFS);
+ return -EROFS;
if (copy_from_user(&inout, arg, sizeof(inout)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
@@ -1686,10 +1683,10 @@ xfs_file_ioctl(
error = xfs_reserve_blocks(mp, &in, &inout);
mnt_drop_write_file(filp);
if (error)
- return -error;
+ return error;
if (copy_to_user(arg, &inout, sizeof(inout)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1701,10 +1698,10 @@ xfs_file_ioctl(
error = xfs_reserve_blocks(mp, NULL, &out);
if (error)
- return -error;
+ return error;
if (copy_to_user(arg, &out, sizeof(out)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -1713,42 +1710,42 @@ xfs_file_ioctl(
xfs_growfs_data_t in;
if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_growfs_data(mp, &in);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_FSGROWFSLOG: {
xfs_growfs_log_t in;
if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_growfs_log(mp, &in);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_FSGROWFSRT: {
xfs_growfs_rt_t in;
if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_growfs_rt(mp, &in);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_GOINGDOWN: {
@@ -1758,10 +1755,9 @@ xfs_file_ioctl(
return -EPERM;
if (get_user(in, (__uint32_t __user *)arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
- error = xfs_fs_goingdown(mp, in);
- return -error;
+ return xfs_fs_goingdown(mp, in);
}
case XFS_IOC_ERROR_INJECTION: {
@@ -1771,18 +1767,16 @@ xfs_file_ioctl(
return -EPERM;
if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
- error = xfs_errortag_add(in.errtag, mp);
- return -error;
+ return xfs_errortag_add(in.errtag, mp);
}
case XFS_IOC_ERROR_CLEARALL:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- error = xfs_errortag_clearall(mp, 1);
- return -error;
+ return xfs_errortag_clearall(mp, 1);
case XFS_IOC_FREE_EOFBLOCKS: {
struct xfs_fs_eofblocks eofb;
@@ -1792,16 +1786,16 @@ xfs_file_ioctl(
return -EPERM;
if (mp->m_flags & XFS_MOUNT_RDONLY)
- return -XFS_ERROR(EROFS);
+ return -EROFS;
if (copy_from_user(&eofb, arg, sizeof(eofb)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = xfs_fs_eofblocks_from_user(&eofb, &keofb);
if (error)
- return -error;
+ return error;
- return -xfs_icache_free_eofblocks(mp, &keofb);
+ return xfs_icache_free_eofblocks(mp, &keofb);
}
default:
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 944d5baa710..a554646ff14 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -28,7 +28,6 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_vnode.h"
#include "xfs_inode.h"
#include "xfs_itable.h"
#include "xfs_error.h"
@@ -56,7 +55,7 @@ xfs_compat_flock64_copyin(
get_user(bf->l_sysid, &arg32->l_sysid) ||
get_user(bf->l_pid, &arg32->l_pid) ||
copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -70,10 +69,10 @@ xfs_compat_ioc_fsgeometry_v1(
error = xfs_fs_geometry(mp, &fsgeo, 3);
if (error)
- return -error;
+ return error;
/* The 32-bit variant simply has some padding at the end */
if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -84,7 +83,7 @@ xfs_compat_growfs_data_copyin(
{
if (get_user(in->newblocks, &arg32->newblocks) ||
get_user(in->imaxpct, &arg32->imaxpct))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -95,14 +94,14 @@ xfs_compat_growfs_rt_copyin(
{
if (get_user(in->newblocks, &arg32->newblocks) ||
get_user(in->extsize, &arg32->extsize))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
STATIC int
xfs_inumbers_fmt_compat(
void __user *ubuffer,
- const xfs_inogrp_t *buffer,
+ const struct xfs_inogrp *buffer,
long count,
long *written)
{
@@ -113,7 +112,7 @@ xfs_inumbers_fmt_compat(
if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) ||
put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
}
*written = count * sizeof(*p32);
return 0;
@@ -132,7 +131,7 @@ xfs_ioctl32_bstime_copyin(
if (get_user(sec32, &bstime32->tv_sec) ||
get_user(bstime->tv_nsec, &bstime32->tv_nsec))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
bstime->tv_sec = sec32;
return 0;
}
@@ -164,7 +163,7 @@ xfs_ioctl32_bstat_copyin(
get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) ||
get_user(bstat->bs_aextents, &bstat32->bs_aextents))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -180,7 +179,7 @@ xfs_bstime_store_compat(
sec32 = p->tv_sec;
if (put_user(sec32, &p32->tv_sec) ||
put_user(p->tv_nsec, &p32->tv_nsec))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return 0;
}
@@ -195,7 +194,7 @@ xfs_bulkstat_one_fmt_compat(
compat_xfs_bstat_t __user *p32 = ubuffer;
if (ubsize < sizeof(*p32))
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
if (put_user(buffer->bs_ino, &p32->bs_ino) ||
put_user(buffer->bs_mode, &p32->bs_mode) ||
@@ -218,7 +217,7 @@ xfs_bulkstat_one_fmt_compat(
put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
put_user(buffer->bs_aextents, &p32->bs_aextents))
- return XFS_ERROR(EFAULT);
+ return -EFAULT;
if (ubused)
*ubused = sizeof(*p32);
return 0;
@@ -256,30 +255,30 @@ xfs_compat_ioc_bulkstat(
/* should be called again (unused here, but used in dmapi) */
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
if (get_user(addr, &p32->lastip))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
bulkreq.lastip = compat_ptr(addr);
if (get_user(bulkreq.icount, &p32->icount) ||
get_user(addr, &p32->ubuffer))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
bulkreq.ubuffer = compat_ptr(addr);
if (get_user(addr, &p32->ocount))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
bulkreq.ocount = compat_ptr(addr);
if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if ((count = bulkreq.icount) <= 0)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
if (bulkreq.ubuffer == NULL)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
if (cmd == XFS_IOC_FSINUMBERS_32) {
error = xfs_inumbers(mp, &inlast, &count,
@@ -294,17 +293,17 @@ xfs_compat_ioc_bulkstat(
xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
bulkreq.ubuffer, &done);
} else
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
if (error)
- return -error;
+ return error;
if (bulkreq.ocount != NULL) {
if (copy_to_user(bulkreq.lastip, &inlast,
sizeof(xfs_ino_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
}
return 0;
@@ -318,7 +317,7 @@ xfs_compat_handlereq_copyin(
compat_xfs_fsop_handlereq_t hreq32;
if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
hreq->fd = hreq32.fd;
hreq->path = compat_ptr(hreq32.path);
@@ -352,19 +351,19 @@ xfs_compat_attrlist_by_handle(
char *kbuf;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (copy_from_user(&al_hreq, arg,
sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
if (al_hreq.buflen < sizeof(struct attrlist) ||
al_hreq.buflen > XATTR_LIST_MAX)
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* Reject flags, only allow namespaces.
*/
if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
- return -XFS_ERROR(EINVAL);
+ return -EINVAL;
dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq);
if (IS_ERR(dentry))
@@ -376,7 +375,7 @@ xfs_compat_attrlist_by_handle(
goto out_dput;
cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
- error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+ error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
al_hreq.flags, cursor);
if (error)
goto out_kfree;
@@ -404,10 +403,10 @@ xfs_compat_attrmulti_by_handle(
unsigned char *attr_name;
if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (copy_from_user(&am_hreq, arg,
sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
/* overflow check */
if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
@@ -417,7 +416,7 @@ xfs_compat_attrmulti_by_handle(
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- error = E2BIG;
+ error = -E2BIG;
size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
if (!size || size > 16 * PAGE_SIZE)
goto out_dput;
@@ -428,7 +427,7 @@ xfs_compat_attrmulti_by_handle(
goto out_dput;
}
- error = ENOMEM;
+ error = -ENOMEM;
attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
if (!attr_name)
goto out_kfree_ops;
@@ -439,7 +438,7 @@ xfs_compat_attrmulti_by_handle(
compat_ptr(ops[i].am_attrname),
MAXNAMELEN);
if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
- error = ERANGE;
+ error = -ERANGE;
if (ops[i].am_error < 0)
break;
@@ -470,19 +469,19 @@ xfs_compat_attrmulti_by_handle(
mnt_drop_write_file(parfilp);
break;
default:
- ops[i].am_error = EINVAL;
+ ops[i].am_error = -EINVAL;
}
}
if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
- error = XFS_ERROR(EFAULT);
+ error = -EFAULT;
kfree(attr_name);
out_kfree_ops:
kfree(ops);
out_dput:
dput(dentry);
- return -error;
+ return error;
}
STATIC int
@@ -496,26 +495,26 @@ xfs_compat_fssetdm_by_handle(
struct dentry *dentry;
if (!capable(CAP_MKNOD))
- return -XFS_ERROR(EPERM);
+ return -EPERM;
if (copy_from_user(&dmhreq, arg,
sizeof(compat_xfs_fsop_setdm_handlereq_t)))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
- error = -XFS_ERROR(EPERM);
+ error = -EPERM;
goto out;
}
if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
- error = -XFS_ERROR(EFAULT);
+ error = -EFAULT;
goto out;
}
- error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+ error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
fsd.fsd_dmstate);
out:
@@ -537,7 +536,7 @@ xfs_file_compat_ioctl(
int error;
if (filp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
+ ioflags |= XFS_IO_INVIS;
trace_xfs_file_compat_ioctl(ip);
@@ -588,7 +587,7 @@ xfs_file_compat_ioctl(
struct xfs_flock64 bf;
if (xfs_compat_flock64_copyin(&bf, arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
}
@@ -598,25 +597,25 @@ xfs_file_compat_ioctl(
struct xfs_growfs_data in;
if (xfs_compat_growfs_data_copyin(&in, arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_growfs_data(mp, &in);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_FSGROWFSRT_32: {
struct xfs_growfs_rt in;
if (xfs_compat_growfs_rt_copyin(&in, arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_growfs_rt(mp, &in);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
#endif
/* long changes size, but xfs only copiese out 32 bits */
@@ -633,13 +632,13 @@ xfs_file_compat_ioctl(
if (copy_from_user(&sxp, sxu,
offsetof(struct xfs_swapext, sx_stat)) ||
xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
error = mnt_want_write_file(filp);
if (error)
return error;
error = xfs_ioc_swapext(&sxp);
mnt_drop_write_file(filp);
- return -error;
+ return error;
}
case XFS_IOC_FSBULKSTAT_32:
case XFS_IOC_FSBULKSTAT_SINGLE_32:
@@ -651,7 +650,7 @@ xfs_file_compat_ioctl(
struct xfs_fsop_handlereq hreq;
if (xfs_compat_handlereq_copyin(&hreq, arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
return xfs_find_handle(cmd, &hreq);
}
@@ -659,14 +658,14 @@ xfs_file_compat_ioctl(
struct xfs_fsop_handlereq hreq;
if (xfs_compat_handlereq_copyin(&hreq, arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return xfs_open_by_handle(filp, &hreq);
}
case XFS_IOC_READLINK_BY_HANDLE_32: {
struct xfs_fsop_handlereq hreq;
if (xfs_compat_handlereq_copyin(&hreq, arg))
- return -XFS_ERROR(EFAULT);
+ return -EFAULT;
return xfs_readlink_by_handle(filp, &hreq);
}
case XFS_IOC_ATTRLIST_BY_HANDLE_32:
@@ -676,6 +675,6 @@ xfs_file_compat_ioctl(
case XFS_IOC_FSSETDM_BY_HANDLE_32:
return xfs_compat_fssetdm_by_handle(filp, arg);
default:
- return -XFS_ERROR(ENOIOCTLCMD);
+ return -ENOIOCTLCMD;
}
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 6d3ec2b6ee2..e9c47b6f5e5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -110,7 +110,7 @@ xfs_alert_fsblock_zero(
(unsigned long long)imap->br_startoff,
(unsigned long long)imap->br_blockcount,
imap->br_state);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
int
@@ -138,7 +138,7 @@ xfs_iomap_write_direct(
error = xfs_qm_dqattach(ip, 0);
if (error)
- return XFS_ERROR(error);
+ return error;
rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip);
@@ -148,7 +148,7 @@ xfs_iomap_write_direct(
if ((offset + count) > XFS_ISIZE(ip)) {
error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
if (error)
- return XFS_ERROR(error);
+ return error;
} else {
if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
last_fsb = MIN(last_fsb, (xfs_fileoff_t)
@@ -188,7 +188,7 @@ xfs_iomap_write_direct(
*/
if (error) {
xfs_trans_cancel(tp, 0);
- return XFS_ERROR(error);
+ return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -225,7 +225,7 @@ xfs_iomap_write_direct(
* Copy any maps to caller's array and return any error.
*/
if (nimaps == 0) {
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
goto out_unlock;
}
@@ -397,7 +397,8 @@ xfs_quota_calc_throttle(
struct xfs_inode *ip,
int type,
xfs_fsblock_t *qblocks,
- int *qshift)
+ int *qshift,
+ int64_t *qfreesp)
{
int64_t freesp;
int shift = 0;
@@ -406,6 +407,7 @@ xfs_quota_calc_throttle(
/* over hi wmark, squash the prealloc completely */
if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
*qblocks = 0;
+ *qfreesp = 0;
return;
}
@@ -418,6 +420,9 @@ xfs_quota_calc_throttle(
shift += 2;
}
+ if (freesp < *qfreesp)
+ *qfreesp = freesp;
+
/* only overwrite the throttle values if we are more aggressive */
if ((freesp >> shift) < (*qblocks >> *qshift)) {
*qblocks = freesp;
@@ -476,15 +481,18 @@ xfs_iomap_prealloc_size(
}
/*
- * Check each quota to cap the prealloc size and provide a shift
- * value to throttle with.
+ * Check each quota to cap the prealloc size, provide a shift value to
+ * throttle with and adjust amount of available space.
*/
if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks))
- xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift);
+ xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift,
+ &freesp);
if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks))
- xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift);
+ xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift,
+ &freesp);
if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks))
- xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift);
+ xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift,
+ &freesp);
/*
* The final prealloc size is set to the minimum of free space available
@@ -552,7 +560,7 @@ xfs_iomap_write_delay(
*/
error = xfs_qm_dqattach_locked(ip, 0);
if (error)
- return XFS_ERROR(error);
+ return error;
extsz = xfs_get_extsz_hint(ip);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -596,11 +604,11 @@ retry:
imap, &nimaps, XFS_BMAPI_ENTIRE);
switch (error) {
case 0:
- case ENOSPC:
- case EDQUOT:
+ case -ENOSPC:
+ case -EDQUOT:
break;
default:
- return XFS_ERROR(error);
+ return error;
}
/*
@@ -614,7 +622,7 @@ retry:
error = 0;
goto retry;
}
- return XFS_ERROR(error ? error : ENOSPC);
+ return error ? error : -ENOSPC;
}
if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
@@ -663,7 +671,7 @@ xfs_iomap_write_allocate(
*/
error = xfs_qm_dqattach(ip, 0);
if (error)
- return XFS_ERROR(error);
+ return error;
offset_fsb = XFS_B_TO_FSBT(mp, offset);
count_fsb = imap->br_blockcount;
@@ -690,7 +698,7 @@ xfs_iomap_write_allocate(
nres, 0);
if (error) {
xfs_trans_cancel(tp, 0);
- return XFS_ERROR(error);
+ return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
@@ -739,7 +747,7 @@ xfs_iomap_write_allocate(
if ((map_start_fsb + count_fsb) > last_block) {
count_fsb = last_block - map_start_fsb;
if (count_fsb == 0) {
- error = EAGAIN;
+ error = -EAGAIN;
goto trans_cancel;
}
}
@@ -793,7 +801,7 @@ trans_cancel:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
error0:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return XFS_ERROR(error);
+ return error;
}
int
@@ -853,7 +861,7 @@ xfs_iomap_write_unwritten(
resblks, 0);
if (error) {
xfs_trans_cancel(tp, 0);
- return XFS_ERROR(error);
+ return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -892,7 +900,7 @@ xfs_iomap_write_unwritten(
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
- return XFS_ERROR(error);
+ return error;
if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
return xfs_alert_fsblock_zero(ip, &imap);
@@ -915,5 +923,5 @@ error_on_bmapi_transaction:
xfs_bmap_cancel(&free_list);
xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return XFS_ERROR(error);
+ return error;
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 205613a0606..72129493e9d 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -72,7 +72,7 @@ xfs_initxattrs(
int error = 0;
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
- error = -xfs_attr_set(ip, xattr->name, xattr->value,
+ error = xfs_attr_set(ip, xattr->name, xattr->value,
xattr->value_len, ATTR_SECURE);
if (error < 0)
break;
@@ -93,7 +93,7 @@ xfs_init_security(
struct inode *dir,
const struct qstr *qstr)
{
- return -security_inode_init_security(inode, dir, qstr,
+ return security_inode_init_security(inode, dir, qstr,
&xfs_initxattrs, NULL);
}
@@ -173,12 +173,12 @@ xfs_generic_create(
#ifdef CONFIG_XFS_POSIX_ACL
if (default_acl) {
- error = -xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+ error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
if (error)
goto out_cleanup_inode;
}
if (acl) {
- error = -xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
+ error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
if (error)
goto out_cleanup_inode;
}
@@ -194,7 +194,7 @@ xfs_generic_create(
posix_acl_release(default_acl);
if (acl)
posix_acl_release(acl);
- return -error;
+ return error;
out_cleanup_inode:
if (!tmpfile)
@@ -248,8 +248,8 @@ xfs_vn_lookup(
xfs_dentry_to_name(&name, dentry, 0);
error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
if (unlikely(error)) {
- if (unlikely(error != ENOENT))
- return ERR_PTR(-error);
+ if (unlikely(error != -ENOENT))
+ return ERR_PTR(error);
d_add(dentry, NULL);
return NULL;
}
@@ -275,8 +275,8 @@ xfs_vn_ci_lookup(
xfs_dentry_to_name(&xname, dentry, 0);
error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
if (unlikely(error)) {
- if (unlikely(error != ENOENT))
- return ERR_PTR(-error);
+ if (unlikely(error != -ENOENT))
+ return ERR_PTR(error);
/*
* call d_add(dentry, NULL) here when d_drop_negative_children
* is called in xfs_vn_mknod (ie. allow negative dentries
@@ -311,7 +311,7 @@ xfs_vn_link(
error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
if (unlikely(error))
- return -error;
+ return error;
ihold(inode);
d_instantiate(dentry, inode);
@@ -328,7 +328,7 @@ xfs_vn_unlink(
xfs_dentry_to_name(&name, dentry, 0);
- error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
+ error = xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
if (error)
return error;
@@ -375,7 +375,7 @@ xfs_vn_symlink(
xfs_cleanup_inode(dir, inode, dentry);
iput(inode);
out:
- return -error;
+ return error;
}
STATIC int
@@ -392,8 +392,8 @@ xfs_vn_rename(
xfs_dentry_to_name(&oname, odentry, 0);
xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode);
- return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
- XFS_I(ndir), &nname, new_inode ?
+ return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
+ XFS_I(ndir), &nname, new_inode ?
XFS_I(new_inode) : NULL);
}
@@ -414,7 +414,7 @@ xfs_vn_follow_link(
if (!link)
goto out_err;
- error = -xfs_readlink(XFS_I(dentry->d_inode), link);
+ error = xfs_readlink(XFS_I(dentry->d_inode), link);
if (unlikely(error))
goto out_kfree;
@@ -441,7 +441,7 @@ xfs_vn_getattr(
trace_xfs_getattr(ip);
if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
+ return -EIO;
stat->size = XFS_ISIZE(ip);
stat->dev = inode->i_sb->s_dev;
@@ -546,14 +546,14 @@ xfs_setattr_nonsize(
/* If acls are being inherited, we already have this checked */
if (!(flags & XFS_ATTR_NOACL)) {
if (mp->m_flags & XFS_MOUNT_RDONLY)
- return XFS_ERROR(EROFS);
+ return -EROFS;
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
- error = -inode_change_ok(inode, iattr);
+ error = inode_change_ok(inode, iattr);
if (error)
- return XFS_ERROR(error);
+ return error;
}
ASSERT((mask & ATTR_SIZE) == 0);
@@ -703,7 +703,7 @@ xfs_setattr_nonsize(
xfs_qm_dqrele(gdqp);
if (error)
- return XFS_ERROR(error);
+ return error;
/*
* XXX(hch): Updating the ACL entries is not atomic vs the i_mode
@@ -713,9 +713,9 @@ xfs_setattr_nonsize(
* Posix ACL code seems to care about this issue either.
*/
if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
- error = -posix_acl_chmod(inode, inode->i_mode);
+ error = posix_acl_chmod(inode, inode->i_mode);
if (error)
- return XFS_ERROR(error);
+ return error;
}
return 0;
@@ -748,14 +748,14 @@ xfs_setattr_size(
trace_xfs_setattr(ip);
if (mp->m_flags & XFS_MOUNT_RDONLY)
- return XFS_ERROR(EROFS);
+ return -EROFS;
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
- error = -inode_change_ok(inode, iattr);
+ error = inode_change_ok(inode, iattr);
if (error)
- return XFS_ERROR(error);
+ return error;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(S_ISREG(ip->i_d.di_mode));
@@ -818,7 +818,7 @@ xfs_setattr_size(
* care about here.
*/
if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
- error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+ error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ip->i_d.di_size, newsize);
if (error)
return error;
@@ -844,7 +844,7 @@ xfs_setattr_size(
* much we can do about this, except to hope that the caller sees ENOMEM
* and retries the truncate operation.
*/
- error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
+ error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
if (error)
return error;
truncate_setsize(inode, newsize);
@@ -950,7 +950,7 @@ xfs_vn_setattr(
error = xfs_setattr_nonsize(ip, iattr, 0);
}
- return -error;
+ return error;
}
STATIC int
@@ -970,7 +970,7 @@ xfs_vn_update_time(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
- return -error;
+ return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -991,7 +991,7 @@ xfs_vn_update_time(
}
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
- return -xfs_trans_commit(tp, 0);
+ return xfs_trans_commit(tp, 0);
}
#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
@@ -1036,7 +1036,7 @@ xfs_fiemap_format(
*full = 1; /* user array now full */
}
- return -error;
+ return error;
}
STATIC int
@@ -1055,12 +1055,12 @@ xfs_vn_fiemap(
return error;
/* Set up bmap header for xfs internal routine */
- bm.bmv_offset = BTOBB(start);
+ bm.bmv_offset = BTOBBT(start);
/* Special case for whole file */
if (length == FIEMAP_MAX_OFFSET)
bm.bmv_length = -1LL;
else
- bm.bmv_length = BTOBB(length);
+ bm.bmv_length = BTOBB(start + length) - bm.bmv_offset;
/* We add one because in getbmap world count includes the header */
bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
@@ -1075,7 +1075,7 @@ xfs_vn_fiemap(
error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
if (error)
- return -error;
+ return error;
return 0;
}
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index cb64f222d60..f71be9c6801 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -67,19 +67,17 @@ xfs_bulkstat_one_int(
*stat = BULKSTAT_RV_NOTHING;
if (!buffer || xfs_internal_inum(mp, ino))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
if (!buf)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
error = xfs_iget(mp, NULL, ino,
(XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
XFS_ILOCK_SHARED, &ip);
- if (error) {
- *stat = BULKSTAT_RV_NOTHING;
+ if (error)
goto out_free;
- }
ASSERT(ip != NULL);
ASSERT(ip->i_imap.im_blkno != 0);
@@ -136,7 +134,6 @@ xfs_bulkstat_one_int(
IRELE(ip);
error = formatter(buffer, ubsize, ubused, buf);
-
if (!error)
*stat = BULKSTAT_RV_DIDONE;
@@ -154,9 +151,9 @@ xfs_bulkstat_one_fmt(
const xfs_bstat_t *buffer)
{
if (ubsize < sizeof(*buffer))
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
- return XFS_ERROR(EFAULT);
+ return -EFAULT;
if (ubused)
*ubused = sizeof(*buffer);
return 0;
@@ -175,9 +172,170 @@ xfs_bulkstat_one(
xfs_bulkstat_one_fmt, ubused, stat);
}
+/*
+ * Loop over all clusters in a chunk for a given incore inode allocation btree
+ * record. Do a readahead if there are any allocated inodes in that cluster.
+ */
+STATIC void
+xfs_bulkstat_ichunk_ra(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ struct xfs_inobt_rec_incore *irec)
+{
+ xfs_agblock_t agbno;
+ struct blk_plug plug;
+ int blks_per_cluster;
+ int inodes_per_cluster;
+ int i; /* inode chunk index */
+
+ agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
+ blks_per_cluster = xfs_icluster_size_fsb(mp);
+ inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
+
+ blk_start_plug(&plug);
+ for (i = 0; i < XFS_INODES_PER_CHUNK;
+ i += inodes_per_cluster, agbno += blks_per_cluster) {
+ if (xfs_inobt_maskn(i, inodes_per_cluster) & ~irec->ir_free) {
+ xfs_btree_reada_bufs(mp, agno, agbno, blks_per_cluster,
+ &xfs_inode_buf_ops);
+ }
+ }
+ blk_finish_plug(&plug);
+}
+
+/*
+ * Lookup the inode chunk that the given inode lives in and then get the record
+ * if we found the chunk. If the inode was not the last in the chunk and there
+ * are some left allocated, update the data for the pointed-to record as well as
+ * return the count of grabbed inodes.
+ */
+STATIC int
+xfs_bulkstat_grab_ichunk(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agino_t agino, /* starting inode of chunk */
+ int *icount,/* return # of inodes grabbed */
+ struct xfs_inobt_rec_incore *irec) /* btree record */
+{
+ int idx; /* index into inode chunk */
+ int stat;
+ int error = 0;
+
+ /* Lookup the inode chunk that this inode lives in */
+ error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat);
+ if (error)
+ return error;
+ if (!stat) {
+ *icount = 0;
+ return error;
+ }
+
+ /* Get the record, should always work */
+ error = xfs_inobt_get_rec(cur, irec, &stat);
+ if (error)
+ return error;
+ XFS_WANT_CORRUPTED_RETURN(stat == 1);
+
+ /* Check if the record contains the inode in request */
+ if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino)
+ return -EINVAL;
+
+ idx = agino - irec->ir_startino + 1;
+ if (idx < XFS_INODES_PER_CHUNK &&
+ (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) {
+ int i;
+
+ /* We got a right chunk with some left inodes allocated at it.
+ * Grab the chunk record. Mark all the uninteresting inodes
+ * free -- because they're before our start point.
+ */
+ for (i = 0; i < idx; i++) {
+ if (XFS_INOBT_MASK(i) & ~irec->ir_free)
+ irec->ir_freecount++;
+ }
+
+ irec->ir_free |= xfs_inobt_maskn(0, idx);
+ *icount = XFS_INODES_PER_CHUNK - irec->ir_freecount;
+ }
+
+ return 0;
+}
+
#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
/*
+ * Process inodes in chunk with a pointer to a formatter function
+ * that will iget the inode and fill in the appropriate structure.
+ */
+int
+xfs_bulkstat_ag_ichunk(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ struct xfs_inobt_rec_incore *irbp,
+ bulkstat_one_pf formatter,
+ size_t statstruct_size,
+ struct xfs_bulkstat_agichunk *acp)
+{
+ xfs_ino_t lastino = acp->ac_lastino;
+ char __user **ubufp = acp->ac_ubuffer;
+ int ubleft = acp->ac_ubleft;
+ int ubelem = acp->ac_ubelem;
+ int chunkidx, clustidx;
+ int error = 0;
+ xfs_agino_t agino;
+
+ for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
+ XFS_BULKSTAT_UBLEFT(ubleft) &&
+ irbp->ir_freecount < XFS_INODES_PER_CHUNK;
+ chunkidx++, clustidx++, agino++) {
+ int fmterror; /* bulkstat formatter result */
+ int ubused;
+ xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino);
+
+ ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
+
+ /* Skip if this inode is free */
+ if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
+ lastino = ino;
+ continue;
+ }
+
+ /*
+ * Count used inodes as free so we can tell when the
+ * chunk is used up.
+ */
+ irbp->ir_freecount++;
+
+ /* Get the inode and fill in a single buffer */
+ ubused = statstruct_size;
+ error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror);
+ if (fmterror == BULKSTAT_RV_NOTHING) {
+ if (error && error != -ENOENT && error != -EINVAL) {
+ ubleft = 0;
+ break;
+ }
+ lastino = ino;
+ continue;
+ }
+ if (fmterror == BULKSTAT_RV_GIVEUP) {
+ ubleft = 0;
+ ASSERT(error);
+ break;
+ }
+ if (*ubufp)
+ *ubufp += ubused;
+ ubleft -= ubused;
+ ubelem++;
+ lastino = ino;
+ }
+
+ acp->ac_lastino = lastino;
+ acp->ac_ubleft = ubleft;
+ acp->ac_ubelem = ubelem;
+
+ return error;
+}
+
+/*
* Return stat information in bulk (by-inode) for the filesystem.
*/
int /* error status */
@@ -190,13 +348,10 @@ xfs_bulkstat(
char __user *ubuffer, /* buffer with inode stats */
int *done) /* 1 if there are more stats to get */
{
- xfs_agblock_t agbno=0;/* allocation group block number */
xfs_buf_t *agbp; /* agi header buffer */
xfs_agi_t *agi; /* agi header data */
xfs_agino_t agino; /* inode # in allocation group */
xfs_agnumber_t agno; /* allocation group number */
- int chunkidx; /* current index into inode chunk */
- int clustidx; /* current index into inode cluster */
xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
int end_of_ag; /* set if we've seen the ag end */
int error; /* error code */
@@ -209,8 +364,6 @@ xfs_bulkstat(
xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */
xfs_ino_t lastino; /* last inode number returned */
- int blks_per_cluster; /* # of blocks per cluster */
- int inodes_per_cluster;/* # of inodes per cluster */
int nirbuf; /* size of irbuf */
int rval; /* return value error code */
int tmp; /* result value from btree calls */
@@ -218,7 +371,6 @@ xfs_bulkstat(
int ubleft; /* bytes left in user's buffer */
char __user *ubufp; /* pointer into user's buffer */
int ubelem; /* spaces used in user's buffer */
- int ubused; /* bytes used by formatter */
/*
* Get the last inode value, see if there's nothing to do.
@@ -233,20 +385,16 @@ xfs_bulkstat(
*ubcountp = 0;
return 0;
}
- if (!ubcountp || *ubcountp <= 0) {
- return EINVAL;
- }
+
ubcount = *ubcountp; /* statstruct's */
ubleft = ubcount * statstruct_size; /* bytes */
*ubcountp = ubelem = 0;
*done = 0;
fmterror = 0;
ubufp = ubuffer;
- blks_per_cluster = xfs_icluster_size_fsb(mp);
- inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
if (!irbuf)
- return ENOMEM;
+ return -ENOMEM;
nirbuf = irbsize / sizeof(*irbuf);
@@ -258,14 +406,8 @@ xfs_bulkstat(
while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
cond_resched();
error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
- if (error) {
- /*
- * Skip this allocation group and go to the next one.
- */
- agno++;
- agino = 0;
- continue;
- }
+ if (error)
+ break;
agi = XFS_BUF_TO_AGI(agbp);
/*
* Allocate and initialize a btree cursor for ialloc btree.
@@ -275,96 +417,39 @@ xfs_bulkstat(
irbp = irbuf;
irbufend = irbuf + nirbuf;
end_of_ag = 0;
- /*
- * If we're returning in the middle of an allocation group,
- * we need to get the remainder of the chunk we're in.
- */
+ icount = 0;
if (agino > 0) {
- xfs_inobt_rec_incore_t r;
-
/*
- * Lookup the inode chunk that this inode lives in.
+ * In the middle of an allocation group, we need to get
+ * the remainder of the chunk we're in.
*/
- error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE,
- &tmp);
- if (!error && /* no I/O error */
- tmp && /* lookup succeeded */
- /* got the record, should always work */
- !(error = xfs_inobt_get_rec(cur, &r, &i)) &&
- i == 1 &&
- /* this is the right chunk */
- agino < r.ir_startino + XFS_INODES_PER_CHUNK &&
- /* lastino was not last in chunk */
- (chunkidx = agino - r.ir_startino + 1) <
- XFS_INODES_PER_CHUNK &&
- /* there are some left allocated */
- xfs_inobt_maskn(chunkidx,
- XFS_INODES_PER_CHUNK - chunkidx) &
- ~r.ir_free) {
- /*
- * Grab the chunk record. Mark all the
- * uninteresting inodes (because they're
- * before our start point) free.
- */
- for (i = 0; i < chunkidx; i++) {
- if (XFS_INOBT_MASK(i) & ~r.ir_free)
- r.ir_freecount++;
- }
- r.ir_free |= xfs_inobt_maskn(0, chunkidx);
+ struct xfs_inobt_rec_incore r;
+
+ error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
+ if (error)
+ break;
+ if (icount) {
irbp->ir_startino = r.ir_startino;
irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free;
irbp++;
agino = r.ir_startino + XFS_INODES_PER_CHUNK;
- icount = XFS_INODES_PER_CHUNK - r.ir_freecount;
- } else {
- /*
- * If any of those tests failed, bump the
- * inode number (just in case).
- */
- agino++;
- icount = 0;
}
- /*
- * In any case, increment to the next record.
- */
- if (!error)
- error = xfs_btree_increment(cur, 0, &tmp);
+ /* Increment to the next record */
+ error = xfs_btree_increment(cur, 0, &tmp);
} else {
- /*
- * Start of ag. Lookup the first inode chunk.
- */
+ /* Start of ag. Lookup the first inode chunk */
error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp);
- icount = 0;
}
+ if (error)
+ break;
+
/*
* Loop through inode btree records in this ag,
* until we run out of inodes or space in the buffer.
*/
while (irbp < irbufend && icount < ubcount) {
- xfs_inobt_rec_incore_t r;
-
- /*
- * Loop as long as we're unable to read the
- * inode btree.
- */
- while (error) {
- agino += XFS_INODES_PER_CHUNK;
- if (XFS_AGINO_TO_AGBNO(mp, agino) >=
- be32_to_cpu(agi->agi_length))
- break;
- error = xfs_inobt_lookup(cur, agino,
- XFS_LOOKUP_GE, &tmp);
- cond_resched();
- }
- /*
- * If ran off the end of the ag either with an error,
- * or the normal way, set end and stop collecting.
- */
- if (error) {
- end_of_ag = 1;
- break;
- }
+ struct xfs_inobt_rec_incore r;
error = xfs_inobt_get_rec(cur, &r, &i);
if (error || i == 0) {
@@ -377,25 +462,7 @@ xfs_bulkstat(
* Also start read-ahead now for this chunk.
*/
if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
- struct blk_plug plug;
- /*
- * Loop over all clusters in the next chunk.
- * Do a readahead if there are any allocated
- * inodes in that cluster.
- */
- blk_start_plug(&plug);
- agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino);
- for (chunkidx = 0;
- chunkidx < XFS_INODES_PER_CHUNK;
- chunkidx += inodes_per_cluster,
- agbno += blks_per_cluster) {
- if (xfs_inobt_maskn(chunkidx,
- inodes_per_cluster) & ~r.ir_free)
- xfs_btree_reada_bufs(mp, agno,
- agbno, blks_per_cluster,
- &xfs_inode_buf_ops);
- }
- blk_finish_plug(&plug);
+ xfs_bulkstat_ichunk_ra(mp, agno, &r);
irbp->ir_startino = r.ir_startino;
irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free;
@@ -422,57 +489,20 @@ xfs_bulkstat(
irbufend = irbp;
for (irbp = irbuf;
irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) {
- /*
- * Now process this chunk of inodes.
- */
- for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
- XFS_BULKSTAT_UBLEFT(ubleft) &&
- irbp->ir_freecount < XFS_INODES_PER_CHUNK;
- chunkidx++, clustidx++, agino++) {
- ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
-
- ino = XFS_AGINO_TO_INO(mp, agno, agino);
- /*
- * Skip if this inode is free.
- */
- if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
- lastino = ino;
- continue;
- }
- /*
- * Count used inodes as free so we can tell
- * when the chunk is used up.
- */
- irbp->ir_freecount++;
-
- /*
- * Get the inode and fill in a single buffer.
- */
- ubused = statstruct_size;
- error = formatter(mp, ino, ubufp, ubleft,
- &ubused, &fmterror);
- if (fmterror == BULKSTAT_RV_NOTHING) {
- if (error && error != ENOENT &&
- error != EINVAL) {
- ubleft = 0;
- rval = error;
- break;
- }
- lastino = ino;
- continue;
- }
- if (fmterror == BULKSTAT_RV_GIVEUP) {
- ubleft = 0;
- ASSERT(error);
- rval = error;
- break;
- }
- if (ubufp)
- ubufp += ubused;
- ubleft -= ubused;
- ubelem++;
- lastino = ino;
- }
+ struct xfs_bulkstat_agichunk ac;
+
+ ac.ac_lastino = lastino;
+ ac.ac_ubuffer = &ubuffer;
+ ac.ac_ubleft = ubleft;
+ ac.ac_ubelem = ubelem;
+ error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
+ formatter, statstruct_size, &ac);
+ if (error)
+ rval = error;
+
+ lastino = ac.ac_lastino;
+ ubleft = ac.ac_ubleft;
+ ubelem = ac.ac_ubelem;
cond_resched();
}
@@ -512,58 +542,10 @@ xfs_bulkstat(
return rval;
}
-/*
- * Return stat information in bulk (by-inode) for the filesystem.
- * Special case for non-sequential one inode bulkstat.
- */
-int /* error status */
-xfs_bulkstat_single(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t *lastinop, /* inode to return */
- char __user *buffer, /* buffer with inode stats */
- int *done) /* 1 if there are more stats to get */
-{
- int count; /* count value for bulkstat call */
- int error; /* return value */
- xfs_ino_t ino; /* filesystem inode number */
- int res; /* result from bs1 */
-
- /*
- * note that requesting valid inode numbers which are not allocated
- * to inodes will most likely cause xfs_imap_to_bp to generate warning
- * messages about bad magic numbers. This is ok. The fact that
- * the inode isn't actually an inode is handled by the
- * error check below. Done this way to make the usual case faster
- * at the expense of the error case.
- */
-
- ino = *lastinop;
- error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
- NULL, &res);
- if (error) {
- /*
- * Special case way failed, do it the "long" way
- * to see if that works.
- */
- (*lastinop)--;
- count = 1;
- if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
- sizeof(xfs_bstat_t), buffer, done))
- return error;
- if (count == 0 || (xfs_ino_t)*lastinop != ino)
- return error == EFSCORRUPTED ?
- XFS_ERROR(EINVAL) : error;
- else
- return 0;
- }
- *done = 0;
- return 0;
-}
-
int
xfs_inumbers_fmt(
void __user *ubuffer, /* buffer to write to */
- const xfs_inogrp_t *buffer, /* buffer to read from */
+ const struct xfs_inogrp *buffer, /* buffer to read from */
long count, /* # of elements to read */
long *written) /* # of bytes written */
{
@@ -578,127 +560,104 @@ xfs_inumbers_fmt(
*/
int /* error status */
xfs_inumbers(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t *lastino, /* last inode returned */
- int *count, /* size of buffer/count returned */
- void __user *ubuffer,/* buffer with inode descriptions */
- inumbers_fmt_pf formatter)
+ struct xfs_mount *mp,/* mount point for filesystem */
+ xfs_ino_t *lastino,/* last inode returned */
+ int *count,/* size of buffer/count returned */
+ void __user *ubuffer,/* buffer with inode descriptions */
+ inumbers_fmt_pf formatter)
{
- xfs_buf_t *agbp;
- xfs_agino_t agino;
- xfs_agnumber_t agno;
- int bcount;
- xfs_inogrp_t *buffer;
- int bufidx;
- xfs_btree_cur_t *cur;
- int error;
- xfs_inobt_rec_incore_t r;
- int i;
- xfs_ino_t ino;
- int left;
- int tmp;
-
- ino = (xfs_ino_t)*lastino;
- agno = XFS_INO_TO_AGNO(mp, ino);
- agino = XFS_INO_TO_AGINO(mp, ino);
- left = *count;
+ xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, *lastino);
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, *lastino);
+ struct xfs_btree_cur *cur = NULL;
+ struct xfs_buf *agbp = NULL;
+ struct xfs_inogrp *buffer;
+ int bcount;
+ int left = *count;
+ int bufidx = 0;
+ int error = 0;
+
*count = 0;
+ if (agno >= mp->m_sb.sb_agcount ||
+ *lastino != XFS_AGINO_TO_INO(mp, agno, agino))
+ return error;
+
bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer)));
buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
- error = bufidx = 0;
- cur = NULL;
- agbp = NULL;
- while (left > 0 && agno < mp->m_sb.sb_agcount) {
- if (agbp == NULL) {
+ do {
+ struct xfs_inobt_rec_incore r;
+ int stat;
+
+ if (!agbp) {
error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
- if (error) {
- /*
- * If we can't read the AGI of this ag,
- * then just skip to the next one.
- */
- ASSERT(cur == NULL);
- agbp = NULL;
- agno++;
- agino = 0;
- continue;
- }
+ if (error)
+ break;
+
cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
XFS_BTNUM_INO);
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
- &tmp);
- if (error) {
- xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
- cur = NULL;
- xfs_buf_relse(agbp);
- agbp = NULL;
- /*
- * Move up the last inode in the current
- * chunk. The lookup_ge will always get
- * us the first inode in the next chunk.
- */
- agino += XFS_INODES_PER_CHUNK - 1;
- continue;
- }
- }
- error = xfs_inobt_get_rec(cur, &r, &i);
- if (error || i == 0) {
- xfs_buf_relse(agbp);
- agbp = NULL;
- xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
- cur = NULL;
- agno++;
- agino = 0;
- continue;
+ &stat);
+ if (error)
+ break;
+ if (!stat)
+ goto next_ag;
}
+
+ error = xfs_inobt_get_rec(cur, &r, &stat);
+ if (error)
+ break;
+ if (!stat)
+ goto next_ag;
+
agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
buffer[bufidx].xi_startino =
XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
buffer[bufidx].xi_alloccount =
XFS_INODES_PER_CHUNK - r.ir_freecount;
buffer[bufidx].xi_allocmask = ~r.ir_free;
- bufidx++;
- left--;
- if (bufidx == bcount) {
- long written;
- if (formatter(ubuffer, buffer, bufidx, &written)) {
- error = XFS_ERROR(EFAULT);
+ if (++bufidx == bcount) {
+ long written;
+
+ error = formatter(ubuffer, buffer, bufidx, &written);
+ if (error)
break;
- }
ubuffer += written;
*count += bufidx;
bufidx = 0;
}
- if (left) {
- error = xfs_btree_increment(cur, 0, &tmp);
- if (error) {
- xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
- cur = NULL;
- xfs_buf_relse(agbp);
- agbp = NULL;
- /*
- * The agino value has already been bumped.
- * Just try to skip up to it.
- */
- agino += XFS_INODES_PER_CHUNK;
- continue;
- }
- }
- }
+ if (!--left)
+ break;
+
+ error = xfs_btree_increment(cur, 0, &stat);
+ if (error)
+ break;
+ if (stat)
+ continue;
+
+next_ag:
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ cur = NULL;
+ xfs_buf_relse(agbp);
+ agbp = NULL;
+ agino = 0;
+ } while (++agno < mp->m_sb.sb_agcount);
+
if (!error) {
if (bufidx) {
- long written;
- if (formatter(ubuffer, buffer, bufidx, &written))
- error = XFS_ERROR(EFAULT);
- else
+ long written;
+
+ error = formatter(ubuffer, buffer, bufidx, &written);
+ if (!error)
*count += bufidx;
}
*lastino = XFS_AGINO_TO_INO(mp, agno, agino);
}
+
kmem_free(buffer);
if (cur)
xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
XFS_BTREE_NOERROR));
if (agbp)
xfs_buf_relse(agbp);
+
return error;
}
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 97295d91d17..aaed08022eb 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -30,6 +30,22 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
int *ubused,
int *stat);
+struct xfs_bulkstat_agichunk {
+ xfs_ino_t ac_lastino; /* last inode returned */
+ char __user **ac_ubuffer;/* pointer into user's buffer */
+ int ac_ubleft; /* bytes left in user's buffer */
+ int ac_ubelem; /* spaces used in user's buffer */
+};
+
+int
+xfs_bulkstat_ag_ichunk(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ struct xfs_inobt_rec_incore *irbp,
+ bulkstat_one_pf formatter,
+ size_t statstruct_size,
+ struct xfs_bulkstat_agichunk *acp);
+
/*
* Values for stat return value.
*/
@@ -50,13 +66,6 @@ xfs_bulkstat(
char __user *ubuffer,/* buffer with inode stats */
int *done); /* 1 if there are more stats to get */
-int
-xfs_bulkstat_single(
- xfs_mount_t *mp,
- xfs_ino_t *lastinop,
- char __user *buffer,
- int *done);
-
typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */
void __user *ubuffer, /* buffer to write to */
int ubsize, /* remaining user buffer sz */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 825249d2dfc..d10dc8f397c 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -21,18 +21,6 @@
#include <linux/types.h>
/*
- * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
- * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
- */
-#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
-# define XFS_BIG_BLKNOS 1
-# define XFS_BIG_INUMS 1
-#else
-# define XFS_BIG_BLKNOS 0
-# define XFS_BIG_INUMS 0
-#endif
-
-/*
* Kernel specific type declarations for XFS
*/
typedef signed char __int8_t;
@@ -113,7 +101,7 @@ typedef __uint64_t __psunsigned_t;
#include <asm/byteorder.h>
#include <asm/unaligned.h>
-#include "xfs_vnode.h"
+#include "xfs_fs.h"
#include "xfs_stats.h"
#include "xfs_sysctl.h"
#include "xfs_iops.h"
@@ -191,6 +179,17 @@ typedef __uint64_t __psunsigned_t;
#define MAX(a,b) (max(a,b))
#define howmany(x, y) (((x)+((y)-1))/(y))
+/*
+ * XFS wrapper structure for sysfs support. It depends on external data
+ * structures and is embedded in various internal data structures to implement
+ * the XFS sysfs object heirarchy. Define it here for broad access throughout
+ * the codebase.
+ */
+struct xfs_kobj {
+ struct kobject kobject;
+ struct completion complete;
+};
+
/* Kernel uid/gid conversion. These are used to convert to/from the on disk
* uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally.
* The conversion here is type only, the value will remain the same since we
@@ -331,7 +330,7 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
{
x += y - 1;
do_div(x, y);
- return(x * y);
+ return x * y;
}
static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 292308dede6..ca4fd5bd852 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -34,6 +34,7 @@
#include "xfs_trace.h"
#include "xfs_fsops.h"
#include "xfs_cksum.h"
+#include "xfs_sysfs.h"
kmem_zone_t *xfs_log_ticket_zone;
@@ -283,7 +284,7 @@ xlog_grant_head_wait(
return 0;
shutdown:
list_del_init(&tic->t_queue);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/*
@@ -377,7 +378,7 @@ xfs_log_regrant(
int error = 0;
if (XLOG_FORCED_SHUTDOWN(log))
- return XFS_ERROR(EIO);
+ return -EIO;
XFS_STATS_INC(xs_try_logspace);
@@ -446,7 +447,7 @@ xfs_log_reserve(
ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
if (XLOG_FORCED_SHUTDOWN(log))
- return XFS_ERROR(EIO);
+ return -EIO;
XFS_STATS_INC(xs_try_logspace);
@@ -454,7 +455,7 @@ xfs_log_reserve(
tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
KM_SLEEP | KM_MAYFAIL);
if (!tic)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
tic->t_trans_type = t_type;
*ticp = tic;
@@ -590,7 +591,7 @@ xfs_log_release_iclog(
{
if (xlog_state_release_iclog(mp->m_log, iclog)) {
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
- return EIO;
+ return -EIO;
}
return 0;
@@ -628,7 +629,7 @@ xfs_log_mount(
mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
if (IS_ERR(mp->m_log)) {
- error = -PTR_ERR(mp->m_log);
+ error = PTR_ERR(mp->m_log);
goto out;
}
@@ -652,18 +653,18 @@ xfs_log_mount(
xfs_warn(mp,
"Log size %d blocks too small, minimum size is %d blocks",
mp->m_sb.sb_logblocks, min_logfsbs);
- error = EINVAL;
+ error = -EINVAL;
} else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) {
xfs_warn(mp,
"Log size %d blocks too large, maximum size is %lld blocks",
mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS);
- error = EINVAL;
+ error = -EINVAL;
} else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) {
xfs_warn(mp,
"log size %lld bytes too large, maximum size is %lld bytes",
XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
XFS_MAX_LOG_BYTES);
- error = EINVAL;
+ error = -EINVAL;
}
if (error) {
if (xfs_sb_version_hascrc(&mp->m_sb)) {
@@ -707,6 +708,11 @@ xfs_log_mount(
}
}
+ error = xfs_sysfs_init(&mp->m_log->l_kobj, &xfs_log_ktype, &mp->m_kobj,
+ "log");
+ if (error)
+ goto out_destroy_ail;
+
/* Normal transactions can now occur */
mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
@@ -947,6 +953,9 @@ xfs_log_unmount(
xfs_log_quiesce(mp);
xfs_trans_ail_destroy(mp);
+
+ xfs_sysfs_del(&mp->m_log->l_kobj);
+
xlog_dealloc_log(mp->m_log);
}
@@ -1313,7 +1322,7 @@ xlog_alloc_log(
xlog_in_core_t *iclog, *prev_iclog=NULL;
xfs_buf_t *bp;
int i;
- int error = ENOMEM;
+ int error = -ENOMEM;
uint log2_size = 0;
log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL);
@@ -1340,7 +1349,7 @@ xlog_alloc_log(
xlog_grant_head_init(&log->l_reserve_head);
xlog_grant_head_init(&log->l_write_head);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
if (xfs_sb_version_hassector(&mp->m_sb)) {
log2_size = mp->m_sb.sb_logsectlog;
if (log2_size < BBSHIFT) {
@@ -1369,8 +1378,14 @@ xlog_alloc_log(
xlog_get_iclog_buffer_size(mp, log);
- error = ENOMEM;
- bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0);
+ /*
+ * Use a NULL block for the extra log buffer used during splits so that
+ * it will trigger errors if we ever try to do IO on it without first
+ * having set it up properly.
+ */
+ error = -ENOMEM;
+ bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL,
+ BTOBB(log->l_iclog_size), 0);
if (!bp)
goto out_free_log;
@@ -1463,7 +1478,7 @@ out_free_iclog:
out_free_log:
kmem_free(log);
out:
- return ERR_PTR(-error);
+ return ERR_PTR(error);
} /* xlog_alloc_log */
@@ -1661,7 +1676,7 @@ xlog_bdstrat(
xfs_buf_lock(bp);
if (iclog->ic_state & XLOG_STATE_IOERROR) {
- xfs_buf_ioerror(bp, EIO);
+ xfs_buf_ioerror(bp, -EIO);
xfs_buf_stale(bp);
xfs_buf_ioend(bp, 0);
/*
@@ -2360,7 +2375,7 @@ xlog_write(
ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
if (!ophdr)
- return XFS_ERROR(EIO);
+ return -EIO;
xlog_write_adv_cnt(&ptr, &len, &log_offset,
sizeof(struct xlog_op_header));
@@ -2859,7 +2874,7 @@ restart:
spin_lock(&log->l_icloglock);
if (XLOG_FORCED_SHUTDOWN(log)) {
spin_unlock(&log->l_icloglock);
- return XFS_ERROR(EIO);
+ return -EIO;
}
iclog = log->l_iclog;
@@ -3047,7 +3062,7 @@ xlog_state_release_iclog(
int sync = 0; /* do we sync? */
if (iclog->ic_state & XLOG_STATE_IOERROR)
- return XFS_ERROR(EIO);
+ return -EIO;
ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
@@ -3055,7 +3070,7 @@ xlog_state_release_iclog(
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
- return XFS_ERROR(EIO);
+ return -EIO;
}
ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_WANT_SYNC);
@@ -3172,7 +3187,7 @@ _xfs_log_force(
iclog = log->l_iclog;
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/* If the head iclog is not active nor dirty, we just attach
@@ -3210,7 +3225,7 @@ _xfs_log_force(
spin_unlock(&log->l_icloglock);
if (xlog_state_release_iclog(log, iclog))
- return XFS_ERROR(EIO);
+ return -EIO;
if (log_flushed)
*log_flushed = 1;
@@ -3246,7 +3261,7 @@ maybe_sleep:
*/
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
- return XFS_ERROR(EIO);
+ return -EIO;
}
XFS_STATS_INC(xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
@@ -3256,7 +3271,7 @@ maybe_sleep:
* and the memory read should be atomic.
*/
if (iclog->ic_state & XLOG_STATE_IOERROR)
- return XFS_ERROR(EIO);
+ return -EIO;
if (log_flushed)
*log_flushed = 1;
} else {
@@ -3324,7 +3339,7 @@ try_again:
iclog = log->l_iclog;
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
- return XFS_ERROR(EIO);
+ return -EIO;
}
do {
@@ -3375,7 +3390,7 @@ try_again:
xlog_state_switch_iclogs(log, iclog, 0);
spin_unlock(&log->l_icloglock);
if (xlog_state_release_iclog(log, iclog))
- return XFS_ERROR(EIO);
+ return -EIO;
if (log_flushed)
*log_flushed = 1;
spin_lock(&log->l_icloglock);
@@ -3390,7 +3405,7 @@ try_again:
*/
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
- return XFS_ERROR(EIO);
+ return -EIO;
}
XFS_STATS_INC(xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
@@ -3400,7 +3415,7 @@ try_again:
* and the memory read should be atomic.
*/
if (iclog->ic_state & XLOG_STATE_IOERROR)
- return XFS_ERROR(EIO);
+ return -EIO;
if (log_flushed)
*log_flushed = 1;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index b3425b34e3d..f6b79e5325d 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -78,8 +78,6 @@ xlog_cil_init_post_recovery(
{
log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
log->l_cilp->xc_ctx->sequence = 1;
- log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle,
- log->l_curr_block);
}
/*
@@ -634,7 +632,7 @@ out_abort_free_ticket:
xfs_log_ticket_put(tic);
out_abort:
xlog_cil_committed(ctx, XFS_LI_ABORTED);
- return XFS_ERROR(EIO);
+ return -EIO;
}
static void
@@ -928,12 +926,12 @@ xlog_cil_init(
cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
if (!cil)
- return ENOMEM;
+ return -ENOMEM;
ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
if (!ctx) {
kmem_free(cil);
- return ENOMEM;
+ return -ENOMEM;
}
INIT_WORK(&cil->xc_push_work, xlog_cil_push_work);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 9bc403a9e54..db7cbdeb2b4 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -405,6 +405,8 @@ struct xlog {
struct xlog_grant_head l_reserve_head;
struct xlog_grant_head l_write_head;
+ struct xfs_kobj l_kobj;
+
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
char *l_iclog_bak[XLOG_MAX_ICLOGS];
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 981af0f6504..1fd5787add9 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -179,7 +179,7 @@ xlog_bread_noalign(
xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
nbblks);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
blk_no = round_down(blk_no, log->l_sectBBsize);
@@ -194,7 +194,7 @@ xlog_bread_noalign(
bp->b_error = 0;
if (XFS_FORCED_SHUTDOWN(log->l_mp))
- return XFS_ERROR(EIO);
+ return -EIO;
xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
@@ -268,7 +268,7 @@ xlog_bwrite(
xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
nbblks);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
blk_no = round_down(blk_no, log->l_sectBBsize);
@@ -330,14 +330,14 @@ xlog_header_check_recover(
xlog_header_check_dump(mp, head);
XFS_ERROR_REPORT("xlog_header_check_recover(1)",
XFS_ERRLEVEL_HIGH, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
} else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
xfs_warn(mp,
"dirty log entry has mismatched uuid - can't recover");
xlog_header_check_dump(mp, head);
XFS_ERROR_REPORT("xlog_header_check_recover(2)",
XFS_ERRLEVEL_HIGH, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -364,7 +364,7 @@ xlog_header_check_mount(
xlog_header_check_dump(mp, head);
XFS_ERROR_REPORT("xlog_header_check_mount",
XFS_ERRLEVEL_HIGH, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -462,7 +462,7 @@ xlog_find_verify_cycle(
while (!(bp = xlog_get_bp(log, bufblks))) {
bufblks >>= 1;
if (bufblks < log->l_sectBBsize)
- return ENOMEM;
+ return -ENOMEM;
}
for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
@@ -524,7 +524,7 @@ xlog_find_verify_log_record(
if (!(bp = xlog_get_bp(log, num_blks))) {
if (!(bp = xlog_get_bp(log, 1)))
- return ENOMEM;
+ return -ENOMEM;
smallmem = 1;
} else {
error = xlog_bread(log, start_blk, num_blks, bp, &offset);
@@ -539,7 +539,7 @@ xlog_find_verify_log_record(
xfs_warn(log->l_mp,
"Log inconsistent (didn't find previous header)");
ASSERT(0);
- error = XFS_ERROR(EIO);
+ error = -EIO;
goto out;
}
@@ -564,7 +564,7 @@ xlog_find_verify_log_record(
* will be called again for the end of the physical log.
*/
if (i == -1) {
- error = -1;
+ error = 1;
goto out;
}
@@ -628,7 +628,12 @@ xlog_find_head(
int error, log_bbnum = log->l_logBBsize;
/* Is the end of the log device zeroed? */
- if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
+ error = xlog_find_zeroed(log, &first_blk);
+ if (error < 0) {
+ xfs_warn(log->l_mp, "empty log check failed");
+ return error;
+ }
+ if (error == 1) {
*return_head_blk = first_blk;
/* Is the whole lot zeroed? */
@@ -641,15 +646,12 @@ xlog_find_head(
}
return 0;
- } else if (error) {
- xfs_warn(log->l_mp, "empty log check failed");
- return error;
}
first_blk = 0; /* get cycle # of 1st block */
bp = xlog_get_bp(log, 1);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
error = xlog_bread(log, 0, 1, bp, &offset);
if (error)
@@ -818,29 +820,29 @@ validate_head:
start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
/* start ptr at last block ptr before head_blk */
- if ((error = xlog_find_verify_log_record(log, start_blk,
- &head_blk, 0)) == -1) {
- error = XFS_ERROR(EIO);
- goto bp_err;
- } else if (error)
+ error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
+ if (error == 1)
+ error = -EIO;
+ if (error)
goto bp_err;
} else {
start_blk = 0;
ASSERT(head_blk <= INT_MAX);
- if ((error = xlog_find_verify_log_record(log, start_blk,
- &head_blk, 0)) == -1) {
+ error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
+ if (error < 0)
+ goto bp_err;
+ if (error == 1) {
/* We hit the beginning of the log during our search */
start_blk = log_bbnum - (num_scan_bblks - head_blk);
new_blk = log_bbnum;
ASSERT(start_blk <= INT_MAX &&
(xfs_daddr_t) log_bbnum-start_blk >= 0);
ASSERT(head_blk <= INT_MAX);
- if ((error = xlog_find_verify_log_record(log,
- start_blk, &new_blk,
- (int)head_blk)) == -1) {
- error = XFS_ERROR(EIO);
- goto bp_err;
- } else if (error)
+ error = xlog_find_verify_log_record(log, start_blk,
+ &new_blk, (int)head_blk);
+ if (error == 1)
+ error = -EIO;
+ if (error)
goto bp_err;
if (new_blk != log_bbnum)
head_blk = new_blk;
@@ -911,7 +913,7 @@ xlog_find_tail(
bp = xlog_get_bp(log, 1);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
if (*head_blk == 0) { /* special case */
error = xlog_bread(log, 0, 1, bp, &offset);
if (error)
@@ -961,7 +963,7 @@ xlog_find_tail(
xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
xlog_put_bp(bp);
ASSERT(0);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/* find blk_no of tail of log */
@@ -1092,8 +1094,8 @@ done:
*
* Return:
* 0 => the log is completely written to
- * -1 => use *blk_no as the first block of the log
- * >0 => error has occurred
+ * 1 => use *blk_no as the first block of the log
+ * <0 => error has occurred
*/
STATIC int
xlog_find_zeroed(
@@ -1112,7 +1114,7 @@ xlog_find_zeroed(
/* check totally zeroed log */
bp = xlog_get_bp(log, 1);
if (!bp)
- return ENOMEM;
+ return -ENOMEM;
error = xlog_bread(log, 0, 1, bp, &offset);
if (error)
goto bp_err;
@@ -1121,7 +1123,7 @@ xlog_find_zeroed(
if (first_cycle == 0) { /* completely zeroed log */
*blk_no = 0;
xlog_put_bp(bp);
- return -1;
+ return 1;
}
/* check partially zeroed log */
@@ -1141,7 +1143,7 @@ xlog_find_zeroed(
*/
xfs_warn(log->l_mp,
"Log inconsistent or not a log (last==0, first!=1)");
- error = XFS_ERROR(EINVAL);
+ error = -EINVAL;
goto bp_err;
}
@@ -1179,19 +1181,18 @@ xlog_find_zeroed(
* Potentially backup over partial log record write. We don't need
* to search the end of the log because we know it is zero.
*/
- if ((error = xlog_find_verify_log_record(log, start_blk,
- &last_blk, 0)) == -1) {
- error = XFS_ERROR(EIO);
- goto bp_err;
- } else if (error)
- goto bp_err;
+ error = xlog_find_verify_log_record(log, start_blk, &last_blk, 0);
+ if (error == 1)
+ error = -EIO;
+ if (error)
+ goto bp_err;
*blk_no = last_blk;
bp_err:
xlog_put_bp(bp);
if (error)
return error;
- return -1;
+ return 1;
}
/*
@@ -1251,7 +1252,7 @@ xlog_write_log_records(
while (!(bp = xlog_get_bp(log, bufblks))) {
bufblks >>= 1;
if (bufblks < sectbb)
- return ENOMEM;
+ return -ENOMEM;
}
/* We may need to do a read at the start to fill in part of
@@ -1354,7 +1355,7 @@ xlog_clear_stale_blocks(
if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) {
XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
tail_distance = tail_block + (log->l_logBBsize - head_block);
} else {
@@ -1366,7 +1367,7 @@ xlog_clear_stale_blocks(
if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){
XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
tail_distance = tail_block - head_block;
}
@@ -1551,7 +1552,7 @@ xlog_recover_add_to_trans(
xfs_warn(log->l_mp, "%s: bad header magic number",
__func__);
ASSERT(0);
- return XFS_ERROR(EIO);
+ return -EIO;
}
if (len == sizeof(xfs_trans_header_t))
xlog_recover_add_item(&trans->r_itemq);
@@ -1581,7 +1582,7 @@ xlog_recover_add_to_trans(
in_f->ilf_size);
ASSERT(0);
kmem_free(ptr);
- return XFS_ERROR(EIO);
+ return -EIO;
}
item->ri_total = in_f->ilf_size;
@@ -1702,7 +1703,7 @@ xlog_recover_reorder_trans(
*/
if (!list_empty(&sort_list))
list_splice_init(&sort_list, &trans->r_itemq);
- error = XFS_ERROR(EIO);
+ error = -EIO;
goto out;
}
}
@@ -1943,7 +1944,7 @@ xlog_recover_do_inode_buffer(
item, bp);
XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
@@ -2125,6 +2126,17 @@ xlog_recover_validate_buf_type(
__uint16_t magic16;
__uint16_t magicda;
+ /*
+ * We can only do post recovery validation on items on CRC enabled
+ * fielsystems as we need to know when the buffer was written to be able
+ * to determine if we should have replayed the item. If we replay old
+ * metadata over a newer buffer, then it will enter a temporarily
+ * inconsistent state resulting in verification failures. Hence for now
+ * just avoid the verification stage for non-crc filesystems
+ */
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return;
+
magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
magicda = be16_to_cpu(info->magic);
@@ -2162,8 +2174,6 @@ xlog_recover_validate_buf_type(
bp->b_ops = &xfs_agf_buf_ops;
break;
case XFS_BLFT_AGFL_BUF:
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- break;
if (magic32 != XFS_AGFL_MAGIC) {
xfs_warn(mp, "Bad AGFL block magic!");
ASSERT(0);
@@ -2196,10 +2206,6 @@ xlog_recover_validate_buf_type(
#endif
break;
case XFS_BLFT_DINO_BUF:
- /*
- * we get here with inode allocation buffers, not buffers that
- * track unlinked list changes.
- */
if (magic16 != XFS_DINODE_MAGIC) {
xfs_warn(mp, "Bad INODE block magic!");
ASSERT(0);
@@ -2279,8 +2285,6 @@ xlog_recover_validate_buf_type(
bp->b_ops = &xfs_attr3_leaf_buf_ops;
break;
case XFS_BLFT_ATTR_RMT_BUF:
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- break;
if (magic32 != XFS_ATTR3_RMT_MAGIC) {
xfs_warn(mp, "Bad attr remote magic!");
ASSERT(0);
@@ -2387,16 +2391,7 @@ xlog_recover_do_reg_buffer(
/* Shouldn't be any more regions */
ASSERT(i == item->ri_total);
- /*
- * We can only do post recovery validation on items on CRC enabled
- * fielsystems as we need to know when the buffer was written to be able
- * to determine if we should have replayed the item. If we replay old
- * metadata over a newer buffer, then it will enter a temporarily
- * inconsistent state resulting in verification failures. Hence for now
- * just avoid the verification stage for non-crc filesystems
- */
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xlog_recover_validate_buf_type(mp, bp, buf_f);
+ xlog_recover_validate_buf_type(mp, bp, buf_f);
}
/*
@@ -2404,8 +2399,11 @@ xlog_recover_do_reg_buffer(
* Simple algorithm: if we have found a QUOTAOFF log item of the same type
* (ie. USR or GRP), then just toss this buffer away; don't recover it.
* Else, treat it as a regular buffer and do recovery.
+ *
+ * Return false if the buffer was tossed and true if we recovered the buffer to
+ * indicate to the caller if the buffer needs writing.
*/
-STATIC void
+STATIC bool
xlog_recover_do_dquot_buffer(
struct xfs_mount *mp,
struct xlog *log,
@@ -2420,9 +2418,8 @@ xlog_recover_do_dquot_buffer(
/*
* Filesystems are required to send in quota flags at mount time.
*/
- if (mp->m_qflags == 0) {
- return;
- }
+ if (!mp->m_qflags)
+ return false;
type = 0;
if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
@@ -2435,9 +2432,10 @@ xlog_recover_do_dquot_buffer(
* This type of quotas was turned off, so ignore this buffer
*/
if (log->l_quotaoffs_flag & type)
- return;
+ return false;
xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
+ return true;
}
/*
@@ -2496,7 +2494,7 @@ xlog_recover_buffer_pass2(
bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
buf_flags, NULL);
if (!bp)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
error = bp->b_error;
if (error) {
xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)");
@@ -2504,23 +2502,44 @@ xlog_recover_buffer_pass2(
}
/*
- * recover the buffer only if we get an LSN from it and it's less than
+ * Recover the buffer only if we get an LSN from it and it's less than
* the lsn of the transaction we are replaying.
+ *
+ * Note that we have to be extremely careful of readahead here.
+ * Readahead does not attach verfiers to the buffers so if we don't
+ * actually do any replay after readahead because of the LSN we found
+ * in the buffer if more recent than that current transaction then we
+ * need to attach the verifier directly. Failure to do so can lead to
+ * future recovery actions (e.g. EFI and unlinked list recovery) can
+ * operate on the buffers and they won't get the verifier attached. This
+ * can lead to blocks on disk having the correct content but a stale
+ * CRC.
+ *
+ * It is safe to assume these clean buffers are currently up to date.
+ * If the buffer is dirtied by a later transaction being replayed, then
+ * the verifier will be reset to match whatever recover turns that
+ * buffer into.
*/
lsn = xlog_recover_get_buf_lsn(mp, bp);
- if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0)
+ if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+ xlog_recover_validate_buf_type(mp, bp, buf_f);
goto out_release;
+ }
if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
+ if (error)
+ goto out_release;
} else if (buf_f->blf_flags &
(XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
- xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
+ bool dirty;
+
+ dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
+ if (!dirty)
+ goto out_release;
} else {
xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
}
- if (error)
- goto out_release;
/*
* Perform delayed write on the buffer. Asynchronous writes will be
@@ -2598,7 +2617,7 @@ xfs_recover_inode_owner_change(
ip = xfs_inode_alloc(mp, in_f->ilf_ino);
if (!ip)
- return ENOMEM;
+ return -ENOMEM;
/* instantiate the inode */
xfs_dinode_from_disk(&ip->i_d, dip);
@@ -2676,7 +2695,7 @@ xlog_recover_inode_pass2(
bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
&xfs_inode_buf_ops);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error;
}
error = bp->b_error;
@@ -2697,7 +2716,7 @@ xlog_recover_inode_pass2(
__func__, dip, bp, in_f->ilf_ino);
XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
XFS_ERRLEVEL_LOW, mp);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
dicp = item->ri_buf[1].i_addr;
@@ -2707,7 +2726,7 @@ xlog_recover_inode_pass2(
__func__, item, in_f->ilf_ino);
XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
XFS_ERRLEVEL_LOW, mp);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
@@ -2764,7 +2783,7 @@ xlog_recover_inode_pass2(
"%s: Bad regular inode log record, rec ptr 0x%p, "
"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
__func__, item, dip, bp, in_f->ilf_ino);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
} else if (unlikely(S_ISDIR(dicp->di_mode))) {
@@ -2777,7 +2796,7 @@ xlog_recover_inode_pass2(
"%s: Bad dir inode log record, rec ptr 0x%p, "
"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
__func__, item, dip, bp, in_f->ilf_ino);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
}
@@ -2790,7 +2809,7 @@ xlog_recover_inode_pass2(
__func__, item, dip, bp, in_f->ilf_ino,
dicp->di_nextents + dicp->di_anextents,
dicp->di_nblocks);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
@@ -2800,7 +2819,7 @@ xlog_recover_inode_pass2(
"%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
"dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
isize = xfs_icdinode_size(dicp->di_version);
@@ -2810,7 +2829,7 @@ xlog_recover_inode_pass2(
xfs_alert(mp,
"%s: Bad inode log record length %d, rec ptr 0x%p",
__func__, item->ri_buf[1].i_len, item);
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
goto out_release;
}
@@ -2898,7 +2917,7 @@ xlog_recover_inode_pass2(
default:
xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
ASSERT(0);
- error = EIO;
+ error = -EIO;
goto out_release;
}
}
@@ -2919,7 +2938,7 @@ out_release:
error:
if (need_free)
kmem_free(in_f);
- return XFS_ERROR(error);
+ return error;
}
/*
@@ -2946,7 +2965,7 @@ xlog_recover_quotaoff_pass1(
if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
log->l_quotaoffs_flag |= XFS_DQ_GROUP;
- return (0);
+ return 0;
}
/*
@@ -2971,17 +2990,17 @@ xlog_recover_dquot_pass2(
* Filesystems are required to send in quota flags at mount time.
*/
if (mp->m_qflags == 0)
- return (0);
+ return 0;
recddq = item->ri_buf[1].i_addr;
if (recddq == NULL) {
xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
- return XFS_ERROR(EIO);
+ return -EIO;
}
if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
item->ri_buf[1].i_len, __func__);
- return XFS_ERROR(EIO);
+ return -EIO;
}
/*
@@ -2990,7 +3009,7 @@ xlog_recover_dquot_pass2(
type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
ASSERT(type);
if (log->l_quotaoffs_flag & type)
- return (0);
+ return 0;
/*
* At this point we know that quota was _not_ turned off.
@@ -3007,12 +3026,19 @@ xlog_recover_dquot_pass2(
error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
"xlog_recover_dquot_pass2 (log copy)");
if (error)
- return XFS_ERROR(EIO);
+ return -EIO;
ASSERT(dq_f->qlf_len == 1);
+ /*
+ * At this point we are assuming that the dquots have been allocated
+ * and hence the buffer has valid dquots stamped in it. It should,
+ * therefore, pass verifier validation. If the dquot is bad, then the
+ * we'll return an error here, so we don't need to specifically check
+ * the dquot in the buffer after the verifier has run.
+ */
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
- NULL);
+ &xfs_dquot_buf_ops);
if (error)
return error;
@@ -3020,18 +3046,6 @@ xlog_recover_dquot_pass2(
ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset);
/*
- * At least the magic num portion should be on disk because this
- * was among a chunk of dquots created earlier, and we did some
- * minimal initialization then.
- */
- error = xfs_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
- "xlog_recover_dquot_pass2");
- if (error) {
- xfs_buf_relse(bp);
- return XFS_ERROR(EIO);
- }
-
- /*
* If the dquot has an LSN in it, recover the dquot only if it's less
* than the lsn of the transaction we are replaying.
*/
@@ -3178,38 +3192,38 @@ xlog_recover_do_icreate_pass2(
icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
if (icl->icl_type != XFS_LI_ICREATE) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
- return EINVAL;
+ return -EINVAL;
}
if (icl->icl_size != 1) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
- return EINVAL;
+ return -EINVAL;
}
agno = be32_to_cpu(icl->icl_ag);
if (agno >= mp->m_sb.sb_agcount) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
- return EINVAL;
+ return -EINVAL;
}
agbno = be32_to_cpu(icl->icl_agbno);
if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
- return EINVAL;
+ return -EINVAL;
}
isize = be32_to_cpu(icl->icl_isize);
if (isize != mp->m_sb.sb_inodesize) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
- return EINVAL;
+ return -EINVAL;
}
count = be32_to_cpu(icl->icl_count);
if (!count) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
- return EINVAL;
+ return -EINVAL;
}
length = be32_to_cpu(icl->icl_length);
if (!length || length >= mp->m_sb.sb_agblocks) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
- return EINVAL;
+ return -EINVAL;
}
/* existing allocation is fixed value */
@@ -3218,7 +3232,7 @@ xlog_recover_do_icreate_pass2(
if (count != mp->m_ialloc_inos ||
length != mp->m_ialloc_blks) {
xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");
- return EINVAL;
+ return -EINVAL;
}
/*
@@ -3389,7 +3403,7 @@ xlog_recover_commit_pass1(
xfs_warn(log->l_mp, "%s: invalid item type (%d)",
__func__, ITEM_TYPE(item));
ASSERT(0);
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
@@ -3425,7 +3439,7 @@ xlog_recover_commit_pass2(
xfs_warn(log->l_mp, "%s: invalid item type (%d)",
__func__, ITEM_TYPE(item));
ASSERT(0);
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
@@ -3560,7 +3574,7 @@ xlog_recover_process_data(
/* check the log format matches our own - else we can't recover */
if (xlog_header_check_recover(log->l_mp, rhead))
- return (XFS_ERROR(EIO));
+ return -EIO;
while ((dp < lp) && num_logops) {
ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
@@ -3571,7 +3585,7 @@ xlog_recover_process_data(
xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
__func__, ohead->oh_clientid);
ASSERT(0);
- return (XFS_ERROR(EIO));
+ return -EIO;
}
tid = be32_to_cpu(ohead->oh_tid);
hash = XLOG_RHASH(tid);
@@ -3585,7 +3599,7 @@ xlog_recover_process_data(
xfs_warn(log->l_mp, "%s: bad length 0x%x",
__func__, be32_to_cpu(ohead->oh_len));
WARN_ON(1);
- return (XFS_ERROR(EIO));
+ return -EIO;
}
flags = ohead->oh_flags & ~XLOG_END_TRANS;
if (flags & XLOG_WAS_CONT_TRANS)
@@ -3607,7 +3621,7 @@ xlog_recover_process_data(
xfs_warn(log->l_mp, "%s: bad transaction",
__func__);
ASSERT(0);
- error = XFS_ERROR(EIO);
+ error = -EIO;
break;
case 0:
case XLOG_CONTINUE_TRANS:
@@ -3618,7 +3632,7 @@ xlog_recover_process_data(
xfs_warn(log->l_mp, "%s: bad flag 0x%x",
__func__, flags);
ASSERT(0);
- error = XFS_ERROR(EIO);
+ error = -EIO;
break;
}
if (error) {
@@ -3669,7 +3683,7 @@ xlog_recover_process_efi(
*/
set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
xfs_efi_release(efip, efip->efi_format.efi_nextents);
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
@@ -3969,7 +3983,7 @@ xlog_unpack_data_crc(
* CRC protection by punting an error back up the stack.
*/
if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
- return EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
return 0;
@@ -4018,14 +4032,14 @@ xlog_valid_rec_header(
if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) {
XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (unlikely(
(!rhead->h_version ||
(be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
xfs_warn(log->l_mp, "%s: unrecognised log version (%d).",
__func__, be32_to_cpu(rhead->h_version));
- return XFS_ERROR(EIO);
+ return -EIO;
}
/* LR body must have data or it wouldn't have been written */
@@ -4033,12 +4047,12 @@ xlog_valid_rec_header(
if (unlikely( hlen <= 0 || hlen > INT_MAX )) {
XFS_ERROR_REPORT("xlog_valid_rec_header(2)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) {
XFS_ERROR_REPORT("xlog_valid_rec_header(3)",
XFS_ERRLEVEL_LOW, log->l_mp);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -4081,7 +4095,7 @@ xlog_do_recovery_pass(
*/
hbp = xlog_get_bp(log, 1);
if (!hbp)
- return ENOMEM;
+ return -ENOMEM;
error = xlog_bread(log, tail_blk, 1, hbp, &offset);
if (error)
@@ -4110,11 +4124,11 @@ xlog_do_recovery_pass(
}
if (!hbp)
- return ENOMEM;
+ return -ENOMEM;
dbp = xlog_get_bp(log, BTOBB(h_size));
if (!dbp) {
xlog_put_bp(hbp);
- return ENOMEM;
+ return -ENOMEM;
}
memset(rhash, 0, sizeof(rhash));
@@ -4388,7 +4402,7 @@ xlog_do_recover(
* If IO errors happened during recovery, bail out.
*/
if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
- return (EIO);
+ return -EIO;
}
/*
@@ -4415,7 +4429,7 @@ xlog_do_recover(
if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
xfs_buf_relse(bp);
- return XFS_ERROR(EIO);
+ return -EIO;
}
xfs_buf_iorequest(bp);
@@ -4492,7 +4506,7 @@ xlog_recover(
"Please recover the log on a kernel that supports the unknown features.",
(log->l_mp->m_sb.sb_features_log_incompat &
XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
- return EINVAL;
+ return -EINVAL;
}
xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 3507cd0ec40..fbf0384a466 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -42,6 +42,7 @@
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_dinode.h"
+#include "xfs_sysfs.h"
#ifdef HAVE_PERCPU_SB
@@ -60,6 +61,8 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size;
static uuid_t *xfs_uuid_table;
+extern struct kset *xfs_kset;
+
/*
* See if the UUID is unique among mounted XFS filesystems.
* Mount fails if UUID is nil or a FS with the same UUID is already mounted.
@@ -76,7 +79,7 @@ xfs_uuid_mount(
if (uuid_is_nil(uuid)) {
xfs_warn(mp, "Filesystem has nil UUID - can't mount");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mutex_lock(&xfs_uuid_table_mutex);
@@ -104,7 +107,7 @@ xfs_uuid_mount(
out_duplicate:
mutex_unlock(&xfs_uuid_table_mutex);
xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
STATIC void
@@ -173,13 +176,9 @@ xfs_sb_validate_fsb_count(
ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
ASSERT(sbp->sb_blocklog >= BBSHIFT);
-#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
+ /* Limited by ULONG_MAX of page cache index */
if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
- return EFBIG;
-#else /* Limited by UINT_MAX of sectors */
- if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
- return EFBIG;
-#endif
+ return -EFBIG;
return 0;
}
@@ -250,9 +249,9 @@ xfs_initialize_perag(
mp->m_flags &= ~XFS_MOUNT_32BITINODES;
if (mp->m_flags & XFS_MOUNT_32BITINODES)
- index = xfs_set_inode32(mp);
+ index = xfs_set_inode32(mp, agcount);
else
- index = xfs_set_inode64(mp);
+ index = xfs_set_inode64(mp, agcount);
if (maxagi)
*maxagi = index;
@@ -308,15 +307,15 @@ reread:
if (!bp) {
if (loud)
xfs_warn(mp, "SB buffer read failed");
- return EIO;
+ return -EIO;
}
if (bp->b_error) {
error = bp->b_error;
if (loud)
xfs_warn(mp, "SB validate failed with error %d.", error);
/* bad CRC means corrupted metadata */
- if (error == EFSBADCRC)
- error = EFSCORRUPTED;
+ if (error == -EFSBADCRC)
+ error = -EFSCORRUPTED;
goto release_buf;
}
@@ -324,7 +323,6 @@ reread:
* Initialize the mount structure from the superblock.
*/
xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
- xfs_sb_quota_from_disk(sbp);
/*
* If we haven't validated the superblock, do so now before we try
@@ -333,7 +331,7 @@ reread:
if (sbp->sb_magicnum != XFS_SB_MAGIC) {
if (loud)
xfs_warn(mp, "Invalid superblock magic number");
- error = EINVAL;
+ error = -EINVAL;
goto release_buf;
}
@@ -344,7 +342,7 @@ reread:
if (loud)
xfs_warn(mp, "device supports %u byte sectors (not %u)",
sector_size, sbp->sb_sectsize);
- error = ENOSYS;
+ error = -ENOSYS;
goto release_buf;
}
@@ -392,7 +390,7 @@ xfs_update_alignment(xfs_mount_t *mp)
xfs_warn(mp,
"alignment check failed: sunit/swidth vs. blocksize(%d)",
sbp->sb_blocksize);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
} else {
/*
* Convert the stripe unit and width to FSBs.
@@ -402,14 +400,14 @@ xfs_update_alignment(xfs_mount_t *mp)
xfs_warn(mp,
"alignment check failed: sunit/swidth vs. agsize(%d)",
sbp->sb_agblocks);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
} else if (mp->m_dalign) {
mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
} else {
xfs_warn(mp,
"alignment check failed: sunit(%d) less than bsize(%d)",
mp->m_dalign, sbp->sb_blocksize);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
}
@@ -429,7 +427,7 @@ xfs_update_alignment(xfs_mount_t *mp)
} else {
xfs_warn(mp,
"cannot change alignment: superblock does not support data alignment");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
xfs_sb_version_hasdalign(&mp->m_sb)) {
@@ -556,14 +554,14 @@ xfs_check_sizes(xfs_mount_t *mp)
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
xfs_warn(mp, "filesystem size mismatch detected");
- return XFS_ERROR(EFBIG);
+ return -EFBIG;
}
bp = xfs_buf_read_uncached(mp->m_ddev_targp,
d - XFS_FSS_TO_BB(mp, 1),
XFS_FSS_TO_BB(mp, 1), 0, NULL);
if (!bp) {
xfs_warn(mp, "last sector read failed");
- return EIO;
+ return -EIO;
}
xfs_buf_relse(bp);
@@ -571,14 +569,14 @@ xfs_check_sizes(xfs_mount_t *mp)
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
xfs_warn(mp, "log size mismatch detected");
- return XFS_ERROR(EFBIG);
+ return -EFBIG;
}
bp = xfs_buf_read_uncached(mp->m_logdev_targp,
d - XFS_FSB_TO_BB(mp, 1),
XFS_FSB_TO_BB(mp, 1), 0, NULL);
if (!bp) {
xfs_warn(mp, "log device read failed");
- return EIO;
+ return -EIO;
}
xfs_buf_relse(bp);
}
@@ -731,10 +729,15 @@ xfs_mountfs(
xfs_set_maxicount(mp);
- error = xfs_uuid_mount(mp);
+ mp->m_kobj.kobject.kset = xfs_kset;
+ error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
if (error)
goto out;
+ error = xfs_uuid_mount(mp);
+ if (error)
+ goto out_remove_sysfs;
+
/*
* Set the minimum read and write sizes
*/
@@ -816,7 +819,7 @@ xfs_mountfs(
if (!sbp->sb_logblocks) {
xfs_warn(mp, "no log defined");
XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto out_free_perag;
}
@@ -855,7 +858,7 @@ xfs_mountfs(
!mp->m_sb.sb_inprogress) {
error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
if (error)
- goto out_fail_wait;
+ goto out_log_dealloc;
}
/*
@@ -876,7 +879,7 @@ xfs_mountfs(
xfs_iunlock(rip, XFS_ILOCK_EXCL);
XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
mp);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto out_rele_rip;
}
mp->m_rootip = rip; /* save it */
@@ -927,7 +930,7 @@ xfs_mountfs(
xfs_notice(mp, "resetting quota flags");
error = xfs_mount_reset_sbqflags(mp);
if (error)
- return error;
+ goto out_rtunmount;
}
}
@@ -989,6 +992,8 @@ xfs_mountfs(
xfs_da_unmount(mp);
out_remove_uuid:
xfs_uuid_unmount(mp);
+ out_remove_sysfs:
+ xfs_sysfs_del(&mp->m_kobj);
out:
return error;
}
@@ -1071,6 +1076,8 @@ xfs_unmountfs(
xfs_errortag_clearall(mp, 0);
#endif
xfs_free_perag(mp);
+
+ xfs_sysfs_del(&mp->m_kobj);
}
int
@@ -1152,7 +1159,7 @@ xfs_mod_incore_sb_unlocked(
lcounter += delta;
if (lcounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_icount = lcounter;
return 0;
@@ -1161,7 +1168,7 @@ xfs_mod_incore_sb_unlocked(
lcounter += delta;
if (lcounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_ifree = lcounter;
return 0;
@@ -1191,7 +1198,7 @@ xfs_mod_incore_sb_unlocked(
* blocks if were allowed to.
*/
if (!rsvd)
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
lcounter = (long long)mp->m_resblks_avail + delta;
if (lcounter >= 0) {
@@ -1202,7 +1209,7 @@ xfs_mod_incore_sb_unlocked(
"Filesystem \"%s\": reserve blocks depleted! "
"Consider increasing reserve pool size.",
mp->m_fsname);
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
}
mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
@@ -1211,7 +1218,7 @@ xfs_mod_incore_sb_unlocked(
lcounter = (long long)mp->m_sb.sb_frextents;
lcounter += delta;
if (lcounter < 0) {
- return XFS_ERROR(ENOSPC);
+ return -ENOSPC;
}
mp->m_sb.sb_frextents = lcounter;
return 0;
@@ -1220,7 +1227,7 @@ xfs_mod_incore_sb_unlocked(
lcounter += delta;
if (lcounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_dblocks = lcounter;
return 0;
@@ -1229,7 +1236,7 @@ xfs_mod_incore_sb_unlocked(
scounter += delta;
if (scounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_agcount = scounter;
return 0;
@@ -1238,7 +1245,7 @@ xfs_mod_incore_sb_unlocked(
scounter += delta;
if (scounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_imax_pct = scounter;
return 0;
@@ -1247,7 +1254,7 @@ xfs_mod_incore_sb_unlocked(
scounter += delta;
if (scounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_rextsize = scounter;
return 0;
@@ -1256,7 +1263,7 @@ xfs_mod_incore_sb_unlocked(
scounter += delta;
if (scounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_rbmblocks = scounter;
return 0;
@@ -1265,7 +1272,7 @@ xfs_mod_incore_sb_unlocked(
lcounter += delta;
if (lcounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_rblocks = lcounter;
return 0;
@@ -1274,7 +1281,7 @@ xfs_mod_incore_sb_unlocked(
lcounter += delta;
if (lcounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_rextents = lcounter;
return 0;
@@ -1283,13 +1290,13 @@ xfs_mod_incore_sb_unlocked(
scounter += delta;
if (scounter < 0) {
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_sb.sb_rextslog = scounter;
return 0;
default:
ASSERT(0);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
}
@@ -1452,7 +1459,7 @@ xfs_dev_is_read_only(
(mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
xfs_notice(mp, "%s required on read-only device.", message);
xfs_notice(mp, "write access unavailable, cannot proceed.");
- return EROFS;
+ return -EROFS;
}
return 0;
}
@@ -1995,7 +2002,7 @@ slow_path:
* (e.g. lots of space just got freed). After that
* we are done.
*/
- if (ret != ENOSPC)
+ if (ret != -ENOSPC)
xfs_icsb_balance_counter(mp, field, 0);
xfs_icsb_unlock(mp);
return ret;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7295a0b7c34..b0447c86e7e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -166,6 +166,7 @@ typedef struct xfs_mount {
on the next remount,rw */
int64_t m_low_space[XFS_LOWSP_MAX];
/* low free space thresholds */
+ struct xfs_kobj m_kobj;
struct workqueue_struct *m_data_workqueue;
struct workqueue_struct *m_unwritten_workqueue;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index f99b4933dc2..1eb6f3df698 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -337,20 +337,20 @@ xfs_mru_cache_create(
*mrup = NULL;
if (!mrup || !grp_count || !lifetime_ms || !free_func)
- return EINVAL;
+ return -EINVAL;
if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count))
- return EINVAL;
+ return -EINVAL;
if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP)))
- return ENOMEM;
+ return -ENOMEM;
/* An extra list is needed to avoid reaping up to a grp_time early. */
mru->grp_count = grp_count + 1;
mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
if (!mru->lists) {
- err = ENOMEM;
+ err = -ENOMEM;
goto exit;
}
@@ -434,16 +434,16 @@ xfs_mru_cache_insert(
ASSERT(mru && mru->lists);
if (!mru || !mru->lists)
- return EINVAL;
+ return -EINVAL;
if (radix_tree_preload(GFP_KERNEL))
- return ENOMEM;
+ return -ENOMEM;
INIT_LIST_HEAD(&elem->list_node);
elem->key = key;
spin_lock(&mru->lock);
- error = -radix_tree_insert(&mru->store, key, elem);
+ error = radix_tree_insert(&mru->store, key, elem);
radix_tree_preload_end();
if (!error)
_xfs_mru_cache_list_insert(mru, elem);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 6d26759c779..10232102b4a 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -98,18 +98,18 @@ restart:
next_index = be32_to_cpu(dqp->q_core.d_id) + 1;
error = execute(batch[i], data);
- if (error == EAGAIN) {
+ if (error == -EAGAIN) {
skipped++;
continue;
}
- if (error && last_error != EFSCORRUPTED)
+ if (error && last_error != -EFSCORRUPTED)
last_error = error;
}
mutex_unlock(&qi->qi_tree_lock);
/* bail out if the filesystem is corrupted. */
- if (last_error == EFSCORRUPTED) {
+ if (last_error == -EFSCORRUPTED) {
skipped = 0;
break;
}
@@ -138,7 +138,7 @@ xfs_qm_dqpurge(
xfs_dqlock(dqp);
if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
xfs_dqunlock(dqp);
- return EAGAIN;
+ return -EAGAIN;
}
dqp->dq_flags |= XFS_DQ_FREEING;
@@ -221,100 +221,6 @@ xfs_qm_unmount(
}
}
-
-/*
- * This is called from xfs_mountfs to start quotas and initialize all
- * necessary data structures like quotainfo. This is also responsible for
- * running a quotacheck as necessary. We are guaranteed that the superblock
- * is consistently read in at this point.
- *
- * If we fail here, the mount will continue with quota turned off. We don't
- * need to inidicate success or failure at all.
- */
-void
-xfs_qm_mount_quotas(
- xfs_mount_t *mp)
-{
- int error = 0;
- uint sbf;
-
- /*
- * If quotas on realtime volumes is not supported, we disable
- * quotas immediately.
- */
- if (mp->m_sb.sb_rextents) {
- xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
- mp->m_qflags = 0;
- goto write_changes;
- }
-
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- /*
- * Allocate the quotainfo structure inside the mount struct, and
- * create quotainode(s), and change/rev superblock if necessary.
- */
- error = xfs_qm_init_quotainfo(mp);
- if (error) {
- /*
- * We must turn off quotas.
- */
- ASSERT(mp->m_quotainfo == NULL);
- mp->m_qflags = 0;
- goto write_changes;
- }
- /*
- * If any of the quotas are not consistent, do a quotacheck.
- */
- if (XFS_QM_NEED_QUOTACHECK(mp)) {
- error = xfs_qm_quotacheck(mp);
- if (error) {
- /* Quotacheck failed and disabled quotas. */
- return;
- }
- }
- /*
- * If one type of quotas is off, then it will lose its
- * quotachecked status, since we won't be doing accounting for
- * that type anymore.
- */
- if (!XFS_IS_UQUOTA_ON(mp))
- mp->m_qflags &= ~XFS_UQUOTA_CHKD;
- if (!XFS_IS_GQUOTA_ON(mp))
- mp->m_qflags &= ~XFS_GQUOTA_CHKD;
- if (!XFS_IS_PQUOTA_ON(mp))
- mp->m_qflags &= ~XFS_PQUOTA_CHKD;
-
- write_changes:
- /*
- * We actually don't have to acquire the m_sb_lock at all.
- * This can only be called from mount, and that's single threaded. XXX
- */
- spin_lock(&mp->m_sb_lock);
- sbf = mp->m_sb.sb_qflags;
- mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
- spin_unlock(&mp->m_sb_lock);
-
- if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
- if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
- /*
- * We could only have been turning quotas off.
- * We aren't in very good shape actually because
- * the incore structures are convinced that quotas are
- * off, but the on disk superblock doesn't know that !
- */
- ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
- xfs_alert(mp, "%s: Superblock update failed!",
- __func__);
- }
- }
-
- if (error) {
- xfs_warn(mp, "Failed to initialize disk quotas.");
- return;
- }
-}
-
/*
* Called from the vfsops layer.
*/
@@ -671,7 +577,7 @@ xfs_qm_init_quotainfo(
qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
- error = -list_lru_init(&qinf->qi_lru);
+ error = list_lru_init(&qinf->qi_lru);
if (error)
goto out_free_qinf;
@@ -995,7 +901,7 @@ xfs_qm_dqiter_bufs(
* will leave a trace in the log indicating corruption has
* been detected.
*/
- if (error == EFSCORRUPTED) {
+ if (error == -EFSCORRUPTED) {
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, bno),
mp->m_quotainfo->qi_dqchunklen, 0, &bp,
@@ -1005,6 +911,12 @@ xfs_qm_dqiter_bufs(
if (error)
break;
+ /*
+ * A corrupt buffer might not have a verifier attached, so
+ * make sure we have the correct one attached before writeback
+ * occurs.
+ */
+ bp->b_ops = &xfs_dquot_buf_ops;
xfs_qm_reset_dqcounts(mp, bp, firstid, type);
xfs_buf_delwri_queue(bp, buffer_list);
xfs_buf_relse(bp);
@@ -1090,7 +1002,7 @@ xfs_qm_dqiterate(
xfs_buf_readahead(mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, rablkno),
mp->m_quotainfo->qi_dqchunklen,
- NULL);
+ &xfs_dquot_buf_ops);
rablkno++;
}
}
@@ -1138,8 +1050,8 @@ xfs_qm_quotacheck_dqadjust(
/*
* Shouldn't be able to turn off quotas here.
*/
- ASSERT(error != ESRCH);
- ASSERT(error != ENOENT);
+ ASSERT(error != -ESRCH);
+ ASSERT(error != -ENOENT);
return error;
}
@@ -1226,7 +1138,7 @@ xfs_qm_dqusage_adjust(
*/
if (xfs_is_quota_inode(&mp->m_sb, ino)) {
*res = BULKSTAT_RV_NOTHING;
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
/*
@@ -1330,7 +1242,7 @@ out_unlock:
* Walk thru all the filesystem inodes and construct a consistent view
* of the disk quota world. If the quotacheck fails, disable quotas.
*/
-int
+STATIC int
xfs_qm_quotacheck(
xfs_mount_t *mp)
{
@@ -1463,7 +1375,100 @@ xfs_qm_quotacheck(
}
} else
xfs_notice(mp, "Quotacheck: Done.");
- return (error);
+ return error;
+}
+
+/*
+ * This is called from xfs_mountfs to start quotas and initialize all
+ * necessary data structures like quotainfo. This is also responsible for
+ * running a quotacheck as necessary. We are guaranteed that the superblock
+ * is consistently read in at this point.
+ *
+ * If we fail here, the mount will continue with quota turned off. We don't
+ * need to inidicate success or failure at all.
+ */
+void
+xfs_qm_mount_quotas(
+ struct xfs_mount *mp)
+{
+ int error = 0;
+ uint sbf;
+
+ /*
+ * If quotas on realtime volumes is not supported, we disable
+ * quotas immediately.
+ */
+ if (mp->m_sb.sb_rextents) {
+ xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
+ mp->m_qflags = 0;
+ goto write_changes;
+ }
+
+ ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+ /*
+ * Allocate the quotainfo structure inside the mount struct, and
+ * create quotainode(s), and change/rev superblock if necessary.
+ */
+ error = xfs_qm_init_quotainfo(mp);
+ if (error) {
+ /*
+ * We must turn off quotas.
+ */
+ ASSERT(mp->m_quotainfo == NULL);
+ mp->m_qflags = 0;
+ goto write_changes;
+ }
+ /*
+ * If any of the quotas are not consistent, do a quotacheck.
+ */
+ if (XFS_QM_NEED_QUOTACHECK(mp)) {
+ error = xfs_qm_quotacheck(mp);
+ if (error) {
+ /* Quotacheck failed and disabled quotas. */
+ return;
+ }
+ }
+ /*
+ * If one type of quotas is off, then it will lose its
+ * quotachecked status, since we won't be doing accounting for
+ * that type anymore.
+ */
+ if (!XFS_IS_UQUOTA_ON(mp))
+ mp->m_qflags &= ~XFS_UQUOTA_CHKD;
+ if (!XFS_IS_GQUOTA_ON(mp))
+ mp->m_qflags &= ~XFS_GQUOTA_CHKD;
+ if (!XFS_IS_PQUOTA_ON(mp))
+ mp->m_qflags &= ~XFS_PQUOTA_CHKD;
+
+ write_changes:
+ /*
+ * We actually don't have to acquire the m_sb_lock at all.
+ * This can only be called from mount, and that's single threaded. XXX
+ */
+ spin_lock(&mp->m_sb_lock);
+ sbf = mp->m_sb.sb_qflags;
+ mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
+ spin_unlock(&mp->m_sb_lock);
+
+ if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
+ if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
+ /*
+ * We could only have been turning quotas off.
+ * We aren't in very good shape actually because
+ * the incore structures are convinced that quotas are
+ * off, but the on disk superblock doesn't know that !
+ */
+ ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
+ xfs_alert(mp, "%s: Superblock update failed!",
+ __func__);
+ }
+ }
+
+ if (error) {
+ xfs_warn(mp, "Failed to initialize disk quotas.");
+ return;
+ }
}
/*
@@ -1493,7 +1498,7 @@ xfs_qm_init_quotainos(
error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
0, 0, &uip);
if (error)
- return XFS_ERROR(error);
+ return error;
}
if (XFS_IS_GQUOTA_ON(mp) &&
mp->m_sb.sb_gquotino != NULLFSINO) {
@@ -1563,7 +1568,7 @@ error_rele:
IRELE(gip);
if (pip)
IRELE(pip);
- return XFS_ERROR(error);
+ return error;
}
STATIC void
@@ -1679,7 +1684,7 @@ xfs_qm_vop_dqalloc(
XFS_QMOPT_DOWARN,
&uq);
if (error) {
- ASSERT(error != ENOENT);
+ ASSERT(error != -ENOENT);
return error;
}
/*
@@ -1706,7 +1711,7 @@ xfs_qm_vop_dqalloc(
XFS_QMOPT_DOWARN,
&gq);
if (error) {
- ASSERT(error != ENOENT);
+ ASSERT(error != -ENOENT);
goto error_rele;
}
xfs_dqunlock(gq);
@@ -1726,7 +1731,7 @@ xfs_qm_vop_dqalloc(
XFS_QMOPT_DOWARN,
&pq);
if (error) {
- ASSERT(error != ENOENT);
+ ASSERT(error != -ENOENT);
goto error_rele;
}
xfs_dqunlock(pq);
@@ -1895,7 +1900,7 @@ xfs_qm_vop_chown_reserve(
-((xfs_qcnt_t)delblks), 0, blkflags);
}
- return (0);
+ return 0;
}
int
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 797fd463627..3a07a937e23 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -157,7 +157,6 @@ struct xfs_dquot_acct {
#define XFS_QM_RTBWARNLIMIT 5
extern void xfs_qm_destroy_quotainfo(struct xfs_mount *);
-extern int xfs_qm_quotacheck(struct xfs_mount *);
extern int xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t);
/* dquot stuff */
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index e9be63abd8d..2c61e61b020 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -117,7 +117,7 @@ xfs_qm_newmount(
(uquotaondisk ? " usrquota" : ""),
(gquotaondisk ? " grpquota" : ""),
(pquotaondisk ? " prjquota" : ""));
- return XFS_ERROR(EPERM);
+ return -EPERM;
}
if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index bbc813caba4..80f2d77d929 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -64,10 +64,10 @@ xfs_qm_scall_quotaoff(
/*
* No file system can have quotas enabled on disk but not in core.
* Note that quota utilities (like quotaoff) _expect_
- * errno == EEXIST here.
+ * errno == -EEXIST here.
*/
if ((mp->m_qflags & flags) == 0)
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
error = 0;
flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
@@ -94,7 +94,7 @@ xfs_qm_scall_quotaoff(
/* XXX what to do if error ? Revert back to old vals incore ? */
error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
- return (error);
+ return error;
}
dqtype = 0;
@@ -198,7 +198,7 @@ xfs_qm_scall_quotaoff(
if (mp->m_qflags == 0) {
mutex_unlock(&q->qi_quotaofflock);
xfs_qm_destroy_quotainfo(mp);
- return (0);
+ return 0;
}
/*
@@ -278,13 +278,13 @@ xfs_qm_scall_trunc_qfiles(
xfs_mount_t *mp,
uint flags)
{
- int error = EINVAL;
+ int error = -EINVAL;
if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0 ||
(flags & ~XFS_DQ_ALLTYPES)) {
xfs_debug(mp, "%s: flags=%x m_qflags=%x",
__func__, flags, mp->m_qflags);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
if (flags & XFS_DQ_USER) {
@@ -328,7 +328,7 @@ xfs_qm_scall_quotaon(
if (flags == 0) {
xfs_debug(mp, "%s: zero flags, m_qflags=%x",
__func__, mp->m_qflags);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
/* No fs can turn on quotas with a delayed effect */
@@ -351,13 +351,13 @@ xfs_qm_scall_quotaon(
xfs_debug(mp,
"%s: Can't enforce without acct, flags=%x sbflags=%x",
__func__, flags, mp->m_sb.sb_qflags);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
/*
* If everything's up to-date incore, then don't waste time.
*/
if ((mp->m_qflags & flags) == flags)
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
/*
* Change sb_qflags on disk but not incore mp->qflags
@@ -372,11 +372,11 @@ xfs_qm_scall_quotaon(
* There's nothing to change if it's the same.
*/
if ((qf & flags) == flags && sbflags == 0)
- return XFS_ERROR(EEXIST);
+ return -EEXIST;
sbflags |= XFS_SB_QFLAGS;
if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
- return (error);
+ return error;
/*
* If we aren't trying to switch on quota enforcement, we are done.
*/
@@ -387,10 +387,10 @@ xfs_qm_scall_quotaon(
((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
(mp->m_qflags & XFS_GQUOTA_ACCT)) ||
(flags & XFS_ALL_QUOTA_ENFD) == 0)
- return (0);
+ return 0;
if (! XFS_IS_QUOTA_RUNNING(mp))
- return XFS_ERROR(ESRCH);
+ return -ESRCH;
/*
* Switch on quota enforcement in core.
@@ -399,7 +399,7 @@ xfs_qm_scall_quotaon(
mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
- return (0);
+ return 0;
}
@@ -426,7 +426,7 @@ xfs_qm_scall_getqstat(
if (!xfs_sb_version_hasquota(&mp->m_sb)) {
out->qs_uquota.qfs_ino = NULLFSINO;
out->qs_gquota.qfs_ino = NULLFSINO;
- return (0);
+ return 0;
}
out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
@@ -514,7 +514,7 @@ xfs_qm_scall_getqstatv(
out->qs_uquota.qfs_ino = NULLFSINO;
out->qs_gquota.qfs_ino = NULLFSINO;
out->qs_pquota.qfs_ino = NULLFSINO;
- return (0);
+ return 0;
}
out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
@@ -595,7 +595,7 @@ xfs_qm_scall_setqlim(
xfs_qcnt_t hard, soft;
if (newlim->d_fieldmask & ~XFS_DQ_MASK)
- return EINVAL;
+ return -EINVAL;
if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
return 0;
@@ -615,7 +615,7 @@ xfs_qm_scall_setqlim(
*/
error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp);
if (error) {
- ASSERT(error != ENOENT);
+ ASSERT(error != -ENOENT);
goto out_unlock;
}
xfs_dqunlock(dqp);
@@ -758,7 +758,7 @@ xfs_qm_log_quotaoff_end(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
- return (error);
+ return error;
}
qoffi = xfs_trans_get_qoff_item(tp, startqoff,
@@ -772,7 +772,7 @@ xfs_qm_log_quotaoff_end(
*/
xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0);
- return (error);
+ return error;
}
@@ -822,7 +822,7 @@ error0:
spin_unlock(&mp->m_sb_lock);
}
*qoffstartp = qoffi;
- return (error);
+ return error;
}
@@ -850,7 +850,7 @@ xfs_qm_scall_getquota(
* our utility programs are concerned.
*/
if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
- error = XFS_ERROR(ENOENT);
+ error = -ENOENT;
goto out_put;
}
@@ -953,7 +953,7 @@ xfs_qm_export_flags(
uflags |= FS_QUOTA_GDQ_ENFD;
if (flags & XFS_PQUOTA_ENFD)
uflags |= FS_QUOTA_PDQ_ENFD;
- return (uflags);
+ return uflags;
}
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 2ad1b9822e9..b238027df98 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -51,7 +51,7 @@ xfs_fs_get_xstate(
if (!XFS_IS_QUOTA_RUNNING(mp))
return -ENOSYS;
- return -xfs_qm_scall_getqstat(mp, fqs);
+ return xfs_qm_scall_getqstat(mp, fqs);
}
STATIC int
@@ -63,7 +63,7 @@ xfs_fs_get_xstatev(
if (!XFS_IS_QUOTA_RUNNING(mp))
return -ENOSYS;
- return -xfs_qm_scall_getqstatv(mp, fqs);
+ return xfs_qm_scall_getqstatv(mp, fqs);
}
STATIC int
@@ -95,11 +95,11 @@ xfs_fs_set_xstate(
switch (op) {
case Q_XQUOTAON:
- return -xfs_qm_scall_quotaon(mp, flags);
+ return xfs_qm_scall_quotaon(mp, flags);
case Q_XQUOTAOFF:
if (!XFS_IS_QUOTA_ON(mp))
return -EINVAL;
- return -xfs_qm_scall_quotaoff(mp, flags);
+ return xfs_qm_scall_quotaoff(mp, flags);
}
return -EINVAL;
@@ -112,7 +112,7 @@ xfs_fs_rm_xquota(
{
struct xfs_mount *mp = XFS_M(sb);
unsigned int flags = 0;
-
+
if (sb->s_flags & MS_RDONLY)
return -EROFS;
@@ -123,11 +123,11 @@ xfs_fs_rm_xquota(
flags |= XFS_DQ_USER;
if (uflags & FS_GROUP_QUOTA)
flags |= XFS_DQ_GROUP;
- if (uflags & FS_USER_QUOTA)
+ if (uflags & FS_PROJ_QUOTA)
flags |= XFS_DQ_PROJ;
- return -xfs_qm_scall_trunc_qfiles(mp, flags);
-}
+ return xfs_qm_scall_trunc_qfiles(mp, flags);
+}
STATIC int
xfs_fs_get_dqblk(
@@ -142,7 +142,7 @@ xfs_fs_get_dqblk(
if (!XFS_IS_QUOTA_ON(mp))
return -ESRCH;
- return -xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
+ return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
xfs_quota_type(qid.type), fdq);
}
@@ -161,7 +161,7 @@ xfs_fs_set_dqblk(
if (!XFS_IS_QUOTA_ON(mp))
return -ESRCH;
- return -xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid),
+ return xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid),
xfs_quota_type(qid.type), fdq);
}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ec5ca65c621..909e143b87a 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -863,7 +863,7 @@ xfs_growfs_rt_alloc(
XFS_BMAPI_METADATA, &firstblock,
resblks, &map, &nmap, &flist);
if (!error && nmap < 1)
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
if (error)
goto error_cancel;
/*
@@ -903,7 +903,7 @@ xfs_growfs_rt_alloc(
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
mp->m_bsize, 0);
if (bp == NULL) {
- error = XFS_ERROR(EIO);
+ error = -EIO;
error_cancel:
xfs_trans_cancel(tp, cancelflags);
goto error;
@@ -944,9 +944,9 @@ xfs_growfs_rt(
xfs_buf_t *bp; /* temporary buffer */
int error; /* error return value */
xfs_mount_t *nmp; /* new (fake) mount structure */
- xfs_drfsbno_t nrblocks; /* new number of realtime blocks */
+ xfs_rfsblock_t nrblocks; /* new number of realtime blocks */
xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */
- xfs_drtbno_t nrextents; /* new number of realtime extents */
+ xfs_rtblock_t nrextents; /* new number of realtime extents */
uint8_t nrextslog; /* new log2 of sb_rextents */
xfs_extlen_t nrsumblocks; /* new number of summary blocks */
uint nrsumlevels; /* new rt summary levels */
@@ -962,11 +962,11 @@ xfs_growfs_rt(
* Initial error checking.
*/
if (!capable(CAP_SYS_ADMIN))
- return XFS_ERROR(EPERM);
+ return -EPERM;
if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL ||
(nrblocks = in->newblocks) <= sbp->sb_rblocks ||
(sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks)))
return error;
/*
@@ -976,7 +976,7 @@ xfs_growfs_rt(
XFS_FSB_TO_BB(mp, nrblocks - 1),
XFS_FSB_TO_BB(mp, 1), 0, NULL);
if (!bp)
- return EIO;
+ return -EIO;
if (bp->b_error) {
error = bp->b_error;
xfs_buf_relse(bp);
@@ -1001,7 +1001,7 @@ xfs_growfs_rt(
* since we'll log basically the whole summary file at once.
*/
if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1))
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
/*
* Get the old block counts for bitmap and summary inodes.
* These can't change since other growfs callers are locked out.
@@ -1208,7 +1208,7 @@ xfs_rtallocate_extent(
len, &sumbp, &sb, prod, &r);
break;
default:
- error = EIO;
+ error = -EIO;
ASSERT(0);
}
if (error)
@@ -1247,7 +1247,7 @@ xfs_rtmount_init(
if (mp->m_rtdev_targp == NULL) {
xfs_warn(mp,
"Filesystem has a realtime volume, use rtdev=device option");
- return XFS_ERROR(ENODEV);
+ return -ENODEV;
}
mp->m_rsumlevels = sbp->sb_rextslog + 1;
mp->m_rsumsize =
@@ -1263,7 +1263,7 @@ xfs_rtmount_init(
xfs_warn(mp, "realtime mount -- %llu != %llu",
(unsigned long long) XFS_BB_TO_FSB(mp, d),
(unsigned long long) mp->m_sb.sb_rblocks);
- return XFS_ERROR(EFBIG);
+ return -EFBIG;
}
bp = xfs_buf_read_uncached(mp->m_rtdev_targp,
d - XFS_FSB_TO_BB(mp, 1),
@@ -1272,7 +1272,7 @@ xfs_rtmount_init(
xfs_warn(mp, "realtime device size check failed");
if (bp)
xfs_buf_relse(bp);
- return EIO;
+ return -EIO;
}
xfs_buf_relse(bp);
return 0;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 752b63d1030..c642795324a 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -132,7 +132,7 @@ xfs_rtmount_init(
return 0;
xfs_warn(mp, "Not built with CONFIG_XFS_RT");
- return ENOSYS;
+ return -ENOSYS;
}
# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
# define xfs_rtunmount_inodes(m)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 8f0333b3f7a..b194652033c 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -61,6 +61,7 @@
static const struct super_operations xfs_super_operations;
static kmem_zone_t *xfs_ioend_zone;
mempool_t *xfs_ioend_pool;
+struct kset *xfs_kset;
#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */
#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */
@@ -185,7 +186,7 @@ xfs_parseargs(
*/
mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
if (!mp->m_fsname)
- return ENOMEM;
+ return -ENOMEM;
mp->m_fsname_len = strlen(mp->m_fsname) + 1;
/*
@@ -204,9 +205,6 @@ xfs_parseargs(
*/
mp->m_flags |= XFS_MOUNT_BARRIER;
mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-#if !XFS_BIG_INUMS
- mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-#endif
/*
* These can be overridden by the mount option parsing.
@@ -227,57 +225,57 @@ xfs_parseargs(
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
if (kstrtoint(value, 10, &mp->m_logbufs))
- return EINVAL;
+ return -EINVAL;
} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
- return EINVAL;
+ return -EINVAL;
} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
if (!mp->m_logname)
- return ENOMEM;
+ return -ENOMEM;
} else if (!strcmp(this_char, MNTOPT_MTPT)) {
xfs_warn(mp, "%s option not allowed on this system",
this_char);
- return EINVAL;
+ return -EINVAL;
} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
if (!mp->m_rtname)
- return ENOMEM;
+ return -ENOMEM;
} else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
if (kstrtoint(value, 10, &iosize))
- return EINVAL;
+ return -EINVAL;
iosizelog = ffs(iosize) - 1;
} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
if (suffix_kstrtoint(value, 10, &iosize))
- return EINVAL;
+ return -EINVAL;
iosizelog = ffs(iosize) - 1;
} else if (!strcmp(this_char, MNTOPT_GRPID) ||
!strcmp(this_char, MNTOPT_BSDGROUPS)) {
@@ -297,27 +295,22 @@ xfs_parseargs(
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
if (kstrtoint(value, 10, &dsunit))
- return EINVAL;
+ return -EINVAL;
} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
if (!value || !*value) {
xfs_warn(mp, "%s option requires an argument",
this_char);
- return EINVAL;
+ return -EINVAL;
}
if (kstrtoint(value, 10, &dswidth))
- return EINVAL;
+ return -EINVAL;
} else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
-#if !XFS_BIG_INUMS
- xfs_warn(mp, "%s option not allowed on this system",
- this_char);
- return EINVAL;
-#endif
} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
mp->m_flags |= XFS_MOUNT_NOUUID;
} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
@@ -390,7 +383,7 @@ xfs_parseargs(
"irixsgid is now a sysctl(2) variable, option is deprecated.");
} else {
xfs_warn(mp, "unknown mount option [%s].", this_char);
- return EINVAL;
+ return -EINVAL;
}
}
@@ -400,32 +393,32 @@ xfs_parseargs(
if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
!(mp->m_flags & XFS_MOUNT_RDONLY)) {
xfs_warn(mp, "no-recovery mounts must be read-only.");
- return EINVAL;
+ return -EINVAL;
}
if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
xfs_warn(mp,
"sunit and swidth options incompatible with the noalign option");
- return EINVAL;
+ return -EINVAL;
}
#ifndef CONFIG_XFS_QUOTA
if (XFS_IS_QUOTA_RUNNING(mp)) {
xfs_warn(mp, "quota support not available in this kernel.");
- return EINVAL;
+ return -EINVAL;
}
#endif
if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
xfs_warn(mp, "sunit and swidth must be specified together");
- return EINVAL;
+ return -EINVAL;
}
if (dsunit && (dswidth % dsunit != 0)) {
xfs_warn(mp,
"stripe width (%d) must be a multiple of the stripe unit (%d)",
dswidth, dsunit);
- return EINVAL;
+ return -EINVAL;
}
done:
@@ -446,7 +439,7 @@ done:
mp->m_logbufs > XLOG_MAX_ICLOGS)) {
xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
if (mp->m_logbsize != -1 &&
mp->m_logbsize != 0 &&
@@ -456,7 +449,7 @@ done:
xfs_warn(mp,
"invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
mp->m_logbsize);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
if (iosizelog) {
@@ -465,7 +458,7 @@ done:
xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
iosizelog, XFS_MIN_IO_LOG,
XFS_MAX_IO_LOG);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
@@ -597,15 +590,20 @@ xfs_max_file_offset(
return (((__uint64_t)pagefactor) << bitshift) - 1;
}
+/*
+ * xfs_set_inode32() and xfs_set_inode64() are passed an agcount
+ * because in the growfs case, mp->m_sb.sb_agcount is not updated
+ * yet to the potentially higher ag count.
+ */
xfs_agnumber_t
-xfs_set_inode32(struct xfs_mount *mp)
+xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount)
{
xfs_agnumber_t index = 0;
xfs_agnumber_t maxagi = 0;
xfs_sb_t *sbp = &mp->m_sb;
xfs_agnumber_t max_metadata;
- xfs_agino_t agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0);
- xfs_ino_t ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino);
+ xfs_agino_t agino;
+ xfs_ino_t ino;
xfs_perag_t *pag;
/* Calculate how much should be reserved for inodes to meet
@@ -620,10 +618,12 @@ xfs_set_inode32(struct xfs_mount *mp)
do_div(icount, sbp->sb_agblocks);
max_metadata = icount;
} else {
- max_metadata = sbp->sb_agcount;
+ max_metadata = agcount;
}
- for (index = 0; index < sbp->sb_agcount; index++) {
+ agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
+
+ for (index = 0; index < agcount; index++) {
ino = XFS_AGINO_TO_INO(mp, index, agino);
if (ino > XFS_MAXINUMBER_32) {
@@ -648,11 +648,11 @@ xfs_set_inode32(struct xfs_mount *mp)
}
xfs_agnumber_t
-xfs_set_inode64(struct xfs_mount *mp)
+xfs_set_inode64(struct xfs_mount *mp, xfs_agnumber_t agcount)
{
xfs_agnumber_t index = 0;
- for (index = 0; index < mp->m_sb.sb_agcount; index++) {
+ for (index = 0; index < agcount; index++) {
struct xfs_perag *pag;
pag = xfs_perag_get(mp, index);
@@ -686,7 +686,7 @@ xfs_blkdev_get(
xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
}
- return -error;
+ return error;
}
STATIC void
@@ -756,7 +756,7 @@ xfs_open_devices(
if (rtdev == ddev || rtdev == logdev) {
xfs_warn(mp,
"Cannot mount filesystem with identical rtdev and ddev/logdev.");
- error = EINVAL;
+ error = -EINVAL;
goto out_close_rtdev;
}
}
@@ -764,7 +764,7 @@ xfs_open_devices(
/*
* Setup xfs_mount buffer target pointers
*/
- error = ENOMEM;
+ error = -ENOMEM;
mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
if (!mp->m_ddev_targp)
goto out_close_rtdev;
@@ -1188,6 +1188,7 @@ xfs_fs_remount(
char *options)
{
struct xfs_mount *mp = XFS_M(sb);
+ xfs_sb_t *sbp = &mp->m_sb;
substring_t args[MAX_OPT_ARGS];
char *p;
int error;
@@ -1208,10 +1209,10 @@ xfs_fs_remount(
mp->m_flags &= ~XFS_MOUNT_BARRIER;
break;
case Opt_inode64:
- mp->m_maxagi = xfs_set_inode64(mp);
+ mp->m_maxagi = xfs_set_inode64(mp, sbp->sb_agcount);
break;
case Opt_inode32:
- mp->m_maxagi = xfs_set_inode32(mp);
+ mp->m_maxagi = xfs_set_inode32(mp, sbp->sb_agcount);
break;
default:
/*
@@ -1295,7 +1296,7 @@ xfs_fs_freeze(
xfs_save_resvblks(mp);
xfs_quiesce_attr(mp);
- return -xfs_fs_log_dummy(mp);
+ return xfs_fs_log_dummy(mp);
}
STATIC int
@@ -1314,7 +1315,7 @@ xfs_fs_show_options(
struct seq_file *m,
struct dentry *root)
{
- return -xfs_showargs(XFS_M(root->d_sb), m);
+ return xfs_showargs(XFS_M(root->d_sb), m);
}
/*
@@ -1336,14 +1337,14 @@ xfs_finish_flags(
mp->m_logbsize < mp->m_sb.sb_logsunit) {
xfs_warn(mp,
"logbuf size must be greater than or equal to log stripe size");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
} else {
/* Fail a mount if the logbuf is larger than 32K */
if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
xfs_warn(mp,
"logbuf size for version 1 logs must be 16K or 32K");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
}
@@ -1355,7 +1356,7 @@ xfs_finish_flags(
xfs_warn(mp,
"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.",
MNTOPT_NOATTR2, MNTOPT_ATTR2);
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
/*
@@ -1372,7 +1373,7 @@ xfs_finish_flags(
if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
xfs_warn(mp,
"cannot mount a read-only filesystem as read-write");
- return XFS_ERROR(EROFS);
+ return -EROFS;
}
if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
@@ -1380,7 +1381,7 @@ xfs_finish_flags(
!xfs_sb_version_has_pquotino(&mp->m_sb)) {
xfs_warn(mp,
"Super block does not support project and group quota together");
- return XFS_ERROR(EINVAL);
+ return -EINVAL;
}
return 0;
@@ -1394,7 +1395,7 @@ xfs_fs_fill_super(
{
struct inode *root;
struct xfs_mount *mp = NULL;
- int flags = 0, error = ENOMEM;
+ int flags = 0, error = -ENOMEM;
mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
if (!mp)
@@ -1428,11 +1429,11 @@ xfs_fs_fill_super(
if (error)
goto out_free_fsname;
- error = -xfs_init_mount_workqueues(mp);
+ error = xfs_init_mount_workqueues(mp);
if (error)
goto out_close_devices;
- error = -xfs_icsb_init_counters(mp);
+ error = xfs_icsb_init_counters(mp);
if (error)
goto out_destroy_workqueues;
@@ -1474,12 +1475,12 @@ xfs_fs_fill_super(
root = igrab(VFS_I(mp->m_rootip));
if (!root) {
- error = ENOENT;
+ error = -ENOENT;
goto out_unmount;
}
sb->s_root = d_make_root(root);
if (!sb->s_root) {
- error = ENOMEM;
+ error = -ENOMEM;
goto out_unmount;
}
@@ -1499,7 +1500,7 @@ out_destroy_workqueues:
xfs_free_fsname(mp);
kfree(mp);
out:
- return -error;
+ return error;
out_unmount:
xfs_filestream_unmount(mp);
@@ -1761,9 +1762,15 @@ init_xfs_fs(void)
if (error)
goto out_cleanup_procfs;
+ xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
+ if (!xfs_kset) {
+ error = -ENOMEM;
+ goto out_sysctl_unregister;;
+ }
+
error = xfs_qm_init();
if (error)
- goto out_sysctl_unregister;
+ goto out_kset_unregister;
error = register_filesystem(&xfs_fs_type);
if (error)
@@ -1772,6 +1779,8 @@ init_xfs_fs(void)
out_qm_exit:
xfs_qm_exit();
+ out_kset_unregister:
+ kset_unregister(xfs_kset);
out_sysctl_unregister:
xfs_sysctl_unregister();
out_cleanup_procfs:
@@ -1793,6 +1802,7 @@ exit_xfs_fs(void)
{
xfs_qm_exit();
unregister_filesystem(&xfs_fs_type);
+ kset_unregister(xfs_kset);
xfs_sysctl_unregister();
xfs_cleanup_procfs();
xfs_buf_terminate();
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index bbe3d15a790..2b830c2f322 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -44,16 +44,6 @@ extern void xfs_qm_exit(void);
# define XFS_REALTIME_STRING
#endif
-#if XFS_BIG_BLKNOS
-# if XFS_BIG_INUMS
-# define XFS_BIGFS_STRING "large block/inode numbers, "
-# else
-# define XFS_BIGFS_STRING "large block numbers, "
-# endif
-#else
-# define XFS_BIGFS_STRING
-#endif
-
#ifdef DEBUG
# define XFS_DBG_STRING "debug"
#else
@@ -64,7 +54,6 @@ extern void xfs_qm_exit(void);
#define XFS_BUILD_OPTIONS XFS_ACL_STRING \
XFS_SECURITY_STRING \
XFS_REALTIME_STRING \
- XFS_BIGFS_STRING \
XFS_DBG_STRING /* DBG must be last */
struct xfs_inode;
@@ -76,8 +65,8 @@ extern __uint64_t xfs_max_file_offset(unsigned int);
extern void xfs_flush_inodes(struct xfs_mount *mp);
extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
-extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *);
-extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *);
+extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *, xfs_agnumber_t agcount);
+extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *, xfs_agnumber_t agcount);
extern const struct export_operations xfs_export_operations;
extern const struct xattr_handler *xfs_xattr_handlers[];
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index d69363c833e..6a944a2cd36 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -76,15 +76,15 @@ xfs_readlink_bmap(
bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0,
&xfs_symlink_buf_ops);
if (!bp)
- return XFS_ERROR(ENOMEM);
+ return -ENOMEM;
error = bp->b_error;
if (error) {
xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_relse(bp);
/* bad CRC means corrupted metadata */
- if (error == EFSBADCRC)
- error = EFSCORRUPTED;
+ if (error == -EFSBADCRC)
+ error = -EFSCORRUPTED;
goto out;
}
byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
@@ -95,7 +95,7 @@ xfs_readlink_bmap(
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!xfs_symlink_hdr_ok(ip->i_ino, offset,
byte_cnt, bp)) {
- error = EFSCORRUPTED;
+ error = -EFSCORRUPTED;
xfs_alert(mp,
"symlink header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
offset, byte_cnt, ip->i_ino);
@@ -135,7 +135,7 @@ xfs_readlink(
trace_xfs_readlink(ip);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -148,7 +148,7 @@ xfs_readlink(
__func__, (unsigned long long) ip->i_ino,
(long long) pathlen);
ASSERT(0);
- error = XFS_ERROR(EFSCORRUPTED);
+ error = -EFSCORRUPTED;
goto out;
}
@@ -203,14 +203,14 @@ xfs_symlink(
trace_xfs_symlink(dp, link_name);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
/*
* Check component lengths of the target path name.
*/
pathlen = strlen(target_path);
if (pathlen >= MAXPATHLEN) /* total string too long */
- return XFS_ERROR(ENAMETOOLONG);
+ return -ENAMETOOLONG;
udqp = gdqp = NULL;
prid = xfs_get_initial_prid(dp);
@@ -238,7 +238,7 @@ xfs_symlink(
fs_blocks = xfs_symlink_blocks(mp, pathlen);
resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, resblks, 0);
- if (error == ENOSPC && fs_blocks == 0) {
+ if (error == -ENOSPC && fs_blocks == 0) {
resblks = 0;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0);
}
@@ -254,7 +254,7 @@ xfs_symlink(
* Check whether the directory allows new symlinks or not.
*/
if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
- error = XFS_ERROR(EPERM);
+ error = -EPERM;
goto error_return;
}
@@ -284,7 +284,7 @@ xfs_symlink(
error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
prid, resblks > 0, &ip, NULL);
if (error) {
- if (error == ENOSPC)
+ if (error == -ENOSPC)
goto error_return;
goto error1;
}
@@ -348,7 +348,7 @@ xfs_symlink(
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
BTOBB(byte_cnt), 0);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error2;
}
bp->b_ops = &xfs_symlink_buf_ops;
@@ -489,7 +489,7 @@ xfs_inactive_symlink_rmt(
XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
if (!bp) {
- error = ENOMEM;
+ error = -ENOMEM;
goto error_bmap_cancel;
}
xfs_trans_binval(tp, bp);
@@ -562,7 +562,7 @@ xfs_inactive_symlink(
trace_xfs_inactive_symlink(ip);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -EIO;
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -580,7 +580,7 @@ xfs_inactive_symlink(
__func__, (unsigned long long)ip->i_ino, pathlen);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
ASSERT(0);
- return XFS_ERROR(EFSCORRUPTED);
+ return -EFSCORRUPTED;
}
if (ip->i_df.if_flags & XFS_IFINLINE) {
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
new file mode 100644
index 00000000000..9835139ce1e
--- /dev/null
+++ b/fs/xfs/xfs_sysfs.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "xfs.h"
+#include "xfs_sysfs.h"
+#include "xfs_log_format.h"
+#include "xfs_log.h"
+#include "xfs_log_priv.h"
+
+struct xfs_sysfs_attr {
+ struct attribute attr;
+ ssize_t (*show)(char *buf, void *data);
+ ssize_t (*store)(const char *buf, size_t count, void *data);
+};
+
+static inline struct xfs_sysfs_attr *
+to_attr(struct attribute *attr)
+{
+ return container_of(attr, struct xfs_sysfs_attr, attr);
+}
+
+#define XFS_SYSFS_ATTR_RW(name) \
+ static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name)
+#define XFS_SYSFS_ATTR_RO(name) \
+ static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name)
+
+#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
+
+/*
+ * xfs_mount kobject. This currently has no attributes and thus no need for show
+ * and store helpers. The mp kobject serves as the per-mount parent object that
+ * is identified by the fsname under sysfs.
+ */
+
+struct kobj_type xfs_mp_ktype = {
+ .release = xfs_sysfs_release,
+};
+
+/* xlog */
+
+STATIC ssize_t
+log_head_lsn_show(
+ char *buf,
+ void *data)
+{
+ struct xlog *log = data;
+ int cycle;
+ int block;
+
+ spin_lock(&log->l_icloglock);
+ cycle = log->l_curr_cycle;
+ block = log->l_curr_block;
+ spin_unlock(&log->l_icloglock);
+
+ return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block);
+}
+XFS_SYSFS_ATTR_RO(log_head_lsn);
+
+STATIC ssize_t
+log_tail_lsn_show(
+ char *buf,
+ void *data)
+{
+ struct xlog *log = data;
+ int cycle;
+ int block;
+
+ xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block);
+ return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block);
+}
+XFS_SYSFS_ATTR_RO(log_tail_lsn);
+
+STATIC ssize_t
+reserve_grant_head_show(
+ char *buf,
+ void *data)
+{
+ struct xlog *log = data;
+ int cycle;
+ int bytes;
+
+ xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes);
+ return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
+}
+XFS_SYSFS_ATTR_RO(reserve_grant_head);
+
+STATIC ssize_t
+write_grant_head_show(
+ char *buf,
+ void *data)
+{
+ struct xlog *log = data;
+ int cycle;
+ int bytes;
+
+ xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes);
+ return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
+}
+XFS_SYSFS_ATTR_RO(write_grant_head);
+
+static struct attribute *xfs_log_attrs[] = {
+ ATTR_LIST(log_head_lsn),
+ ATTR_LIST(log_tail_lsn),
+ ATTR_LIST(reserve_grant_head),
+ ATTR_LIST(write_grant_head),
+ NULL,
+};
+
+static inline struct xlog *
+to_xlog(struct kobject *kobject)
+{
+ struct xfs_kobj *kobj = to_kobj(kobject);
+ return container_of(kobj, struct xlog, l_kobj);
+}
+
+STATIC ssize_t
+xfs_log_show(
+ struct kobject *kobject,
+ struct attribute *attr,
+ char *buf)
+{
+ struct xlog *log = to_xlog(kobject);
+ struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+
+ return xfs_attr->show ? xfs_attr->show(buf, log) : 0;
+}
+
+STATIC ssize_t
+xfs_log_store(
+ struct kobject *kobject,
+ struct attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct xlog *log = to_xlog(kobject);
+ struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+
+ return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0;
+}
+
+static struct sysfs_ops xfs_log_ops = {
+ .show = xfs_log_show,
+ .store = xfs_log_store,
+};
+
+struct kobj_type xfs_log_ktype = {
+ .release = xfs_sysfs_release,
+ .sysfs_ops = &xfs_log_ops,
+ .default_attrs = xfs_log_attrs,
+};
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
new file mode 100644
index 00000000000..54a2091183c
--- /dev/null
+++ b/fs/xfs/xfs_sysfs.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef __XFS_SYSFS_H__
+#define __XFS_SYSFS_H__
+
+extern struct kobj_type xfs_mp_ktype; /* xfs_mount */
+extern struct kobj_type xfs_log_ktype; /* xlog */
+
+static inline struct xfs_kobj *
+to_kobj(struct kobject *kobject)
+{
+ return container_of(kobject, struct xfs_kobj, kobject);
+}
+
+static inline void
+xfs_sysfs_release(struct kobject *kobject)
+{
+ struct xfs_kobj *kobj = to_kobj(kobject);
+ complete(&kobj->complete);
+}
+
+static inline int
+xfs_sysfs_init(
+ struct xfs_kobj *kobj,
+ struct kobj_type *ktype,
+ struct xfs_kobj *parent_kobj,
+ const char *name)
+{
+ init_completion(&kobj->complete);
+ return kobject_init_and_add(&kobj->kobject, ktype,
+ &parent_kobj->kobject, "%s", name);
+}
+
+static inline void
+xfs_sysfs_del(
+ struct xfs_kobj *kobj)
+{
+ kobject_del(&kobj->kobject);
+ kobject_put(&kobj->kobject);
+ wait_for_completion(&kobj->complete);
+}
+
+#endif /* __XFS_SYSFS_H__ */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index d03932564cc..30e8e341095 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -190,7 +190,7 @@ xfs_trans_reserve(
-((int64_t)blocks), rsvd);
if (error != 0) {
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
- return (XFS_ERROR(ENOSPC));
+ return -ENOSPC;
}
tp->t_blk_res += blocks;
}
@@ -241,7 +241,7 @@ xfs_trans_reserve(
error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS,
-((int64_t)rtextents), rsvd);
if (error) {
- error = XFS_ERROR(ENOSPC);
+ error = -ENOSPC;
goto undo_log;
}
tp->t_rtx_res += rtextents;
@@ -874,7 +874,7 @@ xfs_trans_commit(
goto out_unreserve;
if (XFS_FORCED_SHUTDOWN(mp)) {
- error = XFS_ERROR(EIO);
+ error = -EIO;
goto out_unreserve;
}
@@ -917,7 +917,7 @@ out_unreserve:
if (tp->t_ticket) {
commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
if (commit_lsn == -1 && !error)
- error = XFS_ERROR(EIO);
+ error = -EIO;
}
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0);
@@ -1024,7 +1024,7 @@ xfs_trans_roll(
*/
error = xfs_trans_commit(trans, 0);
if (error)
- return (error);
+ return error;
trans = *tpp;
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index cb0f3a84cc6..859482f53b5 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -762,7 +762,7 @@ xfs_trans_ail_init(
ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
if (!ailp)
- return ENOMEM;
+ return -ENOMEM;
ailp->xa_mount = mp;
INIT_LIST_HEAD(&ailp->xa_ail);
@@ -781,7 +781,7 @@ xfs_trans_ail_init(
out_free_ailp:
kmem_free(ailp);
- return ENOMEM;
+ return -ENOMEM;
}
void
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index b8eef0549f3..96c898e7ac9 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -166,7 +166,7 @@ xfs_trans_get_buf_map(
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
trace_xfs_trans_get_buf_recur(bip);
- return (bp);
+ return bp;
}
bp = xfs_buf_get_map(target, map, nmaps, flags);
@@ -178,7 +178,7 @@ xfs_trans_get_buf_map(
_xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_get_buf(bp->b_fspriv);
- return (bp);
+ return bp;
}
/*
@@ -201,9 +201,8 @@ xfs_trans_getsb(xfs_trans_t *tp,
* Default to just trying to lock the superblock buffer
* if tp is NULL.
*/
- if (tp == NULL) {
- return (xfs_getsb(mp, flags));
- }
+ if (tp == NULL)
+ return xfs_getsb(mp, flags);
/*
* If the superblock buffer already has this transaction
@@ -218,7 +217,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
trace_xfs_trans_getsb_recur(bip);
- return (bp);
+ return bp;
}
bp = xfs_getsb(mp, flags);
@@ -227,7 +226,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
_xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_getsb(bp->b_fspriv);
- return (bp);
+ return bp;
}
#ifdef DEBUG
@@ -267,7 +266,7 @@ xfs_trans_read_buf_map(
bp = xfs_buf_read_map(target, map, nmaps, flags, ops);
if (!bp)
return (flags & XBF_TRYLOCK) ?
- EAGAIN : XFS_ERROR(ENOMEM);
+ -EAGAIN : -ENOMEM;
if (bp->b_error) {
error = bp->b_error;
@@ -277,8 +276,8 @@ xfs_trans_read_buf_map(
xfs_buf_relse(bp);
/* bad CRC means corrupted metadata */
- if (error == EFSBADCRC)
- error = EFSCORRUPTED;
+ if (error == -EFSBADCRC)
+ error = -EFSCORRUPTED;
return error;
}
#ifdef DEBUG
@@ -287,7 +286,7 @@ xfs_trans_read_buf_map(
if (((xfs_req_num++) % xfs_error_mod) == 0) {
xfs_buf_relse(bp);
xfs_debug(mp, "Returning error!");
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
}
@@ -343,8 +342,8 @@ xfs_trans_read_buf_map(
xfs_force_shutdown(tp->t_mountp,
SHUTDOWN_META_IO_ERROR);
/* bad CRC means corrupted metadata */
- if (error == EFSBADCRC)
- error = EFSCORRUPTED;
+ if (error == -EFSBADCRC)
+ error = -EFSCORRUPTED;
return error;
}
}
@@ -355,7 +354,7 @@ xfs_trans_read_buf_map(
if (XFS_FORCED_SHUTDOWN(mp)) {
trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
*bpp = NULL;
- return XFS_ERROR(EIO);
+ return -EIO;
}
@@ -372,7 +371,7 @@ xfs_trans_read_buf_map(
if (bp == NULL) {
*bpp = NULL;
return (flags & XBF_TRYLOCK) ?
- 0 : XFS_ERROR(ENOMEM);
+ 0 : -ENOMEM;
}
if (bp->b_error) {
error = bp->b_error;
@@ -384,8 +383,8 @@ xfs_trans_read_buf_map(
xfs_buf_relse(bp);
/* bad CRC means corrupted metadata */
- if (error == EFSBADCRC)
- error = EFSCORRUPTED;
+ if (error == -EFSBADCRC)
+ error = -EFSCORRUPTED;
return error;
}
#ifdef DEBUG
@@ -396,7 +395,7 @@ xfs_trans_read_buf_map(
SHUTDOWN_META_IO_ERROR);
xfs_buf_relse(bp);
xfs_debug(mp, "Returning trans error!");
- return XFS_ERROR(EIO);
+ return -EIO;
}
}
}
@@ -414,7 +413,7 @@ shutdown_abort:
trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
xfs_buf_relse(bp);
*bpp = NULL;
- return XFS_ERROR(EIO);
+ return -EIO;
}
/*
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 41172861e85..846e061c2e9 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -722,8 +722,8 @@ xfs_trans_dqresv(
error_return:
xfs_dqunlock(dqp);
if (flags & XFS_QMOPT_ENOSPC)
- return ENOSPC;
- return EDQUOT;
+ return -ENOSPC;
+ return -EDQUOT;
}
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 65c6e6650b1..b79dc66b2ec 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -38,43 +38,18 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */
typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
-/*
- * These types are 64 bits on disk but are either 32 or 64 bits in memory.
- * Disk based types:
- */
-typedef __uint64_t xfs_dfsbno_t; /* blockno in filesystem (agno|agbno) */
-typedef __uint64_t xfs_drfsbno_t; /* blockno in filesystem (raw) */
-typedef __uint64_t xfs_drtbno_t; /* extent (block) in realtime area */
-typedef __uint64_t xfs_dfiloff_t; /* block number in a file */
-typedef __uint64_t xfs_dfilblks_t; /* number of blocks in a file */
-
-/*
- * Memory based types are conditional.
- */
-#if XFS_BIG_BLKNOS
typedef __uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */
typedef __uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */
typedef __uint64_t xfs_rtblock_t; /* extent (block) in realtime area */
-typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
-#else
-typedef __uint32_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */
-typedef __uint32_t xfs_rfsblock_t; /* blockno in filesystem (raw) */
-typedef __uint32_t xfs_rtblock_t; /* extent (block) in realtime area */
-typedef __int32_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
-#endif
typedef __uint64_t xfs_fileoff_t; /* block number in a file */
-typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */
+typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
+typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
/*
* Null values for the types.
*/
-#define NULLDFSBNO ((xfs_dfsbno_t)-1)
-#define NULLDRFSBNO ((xfs_drfsbno_t)-1)
-#define NULLDRTBNO ((xfs_drtbno_t)-1)
-#define NULLDFILOFF ((xfs_dfiloff_t)-1)
-
#define NULLFSBLOCK ((xfs_fsblock_t)-1)
#define NULLRFSBLOCK ((xfs_rfsblock_t)-1)
#define NULLRTBLOCK ((xfs_rtblock_t)-1)
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h
deleted file mode 100644
index e8a77383c0d..00000000000
--- a/fs/xfs/xfs_vnode.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_VNODE_H__
-#define __XFS_VNODE_H__
-
-#include "xfs_fs.h"
-
-struct file;
-struct xfs_inode;
-struct attrlist_cursor_kern;
-
-/*
- * Flags for read/write calls - same values as IRIX
- */
-#define IO_ISDIRECT 0x00004 /* bypass page cache */
-#define IO_INVIS 0x00020 /* don't update inode timestamps */
-
-#define XFS_IO_FLAGS \
- { IO_ISDIRECT, "DIRECT" }, \
- { IO_INVIS, "INVIS"}
-
-/*
- * Some useful predicates.
- */
-#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping)
-#define VN_CACHED(vp) (vp->i_mapping->nrpages)
-#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \
- PAGECACHE_TAG_DIRTY)
-
-
-#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 78ed92a46fd..93455b99804 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -49,7 +49,7 @@ xfs_xattr_get(struct dentry *dentry, const char *name,
value = NULL;
}
- error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
+ error = xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
if (error)
return error;
return asize;
@@ -71,8 +71,8 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
xflags |= ATTR_REPLACE;
if (!value)
- return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
- return -xfs_attr_set(ip, (unsigned char *)name,
+ return xfs_attr_remove(ip, (unsigned char *)name, xflags);
+ return xfs_attr_set(ip, (unsigned char *)name,
(void *)value, size, xflags);
}