diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Makefile | 2 | ||||
-rw-r--r-- | fs/aio.c | 174 | ||||
-rw-r--r-- | fs/bad_inode.c | 7 | ||||
-rw-r--r-- | fs/btrfs/async-thread.c | 44 | ||||
-rw-r--r-- | fs/btrfs/async-thread.h | 28 | ||||
-rw-r--r-- | fs/btrfs/backref.c | 14 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 6 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 20 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 4 | ||||
-rw-r--r-- | fs/btrfs/delayed-inode.c | 4 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 88 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 308 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 5 | ||||
-rw-r--r-- | fs/btrfs/file-item.c | 2 | ||||
-rw-r--r-- | fs/btrfs/file.c | 31 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 371 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 68 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 124 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 5 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 170 | ||||
-rw-r--r-- | fs/btrfs/qgroup.h | 1 | ||||
-rw-r--r-- | fs/btrfs/raid56.c | 9 | ||||
-rw-r--r-- | fs/btrfs/reada.c | 3 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 25 | ||||
-rw-r--r-- | fs/btrfs/super.c | 60 | ||||
-rw-r--r-- | fs/btrfs/sysfs.c | 2 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 33 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 1 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 94 | ||||
-rw-r--r-- | fs/btrfs/tree-log.h | 2 | ||||
-rw-r--r-- | fs/btrfs/ulist.h | 15 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 65 | ||||
-rw-r--r-- | fs/ceph/acl.c | 14 | ||||
-rw-r--r-- | fs/ceph/caps.c | 2 | ||||
-rw-r--r-- | fs/ceph/file.c | 24 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 16 | ||||
-rw-r--r-- | fs/ceph/super.c | 2 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 4 | ||||
-rw-r--r-- | fs/cifs/Kconfig | 35 | ||||
-rw-r--r-- | fs/cifs/cifsfs.c | 26 | ||||
-rw-r--r-- | fs/cifs/cifsfs.h | 4 | ||||
-rw-r--r-- | fs/cifs/cifsglob.h | 10 | ||||
-rw-r--r-- | fs/cifs/cifspdu.h | 23 | ||||
-rw-r--r-- | fs/cifs/connect.c | 21 | ||||
-rw-r--r-- | fs/cifs/dir.c | 8 | ||||
-rw-r--r-- | fs/cifs/file.c | 10 | ||||
-rw-r--r-- | fs/cifs/inode.c | 25 | ||||
-rw-r--r-- | fs/cifs/misc.c | 7 | ||||
-rw-r--r-- | fs/cifs/readdir.c | 6 | ||||
-rw-r--r-- | fs/cifs/sess.c | 7 | ||||
-rw-r--r-- | fs/cifs/smb1ops.c | 7 | ||||
-rw-r--r-- | fs/cifs/smb2file.c | 2 | ||||
-rw-r--r-- | fs/cifs/smb2inode.c | 2 | ||||
-rw-r--r-- | fs/cifs/smb2maperror.c | 4 | ||||
-rw-r--r-- | fs/cifs/smb2misc.c | 17 | ||||
-rw-r--r-- | fs/cifs/smb2ops.c | 172 | ||||
-rw-r--r-- | fs/cifs/smb2pdu.c | 23 | ||||
-rw-r--r-- | fs/cifs/smb2pdu.h | 6 | ||||
-rw-r--r-- | fs/cifs/smbfsctl.h | 2 | ||||
-rw-r--r-- | fs/dcache.c | 196 | ||||
-rw-r--r-- | fs/direct-io.c | 2 | ||||
-rw-r--r-- | fs/eventpoll.c | 3 | ||||
-rw-r--r-- | fs/ext2/super.c | 2 | ||||
-rw-r--r-- | fs/ext3/super.c | 5 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 18 | ||||
-rw-r--r-- | fs/ext4/extents.c | 88 | ||||
-rw-r--r-- | fs/ext4/inode.c | 44 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 5 | ||||
-rw-r--r-- | fs/ext4/namei.c | 59 | ||||
-rw-r--r-- | fs/ext4/resize.c | 2 | ||||
-rw-r--r-- | fs/ext4/super.c | 5 | ||||
-rw-r--r-- | fs/f2fs/Kconfig | 4 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 80 | ||||
-rw-r--r-- | fs/f2fs/data.c | 19 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 4 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 6 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 26 | ||||
-rw-r--r-- | fs/f2fs/file.c | 60 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 8 | ||||
-rw-r--r-- | fs/f2fs/gc.h | 2 | ||||
-rw-r--r-- | fs/f2fs/hash.c | 7 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 38 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 23 | ||||
-rw-r--r-- | fs/f2fs/node.c | 80 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 30 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 53 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 2 | ||||
-rw-r--r-- | fs/f2fs/super.c | 32 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 2 | ||||
-rw-r--r-- | fs/fs_pin.c | 78 | ||||
-rw-r--r-- | fs/fuse/dir.c | 7 | ||||
-rw-r--r-- | fs/fuse/file.c | 4 | ||||
-rw-r--r-- | fs/hostfs/hostfs.h | 1 | ||||
-rw-r--r-- | fs/hostfs/hostfs_kern.c | 30 | ||||
-rw-r--r-- | fs/hostfs/hostfs_user.c | 28 | ||||
-rw-r--r-- | fs/internal.h | 7 | ||||
-rw-r--r-- | fs/isofs/inode.c | 15 | ||||
-rw-r--r-- | fs/isofs/isofs.h | 23 | ||||
-rw-r--r-- | fs/isofs/rock.c | 39 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 21 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 56 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 33 | ||||
-rw-r--r-- | fs/jbd2/revoke.c | 6 | ||||
-rw-r--r-- | fs/lockd/svc.c | 4 | ||||
-rw-r--r-- | fs/locks.c | 88 | ||||
-rw-r--r-- | fs/mount.h | 2 | ||||
-rw-r--r-- | fs/namei.c | 34 | ||||
-rw-r--r-- | fs/namespace.c | 77 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.c | 101 | ||||
-rw-r--r-- | fs/nfs/callback.c | 12 | ||||
-rw-r--r-- | fs/nfs/client.c | 30 | ||||
-rw-r--r-- | fs/nfs/delegation.c | 34 | ||||
-rw-r--r-- | fs/nfs/delegation.h | 1 | ||||
-rw-r--r-- | fs/nfs/dir.c | 208 | ||||
-rw-r--r-- | fs/nfs/direct.c | 33 | ||||
-rw-r--r-- | fs/nfs/filelayout/filelayout.c | 299 | ||||
-rw-r--r-- | fs/nfs/filelayout/filelayoutdev.c | 2 | ||||
-rw-r--r-- | fs/nfs/getroot.c | 2 | ||||
-rw-r--r-- | fs/nfs/inode.c | 9 | ||||
-rw-r--r-- | fs/nfs/internal.h | 11 | ||||
-rw-r--r-- | fs/nfs/nfs3acl.c | 7 | ||||
-rw-r--r-- | fs/nfs/nfs3proc.c | 21 | ||||
-rw-r--r-- | fs/nfs/nfs4_fs.h | 21 | ||||
-rw-r--r-- | fs/nfs/nfs4client.c | 5 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 274 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 45 | ||||
-rw-r--r-- | fs/nfs/nfs4trace.h | 28 | ||||
-rw-r--r-- | fs/nfs/nfs4xdr.c | 2 | ||||
-rw-r--r-- | fs/nfs/objlayout/objio_osd.c | 24 | ||||
-rw-r--r-- | fs/nfs/objlayout/objlayout.c | 81 | ||||
-rw-r--r-- | fs/nfs/objlayout/objlayout.h | 8 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 316 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 178 | ||||
-rw-r--r-- | fs/nfs/pnfs.h | 45 | ||||
-rw-r--r-- | fs/nfs/proc.c | 27 | ||||
-rw-r--r-- | fs/nfs/read.c | 54 | ||||
-rw-r--r-- | fs/nfs/super.c | 12 | ||||
-rw-r--r-- | fs/nfs/write.c | 163 | ||||
-rw-r--r-- | fs/nfs_common/nfsacl.c | 5 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 14 | ||||
-rw-r--r-- | fs/nilfs2/super.c | 2 | ||||
-rw-r--r-- | fs/notify/fdinfo.c | 4 | ||||
-rw-r--r-- | fs/ocfs2/cluster/quorum.c | 13 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 45 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/ioctl.c | 129 | ||||
-rw-r--r-- | fs/pnode.c | 1 | ||||
-rw-r--r-- | fs/quota/dquot.c | 180 | ||||
-rw-r--r-- | fs/quota/kqid.c | 2 | ||||
-rw-r--r-- | fs/quota/netlink.c | 3 | ||||
-rw-r--r-- | fs/quota/quota.c | 6 | ||||
-rw-r--r-- | fs/reiserfs/do_balan.c | 111 | ||||
-rw-r--r-- | fs/reiserfs/journal.c | 22 | ||||
-rw-r--r-- | fs/reiserfs/lbalance.c | 5 | ||||
-rw-r--r-- | fs/reiserfs/reiserfs.h | 9 | ||||
-rw-r--r-- | fs/reiserfs/super.c | 6 | ||||
-rw-r--r-- | fs/super.c | 20 | ||||
-rw-r--r-- | fs/sync.c | 2 | ||||
-rw-r--r-- | fs/ubifs/commit.c | 2 | ||||
-rw-r--r-- | fs/ubifs/io.c | 2 | ||||
-rw-r--r-- | fs/ubifs/log.c | 12 | ||||
-rw-r--r-- | fs/ubifs/lpt.c | 5 | ||||
-rw-r--r-- | fs/ubifs/lpt_commit.c | 7 | ||||
-rw-r--r-- | fs/ubifs/master.c | 7 | ||||
-rw-r--r-- | fs/ubifs/orphan.c | 1 | ||||
-rw-r--r-- | fs/ubifs/recovery.c | 5 | ||||
-rw-r--r-- | fs/ubifs/sb.c | 4 | ||||
-rw-r--r-- | fs/ubifs/scan.c | 14 | ||||
-rw-r--r-- | fs/ubifs/super.c | 19 | ||||
-rw-r--r-- | fs/ubifs/tnc.c | 1 | ||||
-rw-r--r-- | fs/ubifs/tnc_commit.c | 1 | ||||
-rw-r--r-- | fs/ubifs/ubifs.h | 4 | ||||
-rw-r--r-- | fs/udf/file.c | 22 | ||||
-rw-r--r-- | fs/udf/ialloc.c | 28 | ||||
-rw-r--r-- | fs/udf/inode.c | 161 | ||||
-rw-r--r-- | fs/udf/lowlevel.c | 2 | ||||
-rw-r--r-- | fs/udf/namei.c | 156 | ||||
-rw-r--r-- | fs/udf/super.c | 71 | ||||
-rw-r--r-- | fs/udf/symlink.c | 2 | ||||
-rw-r--r-- | fs/udf/udfdecl.h | 3 | ||||
-rw-r--r-- | fs/udf/unicode.c | 9 | ||||
-rw-r--r-- | fs/ufs/inode.c | 7 | ||||
-rw-r--r-- | fs/ufs/namei.c | 14 | ||||
-rw-r--r-- | fs/xfs/Kconfig | 1 | ||||
-rw-r--r-- | fs/xfs/Makefile | 71 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ag.h (renamed from fs/xfs/xfs_ag.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.c (renamed from fs/xfs/xfs_alloc.c) | 20 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.h (renamed from fs/xfs/xfs_alloc.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc_btree.c (renamed from fs/xfs/xfs_alloc_btree.c) | 6 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc_btree.h (renamed from fs/xfs/xfs_alloc_btree.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr.c (renamed from fs/xfs/xfs_attr.c) | 92 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.c (renamed from fs/xfs/xfs_attr_leaf.c) | 78 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.h (renamed from fs/xfs/xfs_attr_leaf.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_remote.c (renamed from fs/xfs/xfs_attr_remote.c) | 22 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_remote.h (renamed from fs/xfs/xfs_attr_remote.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_sf.h (renamed from fs/xfs/xfs_attr_sf.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bit.h (renamed from fs/xfs/xfs_bit.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c (renamed from fs/xfs/xfs_bmap.c) | 78 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h (renamed from fs/xfs/xfs_bmap.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap_btree.c (renamed from fs/xfs/xfs_bmap_btree.c) | 99 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap_btree.h (renamed from fs/xfs/xfs_bmap_btree.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_btree.c (renamed from fs/xfs/xfs_btree.c) | 46 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_btree.h (renamed from fs/xfs/xfs_btree.h) | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_cksum.h (renamed from fs/xfs/xfs_cksum.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_btree.c (renamed from fs/xfs/xfs_da_btree.c) | 112 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_btree.h (renamed from fs/xfs/xfs_da_btree.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_format.c (renamed from fs/xfs/xfs_da_format.c) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_format.h (renamed from fs/xfs/xfs_da_format.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dinode.h (renamed from fs/xfs/xfs_dinode.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2.c (renamed from fs/xfs/xfs_dir2.c) | 24 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2.h (renamed from fs/xfs/xfs_dir2.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2_block.c (renamed from fs/xfs/xfs_dir2_block.c) | 18 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2_data.c (renamed from fs/xfs/xfs_dir2_data.c) | 10 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2_leaf.c (renamed from fs/xfs/xfs_dir2_leaf.c) | 24 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2_node.c (renamed from fs/xfs/xfs_dir2_node.c) | 40 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2_priv.h (renamed from fs/xfs/xfs_dir2_priv.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2_sf.c (renamed from fs/xfs/xfs_dir2_sf.c) | 75 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dquot_buf.c (renamed from fs/xfs/xfs_dquot_buf.c) | 6 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_format.h (renamed from fs/xfs/xfs_format.h) | 14 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc.c (renamed from fs/xfs/xfs_ialloc.c) | 34 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc.h (renamed from fs/xfs/xfs_ialloc.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc_btree.c (renamed from fs/xfs/xfs_ialloc_btree.c) | 6 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc_btree.h (renamed from fs/xfs/xfs_ialloc_btree.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_buf.c (renamed from fs/xfs/xfs_inode_buf.c) | 10 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_buf.h (renamed from fs/xfs/xfs_inode_buf.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_fork.c (renamed from fs/xfs/xfs_inode_fork.c) | 36 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_fork.h (renamed from fs/xfs/xfs_inode_fork.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inum.h (renamed from fs/xfs/xfs_inum.h) | 4 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_log_format.h (renamed from fs/xfs/xfs_log_format.h) | 4 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_log_recover.h (renamed from fs/xfs/xfs_log_recover.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_log_rlimit.c (renamed from fs/xfs/xfs_log_rlimit.c) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_quota_defs.h (renamed from fs/xfs/xfs_quota_defs.h) | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_rtbitmap.c (renamed from fs/xfs/xfs_rtbitmap.c) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_sb.c (renamed from fs/xfs/xfs_sb.c) | 56 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_sb.h (renamed from fs/xfs/xfs_sb.h) | 8 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_shared.h (renamed from fs/xfs/xfs_shared.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_symlink_remote.c (renamed from fs/xfs/xfs_symlink_remote.c) | 6 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_trans_resv.c (renamed from fs/xfs/xfs_trans_resv.c) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_trans_resv.h (renamed from fs/xfs/xfs_trans_resv.h) | 0 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_trans_space.h (renamed from fs/xfs/xfs_trans_space.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_acl.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 79 | ||||
-rw-r--r-- | fs/xfs/xfs_attr_inactive.c | 22 | ||||
-rw-r--r-- | fs/xfs/xfs_attr_list.c | 38 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 194 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c | 40 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_dir2_readdir.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_discard.c | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.c | 41 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.h | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_error.c | 25 | ||||
-rw-r--r-- | fs/xfs/xfs_error.h | 13 | ||||
-rw-r--r-- | fs/xfs/xfs_export.c | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_extfree_item.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 102 | ||||
-rw-r--r-- | fs/xfs/xfs_filestream.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_fs.h | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_fsops.c | 42 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 148 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.h | 13 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 68 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c | 266 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl32.c | 111 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 54 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 72 | ||||
-rw-r--r-- | fs/xfs/xfs_itable.c | 579 | ||||
-rw-r--r-- | fs/xfs/xfs_itable.h | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_linux.h | 27 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 69 | ||||
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 284 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 97 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_mru_cache.c | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 229 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_bhv.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_syscalls.c | 46 | ||||
-rw-r--r-- | fs/xfs/xfs_quotaops.c | 20 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.c | 24 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 132 | ||||
-rw-r--r-- | fs/xfs/xfs_super.h | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_symlink.c | 30 | ||||
-rw-r--r-- | fs/xfs/xfs_sysfs.c | 165 | ||||
-rw-r--r-- | fs/xfs/xfs_sysfs.h | 59 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_ail.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_buf.c | 37 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_dquot.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_types.h | 29 | ||||
-rw-r--r-- | fs/xfs/xfs_vnode.h | 46 | ||||
-rw-r--r-- | fs/xfs/xfs_xattr.c | 6 |
298 files changed, 6511 insertions, 4786 deletions
diff --git a/fs/Makefile b/fs/Makefile index 4030cbfbc9a..90c88529892 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o splice.o sync.o utimes.o \ - stack.o fs_struct.o statfs.o + stack.o fs_struct.o statfs.o fs_pin.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o block_dev.o direct-io.o mpage.o @@ -141,6 +141,7 @@ struct kioctx { struct { unsigned tail; + unsigned completed_events; spinlock_t completion_lock; } ____cacheline_aligned_in_smp; @@ -192,7 +193,6 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) } file->f_flags = O_RDWR; - file->private_data = ctx; return file; } @@ -202,7 +202,7 @@ static struct dentry *aio_mount(struct file_system_type *fs_type, static const struct dentry_operations ops = { .d_dname = simple_dname, }; - return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1); + return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC); } /* aio_setup @@ -556,8 +556,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) struct aio_ring *ring; spin_lock(&mm->ioctx_lock); - rcu_read_lock(); - table = rcu_dereference(mm->ioctx_table); + table = rcu_dereference_raw(mm->ioctx_table); while (1) { if (table) @@ -565,7 +564,6 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) if (!table->table[i]) { ctx->id = i; table->table[i] = ctx; - rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); /* While kioctx setup is in progress, @@ -579,8 +577,6 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) } new_nr = (table ? table->nr : 1) * 4; - - rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) * @@ -591,8 +587,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) table->nr = new_nr; spin_lock(&mm->ioctx_lock); - rcu_read_lock(); - old = rcu_dereference(mm->ioctx_table); + old = rcu_dereference_raw(mm->ioctx_table); if (!old) { rcu_assign_pointer(mm->ioctx_table, table); @@ -739,12 +734,9 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, spin_lock(&mm->ioctx_lock); - rcu_read_lock(); - table = rcu_dereference(mm->ioctx_table); - + table = rcu_dereference_raw(mm->ioctx_table); WARN_ON(ctx != table->table[ctx->id]); table->table[ctx->id] = NULL; - rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); /* percpu_ref_kill() will do the necessary call_rcu() */ @@ -793,40 +785,35 @@ EXPORT_SYMBOL(wait_on_sync_kiocb); */ void exit_aio(struct mm_struct *mm) { - struct kioctx_table *table; - struct kioctx *ctx; - unsigned i = 0; - - while (1) { - rcu_read_lock(); - table = rcu_dereference(mm->ioctx_table); - - do { - if (!table || i >= table->nr) { - rcu_read_unlock(); - rcu_assign_pointer(mm->ioctx_table, NULL); - if (table) - kfree(table); - return; - } + struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table); + int i; - ctx = table->table[i++]; - } while (!ctx); + if (!table) + return; - rcu_read_unlock(); + for (i = 0; i < table->nr; ++i) { + struct kioctx *ctx = table->table[i]; + struct completion requests_done = + COMPLETION_INITIALIZER_ONSTACK(requests_done); + if (!ctx) + continue; /* - * We don't need to bother with munmap() here - - * exit_mmap(mm) is coming and it'll unmap everything. - * Since aio_free_ring() uses non-zero ->mmap_size - * as indicator that it needs to unmap the area, - * just set it to 0; aio_free_ring() is the only - * place that uses ->mmap_size, so it's safe. + * We don't need to bother with munmap() here - exit_mmap(mm) + * is coming and it'll unmap everything. And we simply can't, + * this is not necessarily our ->mm. + * Since kill_ioctx() uses non-zero ->mmap_size as indicator + * that it needs to unmap the area, just set it to 0. */ ctx->mmap_size = 0; + kill_ioctx(mm, ctx, &requests_done); - kill_ioctx(mm, ctx, NULL); + /* Wait until all IO for the context are done. */ + wait_for_completion(&requests_done); } + + RCU_INIT_POINTER(mm->ioctx_table, NULL); + kfree(table); } static void put_reqs_available(struct kioctx *ctx, unsigned nr) @@ -834,10 +821,8 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr) struct kioctx_cpu *kcpu; unsigned long flags; - preempt_disable(); - kcpu = this_cpu_ptr(ctx->cpu); - local_irq_save(flags); + kcpu = this_cpu_ptr(ctx->cpu); kcpu->reqs_available += nr; while (kcpu->reqs_available >= ctx->req_batch * 2) { @@ -846,7 +831,6 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr) } local_irq_restore(flags); - preempt_enable(); } static bool get_reqs_available(struct kioctx *ctx) @@ -855,10 +839,8 @@ static bool get_reqs_available(struct kioctx *ctx) bool ret = false; unsigned long flags; - preempt_disable(); - kcpu = this_cpu_ptr(ctx->cpu); - local_irq_save(flags); + kcpu = this_cpu_ptr(ctx->cpu); if (!kcpu->reqs_available) { int old, avail = atomic_read(&ctx->reqs_available); @@ -878,10 +860,71 @@ static bool get_reqs_available(struct kioctx *ctx) kcpu->reqs_available--; out: local_irq_restore(flags); - preempt_enable(); return ret; } +/* refill_reqs_available + * Updates the reqs_available reference counts used for tracking the + * number of free slots in the completion ring. This can be called + * from aio_complete() (to optimistically update reqs_available) or + * from aio_get_req() (the we're out of events case). It must be + * called holding ctx->completion_lock. + */ +static void refill_reqs_available(struct kioctx *ctx, unsigned head, + unsigned tail) +{ + unsigned events_in_ring, completed; + + /* Clamp head since userland can write to it. */ + head %= ctx->nr_events; + if (head <= tail) + events_in_ring = tail - head; + else + events_in_ring = ctx->nr_events - (head - tail); + + completed = ctx->completed_events; + if (events_in_ring < completed) + completed -= events_in_ring; + else + completed = 0; + + if (!completed) + return; + + ctx->completed_events -= completed; + put_reqs_available(ctx, completed); +} + +/* user_refill_reqs_available + * Called to refill reqs_available when aio_get_req() encounters an + * out of space in the completion ring. + */ +static void user_refill_reqs_available(struct kioctx *ctx) +{ + spin_lock_irq(&ctx->completion_lock); + if (ctx->completed_events) { + struct aio_ring *ring; + unsigned head; + + /* Access of ring->head may race with aio_read_events_ring() + * here, but that's okay since whether we read the old version + * or the new version, and either will be valid. The important + * part is that head cannot pass tail since we prevent + * aio_complete() from updating tail by holding + * ctx->completion_lock. Even if head is invalid, the check + * against ctx->completed_events below will make sure we do the + * safe/right thing. + */ + ring = kmap_atomic(ctx->ring_pages[0]); + head = ring->head; + kunmap_atomic(ring); + + refill_reqs_available(ctx, head, ctx->tail); + } + + spin_unlock_irq(&ctx->completion_lock); +} + /* aio_get_req * Allocate a slot for an aio request. * Returns NULL if no requests are free. @@ -890,8 +933,11 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) { struct kiocb *req; - if (!get_reqs_available(ctx)) - return NULL; + if (!get_reqs_available(ctx)) { + user_refill_reqs_available(ctx); + if (!get_reqs_available(ctx)) + return NULL; + } req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); if (unlikely(!req)) @@ -950,8 +996,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2) struct kioctx *ctx = iocb->ki_ctx; struct aio_ring *ring; struct io_event *ev_page, *event; + unsigned tail, pos, head; unsigned long flags; - unsigned tail, pos; /* * Special case handling for sync iocbs: @@ -1012,10 +1058,14 @@ void aio_complete(struct kiocb *iocb, long res, long res2) ctx->tail = tail; ring = kmap_atomic(ctx->ring_pages[0]); + head = ring->head; ring->tail = tail; kunmap_atomic(ring); flush_dcache_page(ctx->ring_pages[0]); + ctx->completed_events++; + if (ctx->completed_events > 1) + refill_reqs_available(ctx, head, tail); spin_unlock_irqrestore(&ctx->completion_lock, flags); pr_debug("added to ring %p at [%u]\n", iocb, tail); @@ -1030,7 +1080,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) /* everything turned out well, dispose of the aiocb. */ kiocb_free(iocb); - put_reqs_available(ctx, 1); /* * We have to order our ring_info tail store above and test @@ -1047,7 +1096,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) } EXPORT_SYMBOL(aio_complete); -/* aio_read_events +/* aio_read_events_ring * Pull an event off of the ioctx's event ring. Returns the number of * events fetched */ @@ -1067,6 +1116,12 @@ static long aio_read_events_ring(struct kioctx *ctx, tail = ring->tail; kunmap_atomic(ring); + /* + * Ensure that once we've read the current tail pointer, that + * we also see the events that were stored up to the tail. + */ + smp_rmb(); + pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); if (head == tail) @@ -1270,12 +1325,12 @@ static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, if (compat) ret = compat_rw_copy_check_uvector(rw, (struct compat_iovec __user *)buf, - *nr_segs, 1, *iovec, iovec); + *nr_segs, UIO_FASTIOV, *iovec, iovec); else #endif ret = rw_copy_check_uvector(rw, (struct iovec __user *)buf, - *nr_segs, 1, *iovec, iovec); + *nr_segs, UIO_FASTIOV, *iovec, iovec); if (ret < 0) return ret; @@ -1299,9 +1354,8 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb, } /* - * aio_setup_iocb: - * Performs the initial checks and aio retry method - * setup for the kiocb at the time of io submission. + * aio_run_iocb: + * Performs the initial checks and io submission. */ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, char __user *buf, bool compat) @@ -1313,7 +1367,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, fmode_t mode; aio_rw_op *rw_op; rw_iter_op *iter_op; - struct iovec inline_vec, *iovec = &inline_vec; + struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iov_iter iter; switch (opcode) { @@ -1348,7 +1402,7 @@ rw_common: if (!ret) ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); if (ret < 0) { - if (iovec != &inline_vec) + if (iovec != inline_vecs) kfree(iovec); return ret; } @@ -1395,7 +1449,7 @@ rw_common: return -EINVAL; } - if (iovec != &inline_vec) + if (iovec != inline_vecs) kfree(iovec); if (ret != -EIOCBQUEUED) { diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 7c93953030f..afd2b4408ad 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -218,8 +218,9 @@ static int bad_inode_mknod (struct inode *dir, struct dentry *dentry, return -EIO; } -static int bad_inode_rename (struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +static int bad_inode_rename2(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) { return -EIO; } @@ -279,7 +280,7 @@ static const struct inode_operations bad_inode_ops = .mkdir = bad_inode_mkdir, .rmdir = bad_inode_rmdir, .mknod = bad_inode_mknod, - .rename = bad_inode_rename, + .rename2 = bad_inode_rename2, .readlink = bad_inode_readlink, /* follow_link must be no-op, otherwise unmounting this inode won't work */ diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 5a201d81049..fbd76ded9a3 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -22,7 +22,6 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/freezer.h> -#include <linux/workqueue.h> #include "async-thread.h" #include "ctree.h" @@ -55,8 +54,39 @@ struct btrfs_workqueue { struct __btrfs_workqueue *high; }; -static inline struct __btrfs_workqueue -*__btrfs_alloc_workqueue(const char *name, int flags, int max_active, +static void normal_work_helper(struct btrfs_work *work); + +#define BTRFS_WORK_HELPER(name) \ +void btrfs_##name(struct work_struct *arg) \ +{ \ + struct btrfs_work *work = container_of(arg, struct btrfs_work, \ + normal_work); \ + normal_work_helper(work); \ +} + +BTRFS_WORK_HELPER(worker_helper); +BTRFS_WORK_HELPER(delalloc_helper); +BTRFS_WORK_HELPER(flush_delalloc_helper); +BTRFS_WORK_HELPER(cache_helper); +BTRFS_WORK_HELPER(submit_helper); +BTRFS_WORK_HELPER(fixup_helper); +BTRFS_WORK_HELPER(endio_helper); +BTRFS_WORK_HELPER(endio_meta_helper); +BTRFS_WORK_HELPER(endio_meta_write_helper); +BTRFS_WORK_HELPER(endio_raid56_helper); +BTRFS_WORK_HELPER(rmw_helper); +BTRFS_WORK_HELPER(endio_write_helper); +BTRFS_WORK_HELPER(freespace_write_helper); +BTRFS_WORK_HELPER(delayed_meta_helper); +BTRFS_WORK_HELPER(readahead_helper); +BTRFS_WORK_HELPER(qgroup_rescan_helper); +BTRFS_WORK_HELPER(extent_refs_helper); +BTRFS_WORK_HELPER(scrub_helper); +BTRFS_WORK_HELPER(scrubwrc_helper); +BTRFS_WORK_HELPER(scrubnc_helper); + +static struct __btrfs_workqueue * +__btrfs_alloc_workqueue(const char *name, int flags, int max_active, int thresh) { struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); @@ -232,13 +262,11 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) spin_unlock_irqrestore(lock, flags); } -static void normal_work_helper(struct work_struct *arg) +static void normal_work_helper(struct btrfs_work *work) { - struct btrfs_work *work; struct __btrfs_workqueue *wq; int need_order = 0; - work = container_of(arg, struct btrfs_work, normal_work); /* * We should not touch things inside work in the following cases: * 1) after work->func() if it has no ordered_free @@ -262,7 +290,7 @@ static void normal_work_helper(struct work_struct *arg) trace_btrfs_all_work_done(work); } -void btrfs_init_work(struct btrfs_work *work, +void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, btrfs_func_t func, btrfs_func_t ordered_func, btrfs_func_t ordered_free) @@ -270,7 +298,7 @@ void btrfs_init_work(struct btrfs_work *work, work->func = func; work->ordered_func = ordered_func; work->ordered_free = ordered_free; - INIT_WORK(&work->normal_work, normal_work_helper); + INIT_WORK(&work->normal_work, uniq_func); INIT_LIST_HEAD(&work->ordered_list); work->flags = 0; } diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 9c6b66d15fb..e9e31c94758 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -19,12 +19,14 @@ #ifndef __BTRFS_ASYNC_THREAD_ #define __BTRFS_ASYNC_THREAD_ +#include <linux/workqueue.h> struct btrfs_workqueue; /* Internal use only */ struct __btrfs_workqueue; struct btrfs_work; typedef void (*btrfs_func_t)(struct btrfs_work *arg); +typedef void (*btrfs_work_func_t)(struct work_struct *arg); struct btrfs_work { btrfs_func_t func; @@ -38,11 +40,35 @@ struct btrfs_work { unsigned long flags; }; +#define BTRFS_WORK_HELPER_PROTO(name) \ +void btrfs_##name(struct work_struct *arg) + +BTRFS_WORK_HELPER_PROTO(worker_helper); +BTRFS_WORK_HELPER_PROTO(delalloc_helper); +BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper); +BTRFS_WORK_HELPER_PROTO(cache_helper); +BTRFS_WORK_HELPER_PROTO(submit_helper); +BTRFS_WORK_HELPER_PROTO(fixup_helper); +BTRFS_WORK_HELPER_PROTO(endio_helper); +BTRFS_WORK_HELPER_PROTO(endio_meta_helper); +BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper); +BTRFS_WORK_HELPER_PROTO(endio_raid56_helper); +BTRFS_WORK_HELPER_PROTO(rmw_helper); +BTRFS_WORK_HELPER_PROTO(endio_write_helper); +BTRFS_WORK_HELPER_PROTO(freespace_write_helper); +BTRFS_WORK_HELPER_PROTO(delayed_meta_helper); +BTRFS_WORK_HELPER_PROTO(readahead_helper); +BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper); +BTRFS_WORK_HELPER_PROTO(extent_refs_helper); +BTRFS_WORK_HELPER_PROTO(scrub_helper); +BTRFS_WORK_HELPER_PROTO(scrubwrc_helper); +BTRFS_WORK_HELPER_PROTO(scrubnc_helper); + struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, int flags, int max_active, int thresh); -void btrfs_init_work(struct btrfs_work *work, +void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper, btrfs_func_t func, btrfs_func_t ordered_func, btrfs_func_t ordered_free); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index e25564bfcb4..54a201dac7f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -276,9 +276,8 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, } if (ret > 0) goto next; - ret = ulist_add_merge(parents, eb->start, - (uintptr_t)eie, - (u64 *)&old, GFP_NOFS); + ret = ulist_add_merge_ptr(parents, eb->start, + eie, (void **)&old, GFP_NOFS); if (ret < 0) break; if (!ret && extent_item_pos) { @@ -1001,16 +1000,19 @@ again: ret = -EIO; goto out; } + btrfs_tree_read_lock(eb); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); ret = find_extent_in_eb(eb, bytenr, *extent_item_pos, &eie); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); if (ret < 0) goto out; ref->inode_list = eie; } - ret = ulist_add_merge(refs, ref->parent, - (uintptr_t)ref->inode_list, - (u64 *)&eie, GFP_NOFS); + ret = ulist_add_merge_ptr(refs, ref->parent, + ref->inode_list, + (void **)&eie, GFP_NOFS); if (ret < 0) goto out; if (!ret && extent_item_pos) { diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 4794923c410..43527fd7882 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -84,12 +84,6 @@ struct btrfs_inode { */ struct list_head delalloc_inodes; - /* - * list for tracking inodes that must be sent to disk before a - * rename or truncate commit - */ - struct list_head ordered_operations; - /* node for the red-black tree that links inodes in subvolume root */ struct rb_node rb_node; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index aeab453b8e2..44ee5d2e52a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -280,9 +280,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(buf) > trans->transid); if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) - ret = btrfs_inc_ref(trans, root, cow, 1, 1); + ret = btrfs_inc_ref(trans, root, cow, 1); else - ret = btrfs_inc_ref(trans, root, cow, 0, 1); + ret = btrfs_inc_ref(trans, root, cow, 0); if (ret) return ret; @@ -1035,14 +1035,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if ((owner == root->root_key.objectid || root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { - ret = btrfs_inc_ref(trans, root, buf, 1, 1); + ret = btrfs_inc_ref(trans, root, buf, 1); BUG_ON(ret); /* -ENOMEM */ if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { - ret = btrfs_dec_ref(trans, root, buf, 0, 1); + ret = btrfs_dec_ref(trans, root, buf, 0); BUG_ON(ret); /* -ENOMEM */ - ret = btrfs_inc_ref(trans, root, cow, 1, 1); + ret = btrfs_inc_ref(trans, root, cow, 1); BUG_ON(ret); /* -ENOMEM */ } new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; @@ -1050,9 +1050,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) - ret = btrfs_inc_ref(trans, root, cow, 1, 1); + ret = btrfs_inc_ref(trans, root, cow, 1); else - ret = btrfs_inc_ref(trans, root, cow, 0, 1); + ret = btrfs_inc_ref(trans, root, cow, 0); BUG_ON(ret); /* -ENOMEM */ } if (new_flags != 0) { @@ -1069,11 +1069,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) - ret = btrfs_inc_ref(trans, root, cow, 1, 1); + ret = btrfs_inc_ref(trans, root, cow, 1); else - ret = btrfs_inc_ref(trans, root, cow, 0, 1); + ret = btrfs_inc_ref(trans, root, cow, 0); BUG_ON(ret); /* -ENOMEM */ - ret = btrfs_dec_ref(trans, root, buf, 1, 1); + ret = btrfs_dec_ref(trans, root, buf, 1); BUG_ON(ret); /* -ENOMEM */ } clean_tree_block(trans, root, buf); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index be91397f4e9..8e29b614fe9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3326,9 +3326,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, int is_data, int delalloc); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int full_backref, int no_quota); + struct extent_buffer *buf, int full_backref); int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int full_backref, int no_quota); + struct extent_buffer *buf, int full_backref); int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 flags, diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index da775bfdebc..a2e90f855d7 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1395,8 +1395,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, return -ENOMEM; async_work->delayed_root = delayed_root; - btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, - NULL, NULL); + btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper, + btrfs_async_run_delayed_root, NULL, NULL); async_work->nr = nr; btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 08e65e9cf2a..a1d36e62179 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -39,7 +39,6 @@ #include "btrfs_inode.h" #include "volumes.h" #include "print-tree.h" -#include "async-thread.h" #include "locking.h" #include "tree-log.h" #include "free-space-cache.h" @@ -60,8 +59,6 @@ static void end_workqueue_fn(struct btrfs_work *work); static void free_fs_root(struct btrfs_root *root); static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int read_only); -static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, - struct btrfs_root *root); static void btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_root *root); @@ -695,35 +692,41 @@ static void end_workqueue_bio(struct bio *bio, int err) { struct end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; + struct btrfs_workqueue *wq; + btrfs_work_func_t func; fs_info = end_io_wq->info; end_io_wq->error = err; - btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); if (bio->bi_rw & REQ_WRITE) { - if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) - btrfs_queue_work(fs_info->endio_meta_write_workers, - &end_io_wq->work); - else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) - btrfs_queue_work(fs_info->endio_freespace_worker, - &end_io_wq->work); - else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) - btrfs_queue_work(fs_info->endio_raid56_workers, - &end_io_wq->work); - else - btrfs_queue_work(fs_info->endio_write_workers, - &end_io_wq->work); + if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { + wq = fs_info->endio_meta_write_workers; + func = btrfs_endio_meta_write_helper; + } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) { + wq = fs_info->endio_freespace_worker; + func = btrfs_freespace_write_helper; + } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { + wq = fs_info->endio_raid56_workers; + func = btrfs_endio_raid56_helper; + } else { + wq = fs_info->endio_write_workers; + func = btrfs_endio_write_helper; + } } else { - if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) - btrfs_queue_work(fs_info->endio_raid56_workers, - &end_io_wq->work); - else if (end_io_wq->metadata) - btrfs_queue_work(fs_info->endio_meta_workers, - &end_io_wq->work); - else - btrfs_queue_work(fs_info->endio_workers, - &end_io_wq->work); + if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { + wq = fs_info->endio_raid56_workers; + func = btrfs_endio_raid56_helper; + } else if (end_io_wq->metadata) { + wq = fs_info->endio_meta_workers; + func = btrfs_endio_meta_helper; + } else { + wq = fs_info->endio_workers; + func = btrfs_endio_helper; + } } + + btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL); + btrfs_queue_work(wq, &end_io_wq->work); } /* @@ -830,7 +833,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->submit_bio_start = submit_bio_start; async->submit_bio_done = submit_bio_done; - btrfs_init_work(&async->work, run_one_async_start, + btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start, run_one_async_done, run_one_async_free); async->bio_flags = bio_flags; @@ -3452,7 +3455,8 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) btrfs_set_stack_device_generation(dev_item, 0); btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); - btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); + btrfs_set_stack_device_total_bytes(dev_item, + dev->disk_total_bytes); btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); btrfs_set_stack_device_io_align(dev_item, dev->io_align); btrfs_set_stack_device_io_width(dev_item, dev->io_width); @@ -3829,34 +3833,6 @@ static void btrfs_error_commit_super(struct btrfs_root *root) btrfs_cleanup_transaction(root); } -static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, - struct btrfs_root *root) -{ - struct btrfs_inode *btrfs_inode; - struct list_head splice; - - INIT_LIST_HEAD(&splice); - - mutex_lock(&root->fs_info->ordered_operations_mutex); - spin_lock(&root->fs_info->ordered_root_lock); - - list_splice_init(&t->ordered_operations, &splice); - while (!list_empty(&splice)) { - btrfs_inode = list_entry(splice.next, struct btrfs_inode, - ordered_operations); - - list_del_init(&btrfs_inode->ordered_operations); - spin_unlock(&root->fs_info->ordered_root_lock); - - btrfs_invalidate_inodes(btrfs_inode->root); - - spin_lock(&root->fs_info->ordered_root_lock); - } - - spin_unlock(&root->fs_info->ordered_root_lock); - mutex_unlock(&root->fs_info->ordered_operations_mutex); -} - static void btrfs_destroy_ordered_extents(struct btrfs_root *root) { struct btrfs_ordered_extent *ordered; @@ -4093,8 +4069,6 @@ again: void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_root *root) { - btrfs_destroy_ordered_operations(cur_trans, root); - btrfs_destroy_delayed_refs(cur_trans, root); cur_trans->state = TRANS_STATE_COMMIT_START; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 813537f362f..3efe1c3877b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -552,7 +552,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, caching_ctl->block_group = cache; caching_ctl->progress = cache->key.objectid; atomic_set(&caching_ctl->count, 1); - btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); + btrfs_init_work(&caching_ctl->work, btrfs_cache_helper, + caching_thread, NULL, NULL); spin_lock(&cache->lock); /* @@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root, async->sync = 0; init_completion(&async->wait); - btrfs_init_work(&async->work, delayed_ref_async_start, - NULL, NULL); + btrfs_init_work(&async->work, btrfs_extent_refs_helper, + delayed_ref_async_start, NULL, NULL); btrfs_queue_work(root->fs_info->extent_workers, &async->work); @@ -3057,7 +3058,7 @@ out: static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, - int full_backref, int inc, int no_quota) + int full_backref, int inc) { u64 bytenr; u64 num_bytes; @@ -3111,7 +3112,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, key.offset -= btrfs_file_extent_offset(buf, fi); ret = process_func(trans, root, bytenr, num_bytes, parent, ref_root, key.objectid, - key.offset, no_quota); + key.offset, 1); if (ret) goto fail; } else { @@ -3119,7 +3120,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, num_bytes = btrfs_level_size(root, level - 1); ret = process_func(trans, root, bytenr, num_bytes, parent, ref_root, level - 1, 0, - no_quota); + 1); if (ret) goto fail; } @@ -3130,15 +3131,15 @@ fail: } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int full_backref, int no_quota) + struct extent_buffer *buf, int full_backref) { - return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota); + return __btrfs_mod_ref(trans, root, buf, full_backref, 1); } int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int full_backref, int no_quota) + struct extent_buffer *buf, int full_backref) { - return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota); + return __btrfs_mod_ref(trans, root, buf, full_backref, 0); } static int write_one_cache_group(struct btrfs_trans_handle *trans, @@ -3586,13 +3587,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags) */ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) { - /* - * we add in the count of missing devices because we want - * to make sure that any RAID levels on a degraded FS - * continue to be honored. - */ - u64 num_devices = root->fs_info->fs_devices->rw_devices + - root->fs_info->fs_devices->missing_devices; + u64 num_devices = root->fs_info->fs_devices->rw_devices; u64 target; u64 tmp; @@ -7478,6 +7473,220 @@ reada: wc->reada_slot = slot; } +static int account_leaf_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *eb) +{ + int nr = btrfs_header_nritems(eb); + int i, extent_type, ret; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + u64 bytenr, num_bytes; + + for (i = 0; i < nr; i++) { + btrfs_item_key_to_cpu(eb, &key, i); + + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; + + fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); + /* filter out non qgroup-accountable extents */ + extent_type = btrfs_file_extent_type(eb, fi); + + if (extent_type == BTRFS_FILE_EXTENT_INLINE) + continue; + + bytenr = btrfs_file_extent_disk_bytenr(eb, fi); + if (!bytenr) + continue; + + num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + + ret = btrfs_qgroup_record_ref(trans, root->fs_info, + root->objectid, + bytenr, num_bytes, + BTRFS_QGROUP_OPER_SUB_SUBTREE, 0); + if (ret) + return ret; + } + return 0; +} + +/* + * Walk up the tree from the bottom, freeing leaves and any interior + * nodes which have had all slots visited. If a node (leaf or + * interior) is freed, the node above it will have it's slot + * incremented. The root node will never be freed. + * + * At the end of this function, we should have a path which has all + * slots incremented to the next position for a search. If we need to + * read a new node it will be NULL and the node above it will have the + * correct slot selected for a later read. + * + * If we increment the root nodes slot counter past the number of + * elements, 1 is returned to signal completion of the search. + */ +static int adjust_slots_upwards(struct btrfs_root *root, + struct btrfs_path *path, int root_level) +{ + int level = 0; + int nr, slot; + struct extent_buffer *eb; + + if (root_level == 0) + return 1; + + while (level <= root_level) { + eb = path->nodes[level]; + nr = btrfs_header_nritems(eb); + path->slots[level]++; + slot = path->slots[level]; + if (slot >= nr || level == 0) { + /* + * Don't free the root - we will detect this + * condition after our loop and return a + * positive value for caller to stop walking the tree. + */ + if (level != root_level) { + btrfs_tree_unlock_rw(eb, path->locks[level]); + path->locks[level] = 0; + + free_extent_buffer(eb); + path->nodes[level] = NULL; + path->slots[level] = 0; + } + } else { + /* + * We have a valid slot to walk back down + * from. Stop here so caller can process these + * new nodes. + */ + break; + } + + level++; + } + + eb = path->nodes[root_level]; + if (path->slots[root_level] >= btrfs_header_nritems(eb)) + return 1; + + return 0; +} + +/* + * root_eb is the subtree root and is locked before this function is called. + */ +static int account_shared_subtree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *root_eb, + u64 root_gen, + int root_level) +{ + int ret = 0; + int level; + struct extent_buffer *eb = root_eb; + struct btrfs_path *path = NULL; + + BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL); + BUG_ON(root_eb == NULL); + + if (!root->fs_info->quota_enabled) + return 0; + + if (!extent_buffer_uptodate(root_eb)) { + ret = btrfs_read_buffer(root_eb, root_gen); + if (ret) + goto out; + } + + if (root_level == 0) { + ret = account_leaf_items(trans, root, root_eb); + goto out; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* + * Walk down the tree. Missing extent blocks are filled in as + * we go. Metadata is accounted every time we read a new + * extent block. + * + * When we reach a leaf, we account for file extent items in it, + * walk back up the tree (adjusting slot pointers as we go) + * and restart the search process. + */ + extent_buffer_get(root_eb); /* For path */ + path->nodes[root_level] = root_eb; + path->slots[root_level] = 0; + path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ +walk_down: + level = root_level; + while (level >= 0) { + if (path->nodes[level] == NULL) { + int child_bsize = root->nodesize; + int parent_slot; + u64 child_gen; + u64 child_bytenr; + + /* We need to get child blockptr/gen from + * parent before we can read it. */ + eb = path->nodes[level + 1]; + parent_slot = path->slots[level + 1]; + child_bytenr = btrfs_node_blockptr(eb, parent_slot); + child_gen = btrfs_node_ptr_generation(eb, parent_slot); + + eb = read_tree_block(root, child_bytenr, child_bsize, + child_gen); + if (!eb || !extent_buffer_uptodate(eb)) { + ret = -EIO; + goto out; + } + + path->nodes[level] = eb; + path->slots[level] = 0; + + btrfs_tree_read_lock(eb); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + path->locks[level] = BTRFS_READ_LOCK_BLOCKING; + + ret = btrfs_qgroup_record_ref(trans, root->fs_info, + root->objectid, + child_bytenr, + child_bsize, + BTRFS_QGROUP_OPER_SUB_SUBTREE, + 0); + if (ret) + goto out; + + } + + if (level == 0) { + ret = account_leaf_items(trans, root, path->nodes[level]); + if (ret) + goto out; + + /* Nonzero return here means we completed our search */ + ret = adjust_slots_upwards(root, path, root_level); + if (ret) + break; + + /* Restart search with new slots */ + goto walk_down; + } + + level--; + } + + ret = 0; +out: + btrfs_free_path(path); + + return ret; +} + /* * helper to process tree block while walking down the tree. * @@ -7532,9 +7741,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, /* wc->stage == UPDATE_BACKREF */ if (!(wc->flags[level] & flag)) { BUG_ON(!path->locks[level]); - ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc); + ret = btrfs_inc_ref(trans, root, eb, 1); BUG_ON(ret); /* -ENOMEM */ - ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); + ret = btrfs_dec_ref(trans, root, eb, 0); BUG_ON(ret); /* -ENOMEM */ ret = btrfs_set_disk_extent_flags(trans, root, eb->start, eb->len, flag, @@ -7581,6 +7790,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, int level = wc->level; int reada = 0; int ret = 0; + bool need_account = false; generation = btrfs_node_ptr_generation(path->nodes[level], path->slots[level]); @@ -7626,6 +7836,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, if (wc->stage == DROP_REFERENCE) { if (wc->refs[level - 1] > 1) { + need_account = true; if (level == 1 && (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) goto skip; @@ -7689,6 +7900,16 @@ skip: parent = 0; } + if (need_account) { + ret = account_shared_subtree(trans, root, next, + generation, level - 1); + if (ret) { + printk_ratelimited(KERN_ERR "BTRFS: %s Error " + "%d accounting shared subtree. Quota " + "is out of sync, rescan required.\n", + root->fs_info->sb->s_id, ret); + } + } ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, root->root_key.objectid, level - 1, 0, 0); BUG_ON(ret); /* -ENOMEM */ @@ -7769,12 +7990,17 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, if (wc->refs[level] == 1) { if (level == 0) { if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) - ret = btrfs_dec_ref(trans, root, eb, 1, - wc->for_reloc); + ret = btrfs_dec_ref(trans, root, eb, 1); else - ret = btrfs_dec_ref(trans, root, eb, 0, - wc->for_reloc); + ret = btrfs_dec_ref(trans, root, eb, 0); BUG_ON(ret); /* -ENOMEM */ + ret = account_leaf_items(trans, root, eb); + if (ret) { + printk_ratelimited(KERN_ERR "BTRFS: %s Error " + "%d accounting leaf items. Quota " + "is out of sync, rescan required.\n", + root->fs_info->sb->s_id, ret); + } } /* make block locked assertion in clean_tree_block happy */ if (!path->locks[level] && @@ -7900,6 +8126,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int level; bool root_dropped = false; + btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid); + path = btrfs_alloc_path(); if (!path) { err = -ENOMEM; @@ -8025,6 +8253,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, goto out_end_trans; } + /* + * Qgroup update accounting is run from + * delayed ref handling. This usually works + * out because delayed refs are normally the + * only way qgroup updates are added. However, + * we may have added updates during our tree + * walk so run qgroups here to make sure we + * don't lose any updates. + */ + ret = btrfs_delayed_qgroup_accounting(trans, + root->fs_info); + if (ret) + printk_ratelimited(KERN_ERR "BTRFS: Failure %d " + "running qgroup updates " + "during snapshot delete. " + "Quota is out of sync, " + "rescan required.\n", ret); + btrfs_end_transaction_throttle(trans, tree_root); if (!for_reloc && btrfs_need_cleaner_sleep(root)) { pr_debug("BTRFS: drop snapshot early exit\n"); @@ -8078,6 +8324,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } root_dropped = true; out_end_trans: + ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info); + if (ret) + printk_ratelimited(KERN_ERR "BTRFS: Failure %d " + "running qgroup updates " + "during snapshot delete. " + "Quota is out of sync, " + "rescan required.\n", ret); + btrfs_end_transaction_throttle(trans, tree_root); out_free: kfree(wc); @@ -8181,13 +8435,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) if (stripped) return extended_to_chunk(stripped); - /* - * we add in the count of missing devices because we want - * to make sure that any RAID levels on a degraded FS - * continue to be honored. - */ - num_devices = root->fs_info->fs_devices->rw_devices + - root->fs_info->fs_devices->missing_devices; + num_devices = root->fs_info->fs_devices->rw_devices; stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e11aab9f39..af0359dcf33 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2532,6 +2532,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) uptodate = 0; + offset += len; continue; } } @@ -4207,8 +4208,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return -ENOMEM; path->leave_spinning = 1; - start = ALIGN(start, BTRFS_I(inode)->root->sectorsize); - len = ALIGN(len, BTRFS_I(inode)->root->sectorsize); + start = round_down(start, BTRFS_I(inode)->root->sectorsize); + len = round_up(max, BTRFS_I(inode)->root->sectorsize) - start; /* * lookup the last file extent. We're not using i_size here diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index f46cfe45d68..54c84daec9b 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -756,7 +756,7 @@ again: found_next = 1; if (ret != 0) goto insert; - slot = 0; + slot = path->slots[0]; } btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1f2b99cb55e..ff1cc0399b9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1838,6 +1838,8 @@ out: int btrfs_release_file(struct inode *inode, struct file *filp) { + if (filp->private_data) + btrfs_ioctl_trans_end(filp); /* * ordered_data_close is set by settattr when we are about to truncate * a file from a non-zero size to a zero size. This tries to @@ -1845,26 +1847,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp) * application were using truncate to replace a file in place. */ if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, - &BTRFS_I(inode)->runtime_flags)) { - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(inode)->root; - - /* - * We need to block on a committing transaction to keep us from - * throwing a ordered operation on to the list and causing - * something like sync to deadlock trying to flush out this - * inode. - */ - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); - btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode); - btrfs_end_transaction(trans, root); - if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) + &BTRFS_I(inode)->runtime_flags)) filemap_flush(inode->i_mapping); - } - if (filp->private_data) - btrfs_ioctl_trans_end(filp); return 0; } @@ -1982,7 +1966,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) btrfs_init_log_ctx(&ctx); - ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); + ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); if (ret < 0) { /* Fallthrough and commit/free transaction. */ ret = 1; @@ -2112,10 +2096,9 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, goto out; } - if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { + if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) { u64 num_bytes; - path->slots[0]++; key.offset = offset; btrfs_set_item_key_safe(root, path, &key); fi = btrfs_item_ptr(leaf, path->slots[0], @@ -2240,7 +2223,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) goto out_only_mutex; } - lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize); + lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); lockend = round_down(offset + len, BTRFS_I(inode)->root->sectorsize) - 1; same_page = ((offset >> PAGE_CACHE_SHIFT) == @@ -2301,7 +2284,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) tail_start + tail_len, 0, 1); if (ret) goto out_only_mutex; - } + } } } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3668048e16f..016c403bfe7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -709,6 +709,18 @@ retry: unlock_extent(io_tree, async_extent->start, async_extent->start + async_extent->ram_size - 1); + + /* + * we need to redirty the pages if we decide to + * fallback to uncompressed IO, otherwise we + * will not submit these pages down to lower + * layers. + */ + extent_range_redirty_for_io(inode, + async_extent->start, + async_extent->start + + async_extent->ram_size - 1); + goto retry; } goto out_free; @@ -766,8 +778,12 @@ retry: ins.offset, BTRFS_ORDERED_COMPRESSED, async_extent->compress_type); - if (ret) + if (ret) { + btrfs_drop_extent_cache(inode, async_extent->start, + async_extent->start + + async_extent->ram_size - 1, 0); goto out_free_reserve; + } /* * clear dirty, set writeback and unlock the pages. @@ -959,14 +975,14 @@ static noinline int cow_file_range(struct inode *inode, ret = btrfs_add_ordered_extent(inode, start, ins.objectid, ram_size, cur_alloc_size, 0); if (ret) - goto out_reserve; + goto out_drop_extent_cache; if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_reloc_clone_csums(inode, start, cur_alloc_size); if (ret) - goto out_reserve; + goto out_drop_extent_cache; } if (disk_num_bytes < cur_alloc_size) @@ -994,6 +1010,8 @@ static noinline int cow_file_range(struct inode *inode, out: return ret; +out_drop_extent_cache: + btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); out_reserve: btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); out_unlock: @@ -1084,8 +1102,10 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_cow->end = cur_end; INIT_LIST_HEAD(&async_cow->extents); - btrfs_init_work(&async_cow->work, async_cow_start, - async_cow_submit, async_cow_free); + btrfs_init_work(&async_cow->work, + btrfs_delalloc_helper, + async_cow_start, async_cow_submit, + async_cow_free); nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT; @@ -1869,7 +1889,8 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) SetPageChecked(page); page_cache_get(page); - btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); + btrfs_init_work(&fixup->work, btrfs_fixup_helper, + btrfs_writepage_fixup_worker, NULL, NULL); fixup->page = page; btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); return -EBUSY; @@ -2810,7 +2831,8 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ordered_extent *ordered_extent = NULL; - struct btrfs_workqueue *workers; + struct btrfs_workqueue *wq; + btrfs_work_func_t func; trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); @@ -2819,13 +2841,17 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, end - start + 1, uptodate)) return 0; - btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); + if (btrfs_is_free_space_inode(inode)) { + wq = root->fs_info->endio_freespace_worker; + func = btrfs_freespace_write_helper; + } else { + wq = root->fs_info->endio_write_workers; + func = btrfs_endio_write_helper; + } - if (btrfs_is_free_space_inode(inode)) - workers = root->fs_info->endio_freespace_worker; - else - workers = root->fs_info->endio_write_workers; - btrfs_queue_work(workers, &ordered_extent->work); + btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL, + NULL); + btrfs_queue_work(wq, &ordered_extent->work); return 0; } @@ -4222,7 +4248,8 @@ out: btrfs_abort_transaction(trans, root, ret); } error: - if (last_size != (u64)-1) + if (last_size != (u64)-1 && + root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) btrfs_ordered_update_i_size(inode, last_size, NULL); btrfs_free_path(path); return err; @@ -4662,6 +4689,11 @@ static void evict_inode_truncate_pages(struct inode *inode) clear_bit(EXTENT_FLAG_LOGGING, &em->flags); remove_extent_mapping(map_tree, em); free_extent_map(em); + if (need_resched()) { + write_unlock(&map_tree->lock); + cond_resched(); + write_lock(&map_tree->lock); + } } write_unlock(&map_tree->lock); @@ -4684,6 +4716,7 @@ static void evict_inode_truncate_pages(struct inode *inode) &cached_state, GFP_NOFS); free_extent_state(state); + cond_resched(); spin_lock(&io_tree->lock); } spin_unlock(&io_tree->lock); @@ -5169,6 +5202,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) iput(inode); inode = ERR_PTR(ret); } + /* + * If orphan cleanup did remove any orphans, it means the tree + * was modified and therefore the commit root is not the same as + * the current root anymore. This is a problem, because send + * uses the commit root and therefore can see inode items that + * don't exist in the current root anymore, and for example make + * calls to btrfs_iget, which will do tree lookups based on the + * current root and not on the commit root. Those lookups will + * fail, returning a -ESTALE error, and making send fail with + * that error. So make sure a send does not see any orphans we + * have just removed, and that it will see the same inodes + * regardless of whether a transaction commit happened before + * it started (meaning that the commit root will be the same as + * the current root) or not. + */ + if (sub_root->node != sub_root->commit_root) { + u64 sub_flags = btrfs_root_flags(&sub_root->root_item); + + if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) { + struct extent_buffer *eb; + + /* + * Assert we can't have races between dentry + * lookup called through the snapshot creation + * ioctl and the VFS. + */ + ASSERT(mutex_is_locked(&dir->i_mutex)); + + down_write(&root->fs_info->commit_root_sem); + eb = sub_root->commit_root; + sub_root->commit_root = + btrfs_root_node(sub_root); + up_write(&root->fs_info->commit_root_sem); + free_extent_buffer(eb); + } + } } return inode; @@ -5565,6 +5634,17 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index) return ret; } +static int btrfs_insert_inode_locked(struct inode *inode) +{ + struct btrfs_iget_args args; + args.location = &BTRFS_I(inode)->location; + args.root = BTRFS_I(inode)->root; + + return insert_inode_locked4(inode, + btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root), + btrfs_find_actor, &args); +} + static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, @@ -5594,6 +5674,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, } /* + * O_TMPFILE, set link count to 0, so that after this point, + * we fill in an inode item with the correct link count. + */ + if (!name) + set_nlink(inode, 0); + + /* * we have to initialize this early, so we can reclaim the inode * number if we fail afterwards in this function. */ @@ -5650,10 +5737,19 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, sizes[1] = name_len + sizeof(*ref); } + location = &BTRFS_I(inode)->location; + location->objectid = objectid; + location->offset = 0; + btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); + + ret = btrfs_insert_inode_locked(inode); + if (ret < 0) + goto fail; + path->leave_spinning = 1; ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems); if (ret != 0) - goto fail; + goto fail_unlock; inode_init_owner(inode, dir, mode); inode_set_bytes(inode, 0); @@ -5676,11 +5772,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); - location = &BTRFS_I(inode)->location; - location->objectid = objectid; - location->offset = 0; - btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - btrfs_inherit_iflags(inode, dir); if (S_ISREG(mode)) { @@ -5691,7 +5782,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BTRFS_INODE_NODATASUM; } - btrfs_insert_inode_hash(inode); inode_tree_add(inode); trace_btrfs_inode_new(inode); @@ -5706,6 +5796,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_ino(inode), root->root_key.objectid, ret); return inode; + +fail_unlock: + unlock_new_inode(inode); fail: if (dir && name) BTRFS_I(dir)->index_cnt--; @@ -5840,28 +5933,28 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock; } - err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); - if (err) { - drop_inode = 1; - goto out_unlock; - } - /* * If the active LSM wants to access the inode during * d_instantiate it needs these. Smack checks to see * if the filesystem supports xattrs by looking at the * ops vector. */ - inode->i_op = &btrfs_special_inode_operations; - err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); + init_special_inode(inode, inode->i_mode, rdev); + + err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - drop_inode = 1; - else { - init_special_inode(inode, inode->i_mode, rdev); + goto out_unlock_inode; + + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); + if (err) { + goto out_unlock_inode; + } else { btrfs_update_inode(trans, root, inode); + unlock_new_inode(inode); d_instantiate(dentry, inode); } + out_unlock: btrfs_end_transaction(trans, root); btrfs_balance_delayed_items(root); @@ -5871,6 +5964,12 @@ out_unlock: iput(inode); } return err; + +out_unlock_inode: + drop_inode = 1; + unlock_new_inode(inode); + goto out_unlock; + } static int btrfs_create(struct inode *dir, struct dentry *dentry, @@ -5905,15 +6004,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } drop_inode_on_err = 1; - - err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); - if (err) - goto out_unlock; - - err = btrfs_update_inode(trans, root, inode); - if (err) - goto out_unlock; - /* * If the active LSM wants to access the inode during * d_instantiate it needs these. Smack checks to see @@ -5922,14 +6012,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, */ inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_mapping->backing_dev_info = &root->fs_info->bdi; + + err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); + if (err) + goto out_unlock_inode; + + err = btrfs_update_inode(trans, root, inode); + if (err) + goto out_unlock_inode; err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) - goto out_unlock; + goto out_unlock_inode; - inode->i_mapping->a_ops = &btrfs_aops; - inode->i_mapping->backing_dev_info = &root->fs_info->bdi; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + unlock_new_inode(inode); d_instantiate(dentry, inode); out_unlock: @@ -5941,6 +6040,11 @@ out_unlock: btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); return err; + +out_unlock_inode: + unlock_new_inode(inode); + goto out_unlock; + } static int btrfs_link(struct dentry *old_dentry, struct inode *dir, @@ -6048,25 +6152,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } drop_on_err = 1; + /* these must be set before we unlock the inode */ + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_fail; - - inode->i_op = &btrfs_dir_inode_operations; - inode->i_fop = &btrfs_dir_file_operations; + goto out_fail_inode; btrfs_i_size_write(inode, 0); err = btrfs_update_inode(trans, root, inode); if (err) - goto out_fail; + goto out_fail_inode; err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, dentry->d_name.len, 0, index); if (err) - goto out_fail; + goto out_fail_inode; d_instantiate(dentry, inode); + /* + * mkdir is special. We're unlocking after we call d_instantiate + * to avoid a race with nfsd calling d_instantiate. + */ + unlock_new_inode(inode); drop_on_err = 0; out_fail: @@ -6076,6 +6185,10 @@ out_fail: btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); return err; + +out_fail_inode: + unlock_new_inode(inode); + goto out_fail; } /* helper for btfs_get_extent. Given an existing extent in the tree, @@ -6085,14 +6198,14 @@ out_fail: static int merge_extent_mapping(struct extent_map_tree *em_tree, struct extent_map *existing, struct extent_map *em, - u64 map_start, u64 map_len) + u64 map_start) { u64 start_diff; BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); start_diff = map_start - em->start; em->start = map_start; - em->len = map_len; + em->len = existing->start - em->start; if (em->block_start < EXTENT_MAP_LAST_BYTE && !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { em->block_start += start_diff; @@ -6263,6 +6376,8 @@ next: goto not_found; if (start + len <= found_key.offset) goto not_found; + if (start > found_key.offset) + goto next; em->start = start; em->orig_start = start; em->len = found_key.offset - start; @@ -6378,8 +6493,7 @@ insert: em->len); if (existing) { err = merge_extent_mapping(em_tree, existing, - em, start, - root->sectorsize); + em, start); free_extent_map(existing); if (err) { free_extent_map(em); @@ -7146,7 +7260,8 @@ again: if (!ret) goto out_test; - btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); + btrfs_init_work(&ordered->work, btrfs_endio_write_helper, + finish_ordered_fn, NULL, NULL); btrfs_queue_work(root->fs_info->endio_write_workers, &ordered->work); out_test: @@ -7294,10 +7409,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, map_length = orig_bio->bi_iter.bi_size; ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, &map_length, NULL, 0); - if (ret) { - bio_put(orig_bio); + if (ret) return -EIO; - } if (map_length >= orig_bio->bi_iter.bi_size) { bio = orig_bio; @@ -7314,6 +7427,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); if (!bio) return -ENOMEM; + bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; atomic_inc(&dip->pending_bios); @@ -7522,7 +7636,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, count = iov_iter_count(iter); if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &BTRFS_I(inode)->runtime_flags)) - filemap_fdatawrite_range(inode->i_mapping, offset, count); + filemap_fdatawrite_range(inode->i_mapping, offset, + offset + count - 1); if (rw & WRITE) { /* @@ -7939,27 +8054,6 @@ static int btrfs_truncate(struct inode *inode) BUG_ON(ret); /* - * setattr is responsible for setting the ordered_data_close flag, - * but that is only tested during the last file release. That - * could happen well after the next commit, leaving a great big - * window where new writes may get lost if someone chooses to write - * to this file after truncating to zero - * - * The inode doesn't have any dirty data here, and so if we commit - * this is a noop. If someone immediately starts writing to the inode - * it is very likely we'll catch some of their writes in this - * transaction, and the commit will find this file on the ordered - * data list with good things to send down. - * - * This is a best effort solution, there is still a window where - * using truncate to replace the contents of the file will - * end up with a zero length file after a crash. - */ - if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, - &BTRFS_I(inode)->runtime_flags)) - btrfs_add_ordered_operation(trans, root, inode); - - /* * So if we truncate and then write and fsync we normally would just * write the extents that changed, which is a problem if we need to * first truncate that entire inode. So set this flag so we write out @@ -8050,6 +8144,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, set_nlink(inode, 1); btrfs_i_size_write(inode, 0); + unlock_new_inode(inode); err = btrfs_subvol_inherit_props(trans, new_root, parent_root); if (err) @@ -8106,7 +8201,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) mutex_init(&ei->delalloc_mutex); btrfs_ordered_inode_tree_init(&ei->ordered_tree); INIT_LIST_HEAD(&ei->delalloc_inodes); - INIT_LIST_HEAD(&ei->ordered_operations); RB_CLEAR_NODE(&ei->rb_node); return inode; @@ -8146,17 +8240,6 @@ void btrfs_destroy_inode(struct inode *inode) if (!root) goto free; - /* - * Make sure we're properly removed from the ordered operation - * lists. - */ - smp_mb(); - if (!list_empty(&BTRFS_I(inode)->ordered_operations)) { - spin_lock(&root->fs_info->ordered_root_lock); - list_del_init(&BTRFS_I(inode)->ordered_operations); - spin_unlock(&root->fs_info->ordered_root_lock); - } - if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &BTRFS_I(inode)->runtime_flags)) { btrfs_info(root->fs_info, "inode %llu still on the orphan list", @@ -8338,12 +8421,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, ret = 0; /* - * we're using rename to replace one file with another. - * and the replacement file is large. Start IO on it now so - * we don't add too much work to the end of the transaction + * we're using rename to replace one file with another. Start IO on it + * now so we don't add too much work to the end of the transaction */ - if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size && - old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) + if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size) filemap_flush(old_inode->i_mapping); /* close the racy window with snapshot create/destroy ioctl */ @@ -8391,12 +8472,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, */ btrfs_pin_log_trans(root); } - /* - * make sure the inode gets flushed if it is replacing - * something. - */ - if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) - btrfs_add_ordered_operation(trans, root, old_inode); inode_inc_iversion(old_dir); inode_inc_iversion(new_dir); @@ -8476,6 +8551,16 @@ out_notrans: return ret; } +static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + + return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry); +} + static void btrfs_run_delalloc_work(struct btrfs_work *work) { struct btrfs_delalloc_work *delalloc_work; @@ -8514,7 +8599,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, work->inode = inode; work->wait = wait; work->delay_iput = delay_iput; - btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); + WARN_ON_ONCE(!inode); + btrfs_init_work(&work->work, btrfs_flush_delalloc_helper, + btrfs_run_delalloc_work, NULL, NULL); return work; } @@ -8718,12 +8805,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; } - err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); - if (err) { - drop_inode = 1; - goto out_unlock; - } - /* * If the active LSM wants to access the inode during * d_instantiate it needs these. Smack checks to see @@ -8732,23 +8813,22 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, */ inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_mapping->backing_dev_info = &root->fs_info->bdi; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + + err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); + if (err) + goto out_unlock_inode; err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) - drop_inode = 1; - else { - inode->i_mapping->a_ops = &btrfs_aops; - inode->i_mapping->backing_dev_info = &root->fs_info->bdi; - BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; - } - if (drop_inode) - goto out_unlock; + goto out_unlock_inode; path = btrfs_alloc_path(); if (!path) { err = -ENOMEM; - drop_inode = 1; - goto out_unlock; + goto out_unlock_inode; } key.objectid = btrfs_ino(inode); key.offset = 0; @@ -8757,9 +8837,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, err = btrfs_insert_empty_item(trans, root, path, &key, datasize); if (err) { - drop_inode = 1; btrfs_free_path(path); - goto out_unlock; + goto out_unlock_inode; } leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], @@ -8783,12 +8862,15 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode_set_bytes(inode, name_len); btrfs_i_size_write(inode, name_len); err = btrfs_update_inode(trans, root, inode); - if (err) + if (err) { drop_inode = 1; + goto out_unlock_inode; + } + + unlock_new_inode(inode); + d_instantiate(dentry, inode); out_unlock: - if (!err) - d_instantiate(dentry, inode); btrfs_end_transaction(trans, root); if (drop_inode) { inode_dec_link_count(inode); @@ -8796,6 +8878,11 @@ out_unlock: } btrfs_btree_balance_dirty(root); return err; + +out_unlock_inode: + drop_inode = 1; + unlock_new_inode(inode); + goto out_unlock; } static int __btrfs_prealloc_file_range(struct inode *inode, int mode, @@ -8979,14 +9066,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) goto out; } - ret = btrfs_init_inode_security(trans, inode, dir, NULL); - if (ret) - goto out; - - ret = btrfs_update_inode(trans, root, inode); - if (ret) - goto out; - inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; @@ -8994,10 +9073,26 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mapping->backing_dev_info = &root->fs_info->bdi; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + ret = btrfs_init_inode_security(trans, inode, dir, NULL); + if (ret) + goto out_inode; + + ret = btrfs_update_inode(trans, root, inode); + if (ret) + goto out_inode; ret = btrfs_orphan_add(trans, inode); if (ret) - goto out; + goto out_inode; + /* + * We set number of links to 0 in btrfs_new_inode(), and here we set + * it to 1 because d_tmpfile() will issue a warning if the count is 0, + * through: + * + * d_tmpfile() -> inode_dec_link_count() -> drop_nlink() + */ + set_nlink(inode, 1); + unlock_new_inode(inode); d_tmpfile(dentry, inode); mark_inode_dirty(inode); @@ -9007,8 +9102,12 @@ out: iput(inode); btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); - return ret; + +out_inode: + unlock_new_inode(inode); + goto out; + } static const struct inode_operations btrfs_dir_inode_operations = { @@ -9019,7 +9118,7 @@ static const struct inode_operations btrfs_dir_inode_operations = { .link = btrfs_link, .mkdir = btrfs_mkdir, .rmdir = btrfs_rmdir, - .rename = btrfs_rename, + .rename2 = btrfs_rename2, .symlink = btrfs_symlink, .setattr = btrfs_setattr, .mknod = btrfs_mknod, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 47aceb494d1..8a8e29878c3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -711,39 +711,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (ret) goto fail; - ret = btrfs_orphan_cleanup(pending_snapshot->snap); - if (ret) - goto fail; - - /* - * If orphan cleanup did remove any orphans, it means the tree was - * modified and therefore the commit root is not the same as the - * current root anymore. This is a problem, because send uses the - * commit root and therefore can see inode items that don't exist - * in the current root anymore, and for example make calls to - * btrfs_iget, which will do tree lookups based on the current root - * and not on the commit root. Those lookups will fail, returning a - * -ESTALE error, and making send fail with that error. So make sure - * a send does not see any orphans we have just removed, and that it - * will see the same inodes regardless of whether a transaction - * commit happened before it started (meaning that the commit root - * will be the same as the current root) or not. - */ - if (readonly && pending_snapshot->snap->node != - pending_snapshot->snap->commit_root) { - trans = btrfs_join_transaction(pending_snapshot->snap); - if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) { - ret = PTR_ERR(trans); - goto fail; - } - if (!IS_ERR(trans)) { - ret = btrfs_commit_transaction(trans, - pending_snapshot->snap); - if (ret) - goto fail; - } - } - inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); if (IS_ERR(inode)) { ret = PTR_ERR(inode); @@ -1052,8 +1019,10 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) return false; next = defrag_lookup_extent(inode, em->start + em->len); - if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || - (em->block_start + em->block_len == next->block_start)) + if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) + ret = false; + else if ((em->block_start + em->block_len == next->block_start) && + (em->block_len > 128 * 1024 && next->block_len > 128 * 1024)) ret = false; free_extent_map(next); @@ -1088,7 +1057,6 @@ static int should_defrag_range(struct inode *inode, u64 start, int thresh, } next_mergeable = defrag_check_next_extent(inode, em); - /* * we hit a real extent, if it is big or the next extent is not a * real extent, don't bother defragging it @@ -1735,7 +1703,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | BTRFS_SUBVOL_QGROUP_INHERIT)) { ret = -EOPNOTSUPP; - goto out; + goto free_args; } if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) @@ -1745,27 +1713,31 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { if (vol_args->size > PAGE_CACHE_SIZE) { ret = -EINVAL; - goto out; + goto free_args; } inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); if (IS_ERR(inherit)) { ret = PTR_ERR(inherit); - goto out; + goto free_args; } } ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, vol_args->fd, subvol, ptr, readonly, inherit); + if (ret) + goto free_inherit; - if (ret == 0 && ptr && - copy_to_user(arg + - offsetof(struct btrfs_ioctl_vol_args_v2, - transid), ptr, sizeof(*ptr))) + if (ptr && copy_to_user(arg + + offsetof(struct btrfs_ioctl_vol_args_v2, + transid), + ptr, sizeof(*ptr))) ret = -EFAULT; -out: - kfree(vol_args); + +free_inherit: kfree(inherit); +free_args: + kfree(vol_args); return ret; } @@ -2685,7 +2657,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) { ret = PTR_ERR(vol_args); - goto out; + goto err_drop; } vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; @@ -2703,6 +2675,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) out: kfree(vol_args); +err_drop: mnt_drop_write_file(file); return ret; } @@ -3527,7 +3500,8 @@ process_slot: btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); - last_dest_end = new_key.offset + datal; + last_dest_end = ALIGN(new_key.offset + datal, + root->sectorsize); ret = clone_finish_inode_update(trans, inode, last_dest_end, destoff, olen); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 7187b14faa6..ac734ec4cc2 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -571,18 +571,6 @@ void btrfs_remove_ordered_extent(struct inode *inode, trace_btrfs_ordered_extent_remove(inode, entry); - /* - * we have no more ordered extents for this inode and - * no dirty pages. We can safely remove it from the - * list of ordered extents - */ - if (RB_EMPTY_ROOT(&tree->tree) && - !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { - spin_lock(&root->fs_info->ordered_root_lock); - list_del_init(&BTRFS_I(inode)->ordered_operations); - spin_unlock(&root->fs_info->ordered_root_lock); - } - if (!root->nr_ordered_extents) { spin_lock(&root->fs_info->ordered_root_lock); BUG_ON(list_empty(&root->ordered_root)); @@ -627,6 +615,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) spin_unlock(&root->ordered_extent_lock); btrfs_init_work(&ordered->flush_work, + btrfs_flush_delalloc_helper, btrfs_run_ordered_extent_work, NULL, NULL); list_add_tail(&ordered->work_list, &works); btrfs_queue_work(root->fs_info->flush_workers, @@ -687,81 +676,6 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) } /* - * this is used during transaction commit to write all the inodes - * added to the ordered operation list. These files must be fully on - * disk before the transaction commits. - * - * we have two modes here, one is to just start the IO via filemap_flush - * and the other is to wait for all the io. When we wait, we have an - * extra check to make sure the ordered operation list really is empty - * before we return - */ -int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int wait) -{ - struct btrfs_inode *btrfs_inode; - struct inode *inode; - struct btrfs_transaction *cur_trans = trans->transaction; - struct list_head splice; - struct list_head works; - struct btrfs_delalloc_work *work, *next; - int ret = 0; - - INIT_LIST_HEAD(&splice); - INIT_LIST_HEAD(&works); - - mutex_lock(&root->fs_info->ordered_extent_flush_mutex); - spin_lock(&root->fs_info->ordered_root_lock); - list_splice_init(&cur_trans->ordered_operations, &splice); - while (!list_empty(&splice)) { - btrfs_inode = list_entry(splice.next, struct btrfs_inode, - ordered_operations); - inode = &btrfs_inode->vfs_inode; - - list_del_init(&btrfs_inode->ordered_operations); - - /* - * the inode may be getting freed (in sys_unlink path). - */ - inode = igrab(inode); - if (!inode) - continue; - - if (!wait) - list_add_tail(&BTRFS_I(inode)->ordered_operations, - &cur_trans->ordered_operations); - spin_unlock(&root->fs_info->ordered_root_lock); - - work = btrfs_alloc_delalloc_work(inode, wait, 1); - if (!work) { - spin_lock(&root->fs_info->ordered_root_lock); - if (list_empty(&BTRFS_I(inode)->ordered_operations)) - list_add_tail(&btrfs_inode->ordered_operations, - &splice); - list_splice_tail(&splice, - &cur_trans->ordered_operations); - spin_unlock(&root->fs_info->ordered_root_lock); - ret = -ENOMEM; - goto out; - } - list_add_tail(&work->list, &works); - btrfs_queue_work(root->fs_info->flush_workers, - &work->work); - - cond_resched(); - spin_lock(&root->fs_info->ordered_root_lock); - } - spin_unlock(&root->fs_info->ordered_root_lock); -out: - list_for_each_entry_safe(work, next, &works, list) { - list_del_init(&work->list); - btrfs_wait_and_free_delalloc_work(work); - } - mutex_unlock(&root->fs_info->ordered_extent_flush_mutex); - return ret; -} - -/* * Used to start IO or wait for a given ordered extent to finish. * * If wait is one, this effectively waits on page writeback for all the pages @@ -1120,42 +1034,6 @@ out: return index; } - -/* - * add a given inode to the list of inodes that must be fully on - * disk before a transaction commit finishes. - * - * This basically gives us the ext3 style data=ordered mode, and it is mostly - * used to make sure renamed files are fully on disk. - * - * It is a noop if the inode is already fully on disk. - * - * If trans is not null, we'll do a friendly check for a transaction that - * is already flushing things and force the IO down ourselves. - */ -void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode) -{ - struct btrfs_transaction *cur_trans = trans->transaction; - u64 last_mod; - - last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); - - /* - * if this file hasn't been changed since the last transaction - * commit, we can safely return without doing anything - */ - if (last_mod <= root->fs_info->last_trans_committed) - return; - - spin_lock(&root->fs_info->ordered_root_lock); - if (list_empty(&BTRFS_I(inode)->ordered_operations)) { - list_add_tail(&BTRFS_I(inode)->ordered_operations, - &cur_trans->ordered_operations); - } - spin_unlock(&root->fs_info->ordered_root_lock); -} - int __init ordered_data_init(void) { btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent", diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 246897058ef..d81a274d621 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -190,11 +190,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum, int len); -int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int wait); -void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode); int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); void btrfs_get_logged_extents(struct inode *inode, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 98cb6b2630f..ded5c601d91 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1201,6 +1201,50 @@ out: mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } + +static int comp_oper_exist(struct btrfs_qgroup_operation *oper1, + struct btrfs_qgroup_operation *oper2) +{ + /* + * Ignore seq and type here, we're looking for any operation + * at all related to this extent on that root. + */ + if (oper1->bytenr < oper2->bytenr) + return -1; + if (oper1->bytenr > oper2->bytenr) + return 1; + if (oper1->ref_root < oper2->ref_root) + return -1; + if (oper1->ref_root > oper2->ref_root) + return 1; + return 0; +} + +static int qgroup_oper_exists(struct btrfs_fs_info *fs_info, + struct btrfs_qgroup_operation *oper) +{ + struct rb_node *n; + struct btrfs_qgroup_operation *cur; + int cmp; + + spin_lock(&fs_info->qgroup_op_lock); + n = fs_info->qgroup_op_tree.rb_node; + while (n) { + cur = rb_entry(n, struct btrfs_qgroup_operation, n); + cmp = comp_oper_exist(cur, oper); + if (cmp < 0) { + n = n->rb_right; + } else if (cmp) { + n = n->rb_left; + } else { + spin_unlock(&fs_info->qgroup_op_lock); + return -EEXIST; + } + } + spin_unlock(&fs_info->qgroup_op_lock); + return 0; +} + static int comp_oper(struct btrfs_qgroup_operation *oper1, struct btrfs_qgroup_operation *oper2) { @@ -1290,6 +1334,23 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); INIT_LIST_HEAD(&oper->elem.list); oper->elem.seq = 0; + + if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) { + /* + * If any operation for this bytenr/ref_root combo + * exists, then we know it's not exclusively owned and + * shouldn't be queued up. + * + * This also catches the case where we have a cloned + * extent that gets queued up multiple times during + * drop snapshot. + */ + if (qgroup_oper_exists(fs_info, oper)) { + kfree(oper); + return 0; + } + } + ret = insert_qgroup_oper(fs_info, oper); if (ret) { /* Shouldn't happen so have an assert for developers */ @@ -1884,6 +1945,111 @@ out: } /* + * Process a reference to a shared subtree. This type of operation is + * queued during snapshot removal when we encounter extents which are + * shared between more than one root. + */ +static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_qgroup_operation *oper) +{ + struct ulist *roots = NULL; + struct ulist_node *unode; + struct ulist_iterator uiter; + struct btrfs_qgroup_list *glist; + struct ulist *parents; + int ret = 0; + int err; + struct btrfs_qgroup *qg; + u64 root_obj = 0; + struct seq_list elem = {}; + + parents = ulist_alloc(GFP_NOFS); + if (!parents) + return -ENOMEM; + + btrfs_get_tree_mod_seq(fs_info, &elem); + ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, + elem.seq, &roots); + btrfs_put_tree_mod_seq(fs_info, &elem); + if (ret < 0) + goto out; + + if (roots->nnodes != 1) + goto out; + + ULIST_ITER_INIT(&uiter); + unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */ + /* + * If we find our ref root then that means all refs + * this extent has to the root have not yet been + * deleted. In that case, we do nothing and let the + * last ref for this bytenr drive our update. + * + * This can happen for example if an extent is + * referenced multiple times in a snapshot (clone, + * etc). If we are in the middle of snapshot removal, + * queued updates for such an extent will find the + * root if we have not yet finished removing the + * snapshot. + */ + if (unode->val == oper->ref_root) + goto out; + + root_obj = unode->val; + BUG_ON(!root_obj); + + spin_lock(&fs_info->qgroup_lock); + qg = find_qgroup_rb(fs_info, root_obj); + if (!qg) + goto out_unlock; + + qg->excl += oper->num_bytes; + qg->excl_cmpr += oper->num_bytes; + qgroup_dirty(fs_info, qg); + + /* + * Adjust counts for parent groups. First we find all + * parents, then in the 2nd loop we do the adjustment + * while adding parents of the parents to our ulist. + */ + list_for_each_entry(glist, &qg->groups, next_group) { + err = ulist_add(parents, glist->group->qgroupid, + ptr_to_u64(glist->group), GFP_ATOMIC); + if (err < 0) { + ret = err; + goto out_unlock; + } + } + + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(parents, &uiter))) { + qg = u64_to_ptr(unode->aux); + qg->excl += oper->num_bytes; + qg->excl_cmpr += oper->num_bytes; + qgroup_dirty(fs_info, qg); + + /* Add any parents of the parents */ + list_for_each_entry(glist, &qg->groups, next_group) { + err = ulist_add(parents, glist->group->qgroupid, + ptr_to_u64(glist->group), GFP_ATOMIC); + if (err < 0) { + ret = err; + goto out_unlock; + } + } + } + +out_unlock: + spin_unlock(&fs_info->qgroup_lock); + +out: + ulist_free(roots); + ulist_free(parents); + return ret; +} + +/* * btrfs_qgroup_account_ref is called for every ref that is added to or deleted * from the fs. First, all roots referencing the extent are searched, and * then the space is accounted accordingly to the different roots. The @@ -1920,6 +2086,9 @@ static int btrfs_qgroup_account(struct btrfs_trans_handle *trans, case BTRFS_QGROUP_OPER_SUB_SHARED: ret = qgroup_shared_accounting(trans, fs_info, oper); break; + case BTRFS_QGROUP_OPER_SUB_SUBTREE: + ret = qgroup_subtree_accounting(trans, fs_info, oper); + break; default: ASSERT(0); } @@ -2551,6 +2720,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, memset(&fs_info->qgroup_rescan_work, 0, sizeof(fs_info->qgroup_rescan_work)); btrfs_init_work(&fs_info->qgroup_rescan_work, + btrfs_qgroup_rescan_helper, btrfs_qgroup_rescan_worker, NULL, NULL); if (ret) { diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 5952ff1fbd7..18cc68ca309 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -44,6 +44,7 @@ enum btrfs_qgroup_operation_type { BTRFS_QGROUP_OPER_ADD_SHARED, BTRFS_QGROUP_OPER_SUB_EXCL, BTRFS_QGROUP_OPER_SUB_SHARED, + BTRFS_QGROUP_OPER_SUB_SUBTREE, }; struct btrfs_qgroup_operation { diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 4a88f073fdd..0a6b6e4bcbb 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1416,7 +1416,8 @@ cleanup: static void async_rmw_stripe(struct btrfs_raid_bio *rbio) { - btrfs_init_work(&rbio->work, rmw_work, NULL, NULL); + btrfs_init_work(&rbio->work, btrfs_rmw_helper, + rmw_work, NULL, NULL); btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); @@ -1424,7 +1425,8 @@ static void async_rmw_stripe(struct btrfs_raid_bio *rbio) static void async_read_rebuild(struct btrfs_raid_bio *rbio) { - btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL); + btrfs_init_work(&rbio->work, btrfs_rmw_helper, + read_rebuild_work, NULL, NULL); btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); @@ -1665,7 +1667,8 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) plug = container_of(cb, struct btrfs_plug_cb, cb); if (from_schedule) { - btrfs_init_work(&plug->work, unplug_work, NULL, NULL); + btrfs_init_work(&plug->work, btrfs_rmw_helper, + unplug_work, NULL, NULL); btrfs_queue_work(plug->info->rmw_workers, &plug->work); return; diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 09230cf3a24..20408c6b665 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -798,7 +798,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) /* FIXME we cannot handle this properly right now */ BUG(); } - btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); + btrfs_init_work(&rmw->work, btrfs_readahead_helper, + reada_start_machine_worker, NULL, NULL); rmw->fs_info = fs_info; btrfs_queue_work(fs_info->readahead_workers, &rmw->work); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b6d198f5181..f4a41f37be2 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -428,8 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) sbio->index = i; sbio->sctx = sctx; sbio->page_count = 0; - btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, - NULL, NULL); + btrfs_init_work(&sbio->work, btrfs_scrub_helper, + scrub_bio_end_io_worker, NULL, NULL); if (i != SCRUB_BIOS_PER_SCTX - 1) sctx->bios[i]->next_free = i + 1; @@ -999,8 +999,8 @@ nodatasum_case: fixup_nodatasum->root = fs_info->extent_root; fixup_nodatasum->mirror_num = failed_mirror_index + 1; scrub_pending_trans_workers_inc(sctx); - btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, - NULL, NULL); + btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper, + scrub_fixup_nodatasum, NULL, NULL); btrfs_queue_work(fs_info->scrub_workers, &fixup_nodatasum->work); goto out; @@ -1616,7 +1616,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err) sbio->err = err; sbio->bio = bio; - btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); + btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper, + scrub_wr_bio_end_io_worker, NULL, NULL); btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); } @@ -2904,6 +2905,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, struct scrub_ctx *sctx; int ret; struct btrfs_device *dev; + struct rcu_string *name; if (btrfs_fs_closing(fs_info)) return -EINVAL; @@ -2965,6 +2967,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, return -ENODEV; } + if (!is_dev_replace && !readonly && !dev->writeable) { + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + rcu_read_lock(); + name = rcu_dereference(dev->name); + btrfs_err(fs_info, "scrub: device %s is not writable", + name->str); + rcu_read_unlock(); + return -EROFS; + } + mutex_lock(&fs_info->scrub_lock); if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { mutex_unlock(&fs_info->scrub_lock); @@ -3203,7 +3215,8 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, nocow_ctx->len = len; nocow_ctx->mirror_num = mirror_num; nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; - btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); + btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper, + copy_nocow_pages_worker, NULL, NULL); INIT_LIST_HEAD(&nocow_ctx->inodes); btrfs_queue_work(fs_info->scrub_nocow_workers, &nocow_ctx->work); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8e16bca69c5..c4124de4435 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -851,7 +851,6 @@ static struct dentry *get_default_root(struct super_block *sb, struct btrfs_path *path; struct btrfs_key location; struct inode *inode; - struct dentry *dentry; u64 dir_id; int new = 0; @@ -922,13 +921,7 @@ setup_root: return dget(sb->s_root); } - dentry = d_obtain_alias(inode); - if (!IS_ERR(dentry)) { - spin_lock(&dentry->d_lock); - dentry->d_flags &= ~DCACHE_DISCONNECTED; - spin_unlock(&dentry->d_lock); - } - return dentry; + return d_obtain_root(inode); } static int btrfs_fill_super(struct super_block *sb, @@ -1672,6 +1665,21 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) return 0; } +/* + * Calculate numbers for 'df', pessimistic in case of mixed raid profiles. + * + * If there's a redundant raid level at DATA block groups, use the respective + * multiplier to scale the sizes. + * + * Unused device space usage is based on simulating the chunk allocator + * algorithm that respects the device sizes, order of allocations and the + * 'alloc_start' value, this is a close approximation of the actual use but + * there are other factors that may change the result (like a new metadata + * chunk). + * + * FIXME: not accurate for mixed block groups, total and free/used are ok, + * available appears slightly larger. + */ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); @@ -1682,6 +1690,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) u64 total_free_data = 0; int bits = dentry->d_sb->s_blocksize_bits; __be32 *fsid = (__be32 *)fs_info->fsid; + unsigned factor = 1; + struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; int ret; /* holding chunk_muext to avoid allocating new chunks */ @@ -1689,30 +1699,52 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) rcu_read_lock(); list_for_each_entry_rcu(found, head, list) { if (found->flags & BTRFS_BLOCK_GROUP_DATA) { + int i; + total_free_data += found->disk_total - found->disk_used; total_free_data -= btrfs_account_ro_block_groups_free_space(found); + + for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { + if (!list_empty(&found->block_groups[i])) { + switch (i) { + case BTRFS_RAID_DUP: + case BTRFS_RAID_RAID1: + case BTRFS_RAID_RAID10: + factor = 2; + } + } + } } total_used += found->disk_used; } + rcu_read_unlock(); - buf->f_namelen = BTRFS_NAME_LEN; - buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; - buf->f_bfree = buf->f_blocks - (total_used >> bits); - buf->f_bsize = dentry->d_sb->s_blocksize; - buf->f_type = BTRFS_SUPER_MAGIC; + buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor); + buf->f_blocks >>= bits; + buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits); + + /* Account global block reserve as used, it's in logical size already */ + spin_lock(&block_rsv->lock); + buf->f_bfree -= block_rsv->size >> bits; + spin_unlock(&block_rsv->lock); + buf->f_bavail = total_free_data; ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); if (ret) { mutex_unlock(&fs_info->chunk_mutex); return ret; } - buf->f_bavail += total_free_data; + buf->f_bavail += div_u64(total_free_data, factor); buf->f_bavail = buf->f_bavail >> bits; mutex_unlock(&fs_info->chunk_mutex); + buf->f_type = BTRFS_SUPER_MAGIC; + buf->f_bsize = dentry->d_sb->s_blocksize; + buf->f_namelen = BTRFS_NAME_LEN; + /* We treat it as constant endianness (it doesn't matter _which_) because we want the fsid to come out the same whether mounted on a big-endian or little-endian host */ diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 78699364f53..12e53556e21 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -614,7 +614,7 @@ int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info, if (!fs_info->device_dir_kobj) return -EINVAL; - if (one_device) { + if (one_device && one_device->bdev) { disk = one_device->bdev->bd_part; disk_kobj = &part_to_dev(disk)->kobj; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5f379affdf2..d89c6d3542c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -218,7 +218,6 @@ loop: spin_lock_init(&cur_trans->delayed_refs.lock); INIT_LIST_HEAD(&cur_trans->pending_snapshots); - INIT_LIST_HEAD(&cur_trans->ordered_operations); INIT_LIST_HEAD(&cur_trans->pending_chunks); INIT_LIST_HEAD(&cur_trans->switch_commits); list_add_tail(&cur_trans->list, &fs_info->trans_list); @@ -1612,27 +1611,6 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); } -static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - int ret; - - ret = btrfs_run_delayed_items(trans, root); - if (ret) - return ret; - - /* - * rename don't use btrfs_join_transaction, so, once we - * set the transaction to blocked above, we aren't going - * to get any new ordered operations. We can safely run - * it here and no for sure that nothing new will be added - * to the list - */ - ret = btrfs_run_ordered_operations(trans, root, 1); - - return ret; -} - static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) { if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) @@ -1653,13 +1631,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *prev_trans = NULL; int ret; - ret = btrfs_run_ordered_operations(trans, root, 0); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - btrfs_end_transaction(trans, root); - return ret; - } - /* Stop the commit early if ->aborted is set */ if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { ret = cur_trans->aborted; @@ -1740,7 +1711,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (ret) goto cleanup_transaction; - ret = btrfs_flush_all_pending_stuffs(trans, root); + ret = btrfs_run_delayed_items(trans, root); if (ret) goto cleanup_transaction; @@ -1748,7 +1719,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, extwriter_counter_read(cur_trans) == 0); /* some pending stuffs might be added after the previous flush. */ - ret = btrfs_flush_all_pending_stuffs(trans, root); + ret = btrfs_run_delayed_items(trans, root); if (ret) goto cleanup_transaction; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 7dd558ed071..579be51b27e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -55,7 +55,6 @@ struct btrfs_transaction { wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; struct list_head pending_snapshots; - struct list_head ordered_operations; struct list_head pending_chunks; struct list_head switch_commits; struct btrfs_delayed_ref_root delayed_refs; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9e1f2cd5e67..d296efe2d3e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -94,8 +94,10 @@ #define LOG_WALK_REPLAY_ALL 3 static int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - int inode_only); + struct btrfs_root *root, struct inode *inode, + int inode_only, + const loff_t start, + const loff_t end); static int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid); @@ -3298,7 +3300,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct list_head ordered_sums; int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; bool has_extents = false; - bool need_find_last_extent = (*last_extent == 0); + bool need_find_last_extent = true; bool done = false; INIT_LIST_HEAD(&ordered_sums); @@ -3352,8 +3354,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, */ if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { has_extents = true; - if (need_find_last_extent && - first_key.objectid == (u64)-1) + if (first_key.objectid == (u64)-1) first_key = ins_keys[i]; } else { need_find_last_extent = false; @@ -3427,6 +3428,16 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (!has_extents) return ret; + if (need_find_last_extent && *last_extent == first_key.offset) { + /* + * We don't have any leafs between our current one and the one + * we processed before that can have file extent items for our + * inode (and have a generation number smaller than our current + * transaction id). + */ + need_find_last_extent = false; + } + /* * Because we use btrfs_search_forward we could skip leaves that were * not modified and then assume *last_extent is valid when it really @@ -3537,7 +3548,7 @@ fill_holes: 0, 0); if (ret) break; - *last_extent = offset + len; + *last_extent = extent_end; } /* * Need to let the callers know we dropped the path so they should @@ -3849,8 +3860,10 @@ process: * This handles both files and directories. */ static int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - int inode_only) + struct btrfs_root *root, struct inode *inode, + int inode_only, + const loff_t start, + const loff_t end) { struct btrfs_path *path; struct btrfs_path *dst_path; @@ -3867,6 +3880,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, int ins_nr; bool fast_search = false; u64 ino = btrfs_ino(inode); + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; path = btrfs_alloc_path(); if (!path) @@ -4040,13 +4054,35 @@ log_extents: goto out_unlock; } } else if (inode_only == LOG_INODE_ALL) { - struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em, *n; - write_lock(&tree->lock); - list_for_each_entry_safe(em, n, &tree->modified_extents, list) - list_del_init(&em->list); - write_unlock(&tree->lock); + write_lock(&em_tree->lock); + /* + * We can't just remove every em if we're called for a ranged + * fsync - that is, one that doesn't cover the whole possible + * file range (0 to LLONG_MAX). This is because we can have + * em's that fall outside the range we're logging and therefore + * their ordered operations haven't completed yet + * (btrfs_finish_ordered_io() not invoked yet). This means we + * didn't get their respective file extent item in the fs/subvol + * tree yet, and need to let the next fast fsync (one which + * consults the list of modified extent maps) find the em so + * that it logs a matching file extent item and waits for the + * respective ordered operation to complete (if it's still + * running). + * + * Removing every em outside the range we're logging would make + * the next fast fsync not log their matching file extent items, + * therefore making us lose data after a log replay. + */ + list_for_each_entry_safe(em, n, &em_tree->modified_extents, + list) { + const u64 mod_end = em->mod_start + em->mod_len - 1; + + if (em->mod_start >= start && mod_end <= end) + list_del_init(&em->list); + } + write_unlock(&em_tree->lock); } if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { @@ -4056,8 +4092,19 @@ log_extents: goto out_unlock; } } - BTRFS_I(inode)->logged_trans = trans->transid; - BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; + + write_lock(&em_tree->lock); + /* + * If we're doing a ranged fsync and there are still modified extents + * in the list, we must run on the next fsync call as it might cover + * those extents (a full fsync or an fsync for other range). + */ + if (list_empty(&em_tree->modified_extents)) { + BTRFS_I(inode)->logged_trans = trans->transid; + BTRFS_I(inode)->last_log_commit = + BTRFS_I(inode)->last_sub_trans; + } + write_unlock(&em_tree->lock); out_unlock: if (unlikely(err)) btrfs_put_logged_extents(&logged_list); @@ -4152,7 +4199,10 @@ out: */ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct dentry *parent, int exists_only, + struct dentry *parent, + const loff_t start, + const loff_t end, + int exists_only, struct btrfs_log_ctx *ctx) { int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; @@ -4198,7 +4248,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (ret) goto end_no_trans; - ret = btrfs_log_inode(trans, root, inode, inode_only); + ret = btrfs_log_inode(trans, root, inode, inode_only, start, end); if (ret) goto end_trans; @@ -4226,7 +4276,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { - ret = btrfs_log_inode(trans, root, inode, inode_only); + ret = btrfs_log_inode(trans, root, inode, inode_only, + 0, LLONG_MAX); if (ret) goto end_trans; } @@ -4260,13 +4311,15 @@ end_no_trans: */ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct dentry *dentry, + const loff_t start, + const loff_t end, struct btrfs_log_ctx *ctx) { struct dentry *parent = dget_parent(dentry); int ret; ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, - 0, ctx); + start, end, 0, ctx); dput(parent); return ret; @@ -4503,6 +4556,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, root->fs_info->last_trans_committed)) return 0; - return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL); + return btrfs_log_inode_parent(trans, root, inode, parent, 0, + LLONG_MAX, 1, NULL); } diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 7f5b41bd537..e2e798ae7cd 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -59,6 +59,8 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, int btrfs_recover_log_trees(struct btrfs_root *tree_root); int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct dentry *dentry, + const loff_t start, + const loff_t end, struct btrfs_log_ctx *ctx); int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index 7f78cbf5cf4..4c29db604bb 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h @@ -57,6 +57,21 @@ void ulist_free(struct ulist *ulist); int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask); int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, u64 *old_aux, gfp_t gfp_mask); + +/* just like ulist_add_merge() but take a pointer for the aux data */ +static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux, + void **old_aux, gfp_t gfp_mask) +{ +#if BITS_PER_LONG == 32 + u64 old64 = (uintptr_t)*old_aux; + int ret = ulist_add_merge(ulist, val, (uintptr_t)aux, &old64, gfp_mask); + *old_aux = (void *)((uintptr_t)old64); + return ret; +#else + return ulist_add_merge(ulist, val, (u64)aux, (u64 *)old_aux, gfp_mask); +#endif +} + struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6cb82f62cb7..340a92d08e8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -508,6 +508,44 @@ static noinline int device_list_add(const char *path, ret = 1; device->fs_devices = fs_devices; } else if (!device->name || strcmp(device->name->str, path)) { + /* + * When FS is already mounted. + * 1. If you are here and if the device->name is NULL that + * means this device was missing at time of FS mount. + * 2. If you are here and if the device->name is different + * from 'path' that means either + * a. The same device disappeared and reappeared with + * different name. or + * b. The missing-disk-which-was-replaced, has + * reappeared now. + * + * We must allow 1 and 2a above. But 2b would be a spurious + * and unintentional. + * + * Further in case of 1 and 2a above, the disk at 'path' + * would have missed some transaction when it was away and + * in case of 2a the stale bdev has to be updated as well. + * 2b must not be allowed at all time. + */ + + /* + * As of now don't allow update to btrfs_fs_device through + * the btrfs dev scan cli, after FS has been mounted. + */ + if (fs_devices->opened) { + return -EBUSY; + } else { + /* + * That is if the FS is _not_ mounted and if you + * are here, that means there is more than one + * disk with same uuid and devid.We keep the one + * with larger generation number or the last-in if + * generation are equal. + */ + if (found_transid < device->generation) + return -EEXIST; + } + name = rcu_string_strdup(path, GFP_NOFS); if (!name) return -ENOMEM; @@ -519,6 +557,15 @@ static noinline int device_list_add(const char *path, } } + /* + * Unmount does not free the btrfs_device struct but would zero + * generation along with most of the other members. So just update + * it back. We need it to pick the disk with largest generation + * (as above). + */ + if (!fs_devices->opened) + device->generation = found_transid; + if (found_transid > fs_devices->latest_trans) { fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; @@ -1436,7 +1483,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, btrfs_set_device_io_align(leaf, dev_item, device->io_align); btrfs_set_device_io_width(leaf, dev_item, device->io_width); btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); - btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); btrfs_set_device_group(leaf, dev_item, 0); btrfs_set_device_seek_speed(leaf, dev_item, 0); @@ -1671,7 +1718,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) device->fs_devices->total_devices--; if (device->missing) - root->fs_info->fs_devices->missing_devices--; + device->fs_devices->missing_devices--; next_device = list_entry(root->fs_info->fs_devices->devices.next, struct btrfs_device, dev_list); @@ -1801,8 +1848,12 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, if (srcdev->bdev) { fs_info->fs_devices->open_devices--; - /* zero out the old super */ - btrfs_scratch_superblock(srcdev); + /* + * zero out the old super if it is not writable + * (e.g. seed device) + */ + if (srcdev->writeable) + btrfs_scratch_superblock(srcdev); } call_rcu(&srcdev->rcu, free_device); @@ -1941,6 +1992,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) fs_devices->seeding = 0; fs_devices->num_devices = 0; fs_devices->open_devices = 0; + fs_devices->missing_devices = 0; + fs_devices->num_can_discard = 0; + fs_devices->rotating = 0; fs_devices->seed = seed_devices; generate_random_uuid(fs_devices->fsid); @@ -5800,7 +5854,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, else generate_random_uuid(dev->uuid); - btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); + btrfs_init_work(&dev->work, btrfs_submit_helper, + pending_bios_fn, NULL, NULL); return dev; } diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 469f2e8657e..cebf2ebefb5 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -172,14 +172,24 @@ out: int ceph_init_acl(struct dentry *dentry, struct inode *inode, struct inode *dir) { struct posix_acl *default_acl, *acl; + umode_t new_mode = inode->i_mode; int error; - error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); + error = posix_acl_create(dir, &new_mode, &default_acl, &acl); if (error) return error; - if (!default_acl && !acl) + if (!default_acl && !acl) { cache_no_acl(inode); + if (new_mode != inode->i_mode) { + struct iattr newattrs = { + .ia_mode = new_mode, + .ia_valid = ATTR_MODE, + }; + error = ceph_setattr(dentry, &newattrs); + } + return error; + } if (default_acl) { error = ceph_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 1fde164b74b..6d1cd45dca8 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3277,7 +3277,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode, rel->ino = cpu_to_le64(ceph_ino(inode)); rel->cap_id = cpu_to_le64(cap->cap_id); rel->seq = cpu_to_le32(cap->seq); - rel->issue_seq = cpu_to_le32(cap->issue_seq), + rel->issue_seq = cpu_to_le32(cap->issue_seq); rel->mseq = cpu_to_le32(cap->mseq); rel->caps = cpu_to_le32(cap->implemented); rel->wanted = cpu_to_le32(cap->mds_wanted); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 302085100c2..2eb02f80a0a 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -423,6 +423,9 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); + + if (!len) + return 0; /* * flush any page cache pages in this range. this * will make concurrent normal and sync io slow, @@ -470,8 +473,11 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, size_t left = ret; while (left) { - int copy = min_t(size_t, PAGE_SIZE, left); - l = copy_page_to_iter(pages[k++], 0, copy, i); + size_t page_off = off & ~PAGE_MASK; + size_t copy = min_t(size_t, + PAGE_SIZE - page_off, left); + l = copy_page_to_iter(pages[k++], page_off, + copy, i); off += l; left -= l; if (l < copy) @@ -531,7 +537,7 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) * objects, rollback on failure, etc.) */ static ssize_t -ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from) +ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); @@ -547,7 +553,6 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from) int check_caps = 0; int ret; struct timespec mtime = CURRENT_TIME; - loff_t pos = iocb->ki_pos; size_t count = iov_iter_count(from); if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) @@ -646,7 +651,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from) * correct atomic write, we should e.g. take write locks on all * objects, rollback on failure, etc.) */ -static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from) +static ssize_t +ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); @@ -663,7 +669,6 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from) int check_caps = 0; int ret; struct timespec mtime = CURRENT_TIME; - loff_t pos = iocb->ki_pos; size_t count = iov_iter_count(from); if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) @@ -918,9 +923,9 @@ retry_snap: /* we might need to revert back to that point */ data = *from; if (file->f_flags & O_DIRECT) - written = ceph_sync_direct_write(iocb, &data); + written = ceph_sync_direct_write(iocb, &data, pos); else - written = ceph_sync_write(iocb, &data); + written = ceph_sync_write(iocb, &data, pos); if (written == -EOLDSNAPC) { dout("aio_write %p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n", @@ -1177,6 +1182,9 @@ static long ceph_fallocate(struct file *file, int mode, loff_t endoff = 0; loff_t size; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 92a2548278f..bad07c09f91 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1904,6 +1904,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); if (req->r_got_unsafe) { + void *p; /* * Replay. Do not regenerate message (and rebuild * paths, etc.); just use the original message. @@ -1924,8 +1925,13 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, /* remove cap/dentry releases from message */ rhead->num_releases = 0; - msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset); - msg->front.iov_len = req->r_request_release_offset; + + /* time stamp */ + p = msg->front.iov_base + req->r_request_release_offset; + ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp)); + + msg->front.iov_len = p - msg->front.iov_base; + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); return 0; } @@ -2061,11 +2067,12 @@ static void __wake_requests(struct ceph_mds_client *mdsc, static void kick_requests(struct ceph_mds_client *mdsc, int mds) { struct ceph_mds_request *req; - struct rb_node *p; + struct rb_node *p = rb_first(&mdsc->request_tree); dout("kick_requests mds%d\n", mds); - for (p = rb_first(&mdsc->request_tree); p; p = rb_next(p)) { + while (p) { req = rb_entry(p, struct ceph_mds_request, r_node); + p = rb_next(p); if (req->r_got_unsafe) continue; if (req->r_session && @@ -2248,6 +2255,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) */ if (result == -ESTALE) { dout("got ESTALE on request %llu", req->r_tid); + req->r_resend_mds = -1; if (req->r_direct_mode != USE_AUTH_MDS) { dout("not using auth, setting for that now"); req->r_direct_mode = USE_AUTH_MDS; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 06150fd745a..f6e12377335 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -755,7 +755,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, goto out; } } else { - root = d_obtain_alias(inode); + root = d_obtain_root(inode); } ceph_init_dentry(root); dout("open_root_inode success, root dentry is %p\n", root); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index c9c2b887381..12f58d22e01 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -592,12 +592,12 @@ start: xattr_version = ci->i_xattrs.version; spin_unlock(&ci->i_ceph_lock); - xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *), + xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *), GFP_NOFS); err = -ENOMEM; if (!xattrs) goto bad_lock; - memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *)); + for (i = 0; i < numattr; i++) { xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS); diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 603f18a65c1..a2172f3f69e 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -22,6 +22,11 @@ config CIFS support for OS/2 and Windows ME and similar servers is provided as well. + The module also provides optional support for the followon + protocols for CIFS including SMB3, which enables + useful performance and security features (see the description + of CONFIG_CIFS_SMB2). + The cifs module provides an advanced network file system client for mounting to CIFS compliant servers. It includes support for DFS (hierarchical name space), secure per-user @@ -121,7 +126,8 @@ config CIFS_ACL depends on CIFS_XATTR && KEYS help Allows fetching CIFS/NTFS ACL from the server. The DACL blob - is handed over to the application/caller. + is handed over to the application/caller. See the man + page for getcifsacl for more information. config CIFS_DEBUG bool "Enable CIFS debugging routines" @@ -162,7 +168,7 @@ config CIFS_NFSD_EXPORT Allows NFS server to export a CIFS mounted share (nfsd over cifs) config CIFS_SMB2 - bool "SMB2 network file system support" + bool "SMB2 and SMB3 network file system support" depends on CIFS && INET select NLS select KEYS @@ -170,16 +176,21 @@ config CIFS_SMB2 select DNS_RESOLVER help - This enables experimental support for the SMB2 (Server Message Block - version 2) protocol. The SMB2 protocol is the successor to the - popular CIFS and SMB network file sharing protocols. SMB2 is the - native file sharing mechanism for recent versions of Windows - operating systems (since Vista). SMB2 enablement will eventually - allow users better performance, security and features, than would be - possible with cifs. Note that smb2 mount options also are simpler - (compared to cifs) due to protocol improvements. - - Unless you are a developer or tester, say N. + This enables support for the Server Message Block version 2 + family of protocols, including SMB3. SMB3 support is + enabled on mount by specifying "vers=3.0" in the mount + options. These protocols are the successors to the popular + CIFS and SMB network file sharing protocols. SMB3 is the + native file sharing mechanism for the more recent + versions of Windows (Windows 8 and Windows 2012 and + later) and Samba server and many others support SMB3 well. + In general SMB3 enables better performance, security + and features, than would be possible with CIFS (Note that + when mounting to Samba, due to the CIFS POSIX extensions, + CIFS mounts can provide slightly better POSIX compatibility + than SMB3 mounts do though). Note that SMB2/SMB3 mount + options are also slightly simpler (compared to CIFS) due + to protocol improvements. config CIFS_FSCACHE bool "Provide CIFS client caching support" diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 88839806742..889b9845575 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -207,6 +207,19 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) +{ + struct super_block *sb = file->f_path.dentry->d_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + struct TCP_Server_Info *server = tcon->ses->server; + + if (server->ops->fallocate) + return server->ops->fallocate(file, tcon, mode, off, len); + + return -EOPNOTSUPP; +} + static int cifs_permission(struct inode *inode, int mask) { struct cifs_sb_info *cifs_sb; @@ -812,8 +825,9 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) if (!(S_ISREG(inode->i_mode))) return -EINVAL; - /* check if file is oplocked */ - if (((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) || + /* Check if file is oplocked if this is request for new lease */ + if (arg == F_UNLCK || + ((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) || ((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode)))) return generic_setlease(file, arg, lease); else if (tlink_tcon(cfile->tlink)->local_lease && @@ -848,7 +862,7 @@ const struct inode_operations cifs_dir_inode_ops = { .link = cifs_hardlink, .mkdir = cifs_mkdir, .rmdir = cifs_rmdir, - .rename = cifs_rename, + .rename2 = cifs_rename2, .permission = cifs_permission, /* revalidate:cifs_revalidate, */ .setattr = cifs_setattr, @@ -908,6 +922,7 @@ const struct file_operations cifs_file_ops = { .unlocked_ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, + .fallocate = cifs_fallocate, }; const struct file_operations cifs_file_strict_ops = { @@ -927,6 +942,7 @@ const struct file_operations cifs_file_strict_ops = { .unlocked_ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, + .fallocate = cifs_fallocate, }; const struct file_operations cifs_file_direct_ops = { @@ -947,6 +963,7 @@ const struct file_operations cifs_file_direct_ops = { #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, .setlease = cifs_setlease, + .fallocate = cifs_fallocate, }; const struct file_operations cifs_file_nobrl_ops = { @@ -965,6 +982,7 @@ const struct file_operations cifs_file_nobrl_ops = { .unlocked_ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, + .fallocate = cifs_fallocate, }; const struct file_operations cifs_file_strict_nobrl_ops = { @@ -983,6 +1001,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .unlocked_ioctl = cifs_ioctl, #endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, + .fallocate = cifs_fallocate, }; const struct file_operations cifs_file_direct_nobrl_ops = { @@ -1002,6 +1021,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, .setlease = cifs_setlease, + .fallocate = cifs_fallocate, }; const struct file_operations cifs_dir_ops = { diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 56048026333..b0fafa49950 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -68,8 +68,8 @@ extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t); extern int cifs_mkdir(struct inode *, struct dentry *, umode_t); extern int cifs_rmdir(struct inode *, struct dentry *); -extern int cifs_rename(struct inode *, struct dentry *, struct inode *, - struct dentry *); +extern int cifs_rename2(struct inode *, struct dentry *, struct inode *, + struct dentry *, unsigned int); extern int cifs_revalidate_file_attr(struct file *filp); extern int cifs_revalidate_dentry_attr(struct dentry *); extern int cifs_revalidate_file(struct file *filp); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0012e1e291d..25b8392bfdd 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -70,11 +70,6 @@ #define SERVER_NAME_LENGTH 40 #define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) -/* used to define string lengths for reversing unicode strings */ -/* (256+1)*2 = 514 */ -/* (max path length + 1 for null) * 2 for unicode */ -#define MAX_NAME 514 - /* SMB echo "timeout" -- FIXME: tunable? */ #define SMB_ECHO_INTERVAL (60 * HZ) @@ -409,6 +404,10 @@ struct smb_version_operations { /* get mtu credits */ int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int, unsigned int *, unsigned int *); + /* check if we need to issue closedir */ + bool (*dir_needs_close)(struct cifsFileInfo *); + long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t, + loff_t); }; struct smb_version_values { @@ -883,6 +882,7 @@ struct cifs_tcon { for this mount even if server would support */ bool local_lease:1; /* check leases (only) on local system not remote */ bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ + bool broken_sparse_sup; /* if server or share does not support sparse */ bool need_reconnect:1; /* connection reset, tid now invalid */ #ifdef CONFIG_CIFS_SMB2 bool print:1; /* set if connection to printer share */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 33df36ef9d5..5f9822ac024 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -2253,6 +2253,29 @@ typedef struct { /* minimum includes first three fields, and empty FS Name */ #define MIN_FS_ATTR_INFO_SIZE 12 + +/* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */ +#define FILE_SUPPORT_INTEGRITY_STREAMS 0x04000000 +#define FILE_SUPPORTS_USN_JOURNAL 0x02000000 +#define FILE_SUPPORTS_OPEN_BY_FILE_ID 0x01000000 +#define FILE_SUPPORTS_EXTENDED_ATTRIBUTES 0x00800000 +#define FILE_SUPPORTS_HARD_LINKS 0x00400000 +#define FILE_SUPPORTS_TRANSACTIONS 0x00200000 +#define FILE_SEQUENTIAL_WRITE_ONCE 0x00100000 +#define FILE_READ_ONLY_VOLUME 0x00080000 +#define FILE_NAMED_STREAMS 0x00040000 +#define FILE_SUPPORTS_ENCRYPTION 0x00020000 +#define FILE_SUPPORTS_OBJECT_IDS 0x00010000 +#define FILE_VOLUME_IS_COMPRESSED 0x00008000 +#define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100 +#define FILE_SUPPORTS_REPARSE_POINTS 0x00000080 +#define FILE_SUPPORTS_SPARSE_FILES 0x00000040 +#define FILE_VOLUME_QUOTAS 0x00000020 +#define FILE_FILE_COMPRESSION 0x00000010 +#define FILE_PERSISTENT_ACLS 0x00000008 +#define FILE_UNICODE_ON_DISK 0x00000004 +#define FILE_CASE_PRESERVED_NAMES 0x00000002 +#define FILE_CASE_SENSITIVE_SEARCH 0x00000001 typedef struct { __le32 Attributes; __le32 MaxPathNameComponentLength; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 03ed8a09581..8a9fded7c13 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -837,7 +837,6 @@ cifs_demultiplex_thread(void *p) struct TCP_Server_Info *server = p; unsigned int pdu_length; char *buf = NULL; - struct task_struct *task_to_wake = NULL; struct mid_q_entry *mid_entry; current->flags |= PF_MEMALLOC; @@ -928,19 +927,7 @@ cifs_demultiplex_thread(void *p) if (server->smallbuf) /* no sense logging a debug message if NULL */ cifs_small_buf_release(server->smallbuf); - task_to_wake = xchg(&server->tsk, NULL); clean_demultiplex_info(server); - - /* if server->tsk was NULL then wait for a signal before exiting */ - if (!task_to_wake) { - set_current_state(TASK_INTERRUPTIBLE); - while (!signal_pending(current)) { - schedule(); - set_current_state(TASK_INTERRUPTIBLE); - } - set_current_state(TASK_RUNNING); - } - module_put_and_exit(0); } @@ -1600,6 +1587,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, tmp_end++; if (!(tmp_end < end && tmp_end[1] == delim)) { /* No it is not. Set the password to NULL */ + kfree(vol->password); vol->password = NULL; break; } @@ -1637,6 +1625,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, options = end; } + kfree(vol->password); /* Now build new password string */ temp_len = strlen(value); vol->password = kzalloc(temp_len+1, GFP_KERNEL); @@ -2061,8 +2050,6 @@ cifs_find_tcp_session(struct smb_vol *vol) static void cifs_put_tcp_session(struct TCP_Server_Info *server) { - struct task_struct *task; - spin_lock(&cifs_tcp_ses_lock); if (--server->srv_count > 0) { spin_unlock(&cifs_tcp_ses_lock); @@ -2086,10 +2073,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server) kfree(server->session_key.response); server->session_key.response = NULL; server->session_key.len = 0; - - task = xchg(&server->tsk, NULL); - if (task) - force_sig(SIGKILL, task); } static struct TCP_Server_Info * diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 3db0c5fd9a1..6cbd9c688cf 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -497,6 +497,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, goto out; } + if (file->f_flags & O_DIRECT && + CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { + if (CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + file->f_op = &cifs_file_direct_nobrl_ops; + else + file->f_op = &cifs_file_direct_ops; + } + file_info = cifs_new_fileinfo(&fid, file, tlink, oplock); if (file_info == NULL) { if (server->ops->close) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4ab2f79ffa7..7c018a1c52f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -467,6 +467,14 @@ int cifs_open(struct inode *inode, struct file *file) cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", inode, file->f_flags, full_path); + if (file->f_flags & O_DIRECT && + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + file->f_op = &cifs_file_direct_nobrl_ops; + else + file->f_op = &cifs_file_direct_ops; + } + if (server->oplocks) oplock = REQ_OPLOCK; else @@ -762,7 +770,7 @@ int cifs_closedir(struct inode *inode, struct file *file) cifs_dbg(FYI, "Freeing private data in close dir\n"); spin_lock(&cifs_file_list_lock); - if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { + if (server->ops->dir_needs_close(cfile)) { cfile->invalidHandle = true; spin_unlock(&cifs_file_list_lock); if (server->ops->close_dir) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 41de3935caa..7899a40465b 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1627,8 +1627,9 @@ do_rename_exit: } int -cifs_rename(struct inode *source_dir, struct dentry *source_dentry, - struct inode *target_dir, struct dentry *target_dentry) +cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, + struct inode *target_dir, struct dentry *target_dentry, + unsigned int flags) { char *from_name = NULL; char *to_name = NULL; @@ -1640,6 +1641,9 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry, unsigned int xid; int rc, tmprc; + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + cifs_sb = CIFS_SB(source_dir->i_sb); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) @@ -1667,6 +1671,12 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry, rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, to_name); + /* + * No-replace is the natural behavior for CIFS, so skip unlink hacks. + */ + if (flags & RENAME_NOREPLACE) + goto cifs_rename_exit; + if (rc == -EEXIST && tcon->unix_ext) { /* * Are src and dst hardlinks of same inode? We can only tell @@ -1710,13 +1720,22 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry, unlink_target: /* Try unlinking the target dentry if it's not negative */ if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) { - tmprc = cifs_unlink(target_dir, target_dentry); + if (d_is_dir(target_dentry)) + tmprc = cifs_rmdir(target_dir, target_dentry); + else + tmprc = cifs_unlink(target_dir, target_dentry); if (tmprc) goto cifs_rename_exit; rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, to_name); } + /* force revalidate to go get info when needed */ + CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0; + + source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime = + target_dir->i_mtime = current_fs_time(source_dir->i_sb); + cifs_rename_exit: kfree(info_buf_source); kfree(from_name); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 81340c6253e..b7415d596db 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -574,13 +574,6 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) cinode->oplock = 0; } -static int -cifs_oplock_break_wait(void *unused) -{ - schedule(); - return signal_pending(current) ? -ERESTARTSYS : 0; -} - /* * We wait for oplock breaks to be processed before we attempt to perform * writes. diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b15862e0f68..b334a89d6a6 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -593,11 +593,11 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, /* close and restart search */ cifs_dbg(FYI, "search backing up - close and restart search\n"); spin_lock(&cifs_file_list_lock); - if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { + if (server->ops->dir_needs_close(cfile)) { cfile->invalidHandle = true; spin_unlock(&cifs_file_list_lock); - if (server->ops->close) - server->ops->close(xid, tcon, &cfile->fid); + if (server->ops->close_dir) + server->ops->close_dir(xid, tcon, &cfile->fid); } else spin_unlock(&cifs_file_list_lock); if (cfile->srch_inf.ntwrk_buf_start) { diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 39ee32688ea..3a5e8331768 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -243,10 +243,11 @@ static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft, kfree(ses->serverOS); ses->serverOS = kzalloc(len + 1, GFP_KERNEL); - if (ses->serverOS) + if (ses->serverOS) { strncpy(ses->serverOS, bcc_ptr, len); - if (strncmp(ses->serverOS, "OS/2", 4) == 0) - cifs_dbg(FYI, "OS/2 server\n"); + if (strncmp(ses->serverOS, "OS/2", 4) == 0) + cifs_dbg(FYI, "OS/2 server\n"); + } bcc_ptr += len + 1; bleft -= len + 1; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 5e8c22d6c7b..1a6df4b03f6 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -1015,6 +1015,12 @@ cifs_wp_retry_size(struct inode *inode) return CIFS_SB(inode->i_sb)->wsize; } +static bool +cifs_dir_needs_close(struct cifsFileInfo *cfile) +{ + return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -1086,6 +1092,7 @@ struct smb_version_operations smb1_operations = { .create_mf_symlink = cifs_create_mf_symlink, .is_read_op = cifs_is_read_op, .wp_retry_size = cifs_wp_retry_size, + .dir_needs_close = cifs_dir_needs_close, #ifdef CONFIG_CIFS_XATTR .query_all_EAs = CIFSSMBQAllEAs, .set_EA = CIFSSMBSetEA, diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 3f17b455083..45992944e23 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -50,7 +50,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, goto out; } - smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, GFP_KERNEL); if (smb2_data == NULL) { rc = -ENOMEM; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 0150182a449..899bbc86f73 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -131,7 +131,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, *adjust_tz = false; *symlink = false; - smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, GFP_KERNEL); if (smb2_data == NULL) return -ENOMEM; diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index e31a9dfdcd3..af59d03db49 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"}, {STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"}, {STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"}, - {STATUS_NO_MORE_FILES, -EIO, "STATUS_NO_MORE_FILES"}, + {STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"}, {STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"}, {STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"}, {STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"}, @@ -298,7 +298,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_INVALID_PARAMETER, -EINVAL, "STATUS_INVALID_PARAMETER"}, {STATUS_NO_SUCH_DEVICE, -ENODEV, "STATUS_NO_SUCH_DEVICE"}, {STATUS_NO_SUCH_FILE, -ENOENT, "STATUS_NO_SUCH_FILE"}, - {STATUS_INVALID_DEVICE_REQUEST, -EIO, "STATUS_INVALID_DEVICE_REQUEST"}, + {STATUS_INVALID_DEVICE_REQUEST, -EOPNOTSUPP, "STATUS_INVALID_DEVICE_REQUEST"}, {STATUS_END_OF_FILE, -ENODATA, "STATUS_END_OF_FILE"}, {STATUS_WRONG_VOLUME, -EIO, "STATUS_WRONG_VOLUME"}, {STATUS_NO_MEDIA_IN_DEVICE, -EIO, "STATUS_NO_MEDIA_IN_DEVICE"}, diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index f2e6ac29a8d..4aa7a0f07d6 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -178,9 +178,24 @@ smb2_check_message(char *buf, unsigned int length) /* Windows 7 server returns 24 bytes more */ if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE) return 0; - /* server can return one byte more */ + /* server can return one byte more due to implied bcc[0] */ if (clc_len == 4 + len + 1) return 0; + + /* + * MacOS server pads after SMB2.1 write response with 3 bytes + * of junk. Other servers match RFC1001 len to actual + * SMB2/SMB3 frame length (header + smb2 response specific data) + * Log the server error (once), but allow it and continue + * since the frame is parseable. + */ + if (clc_len < 4 /* RFC1001 header size */ + len) { + printk_once(KERN_WARNING + "SMB2 server sent bad RFC1001 len %d not %d\n", + len, clc_len - 4); + return 0; + } + return 1; } return 0; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 77f8aeb9c2f..f522193b718 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -389,7 +389,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, int rc; struct smb2_file_all_info *smb2_data; - smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, GFP_KERNEL); if (smb2_data == NULL) return -ENOMEM; @@ -731,11 +731,72 @@ smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile, return SMB2_write(xid, parms, written, iov, nr_segs); } +/* Set or clear the SPARSE_FILE attribute based on value passed in setsparse */ +static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon, + struct cifsFileInfo *cfile, struct inode *inode, __u8 setsparse) +{ + struct cifsInodeInfo *cifsi; + int rc; + + cifsi = CIFS_I(inode); + + /* if file already sparse don't bother setting sparse again */ + if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) && setsparse) + return true; /* already sparse */ + + if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) && !setsparse) + return true; /* already not sparse */ + + /* + * Can't check for sparse support on share the usual way via the + * FS attribute info (FILE_SUPPORTS_SPARSE_FILES) on the share + * since Samba server doesn't set the flag on the share, yet + * supports the set sparse FSCTL and returns sparse correctly + * in the file attributes. If we fail setting sparse though we + * mark that server does not support sparse files for this share + * to avoid repeatedly sending the unsupported fsctl to server + * if the file is repeatedly extended. + */ + if (tcon->broken_sparse_sup) + return false; + + rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, FSCTL_SET_SPARSE, + true /* is_fctl */, &setsparse, 1, NULL, NULL); + if (rc) { + tcon->broken_sparse_sup = true; + cifs_dbg(FYI, "set sparse rc = %d\n", rc); + return false; + } + + if (setsparse) + cifsi->cifsAttrs |= FILE_ATTRIBUTE_SPARSE_FILE; + else + cifsi->cifsAttrs &= (~FILE_ATTRIBUTE_SPARSE_FILE); + + return true; +} + static int smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *cfile, __u64 size, bool set_alloc) { __le64 eof = cpu_to_le64(size); + struct inode *inode; + + /* + * If extending file more than one page make sparse. Many Linux fs + * make files sparse by default when extending via ftruncate + */ + inode = cfile->dentry->d_inode; + + if (!set_alloc && (size > inode->i_size + 8192)) { + __u8 set_sparse = 1; + + /* whether set sparse succeeds or not, extend the file */ + smb2_set_sparse(xid, tcon, cfile, inode, set_sparse); + } + return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, &eof, false); } @@ -954,6 +1015,105 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, + loff_t offset, loff_t len, bool keep_size) +{ + struct inode *inode; + struct cifsInodeInfo *cifsi; + struct cifsFileInfo *cfile = file->private_data; + struct file_zero_data_information fsctl_buf; + long rc; + unsigned int xid; + + xid = get_xid(); + + inode = cfile->dentry->d_inode; + cifsi = CIFS_I(inode); + + /* if file not oplocked can't be sure whether asking to extend size */ + if (!CIFS_CACHE_READ(cifsi)) + if (keep_size == false) + return -EOPNOTSUPP; + + /* + * Must check if file sparse since fallocate -z (zero range) assumes + * non-sparse allocation + */ + if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE)) + return -EOPNOTSUPP; + + /* + * need to make sure we are not asked to extend the file since the SMB3 + * fsctl does not change the file size. In the future we could change + * this to zero the first part of the range then set the file size + * which for a non sparse file would zero the newly extended range + */ + if (keep_size == false) + if (i_size_read(inode) < offset + len) + return -EOPNOTSUPP; + + cifs_dbg(FYI, "offset %lld len %lld", offset, len); + + fsctl_buf.FileOffset = cpu_to_le64(offset); + fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); + + rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, + true /* is_fctl */, (char *)&fsctl_buf, + sizeof(struct file_zero_data_information), NULL, NULL); + free_xid(xid); + return rc; +} + +static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, + loff_t offset, loff_t len) +{ + struct inode *inode; + struct cifsInodeInfo *cifsi; + struct cifsFileInfo *cfile = file->private_data; + struct file_zero_data_information fsctl_buf; + long rc; + unsigned int xid; + __u8 set_sparse = 1; + + xid = get_xid(); + + inode = cfile->dentry->d_inode; + cifsi = CIFS_I(inode); + + /* Need to make file sparse, if not already, before freeing range. */ + /* Consider adding equivalent for compressed since it could also work */ + if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) + return -EOPNOTSUPP; + + cifs_dbg(FYI, "offset %lld len %lld", offset, len); + + fsctl_buf.FileOffset = cpu_to_le64(offset); + fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); + + rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, + true /* is_fctl */, (char *)&fsctl_buf, + sizeof(struct file_zero_data_information), NULL, NULL); + free_xid(xid); + return rc; +} + +static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode, + loff_t off, loff_t len) +{ + /* KEEP_SIZE already checked for by do_fallocate */ + if (mode & FALLOC_FL_PUNCH_HOLE) + return smb3_punch_hole(file, tcon, off, len); + else if (mode & FALLOC_FL_ZERO_RANGE) { + if (mode & FALLOC_FL_KEEP_SIZE) + return smb3_zero_range(file, tcon, off, len, true); + return smb3_zero_range(file, tcon, off, len, false); + } + + return -EOPNOTSUPP; +} + static void smb2_downgrade_oplock(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, bool set_level2) @@ -1161,6 +1321,12 @@ smb2_wp_retry_size(struct inode *inode) SMB2_MAX_BUFFER_SIZE); } +static bool +smb2_dir_needs_close(struct cifsFileInfo *cfile) +{ + return !cfile->invalidHandle; +} + struct smb_version_operations smb20_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, @@ -1236,6 +1402,7 @@ struct smb_version_operations smb20_operations = { .parse_lease_buf = smb2_parse_lease_buf, .clone_range = smb2_clone_range, .wp_retry_size = smb2_wp_retry_size, + .dir_needs_close = smb2_dir_needs_close, }; struct smb_version_operations smb21_operations = { @@ -1313,6 +1480,7 @@ struct smb_version_operations smb21_operations = { .parse_lease_buf = smb2_parse_lease_buf, .clone_range = smb2_clone_range, .wp_retry_size = smb2_wp_retry_size, + .dir_needs_close = smb2_dir_needs_close, }; struct smb_version_operations smb30_operations = { @@ -1393,6 +1561,8 @@ struct smb_version_operations smb30_operations = { .clone_range = smb2_clone_range, .validate_negotiate = smb3_validate_negotiate, .wp_retry_size = smb2_wp_retry_size, + .dir_needs_close = smb2_dir_needs_close, + .fallocate = smb3_fallocate, }; struct smb_version_values smb20_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 42ebc1a8be6..74b3a668438 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -530,7 +530,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, struct smb2_sess_setup_rsp *rsp = NULL; struct kvec iov[2]; int rc = 0; - int resp_buftype; + int resp_buftype = CIFS_NO_BUFFER; __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ struct TCP_Server_Info *server = ses->server; u16 blob_length = 0; @@ -907,7 +907,8 @@ tcon_exit: tcon_error_exit: if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) { cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); - tcon->bad_network_name = true; + if (tcon) + tcon->bad_network_name = true; } goto tcon_exit; } @@ -1224,7 +1225,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, cifs_dbg(FYI, "SMB2 IOCTL\n"); - *out_data = NULL; + if (out_data != NULL) + *out_data = NULL; + /* zero out returned data len, in case of error */ if (plen) *plen = 0; @@ -1400,8 +1403,7 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, rsp = (struct smb2_close_rsp *)iov[0].iov_base; if (rc != 0) { - if (tcon) - cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE); + cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE); goto close_exit; } @@ -1530,7 +1532,7 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, { return query_info(xid, tcon, persistent_fid, volatile_fid, FILE_ALL_INFORMATION, - sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + sizeof(struct smb2_file_all_info) + PATH_MAX * 2, sizeof(struct smb2_file_all_info), data); } @@ -2177,6 +2179,10 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base; if (rc) { + if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) { + srch_inf->endOfSearch = true; + rc = 0; + } cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); goto qdir_exit; } @@ -2214,11 +2220,6 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, else cifs_dbg(VFS, "illegal search buffer type\n"); - if (rsp->hdr.Status == STATUS_NO_MORE_FILES) - srch_inf->endOfSearch = 1; - else - srch_inf->endOfSearch = 0; - return rc; qdir_exit: diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 69f3595d395..fbe486c285a 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -573,6 +573,12 @@ struct copychunk_ioctl { __u32 Reserved2; } __packed; +/* this goes in the ioctl buffer when doing FSCTL_SET_ZERO_DATA */ +struct file_zero_data_information { + __le64 FileOffset; + __le64 BeyondFinalZero; +} __packed; + struct copychunk_ioctl_rsp { __le32 ChunksWritten; __le32 ChunkBytesWritten; diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index 0e538b5c962..83efa59535b 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -63,7 +63,7 @@ #define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */ #define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */ #define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */ -#define FSCTL_SET_ZERO_DATA 0x000900C8 /* BB add struct */ +#define FSCTL_SET_ZERO_DATA 0x000980C8 #define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */ #define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */ #define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */ diff --git a/fs/dcache.c b/fs/dcache.c index 06f65857a85..d30ce699ae4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -731,8 +731,6 @@ EXPORT_SYMBOL(dget_parent); /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question - * @want_discon: flag, used by d_splice_alias, to request - * that only a DISCONNECTED alias be returned. * * If inode has a hashed alias, or is a directory and has any alias, * acquire the reference to alias and return it. Otherwise return NULL. @@ -741,10 +739,9 @@ EXPORT_SYMBOL(dget_parent); * of a filesystem. * * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer - * any other hashed alias over that one unless @want_discon is set, - * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. + * any other hashed alias over that one. */ -static struct dentry *__d_find_alias(struct inode *inode, int want_discon) +static struct dentry *__d_find_alias(struct inode *inode) { struct dentry *alias, *discon_alias; @@ -756,7 +753,7 @@ again: if (IS_ROOT(alias) && (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; - } else if (!want_discon) { + } else { __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; @@ -768,12 +765,9 @@ again: alias = discon_alias; spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { - if (IS_ROOT(alias) && - (alias->d_flags & DCACHE_DISCONNECTED)) { - __dget_dlock(alias); - spin_unlock(&alias->d_lock); - return alias; - } + __dget_dlock(alias); + spin_unlock(&alias->d_lock); + return alias; } spin_unlock(&alias->d_lock); goto again; @@ -787,7 +781,7 @@ struct dentry *d_find_alias(struct inode *inode) if (!hlist_empty(&inode->i_dentry)) { spin_lock(&inode->i_lock); - de = __d_find_alias(inode, 0); + de = __d_find_alias(inode); spin_unlock(&inode->i_lock); } return de; @@ -1781,25 +1775,7 @@ struct dentry *d_find_any_alias(struct inode *inode) } EXPORT_SYMBOL(d_find_any_alias); -/** - * d_obtain_alias - find or allocate a dentry for a given inode - * @inode: inode to allocate the dentry for - * - * Obtain a dentry for an inode resulting from NFS filehandle conversion or - * similar open by handle operations. The returned dentry may be anonymous, - * or may have a full name (if the inode was already in the cache). - * - * When called on a directory inode, we must ensure that the inode only ever - * has one dentry. If a dentry is found, that is returned instead of - * allocating a new one. - * - * On successful return, the reference to the inode has been transferred - * to the dentry. In case of an error the reference on the inode is released. - * To make it easier to use in export operations a %NULL or IS_ERR inode may - * be passed in and will be the error will be propagate to the return value, - * with a %NULL @inode replaced by ERR_PTR(-ESTALE). - */ -struct dentry *d_obtain_alias(struct inode *inode) +static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) { static const struct qstr anonstring = QSTR_INIT("/", 1); struct dentry *tmp; @@ -1830,7 +1806,10 @@ struct dentry *d_obtain_alias(struct inode *inode) } /* attach a disconnected dentry */ - add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED; + add_flags = d_flags_for_inode(inode); + + if (disconnected) + add_flags |= DCACHE_DISCONNECTED; spin_lock(&tmp->d_lock); tmp->d_inode = inode; @@ -1851,59 +1830,51 @@ struct dentry *d_obtain_alias(struct inode *inode) iput(inode); return res; } -EXPORT_SYMBOL(d_obtain_alias); /** - * d_splice_alias - splice a disconnected dentry into the tree if one exists - * @inode: the inode which may have a disconnected dentry - * @dentry: a negative dentry which we want to point to the inode. - * - * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and - * DCACHE_DISCONNECTED), then d_move that in place of the given dentry - * and return it, else simply d_add the inode to the dentry and return NULL. + * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode + * @inode: inode to allocate the dentry for * - * This is needed in the lookup routine of any filesystem that is exportable - * (via knfsd) so that we can build dcache paths to directories effectively. + * Obtain a dentry for an inode resulting from NFS filehandle conversion or + * similar open by handle operations. The returned dentry may be anonymous, + * or may have a full name (if the inode was already in the cache). * - * If a dentry was found and moved, then it is returned. Otherwise NULL - * is returned. This matches the expected return value of ->lookup. + * When called on a directory inode, we must ensure that the inode only ever + * has one dentry. If a dentry is found, that is returned instead of + * allocating a new one. * - * Cluster filesystems may call this function with a negative, hashed dentry. - * In that case, we know that the inode will be a regular file, and also this - * will only occur during atomic_open. So we need to check for the dentry - * being already hashed only in the final case. + * On successful return, the reference to the inode has been transferred + * to the dentry. In case of an error the reference on the inode is released. + * To make it easier to use in export operations a %NULL or IS_ERR inode may + * be passed in and the error will be propagated to the return value, + * with a %NULL @inode replaced by ERR_PTR(-ESTALE). */ -struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) +struct dentry *d_obtain_alias(struct inode *inode) { - struct dentry *new = NULL; - - if (IS_ERR(inode)) - return ERR_CAST(inode); + return __d_obtain_alias(inode, 1); +} +EXPORT_SYMBOL(d_obtain_alias); - if (inode && S_ISDIR(inode->i_mode)) { - spin_lock(&inode->i_lock); - new = __d_find_alias(inode, 1); - if (new) { - BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); - spin_unlock(&inode->i_lock); - security_d_instantiate(new, inode); - d_move(new, dentry); - iput(inode); - } else { - /* already taking inode->i_lock, so d_add() by hand */ - __d_instantiate(dentry, inode); - spin_unlock(&inode->i_lock); - security_d_instantiate(dentry, inode); - d_rehash(dentry); - } - } else { - d_instantiate(dentry, inode); - if (d_unhashed(dentry)) - d_rehash(dentry); - } - return new; +/** + * d_obtain_root - find or allocate a dentry for a given inode + * @inode: inode to allocate the dentry for + * + * Obtain an IS_ROOT dentry for the root of a filesystem. + * + * We must ensure that directory inodes only ever have one dentry. If a + * dentry is found, that is returned instead of allocating a new one. + * + * On successful return, the reference to the inode has been transferred + * to the dentry. In case of an error the reference on the inode is + * released. A %NULL or IS_ERR inode may be passed in and will be the + * error will be propagate to the return value, with a %NULL @inode + * replaced by ERR_PTR(-ESTALE). + */ +struct dentry *d_obtain_root(struct inode *inode) +{ + return __d_obtain_alias(inode, 0); } -EXPORT_SYMBOL(d_splice_alias); +EXPORT_SYMBOL(d_obtain_root); /** * d_add_ci - lookup or allocate new dentry with case-exact name @@ -2697,6 +2668,75 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) } /** + * d_splice_alias - splice a disconnected dentry into the tree if one exists + * @inode: the inode which may have a disconnected dentry + * @dentry: a negative dentry which we want to point to the inode. + * + * If inode is a directory and has an IS_ROOT alias, then d_move that in + * place of the given dentry and return it, else simply d_add the inode + * to the dentry and return NULL. + * + * If a non-IS_ROOT directory is found, the filesystem is corrupt, and + * we should error out: directories can't have multiple aliases. + * + * This is needed in the lookup routine of any filesystem that is exportable + * (via knfsd) so that we can build dcache paths to directories effectively. + * + * If a dentry was found and moved, then it is returned. Otherwise NULL + * is returned. This matches the expected return value of ->lookup. + * + * Cluster filesystems may call this function with a negative, hashed dentry. + * In that case, we know that the inode will be a regular file, and also this + * will only occur during atomic_open. So we need to check for the dentry + * being already hashed only in the final case. + */ +struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) +{ + struct dentry *new = NULL; + + if (IS_ERR(inode)) + return ERR_CAST(inode); + + if (inode && S_ISDIR(inode->i_mode)) { + spin_lock(&inode->i_lock); + new = __d_find_any_alias(inode); + if (new) { + if (!IS_ROOT(new)) { + spin_unlock(&inode->i_lock); + dput(new); + return ERR_PTR(-EIO); + } + if (d_ancestor(new, dentry)) { + spin_unlock(&inode->i_lock); + dput(new); + return ERR_PTR(-EIO); + } + write_seqlock(&rename_lock); + __d_materialise_dentry(dentry, new); + write_sequnlock(&rename_lock); + __d_drop(new); + _d_rehash(new); + spin_unlock(&new->d_lock); + spin_unlock(&inode->i_lock); + security_d_instantiate(new, inode); + iput(inode); + } else { + /* already taking inode->i_lock, so d_add() by hand */ + __d_instantiate(dentry, inode); + spin_unlock(&inode->i_lock); + security_d_instantiate(dentry, inode); + d_rehash(dentry); + } + } else { + d_instantiate(dentry, inode); + if (d_unhashed(dentry)) + d_rehash(dentry); + } + return new; +} +EXPORT_SYMBOL(d_splice_alias); + +/** * d_materialise_unique - introduce an inode into the tree * @dentry: candidate dentry * @inode: inode to bind to the dentry, to which aliases may be attached @@ -2724,7 +2764,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) struct dentry *alias; /* Does an aliased dentry already exist? */ - alias = __d_find_alias(inode, 0); + alias = __d_find_alias(inode); if (alias) { actual = alias; write_seqlock(&rename_lock); diff --git a/fs/direct-io.c b/fs/direct-io.c index 17e39b047de..c3116404ab4 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -158,7 +158,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) { ssize_t ret; - ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE, + ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES, &sdio->from); if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { diff --git a/fs/eventpoll.c b/fs/eventpoll.c index b10b48c2a7a..7bcfff900f0 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1852,7 +1852,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ - ep_take_care_of_epollwakeup(&epds); + if (ep_op_has_event(op)) + ep_take_care_of_epollwakeup(&epds); /* * We have to check that the file structure underneath the file descriptor diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 3750031cfa2..b88edc05c23 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -161,7 +161,7 @@ static struct kmem_cache * ext2_inode_cachep; static struct inode *ext2_alloc_inode(struct super_block *sb) { struct ext2_inode_info *ei; - ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); + ei = kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->i_block_alloc_info = NULL; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 08cdfe5461e..622e8824902 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2828,8 +2828,9 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) */ overhead += ngroups * (2 + sbi->s_itb_per_group); - /* Add the journal blocks as well */ - overhead += sbi->s_journal->j_maxlen; + /* Add the internal journal blocks as well */ + if (sbi->s_journal && !sbi->journal_bdev) + overhead += sbi->s_journal->j_maxlen; sbi->s_overhead_last = overhead; smp_wmb(); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5b19760b1de..b0c225cdb52 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1825,7 +1825,7 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) /* * Special error return code only used by dx_probe() and its callers. */ -#define ERR_BAD_DX_DIR -75000 +#define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1)) /* * Timeout and state flag for lazy initialization inode thread. @@ -2454,6 +2454,22 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) up_write(&EXT4_I(inode)->i_data_sem); } +/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */ +static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize) +{ + int changed = 0; + + if (newsize > inode->i_size) { + i_size_write(inode, newsize); + changed = 1; + } + if (newsize > EXT4_I(inode)->i_disksize) { + ext4_update_i_disksize(inode, newsize); + changed |= 2; + } + return changed; +} + struct ext4_group_info { unsigned long bb_state; struct rb_root bb_free_root; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 76c2df382b7..74292a71b38 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4665,7 +4665,8 @@ retry: } static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, - ext4_lblk_t len, int flags, int mode) + ext4_lblk_t len, loff_t new_size, + int flags, int mode) { struct inode *inode = file_inode(file); handle_t *handle; @@ -4674,8 +4675,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, int retries = 0; struct ext4_map_blocks map; unsigned int credits; + loff_t epos; map.m_lblk = offset; + map.m_len = len; /* * Don't normalize the request if it can fit in one extent so * that it doesn't get unnecessarily split into multiple @@ -4690,9 +4693,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, credits = ext4_chunk_trans_blocks(inode, len); retry: - while (ret >= 0 && ret < len) { - map.m_lblk = map.m_lblk + ret; - map.m_len = len = len - ret; + while (ret >= 0 && len) { handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); if (IS_ERR(handle)) { @@ -4709,6 +4710,21 @@ retry: ret2 = ext4_journal_stop(handle); break; } + map.m_lblk += ret; + map.m_len = len = len - ret; + epos = (loff_t)map.m_lblk << inode->i_blkbits; + inode->i_ctime = ext4_current_time(inode); + if (new_size) { + if (epos > new_size) + epos = new_size; + if (ext4_update_inode_size(inode, epos) & 0x1) + inode->i_mtime = inode->i_ctime; + } else { + if (epos > inode->i_size) + ext4_set_inode_flag(inode, + EXT4_INODE_EOFBLOCKS); + } + ext4_mark_inode_dirty(handle, inode); ret2 = ext4_journal_stop(handle); if (ret2) break; @@ -4731,7 +4747,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, loff_t new_size = 0; int ret = 0; int flags; - int partial; + int credits; + int partial_begin, partial_end; loff_t start, end; ext4_lblk_t lblk; struct address_space *mapping = inode->i_mapping; @@ -4771,7 +4788,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (start < offset || end > offset + len) return -EINVAL; - partial = (offset + len) & ((1 << blkbits) - 1); + partial_begin = offset & ((1 << blkbits) - 1); + partial_end = (offset + len) & ((1 << blkbits) - 1); lblk = start >> blkbits; max_blocks = (end >> blkbits); @@ -4805,7 +4823,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, * If we have a partial block after EOF we have to allocate * the entire block. */ - if (partial) + if (partial_end) max_blocks += 1; } @@ -4813,6 +4831,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, /* Now release the pages and zero block aligned part of pages*/ truncate_pagecache_range(inode, start, end - 1); + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); /* Wait all existing dio workers, newcomers will block on i_mutex */ ext4_inode_block_unlocked_dio(inode); @@ -4825,13 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (ret) goto out_dio; - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, - mode); + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, + flags, mode); if (ret) goto out_dio; } + if (!partial_begin && !partial_end) + goto out_dio; - handle = ext4_journal_start(inode, EXT4_HT_MISC, 4); + /* + * In worst case we have to writeout two nonadjacent unwritten + * blocks and update the inode + */ + credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1; + if (ext4_should_journal_data(inode)) + credits += 2; + handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); ext4_std_error(inode->i_sb, ret); @@ -4839,12 +4867,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, } inode->i_mtime = inode->i_ctime = ext4_current_time(inode); - if (new_size) { - if (new_size > i_size_read(inode)) - i_size_write(inode, new_size); - if (new_size > EXT4_I(inode)->i_disksize) - ext4_update_i_disksize(inode, new_size); + ext4_update_inode_size(inode, new_size); } else { /* * Mark that we allocate beyond EOF so the subsequent truncate @@ -4853,7 +4877,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, if ((offset + len) > i_size_read(inode)) ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); } - ext4_mark_inode_dirty(handle, inode); /* Zero out partial block at the edges of the range */ @@ -4880,13 +4903,11 @@ out_mutex: long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); - handle_t *handle; loff_t new_size = 0; unsigned int max_blocks; int ret = 0; int flags; ext4_lblk_t lblk; - struct timespec tv; unsigned int blkbits = inode->i_blkbits; /* Return error if mode is not supported */ @@ -4937,36 +4958,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) goto out; } - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode); + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, + flags, mode); if (ret) goto out; - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); - if (IS_ERR(handle)) - goto out; - - tv = inode->i_ctime = ext4_current_time(inode); - - if (new_size) { - if (new_size > i_size_read(inode)) { - i_size_write(inode, new_size); - inode->i_mtime = tv; - } - if (new_size > EXT4_I(inode)->i_disksize) - ext4_update_i_disksize(inode, new_size); - } else { - /* - * Mark that we allocate beyond EOF so the subsequent truncate - * can proceed even if the new size is the same as i_size. - */ - if ((offset + len) > i_size_read(inode)) - ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); + if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) { + ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal, + EXT4_I(inode)->i_sync_tid); } - ext4_mark_inode_dirty(handle, inode); - if (file->f_flags & O_SYNC) - ext4_handle_sync(handle); - - ext4_journal_stop(handle); out: mutex_unlock(&inode->i_mutex); trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 367a60c07cf..3aa26e9117c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1055,27 +1055,11 @@ static int ext4_write_end(struct file *file, } else copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); - /* - * No need to use i_size_read() here, the i_size - * cannot change under us because we hole i_mutex. - * - * But it's important to update i_size while still holding page lock: + * it's important to update i_size while still holding page lock: * page writeout could otherwise come in and zero beyond i_size. */ - if (pos + copied > inode->i_size) { - i_size_write(inode, pos + copied); - i_size_changed = 1; - } - - if (pos + copied > EXT4_I(inode)->i_disksize) { - /* We need to mark inode dirty even if - * new_i_size is less that inode->i_size - * but greater than i_disksize. (hint delalloc) - */ - ext4_update_i_disksize(inode, (pos + copied)); - i_size_changed = 1; - } + i_size_changed = ext4_update_inode_size(inode, pos + copied); unlock_page(page); page_cache_release(page); @@ -1123,7 +1107,7 @@ static int ext4_journalled_write_end(struct file *file, int ret = 0, ret2; int partial = 0; unsigned from, to; - loff_t new_i_size; + int size_changed = 0; trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_CACHE_SIZE - 1); @@ -1146,20 +1130,18 @@ static int ext4_journalled_write_end(struct file *file, if (!partial) SetPageUptodate(page); } - new_i_size = pos + copied; - if (new_i_size > inode->i_size) - i_size_write(inode, pos+copied); + size_changed = ext4_update_inode_size(inode, pos + copied); ext4_set_inode_state(inode, EXT4_STATE_JDATA); EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; - if (new_i_size > EXT4_I(inode)->i_disksize) { - ext4_update_i_disksize(inode, new_i_size); + unlock_page(page); + page_cache_release(page); + + if (size_changed) { ret2 = ext4_mark_inode_dirty(handle, inode); if (!ret) ret = ret2; } - unlock_page(page); - page_cache_release(page); if (pos + len > inode->i_size && ext4_can_truncate(inode)) /* if we have allocated more blocks and copied * less. We will have blocks allocated outside @@ -2095,6 +2077,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, struct ext4_map_blocks *map = &mpd->map; int err; loff_t disksize; + int progress = 0; mpd->io_submit.io_end->offset = ((loff_t)map->m_lblk) << inode->i_blkbits; @@ -2111,8 +2094,11 @@ static int mpage_map_and_submit_extent(handle_t *handle, * is non-zero, a commit should free up blocks. */ if ((err == -ENOMEM) || - (err == -ENOSPC && ext4_count_free_clusters(sb))) + (err == -ENOSPC && ext4_count_free_clusters(sb))) { + if (progress) + goto update_disksize; return err; + } ext4_msg(sb, KERN_CRIT, "Delayed block allocation failed for " "inode %lu at logical offset %llu with" @@ -2129,15 +2115,17 @@ static int mpage_map_and_submit_extent(handle_t *handle, *give_up_on_write = true; return err; } + progress = 1; /* * Update buffer state, submit mapped pages, and get us new * extent to map */ err = mpage_map_and_submit_buffers(mpd); if (err < 0) - return err; + goto update_disksize; } while (map->m_len); +update_disksize: /* * Update on-disk size after IO is submitted. Races with * truncate are avoided by checking i_size under i_data_sem. diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 956027711fa..8b0f9ef517d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1412,6 +1412,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, int last = first + count - 1; struct super_block *sb = e4b->bd_sb; + if (WARN_ON(count == 0)) + return; BUG_ON(last >= (sb->s_blocksize << 3)); assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); /* Don't bother if the block group is corrupt. */ @@ -3221,6 +3223,8 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) int err; if (pa == NULL) { + if (ac->ac_f_ex.fe_len == 0) + return; err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b); if (err) { /* @@ -3235,6 +3239,7 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start, ac->ac_f_ex.fe_len); ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group); + ext4_mb_unload_buddy(&e4b); return; } if (pa->pa_type == MB_INODE_PA) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 3520ab8a663..603e4ebbd0a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1227,7 +1227,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, buffer */ int num = 0; ext4_lblk_t nblocks; - int i, err; + int i, err = 0; int namelen; *res_dir = NULL; @@ -1264,7 +1264,11 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, * return. Otherwise, fall back to doing a search the * old fashioned way. */ - if (bh || (err != ERR_BAD_DX_DIR)) + if (err == -ENOENT) + return NULL; + if (err && err != ERR_BAD_DX_DIR) + return ERR_PTR(err); + if (bh) return bh; dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " "falling back\n")); @@ -1295,6 +1299,11 @@ restart: } num++; bh = ext4_getblk(NULL, dir, b++, 0, &err); + if (unlikely(err)) { + if (ra_max == 0) + return ERR_PTR(err); + break; + } bh_use[ra_max] = bh; if (bh) ll_rw_block(READ | REQ_META | REQ_PRIO, @@ -1417,6 +1426,8 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi return ERR_PTR(-ENAMETOOLONG); bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); + if (IS_ERR(bh)) + return (struct dentry *) bh; inode = NULL; if (bh) { __u32 ino = le32_to_cpu(de->inode); @@ -1450,6 +1461,8 @@ struct dentry *ext4_get_parent(struct dentry *child) struct buffer_head *bh; bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL); + if (IS_ERR(bh)) + return (struct dentry *) bh; if (!bh) return ERR_PTR(-ENOENT); ino = le32_to_cpu(de->inode); @@ -2727,6 +2740,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) retval = -ENOENT; bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); + if (IS_ERR(bh)) + return PTR_ERR(bh); if (!bh) goto end_rmdir; @@ -2794,6 +2809,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) retval = -ENOENT; bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); + if (IS_ERR(bh)) + return PTR_ERR(bh); if (!bh) goto end_unlink; @@ -3121,6 +3138,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, struct ext4_dir_entry_2 *de; bh = ext4_find_entry(dir, d_name, &de, NULL); + if (IS_ERR(bh)) + return PTR_ERR(bh); if (bh) { retval = ext4_delete_entry(handle, dir, de, bh); brelse(bh); @@ -3128,7 +3147,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, return retval; } -static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent) +static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent, + int force_reread) { int retval; /* @@ -3140,7 +3160,8 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent) if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino || ent->de->name_len != ent->dentry->d_name.len || strncmp(ent->de->name, ent->dentry->d_name.name, - ent->de->name_len)) { + ent->de->name_len) || + force_reread) { retval = ext4_find_delete_entry(handle, ent->dir, &ent->dentry->d_name); } else { @@ -3191,6 +3212,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, .dentry = new_dentry, .inode = new_dentry->d_inode, }; + int force_reread; int retval; dquot_initialize(old.dir); @@ -3202,6 +3224,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, dquot_initialize(new.inode); old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); + if (IS_ERR(old.bh)) + return PTR_ERR(old.bh); /* * Check for inode number is _not_ due to possible IO errors. * We might rmdir the source, keep it as pwd of some process @@ -3214,6 +3238,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, &new.de, &new.inlined); + if (IS_ERR(new.bh)) { + retval = PTR_ERR(new.bh); + new.bh = NULL; + goto end_rename; + } if (new.bh) { if (!new.inode) { brelse(new.bh); @@ -3246,6 +3275,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, if (retval) goto end_rename; } + /* + * If we're renaming a file within an inline_data dir and adding or + * setting the new dirent causes a conversion from inline_data to + * extents/blockmap, we need to force the dirent delete code to + * re-read the directory, or else we end up trying to delete a dirent + * from what is now the extent tree root (or a block map). + */ + force_reread = (new.dir->i_ino == old.dir->i_ino && + ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); if (!new.bh) { retval = ext4_add_entry(handle, new.dentry, old.inode); if (retval) @@ -3256,6 +3294,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, if (retval) goto end_rename; } + if (force_reread) + force_reread = !ext4_test_inode_flag(new.dir, + EXT4_INODE_INLINE_DATA); /* * Like most other Unix systems, set the ctime for inodes on a @@ -3267,7 +3308,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, /* * ok, that's it */ - ext4_rename_delete(handle, &old); + ext4_rename_delete(handle, &old, force_reread); if (new.inode) { ext4_dec_count(handle, new.inode); @@ -3330,6 +3371,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, &old.inlined); + if (IS_ERR(old.bh)) + return PTR_ERR(old.bh); /* * Check for inode number is _not_ due to possible IO errors. * We might rmdir the source, keep it as pwd of some process @@ -3342,6 +3385,11 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, &new.de, &new.inlined); + if (IS_ERR(new.bh)) { + retval = PTR_ERR(new.bh); + new.bh = NULL; + goto end_rename; + } /* RENAME_EXCHANGE case: old *and* new must both exist */ if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino) @@ -3455,7 +3503,6 @@ const struct inode_operations ext4_dir_inode_operations = { .rmdir = ext4_rmdir, .mknod = ext4_mknod, .tmpfile = ext4_tmpfile, - .rename = ext4_rename, .rename2 = ext4_rename2, .setattr = ext4_setattr, .setxattr = generic_setxattr, diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index bb0e80f03e2..1e43b905ff9 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -575,6 +575,7 @@ handle_bb: bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); + bh = NULL; goto out; } overhead = ext4_group_overhead_blocks(sb, group); @@ -603,6 +604,7 @@ handle_ib: bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); + bh = NULL; goto out; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 32b43ad154b..0b28b36e791 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3181,9 +3181,9 @@ static int set_journal_csum_feature_set(struct super_block *sb) if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { - /* journal checksum v2 */ + /* journal checksum v3 */ compat = 0; - incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; + incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; } else { /* journal checksum v1 */ compat = JBD2_FEATURE_COMPAT_CHECKSUM; @@ -3205,6 +3205,7 @@ static int set_journal_csum_feature_set(struct super_block *sb) jbd2_journal_clear_features(sbi->s_journal, JBD2_FEATURE_COMPAT_CHECKSUM, 0, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | + JBD2_FEATURE_INCOMPAT_CSUM_V3 | JBD2_FEATURE_INCOMPAT_CSUM_V2); } diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 214fe1054fc..736a348509f 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -23,7 +23,7 @@ config F2FS_STAT_FS mounted as f2fs. Each file shows the whole f2fs information. /sys/kernel/debug/f2fs/status includes: - - major file system information managed by f2fs currently + - major filesystem information managed by f2fs currently - average SIT information about whole segments - current memory footprint consumed by f2fs. @@ -68,6 +68,6 @@ config F2FS_CHECK_FS bool "F2FS consistency checking feature" depends on F2FS_FS help - Enables BUG_ONs which check the file system consistency in runtime. + Enables BUG_ONs which check the filesystem consistency in runtime. If you want to improve the performance, say N. diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6aeed5bada5..ec3b7a5381f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -160,14 +160,11 @@ static int f2fs_write_meta_page(struct page *page, goto redirty_out; if (wbc->for_reclaim) goto redirty_out; - - /* Should not write any meta pages, if any IO error was occurred */ - if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) - goto no_write; + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; f2fs_wait_on_page_writeback(page, META); write_meta_page(sbi, page); -no_write: dec_page_count(sbi, F2FS_DIRTY_META); unlock_page(page); return 0; @@ -348,7 +345,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) return e ? true : false; } -static void release_dirty_inode(struct f2fs_sb_info *sbi) +void release_dirty_inode(struct f2fs_sb_info *sbi) { struct ino_entry *e, *tmp; int i; @@ -446,8 +443,8 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) struct f2fs_orphan_block *orphan_blk = NULL; unsigned int nentries = 0; unsigned short index; - unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + - (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); + unsigned short orphan_blocks = + (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans); struct page *page = NULL; struct ino_entry *orphan = NULL; @@ -737,7 +734,7 @@ retry: /* * Freeze all the FS-operations for checkpoint. */ -static void block_operations(struct f2fs_sb_info *sbi) +static int block_operations(struct f2fs_sb_info *sbi) { struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, @@ -745,6 +742,7 @@ static void block_operations(struct f2fs_sb_info *sbi) .for_reclaim = 0, }; struct blk_plug plug; + int err = 0; blk_start_plug(&plug); @@ -754,11 +752,15 @@ retry_flush_dents: if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); sync_dirty_dir_inodes(sbi); + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto out; + } goto retry_flush_dents; } /* - * POR: we should ensure that there is no dirty node pages + * POR: we should ensure that there are no dirty node pages * until finishing nat/sit flush. */ retry_flush_nodes: @@ -767,9 +769,16 @@ retry_flush_nodes: if (get_pages(sbi, F2FS_DIRTY_NODES)) { up_write(&sbi->node_write); sync_node_pages(sbi, 0, &wbc); + if (unlikely(f2fs_cp_error(sbi))) { + f2fs_unlock_all(sbi); + err = -EIO; + goto out; + } goto retry_flush_nodes; } +out: blk_finish_plug(&plug); + return err; } static void unblock_operations(struct f2fs_sb_info *sbi) @@ -813,8 +822,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg)); /* Flush all the NAT/SIT pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) + while (get_pages(sbi, F2FS_DIRTY_META)) { sync_meta_pages(sbi, META, LONG_MAX); + if (unlikely(f2fs_cp_error(sbi))) + return; + } next_free_nid(sbi, &last_nid); @@ -825,7 +837,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); - for (i = 0; i < 3; i++) { + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { ckpt->cur_node_segno[i] = cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); ckpt->cur_node_blkoff[i] = @@ -833,7 +845,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) ckpt->alloc_type[i + CURSEG_HOT_NODE] = curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); } - for (i = 0; i < 3; i++) { + for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { ckpt->cur_data_segno[i] = cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); ckpt->cur_data_blkoff[i] = @@ -848,24 +860,23 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = npages_for_summary_flush(sbi); - if (data_sum_blocks < 3) + if (data_sum_blocks < NR_CURSEG_DATA_TYPE) set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); else clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) - / F2FS_ORPHANS_PER_BLOCK; + orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); if (is_umount) { set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ cp_payload_blks + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); } else { clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + cp_payload_blks + data_sum_blocks + orphan_blocks); } @@ -924,6 +935,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* wait for previous submitted node/meta pages writeback */ wait_on_all_pages_writeback(sbi); + if (unlikely(f2fs_cp_error(sbi))) + return; + filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); @@ -934,15 +948,17 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* Here, we only have one bio having CP pack */ sync_meta_pages(sbi, META_FLUSH, LONG_MAX); - if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { - clear_prefree_segments(sbi); - release_dirty_inode(sbi); - F2FS_RESET_SB_DIRT(sbi); - } + release_dirty_inode(sbi); + + if (unlikely(f2fs_cp_error(sbi))) + return; + + clear_prefree_segments(sbi); + F2FS_RESET_SB_DIRT(sbi); } /* - * We guarantee that this checkpoint procedure should not fail. + * We guarantee that this checkpoint procedure will not fail. */ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) { @@ -952,7 +968,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); mutex_lock(&sbi->cp_mutex); - block_operations(sbi); + + if (!sbi->s_dirty) + goto out; + if (unlikely(f2fs_cp_error(sbi))) + goto out; + if (block_operations(sbi)) + goto out; trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); @@ -976,9 +998,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) do_checkpoint(sbi, is_umount); unblock_operations(sbi); - mutex_unlock(&sbi->cp_mutex); - stat_inc_cp_count(sbi->stat_info); +out: + mutex_unlock(&sbi->cp_mutex); trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); } @@ -999,8 +1021,8 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) * for cp pack we can have max 1020*504 orphan entries */ sbi->n_orphans = 0; - sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) - * F2FS_ORPHANS_PER_BLOCK; + sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - + NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; } int __init create_checkpoint_caches(void) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 03313099c51..76de83e25a8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -53,7 +53,7 @@ static void f2fs_write_end_io(struct bio *bio, int err) struct page *page = bvec->bv_page; if (unlikely(err)) { - SetPageError(page); + set_page_dirty(page); set_bit(AS_EIO, &page->mapping->flags); f2fs_stop_checkpoint(sbi); } @@ -691,7 +691,7 @@ get_next: allocated = true; blkaddr = dn.data_blkaddr; } - /* Give more consecutive addresses for the read ahead */ + /* Give more consecutive addresses for the readahead */ if (blkaddr == (bh_result->b_blocknr + ofs)) { ofs++; dn.ofs_in_node++; @@ -739,7 +739,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page) trace_f2fs_readpage(page, DATA); - /* If the file has inline data, try to read it directlly */ + /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); else @@ -836,10 +836,19 @@ write: /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; err = do_write_data_page(page, &fio); goto done; } + /* we should bypass data pages to proceed the kworkder jobs */ + if (unlikely(f2fs_cp_error(sbi))) { + SetPageError(page); + unlock_page(page); + return 0; + } + if (!wbc->for_reclaim) need_balance_fs = true; else if (has_not_enough_free_secs(sbi, 0)) @@ -927,7 +936,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) if (to > inode->i_size) { truncate_pagecache(inode, inode->i_size); - truncate_blocks(inode, inode->i_size); + truncate_blocks(inode, inode->i_size, true); } } @@ -946,7 +955,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, f2fs_balance_fs(sbi); repeat: - err = f2fs_convert_inline_data(inode, pos + len); + err = f2fs_convert_inline_data(inode, pos + len, NULL); if (err) goto fail; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a441ba33be1..fecebdbfd78 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -32,7 +32,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) struct f2fs_stat_info *si = F2FS_STAT(sbi); int i; - /* valid check of the segment numbers */ + /* validation check of the segment numbers */ si->hit_ext = sbi->read_hit_ext; si->total_ext = sbi->total_hit_ext; si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); @@ -152,7 +152,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); - /* buld nm */ + /* build nm */ si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index bcf893c3d90..155fb056b7f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -124,7 +124,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, /* * For the most part, it should be a bug when name_len is zero. - * We stop here for figuring out where the bugs are occurred. + * We stop here for figuring out where the bugs has occurred. */ f2fs_bug_on(!de->name_len); @@ -391,7 +391,7 @@ put_error: error: /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ truncate_inode_pages(&inode->i_data, 0); - truncate_blocks(inode, 0); + truncate_blocks(inode, 0, false); remove_dirty_dir_inode(inode); remove_inode_page(inode); return ERR_PTR(err); @@ -563,7 +563,7 @@ fail: } /* - * It only removes the dentry from the dentry page,corresponding name + * It only removes the dentry from the dentry page, corresponding name * entry in name page does not need to be touched during deletion. */ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4dab5338a97..e921242186f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -24,7 +24,7 @@ #define f2fs_bug_on(condition) BUG_ON(condition) #define f2fs_down_write(x, y) down_write_nest_lock(x, y) #else -#define f2fs_bug_on(condition) +#define f2fs_bug_on(condition) WARN_ON(condition) #define f2fs_down_write(x, y) down_write(x) #endif @@ -395,7 +395,7 @@ enum count_type { }; /* - * The below are the page types of bios used in submti_bio(). + * The below are the page types of bios used in submit_bio(). * The available types are: * DATA User data pages. It operates as async mode. * NODE Node pages. It operates as async mode. @@ -470,7 +470,7 @@ struct f2fs_sb_info { struct list_head dir_inode_list; /* dir inode list */ spinlock_t dir_inode_lock; /* for dir inode list lock */ - /* basic file system units */ + /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ unsigned int log_blocksize; /* log2 block size */ unsigned int blocksize; /* block size */ @@ -799,7 +799,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) /* * odd numbered checkpoint should at cp segment 0 - * and even segent must be at cp segment 1 + * and even segment must be at cp segment 1 */ if (!(ckpt_version & 1)) start_addr += sbi->blocks_per_seg; @@ -1096,6 +1096,11 @@ static inline int f2fs_readonly(struct super_block *sb) return sb->s_flags & MS_RDONLY; } +static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) +{ + return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); +} + static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) { set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); @@ -1117,7 +1122,7 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) */ int f2fs_sync_file(struct file *, loff_t, loff_t, int); void truncate_data_blocks(struct dnode_of_data *); -int truncate_blocks(struct inode *, u64); +int truncate_blocks(struct inode *, u64, bool); void f2fs_truncate(struct inode *); int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); int f2fs_setattr(struct dentry *, struct iattr *); @@ -1202,10 +1207,8 @@ int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); bool alloc_nid(struct f2fs_sb_info *, nid_t *); void alloc_nid_done(struct f2fs_sb_info *, nid_t); void alloc_nid_failed(struct f2fs_sb_info *, nid_t); -void recover_node_page(struct f2fs_sb_info *, struct page *, - struct f2fs_summary *, struct node_info *, block_t); void recover_inline_xattr(struct inode *, struct page *); -bool recover_xattr_data(struct inode *, struct page *, block_t); +void recover_xattr_data(struct inode *, struct page *, block_t); int recover_inode_page(struct f2fs_sb_info *, struct page *); int restore_node_summary(struct f2fs_sb_info *, unsigned int, struct f2fs_summary_block *); @@ -1238,8 +1241,6 @@ void write_data_page(struct page *, struct dnode_of_data *, block_t *, void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); void recover_data_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); -void rewrite_node_page(struct f2fs_sb_info *, struct page *, - struct f2fs_summary *, block_t, block_t); void allocate_data_block(struct f2fs_sb_info *, struct page *, block_t, block_t *, struct f2fs_summary *, int); void f2fs_wait_on_page_writeback(struct page *, enum page_type); @@ -1262,6 +1263,7 @@ int ra_meta_pages(struct f2fs_sb_info *, int, int, int); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); +void release_dirty_inode(struct f2fs_sb_info *); bool exist_written_data(struct f2fs_sb_info *, nid_t, int); int acquire_orphan_inode(struct f2fs_sb_info *); void release_orphan_inode(struct f2fs_sb_info *); @@ -1439,8 +1441,8 @@ extern const struct inode_operations f2fs_special_inode_operations; */ bool f2fs_may_inline(struct inode *); int f2fs_read_inline_data(struct inode *, struct page *); -int f2fs_convert_inline_data(struct inode *, pgoff_t); +int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *); int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); void truncate_inline_data(struct inode *, u64); -int recover_inline_data(struct inode *, struct page *); +bool recover_inline_data(struct inode *, struct page *); #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 208f1a9bd56..060aee65aee 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -41,6 +41,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, sb_start_pagefault(inode->i_sb); + /* force to convert with normal data indices */ + err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page); + if (err) + goto out; + /* block allocation */ f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -110,6 +115,25 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) return 1; } +static inline bool need_do_checkpoint(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + bool need_cp = false; + + if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) + need_cp = true; + else if (file_wrong_pino(inode)) + need_cp = true; + else if (!space_for_roll_forward(sbi)) + need_cp = true; + else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) + need_cp = true; + else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) + need_cp = true; + + return need_cp; +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -154,23 +178,12 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) /* guarantee free sections for fsync */ f2fs_balance_fs(sbi); - down_read(&fi->i_sem); - /* * Both of fdatasync() and fsync() are able to be recovered from * sudden-power-off. */ - if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) - need_cp = true; - else if (file_wrong_pino(inode)) - need_cp = true; - else if (!space_for_roll_forward(sbi)) - need_cp = true; - else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) - need_cp = true; - else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) - need_cp = true; - + down_read(&fi->i_sem); + need_cp = need_do_checkpoint(inode); up_read(&fi->i_sem); if (need_cp) { @@ -288,7 +301,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) if (err && err != -ENOENT) { goto fail; } else if (err == -ENOENT) { - /* direct node is not exist */ + /* direct node does not exists */ if (whence == SEEK_DATA) { pgofs = PGOFS_OF_NEXT_DNODE(pgofs, F2FS_I(inode)); @@ -417,7 +430,7 @@ out: f2fs_put_page(page, 1); } -int truncate_blocks(struct inode *inode, u64 from) +int truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); unsigned int blocksize = inode->i_sb->s_blocksize; @@ -433,14 +446,16 @@ int truncate_blocks(struct inode *inode, u64 from) free_from = (pgoff_t) ((from + blocksize - 1) >> (sbi->log_blocksize)); - f2fs_lock_op(sbi); + if (lock) + f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; - f2fs_unlock_op(sbi); + if (lock) + f2fs_unlock_op(sbi); trace_f2fs_truncate_blocks_exit(inode, err); return err; } @@ -458,7 +473,8 @@ int truncate_blocks(struct inode *inode, u64 from) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); - f2fs_unlock_op(sbi); + if (lock) + f2fs_unlock_op(sbi); done: /* lastly zero out the first data page */ truncate_partial_data_page(inode, from); @@ -475,7 +491,7 @@ void f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); - if (!truncate_blocks(inode, i_size_read(inode))) { + if (!truncate_blocks(inode, i_size_read(inode), true)) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); } @@ -533,7 +549,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { - err = f2fs_convert_inline_data(inode, attr->ia_size); + err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); if (err) return err; @@ -622,7 +638,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) loff_t off_start, off_end; int ret = 0; - ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1); + ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); if (ret) return ret; @@ -678,7 +694,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; - ret = f2fs_convert_inline_data(inode, offset + len); + ret = f2fs_convert_inline_data(inode, offset + len, NULL); if (ret) return ret; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d7947d90ccc..943a31db7cc 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -58,7 +58,7 @@ static int gc_thread_func(void *data) * 3. IO subsystem is idle by checking the # of requests in * bdev's request list. * - * Note) We have to avoid triggering GCs too much frequently. + * Note) We have to avoid triggering GCs frequently. * Because it is possible that some segments can be * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. @@ -222,7 +222,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) u = (vblocks * 100) >> sbi->log_blocks_per_seg; - /* Handle if the system time is changed by user */ + /* Handle if the system time has changed by the user */ if (mtime < sit_i->min_mtime) sit_i->min_mtime = mtime; if (mtime > sit_i->max_mtime) @@ -593,7 +593,7 @@ next_step: if (phase == 2) { inode = f2fs_iget(sb, dni.ino); - if (IS_ERR(inode)) + if (IS_ERR(inode) || is_bad_inode(inode)) continue; start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); @@ -693,7 +693,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; - if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) + if (unlikely(f2fs_cp_error(sbi))) goto stop; if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 5d5eb6047bf..16f0b2b2299 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -91,7 +91,7 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) block_t invalid_user_blocks = sbi->user_block_count - written_block_count(sbi); /* - * Background GC is triggered with the following condition. + * Background GC is triggered with the following conditions. * 1. There are a number of invalid blocks. * 2. There is not enough free space. */ diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 948d17bf728..a844fcfb9a8 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -42,7 +42,8 @@ static void TEA_transform(unsigned int buf[4], unsigned int const in[]) buf[1] += b1; } -static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num) +static void str2hashbuf(const unsigned char *msg, size_t len, + unsigned int *buf, int num) { unsigned pad, val; int i; @@ -73,9 +74,9 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) { __u32 hash; f2fs_hash_t f2fs_hash; - const char *p; + const unsigned char *p; __u32 in[8], buf[4]; - const char *name = name_info->name; + const unsigned char *name = name_info->name; size_t len = name_info->len; if ((len <= 2) && (name[0] == '.') && diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 5beeccef9ae..3e8ecdf3742 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -68,7 +68,7 @@ out: static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) { - int err; + int err = 0; struct page *ipage; struct dnode_of_data dn; void *src_addr, *dst_addr; @@ -86,6 +86,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) goto out; } + /* someone else converted inline_data already */ + if (!f2fs_has_inline_data(inode)) + goto out; + /* * i_addr[0] is not used for inline data, * so reserving new block will not destroy inline data @@ -124,9 +128,10 @@ out: return err; } -int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) +int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size, + struct page *page) { - struct page *page; + struct page *new_page = page; int err; if (!f2fs_has_inline_data(inode)) @@ -134,17 +139,20 @@ int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) else if (to_size <= MAX_INLINE_DATA) return 0; - page = grab_cache_page(inode->i_mapping, 0); - if (!page) - return -ENOMEM; + if (!page || page->index != 0) { + new_page = grab_cache_page(inode->i_mapping, 0); + if (!new_page) + return -ENOMEM; + } - err = __f2fs_convert_inline_data(inode, page); - f2fs_put_page(page, 1); + err = __f2fs_convert_inline_data(inode, new_page); + if (!page || page->index != 0) + f2fs_put_page(new_page, 1); return err; } int f2fs_write_inline_data(struct inode *inode, - struct page *page, unsigned size) + struct page *page, unsigned size) { void *src_addr, *dst_addr; struct page *ipage; @@ -199,7 +207,7 @@ void truncate_inline_data(struct inode *inode, u64 from) f2fs_put_page(ipage, 1); } -int recover_inline_data(struct inode *inode, struct page *npage) +bool recover_inline_data(struct inode *inode, struct page *npage) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode *ri = NULL; @@ -218,7 +226,7 @@ int recover_inline_data(struct inode *inode, struct page *npage) ri = F2FS_INODE(npage); if (f2fs_has_inline_data(inode) && - ri && ri->i_inline & F2FS_INLINE_DATA) { + ri && (ri->i_inline & F2FS_INLINE_DATA)) { process_inline: ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(IS_ERR(ipage)); @@ -230,7 +238,7 @@ process_inline: memcpy(dst_addr, src_addr, MAX_INLINE_DATA); update_inode(inode, ipage); f2fs_put_page(ipage, 1); - return -1; + return true; } if (f2fs_has_inline_data(inode)) { @@ -242,10 +250,10 @@ process_inline: clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); update_inode(inode, ipage); f2fs_put_page(ipage, 1); - } else if (ri && ri->i_inline & F2FS_INLINE_DATA) { - truncate_blocks(inode, 0); + } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { + truncate_blocks(inode, 0, false); set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); goto process_inline; } - return 0; + return false; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 27b03776ffd..ee103fd7283 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -134,9 +134,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, return 0; out: clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, ino); return err; } @@ -229,7 +227,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) f2fs_delete_entry(de, page, inode); f2fs_unlock_op(sbi); - /* In order to evict this inode, we set it dirty */ + /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); fail: trace_f2fs_unlink_exit(inode, err); @@ -267,9 +265,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, return err; out: clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, inode->i_ino); return err; } @@ -308,9 +304,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) out_fail: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, inode->i_ino); return err; } @@ -354,9 +348,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, return 0; out: clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, inode->i_ino); return err; } @@ -688,9 +680,7 @@ release_out: out: f2fs_unlock_op(sbi); clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, inode->i_ino); return err; } @@ -704,7 +694,6 @@ const struct inode_operations f2fs_dir_inode_operations = { .mkdir = f2fs_mkdir, .rmdir = f2fs_rmdir, .mknod = f2fs_mknod, - .rename = f2fs_rename, .rename2 = f2fs_rename2, .tmpfile = f2fs_tmpfile, .getattr = f2fs_getattr, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d3d90d28463..45378196e19 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -237,7 +237,7 @@ retry: nat_get_blkaddr(e) != NULL_ADDR && new_blkaddr == NEW_ADDR); - /* increament version no as node is removed */ + /* increment version no as node is removed */ if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { unsigned char version = nat_get_version(e); nat_set_version(e, inc_node_version(version)); @@ -274,7 +274,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) } /* - * This function returns always success + * This function always returns success */ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) { @@ -650,7 +650,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, /* get indirect nodes in the path */ for (i = 0; i < idx + 1; i++) { - /* refernece count'll be increased */ + /* reference count'll be increased */ pages[i] = get_node_page(sbi, nid[i]); if (IS_ERR(pages[i])) { err = PTR_ERR(pages[i]); @@ -823,22 +823,26 @@ int truncate_xattr_node(struct inode *inode, struct page *page) */ void remove_inode_page(struct inode *inode) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct page *page; - nid_t ino = inode->i_ino; struct dnode_of_data dn; - page = get_node_page(sbi, ino); - if (IS_ERR(page)) + set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); + if (get_dnode_of_data(&dn, 0, LOOKUP_NODE)) return; - if (truncate_xattr_node(inode, page)) { - f2fs_put_page(page, 1); + if (truncate_xattr_node(inode, dn.inode_page)) { + f2fs_put_dnode(&dn); return; } - /* 0 is possible, after f2fs_new_inode() is failed */ + + /* remove potential inline_data blocks */ + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode)) + truncate_data_blocks_range(&dn, 1); + + /* 0 is possible, after f2fs_new_inode() has failed */ f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); - set_new_dnode(&dn, inode, page, page, ino); + + /* will put inode & node pages */ truncate_node(&dn); } @@ -1129,8 +1133,11 @@ continue_unlock: set_fsync_mark(page, 0); set_dentry_mark(page, 0); } - NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); - wrote++; + + if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) + unlock_page(page); + else + wrote++; if (--wbc->nr_to_write == 0) break; @@ -1212,6 +1219,8 @@ static int f2fs_write_node_page(struct page *page, if (unlikely(sbi->por_doing)) goto redirty_out; + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; f2fs_wait_on_page_writeback(page, NODE); @@ -1540,15 +1549,6 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) kmem_cache_free(free_nid_slab, i); } -void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, - struct f2fs_summary *sum, struct node_info *ni, - block_t new_blkaddr) -{ - rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); - set_node_addr(sbi, ni, new_blkaddr, false); - clear_node_page_dirty(page); -} - void recover_inline_xattr(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); @@ -1557,40 +1557,33 @@ void recover_inline_xattr(struct inode *inode, struct page *page) struct page *ipage; struct f2fs_inode *ri; - if (!f2fs_has_inline_xattr(inode)) - return; - - if (!IS_INODE(page)) - return; - - ri = F2FS_INODE(page); - if (!(ri->i_inline & F2FS_INLINE_XATTR)) - return; - ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(IS_ERR(ipage)); + ri = F2FS_INODE(page); + if (!(ri->i_inline & F2FS_INLINE_XATTR)) { + clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR); + goto update_inode; + } + dst_addr = inline_xattr_addr(ipage); src_addr = inline_xattr_addr(page); inline_size = inline_xattr_size(inode); f2fs_wait_on_page_writeback(ipage, NODE); memcpy(dst_addr, src_addr, inline_size); - +update_inode: update_inode(inode, ipage); f2fs_put_page(ipage, 1); } -bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) +void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; nid_t new_xnid = nid_of_node(page); struct node_info ni; - if (!f2fs_has_xattr_block(ofs_of_node(page))) - return false; - /* 1: invalidate the previous xattr nid */ if (!prev_xnid) goto recover_xnid; @@ -1618,7 +1611,6 @@ recover_xnid: set_node_addr(sbi, &ni, blkaddr, false); update_inode_page(inode); - return true; } int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) @@ -1637,7 +1629,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) if (!ipage) return -ENOMEM; - /* Should not use this inode from free nid list */ + /* Should not use this inode from free nid list */ remove_free_nid(NM_I(sbi), ino); SetPageUptodate(ipage); @@ -1651,6 +1643,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) dst->i_blocks = cpu_to_le64(1); dst->i_links = cpu_to_le32(1); dst->i_xattr_nid = 0; + dst->i_inline = src->i_inline & F2FS_INLINE_XATTR; new_ni = old_ni; new_ni.ino = ino; @@ -1659,13 +1652,14 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR, false); inc_valid_inode_count(sbi); + set_page_dirty(ipage); f2fs_put_page(ipage, 1); return 0; } /* * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-readed pages are alloced in bd_inode's mapping tree. + * these pre-read pages are allocated in bd_inode's mapping tree. */ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, int start, int nrpages) @@ -1709,7 +1703,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi, for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { nrpages = min(last_offset - i, bio_blocks); - /* read ahead node pages */ + /* readahead node pages */ nrpages = ra_sum_pages(sbi, pages, addr, nrpages); if (!nrpages) return -ENOMEM; @@ -1967,7 +1961,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; /* not used nids: 0, node, meta, (and root counted as valid node) */ - nm_i->available_nids = nm_i->max_nid - 3; + nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM; nm_i->fcnt = 0; nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index fe1c6d921ba..756c41cd258 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -62,8 +62,10 @@ static int recover_dentry(struct page *ipage, struct inode *inode) } retry: de = f2fs_find_entry(dir, &name, &page); - if (de && inode->i_ino == le32_to_cpu(de->ino)) + if (de && inode->i_ino == le32_to_cpu(de->ino)) { + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); goto out_unmap_put; + } if (de) { einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); if (IS_ERR(einode)) { @@ -300,14 +302,19 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct node_info ni; int err = 0, recovered = 0; - recover_inline_xattr(inode, page); - - if (recover_inline_data(inode, page)) + /* step 1: recover xattr */ + if (IS_INODE(page)) { + recover_inline_xattr(inode, page); + } else if (f2fs_has_xattr_block(ofs_of_node(page))) { + recover_xattr_data(inode, page, blkaddr); goto out; + } - if (recover_xattr_data(inode, page, blkaddr)) + /* step 2: recover inline data */ + if (recover_inline_data(inode, page)) goto out; + /* step 3: recover data indices */ start = start_bidx_of_node(ofs_of_node(page), fi); end = start + ADDRS_PER_PAGE(page, fi); @@ -364,8 +371,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, fill_node_footer(dn.node_page, dn.nid, ni.ino, ofs_of_node(page), false); set_page_dirty(dn.node_page); - - recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); err: f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); @@ -452,6 +457,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) /* step #1: find fsynced inode numbers */ sbi->por_doing = true; + /* prevent checkpoint */ + mutex_lock(&sbi->cp_mutex); + blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); err = find_fsync_dnodes(sbi, &inode_list); @@ -465,7 +473,8 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) /* step #2: recover data */ err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); - f2fs_bug_on(!list_empty(&inode_list)); + if (!err) + f2fs_bug_on(!list_empty(&inode_list)); out: destroy_fsync_dnodes(&inode_list); kmem_cache_destroy(fsync_entry_slab); @@ -482,8 +491,13 @@ out: /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) sync_meta_pages(sbi, META, LONG_MAX); + set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); + mutex_unlock(&sbi->cp_mutex); } else if (need_writecp) { + mutex_unlock(&sbi->cp_mutex); write_checkpoint(sbi, false); + } else { + mutex_unlock(&sbi->cp_mutex); } return err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0dfeebae2a5..0aa337cd5bb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -62,7 +62,7 @@ static inline unsigned long __reverse_ffs(unsigned long word) } /* - * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue + * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because * f2fs_set_bit makes MSB and LSB reversed in a byte. * Example: * LSB <--> MSB @@ -808,7 +808,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, } /* - * This function always allocates a used segment (from dirty seglist) by SSR + * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) @@ -1103,55 +1103,6 @@ void recover_data_page(struct f2fs_sb_info *sbi, mutex_unlock(&curseg->curseg_mutex); } -void rewrite_node_page(struct f2fs_sb_info *sbi, - struct page *page, struct f2fs_summary *sum, - block_t old_blkaddr, block_t new_blkaddr) -{ - struct sit_info *sit_i = SIT_I(sbi); - int type = CURSEG_WARM_NODE; - struct curseg_info *curseg; - unsigned int segno, old_cursegno; - block_t next_blkaddr = next_blkaddr_of_node(page); - unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); - struct f2fs_io_info fio = { - .type = NODE, - .rw = WRITE_SYNC, - }; - - curseg = CURSEG_I(sbi, type); - - mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); - - segno = GET_SEGNO(sbi, new_blkaddr); - old_cursegno = curseg->segno; - - /* change the current segment */ - if (segno != curseg->segno) { - curseg->next_segno = segno; - change_curseg(sbi, type, true); - } - curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); - __add_sum_entry(sbi, type, sum); - - /* change the current log to the next block addr in advance */ - if (next_segno != segno) { - curseg->next_segno = next_segno; - change_curseg(sbi, type, true); - } - curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr); - - /* rewrite node page */ - set_page_writeback(page); - f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio); - f2fs_submit_merged_bio(sbi, NODE, WRITE); - refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); - locate_dirty_segment(sbi, old_cursegno); - - mutex_unlock(&sit_i->sentry_lock); - mutex_unlock(&curseg->curseg_mutex); -} - static inline bool is_merged_page(struct f2fs_sb_info *sbi, struct page *page, enum page_type type) { diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 55973f7b033..ff483257283 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -549,7 +549,7 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) } /* - * Summary block is always treated as invalid block + * Summary block is always treated as an invalid block */ static inline void check_block_count(struct f2fs_sb_info *sbi, int segno, struct f2fs_sit_entry *raw_sit) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 657582fc760..41bdf511003 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -432,9 +432,15 @@ static void f2fs_put_super(struct super_block *sb) stop_gc_thread(sbi); /* We don't need to do checkpoint when it's clean */ - if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES)) + if (sbi->s_dirty) write_checkpoint(sbi, true); + /* + * normally superblock is clean, so we need to release this. + * In addition, EIO will skip do checkpoint, we need this as well. + */ + release_dirty_inode(sbi); + iput(sbi->node_inode); iput(sbi->meta_inode); @@ -457,9 +463,6 @@ int f2fs_sync_fs(struct super_block *sb, int sync) trace_f2fs_sync_fs(sb, sync); - if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) - return 0; - if (sync) { mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, false); @@ -505,8 +508,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; buf->f_bavail = user_block_count - valid_user_blocks(sbi); - buf->f_files = sbi->total_node_count; - buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); + buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; + buf->f_ffree = buf->f_files - valid_inode_count(sbi); buf->f_namelen = F2FS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; @@ -663,7 +666,7 @@ restore_gc: if (need_restart_gc) { if (start_gc_thread(sbi)) f2fs_msg(sbi->sb, KERN_WARNING, - "background gc thread is stop"); + "background gc thread has stopped"); } else if (need_stop_gc) { stop_gc_thread(sbi); } @@ -812,7 +815,7 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi) if (unlikely(fsmeta >= total)) return 1; - if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { + if (unlikely(f2fs_cp_error(sbi))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; } @@ -899,8 +902,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) struct buffer_head *raw_super_buf; struct inode *root; long err = -EINVAL; + bool retry = true; int i; +try_onemore: /* allocate memory for f2fs-specific super block info */ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); if (!sbi) @@ -1080,9 +1085,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { err = recover_fsync_data(sbi); - if (err) + if (err) { f2fs_msg(sb, KERN_ERR, "Cannot recover all fsync data errno=%ld", err); + goto free_kobj; + } } /* @@ -1123,6 +1130,13 @@ free_sb_buf: brelse(raw_super_buf); free_sbi: kfree(sbi); + + /* give only one another chance */ + if (retry) { + retry = 0; + shrink_dcache_sb(sb); + goto try_onemore; + } return err; } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 8bea941ee30..728a5dc3dc1 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -528,7 +528,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, int free; /* * If value is NULL, it is remove operation. - * In case of update operation, we caculate free. + * In case of update operation, we calculate free. */ free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); if (found) diff --git a/fs/fs_pin.c b/fs/fs_pin.c new file mode 100644 index 00000000000..9368236ca10 --- /dev/null +++ b/fs/fs_pin.c @@ -0,0 +1,78 @@ +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/fs_pin.h> +#include "internal.h" +#include "mount.h" + +static void pin_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct fs_pin, rcu)); +} + +static DEFINE_SPINLOCK(pin_lock); + +void pin_put(struct fs_pin *p) +{ + if (atomic_long_dec_and_test(&p->count)) + call_rcu(&p->rcu, pin_free_rcu); +} + +void pin_remove(struct fs_pin *pin) +{ + spin_lock(&pin_lock); + hlist_del(&pin->m_list); + hlist_del(&pin->s_list); + spin_unlock(&pin_lock); +} + +void pin_insert(struct fs_pin *pin, struct vfsmount *m) +{ + spin_lock(&pin_lock); + hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins); + hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins); + spin_unlock(&pin_lock); +} + +void mnt_pin_kill(struct mount *m) +{ + while (1) { + struct hlist_node *p; + struct fs_pin *pin; + rcu_read_lock(); + p = ACCESS_ONCE(m->mnt_pins.first); + if (!p) { + rcu_read_unlock(); + break; + } + pin = hlist_entry(p, struct fs_pin, m_list); + if (!atomic_long_inc_not_zero(&pin->count)) { + rcu_read_unlock(); + cpu_relax(); + continue; + } + rcu_read_unlock(); + pin->kill(pin); + } +} + +void sb_pin_kill(struct super_block *sb) +{ + while (1) { + struct hlist_node *p; + struct fs_pin *pin; + rcu_read_lock(); + p = ACCESS_ONCE(sb->s_pins.first); + if (!p) { + rcu_read_unlock(); + break; + } + pin = hlist_entry(p, struct fs_pin, s_list); + if (!atomic_long_inc_not_zero(&pin->count)) { + rcu_read_unlock(); + cpu_relax(); + continue; + } + rcu_read_unlock(); + pin->kill(pin); + } +} diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 0c6048247a3..de1d84af9f7 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -845,12 +845,6 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, return err; } -static int fuse_rename(struct inode *olddir, struct dentry *oldent, - struct inode *newdir, struct dentry *newent) -{ - return fuse_rename2(olddir, oldent, newdir, newent, 0); -} - static int fuse_link(struct dentry *entry, struct inode *newdir, struct dentry *newent) { @@ -2024,7 +2018,6 @@ static const struct inode_operations fuse_dir_inode_operations = { .symlink = fuse_symlink, .unlink = fuse_unlink, .rmdir = fuse_rmdir, - .rename = fuse_rename, .rename2 = fuse_rename2, .link = fuse_link, .setattr = fuse_setattr, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 40ac2628ddc..912061ac4ba 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1303,10 +1303,10 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, while (nbytes < *nbytesp && req->num_pages < req->max_pages) { unsigned npages; size_t start; - unsigned n = req->max_pages - req->num_pages; ssize_t ret = iov_iter_get_pages(ii, &req->pages[req->num_pages], - n * PAGE_SIZE, &start); + req->max_pages - req->num_pages, + &start); if (ret < 0) return ret; diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 9c88da0e855..4fcd40d6f30 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -89,6 +89,7 @@ extern int do_mknod(const char *file, int mode, unsigned int major, extern int link_file(const char *from, const char *to); extern int hostfs_do_readlink(char *file, char *buf, int size); extern int rename_file(char *from, char *to); +extern int rename2_file(char *from, char *to, unsigned int flags); extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, long long *bfree_out, long long *bavail_out, long long *files_out, long long *ffree_out, diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index bb529f3b7f2..fd62cae0fdc 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -741,21 +741,31 @@ static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, return err; } -static int hostfs_rename(struct inode *from_ino, struct dentry *from, - struct inode *to_ino, struct dentry *to) +static int hostfs_rename2(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) { - char *from_name, *to_name; + char *old_name, *new_name; int err; - if ((from_name = dentry_name(from)) == NULL) + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + return -EINVAL; + + old_name = dentry_name(old_dentry); + if (old_name == NULL) return -ENOMEM; - if ((to_name = dentry_name(to)) == NULL) { - __putname(from_name); + new_name = dentry_name(new_dentry); + if (new_name == NULL) { + __putname(old_name); return -ENOMEM; } - err = rename_file(from_name, to_name); - __putname(from_name); - __putname(to_name); + if (!flags) + err = rename_file(old_name, new_name); + else + err = rename2_file(old_name, new_name, flags); + + __putname(old_name); + __putname(new_name); return err; } @@ -867,7 +877,7 @@ static const struct inode_operations hostfs_dir_iops = { .mkdir = hostfs_mkdir, .rmdir = hostfs_rmdir, .mknod = hostfs_mknod, - .rename = hostfs_rename, + .rename2 = hostfs_rename2, .permission = hostfs_permission, .setattr = hostfs_setattr, }; diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 67838f3aa20..9765dab95cb 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -14,6 +14,7 @@ #include <sys/time.h> #include <sys/types.h> #include <sys/vfs.h> +#include <sys/syscall.h> #include "hostfs.h" #include <utime.h> @@ -360,6 +361,33 @@ int rename_file(char *from, char *to) return 0; } +int rename2_file(char *from, char *to, unsigned int flags) +{ + int err; + +#ifndef SYS_renameat2 +# ifdef __x86_64__ +# define SYS_renameat2 316 +# endif +# ifdef __i386__ +# define SYS_renameat2 353 +# endif +#endif + +#ifdef SYS_renameat2 + err = syscall(SYS_renameat2, AT_FDCWD, from, AT_FDCWD, to, flags); + if (err < 0) { + if (errno != ENOSYS) + return -errno; + else + return -EINVAL; + } + return 0; +#else + return -EINVAL; +#endif +} + int do_statfs(char *root, long *bsize_out, long long *blocks_out, long long *bfree_out, long long *bavail_out, long long *files_out, long long *ffree_out, diff --git a/fs/internal.h b/fs/internal.h index 46574240746..e325b4f9c79 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -131,7 +131,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, /* * read_write.c */ -extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); extern int rw_verify_area(int, struct file *, const loff_t *, size_t); /* @@ -144,3 +143,9 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, * pipe.c */ extern const struct file_operations pipefifo_fops; + +/* + * fs_pin.c + */ +extern void sb_pin_kill(struct super_block *sb); +extern void mnt_pin_kill(struct mount *m); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 4556ce1af5b..5ddaf8625d3 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -61,7 +61,7 @@ static void isofs_put_super(struct super_block *sb) return; } -static int isofs_read_inode(struct inode *); +static int isofs_read_inode(struct inode *, int relocated); static int isofs_statfs (struct dentry *, struct kstatfs *); static struct kmem_cache *isofs_inode_cachep; @@ -1259,7 +1259,7 @@ out_toomany: goto out; } -static int isofs_read_inode(struct inode *inode) +static int isofs_read_inode(struct inode *inode, int relocated) { struct super_block *sb = inode->i_sb; struct isofs_sb_info *sbi = ISOFS_SB(sb); @@ -1404,7 +1404,7 @@ static int isofs_read_inode(struct inode *inode) */ if (!high_sierra) { - parse_rock_ridge_inode(de, inode); + parse_rock_ridge_inode(de, inode, relocated); /* if we want uid/gid set, override the rock ridge setting */ if (sbi->s_uid_set) inode->i_uid = sbi->s_uid; @@ -1483,9 +1483,10 @@ static int isofs_iget5_set(struct inode *ino, void *data) * offset that point to the underlying meta-data for the inode. The * code below is otherwise similar to the iget() code in * include/linux/fs.h */ -struct inode *isofs_iget(struct super_block *sb, - unsigned long block, - unsigned long offset) +struct inode *__isofs_iget(struct super_block *sb, + unsigned long block, + unsigned long offset, + int relocated) { unsigned long hashval; struct inode *inode; @@ -1507,7 +1508,7 @@ struct inode *isofs_iget(struct super_block *sb, return ERR_PTR(-ENOMEM); if (inode->i_state & I_NEW) { - ret = isofs_read_inode(inode); + ret = isofs_read_inode(inode, relocated); if (ret < 0) { iget_failed(inode); inode = ERR_PTR(ret); diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 99167238518..0ac4c1f73fb 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -107,7 +107,7 @@ extern int iso_date(char *, int); struct inode; /* To make gcc happy */ -extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *); +extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *, int relocated); extern int get_rock_ridge_filename(struct iso_directory_record *, char *, struct inode *); extern int isofs_name_translate(struct iso_directory_record *, char *, struct inode *); @@ -118,9 +118,24 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int extern struct buffer_head *isofs_bread(struct inode *, sector_t); extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); -extern struct inode *isofs_iget(struct super_block *sb, - unsigned long block, - unsigned long offset); +struct inode *__isofs_iget(struct super_block *sb, + unsigned long block, + unsigned long offset, + int relocated); + +static inline struct inode *isofs_iget(struct super_block *sb, + unsigned long block, + unsigned long offset) +{ + return __isofs_iget(sb, block, offset, 0); +} + +static inline struct inode *isofs_iget_reloc(struct super_block *sb, + unsigned long block, + unsigned long offset) +{ + return __isofs_iget(sb, block, offset, 1); +} /* Because the inode number is no longer relevant to finding the * underlying meta-data for an inode, we are free to choose a more diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index c0bf42472e4..f488bbae541 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -288,12 +288,16 @@ eio: goto out; } +#define RR_REGARD_XA 1 +#define RR_RELOC_DE 2 + static int parse_rock_ridge_inode_internal(struct iso_directory_record *de, - struct inode *inode, int regard_xa) + struct inode *inode, int flags) { int symlink_len = 0; int cnt, sig; + unsigned int reloc_block; struct inode *reloc; struct rock_ridge *rr; int rootflag; @@ -305,7 +309,7 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de, init_rock_state(&rs, inode); setup_rock_ridge(de, inode, &rs); - if (regard_xa) { + if (flags & RR_REGARD_XA) { rs.chr += 14; rs.len -= 14; if (rs.len < 0) @@ -485,12 +489,22 @@ repeat: "relocated directory\n"); goto out; case SIG('C', 'L'): - ISOFS_I(inode)->i_first_extent = - isonum_733(rr->u.CL.location); - reloc = - isofs_iget(inode->i_sb, - ISOFS_I(inode)->i_first_extent, - 0); + if (flags & RR_RELOC_DE) { + printk(KERN_ERR + "ISOFS: Recursive directory relocation " + "is not supported\n"); + goto eio; + } + reloc_block = isonum_733(rr->u.CL.location); + if (reloc_block == ISOFS_I(inode)->i_iget5_block && + ISOFS_I(inode)->i_iget5_offset == 0) { + printk(KERN_ERR + "ISOFS: Directory relocation points to " + "itself\n"); + goto eio; + } + ISOFS_I(inode)->i_first_extent = reloc_block; + reloc = isofs_iget_reloc(inode->i_sb, reloc_block, 0); if (IS_ERR(reloc)) { ret = PTR_ERR(reloc); goto out; @@ -637,9 +651,11 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) return rpnt; } -int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode) +int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode, + int relocated) { - int result = parse_rock_ridge_inode_internal(de, inode, 0); + int flags = relocated ? RR_RELOC_DE : 0; + int result = parse_rock_ridge_inode_internal(de, inode, flags); /* * if rockridge flag was reset and we didn't look for attributes @@ -647,7 +663,8 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode) */ if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1) && (ISOFS_SB(inode->i_sb)->s_rock == 2)) { - result = parse_rock_ridge_inode_internal(de, inode, 14); + result = parse_rock_ridge_inode_internal(de, inode, + flags | RR_REGARD_XA); } return result; } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6fac7434985..b73e0215baa 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -97,7 +97,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh) struct commit_header *h; __u32 csum; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; h = (struct commit_header *)(bh->b_data); @@ -313,11 +313,11 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) return checksum; } -static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, +static void write_tag_block(journal_t *j, journal_block_tag_t *tag, unsigned long long block) { tag->t_blocknr = cpu_to_be32(block & (u32)~0); - if (tag_bytes > JBD2_TAG_SIZE32) + if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT)) tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); } @@ -327,7 +327,7 @@ static void jbd2_descr_block_csum_set(journal_t *j, struct jbd2_journal_block_tail *tail; __u32 csum; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - @@ -340,12 +340,13 @@ static void jbd2_descr_block_csum_set(journal_t *j, static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, struct buffer_head *bh, __u32 sequence) { + journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; struct page *page = bh->b_page; __u8 *addr; __u32 csum32; __be32 seq; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; seq = cpu_to_be32(sequence); @@ -355,8 +356,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, bh->b_size); kunmap_atomic(addr); - /* We only have space to store the lower 16 bits of the crc32c. */ - tag->t_checksum = cpu_to_be16(csum32); + if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + tag3->t_checksum = cpu_to_be32(csum32); + else + tag->t_checksum = cpu_to_be16(csum32); } /* * jbd2_journal_commit_transaction @@ -396,7 +399,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) LIST_HEAD(io_bufs); LIST_HEAD(log_bufs); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) csum_size = sizeof(struct jbd2_journal_block_tail); /* @@ -690,7 +693,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) tag_flag |= JBD2_FLAG_SAME_UUID; tag = (journal_block_tag_t *) tagp; - write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); + write_tag_block(journal, tag, jh2bh(jh)->b_blocknr); tag->t_flags = cpu_to_be16(tag_flag); jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], commit_transaction->t_tid); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 67b8e303946..19d74d86d99 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug); /* Checksumming functions */ static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; @@ -145,7 +145,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; return sb->s_checksum == jbd2_superblock_csum(j, sb); @@ -153,7 +153,7 @@ static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; sb->s_checksum = jbd2_superblock_csum(j, sb); @@ -1522,21 +1522,29 @@ static int journal_get_superblock(journal_t *journal) goto out; } - if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && - JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + if (jbd2_journal_has_csum_v2or3(journal) && + JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { /* Can't have checksum v1 and v2 on at the same time! */ printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " "at the same time!\n"); goto out; } + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) && + JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) { + /* Can't have checksum v2 and v3 at the same time! */ + printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 " + "at the same time!\n"); + goto out; + } + if (!jbd2_verify_csum_type(journal, sb)) { printk(KERN_ERR "JBD2: Unknown checksum type\n"); goto out; } /* Load the checksum driver */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + if (jbd2_journal_has_csum_v2or3(journal)) { journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(journal->j_chksum_driver)) { printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); @@ -1553,7 +1561,7 @@ static int journal_get_superblock(journal_t *journal) } /* Precompute checksum seed for all metadata */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); @@ -1813,8 +1821,14 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) return 0; - /* Asking for checksumming v2 and v1? Only give them v2. */ - if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && + /* If enabling v2 checksums, turn on v3 instead */ + if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) { + incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2; + incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3; + } + + /* Asking for checksumming v3 and v1? Only give them v3. */ + if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 && compat & JBD2_FEATURE_COMPAT_CHECKSUM) compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; @@ -1823,8 +1837,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, sb = journal->j_superblock; - /* If enabling v2 checksums, update superblock */ - if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + /* If enabling v3 checksums, update superblock */ + if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { sb->s_checksum_type = JBD2_CRC32C_CHKSUM; sb->s_feature_compat &= ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); @@ -1842,8 +1856,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, } /* Precompute checksum seed for all metadata */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); @@ -1852,7 +1865,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, /* If enabling v1 checksums, downgrade superblock */ if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) sb->s_feature_incompat &= - ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); + ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 | + JBD2_FEATURE_INCOMPAT_CSUM_V3); sb->s_feature_compat |= cpu_to_be32(compat); sb->s_feature_ro_compat |= cpu_to_be32(ro); @@ -2165,16 +2179,20 @@ int jbd2_journal_blocks_per_page(struct inode *inode) */ size_t journal_tag_bytes(journal_t *journal) { - journal_block_tag_t tag; - size_t x = 0; + size_t sz; + + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + return sizeof(journal_block_tag3_t); + + sz = sizeof(journal_block_tag_t); if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - x += sizeof(tag.t_checksum); + sz += sizeof(__u16); if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) - return x + JBD2_TAG_SIZE64; + return sz; else - return x + JBD2_TAG_SIZE32; + return sz - sizeof(__u32); } /* diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 3b6bb19d60b..9b329b55ffe 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -181,7 +181,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j, __be32 provided; __u32 calculated; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - @@ -205,7 +205,7 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) int nr = 0, size = journal->j_blocksize; int tag_bytes = journal_tag_bytes(journal); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) size -= sizeof(struct jbd2_journal_block_tail); tagp = &bh->b_data[sizeof(journal_header_t)]; @@ -338,10 +338,11 @@ int jbd2_journal_skip_recovery(journal_t *journal) return err; } -static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) +static inline unsigned long long read_tag_block(journal_t *journal, + journal_block_tag_t *tag) { unsigned long long block = be32_to_cpu(tag->t_blocknr); - if (tag_bytes > JBD2_TAG_SIZE32) + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; return block; } @@ -384,7 +385,7 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) __be32 provided; __u32 calculated; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; h = buf; @@ -399,17 +400,21 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, void *buf, __u32 sequence) { + journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; __u32 csum32; __be32 seq; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; seq = cpu_to_be32(sequence); csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); - return tag->t_checksum == cpu_to_be16(csum32); + if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + return tag3->t_checksum == cpu_to_be32(csum32); + else + return tag->t_checksum == cpu_to_be16(csum32); } static int do_one_pass(journal_t *journal, @@ -426,6 +431,7 @@ static int do_one_pass(journal_t *journal, int tag_bytes = journal_tag_bytes(journal); __u32 crc32_sum = ~0; /* Transactional Checksums */ int descr_csum_size = 0; + int block_error = 0; /* * First thing is to establish what we expect to find in the log @@ -512,8 +518,7 @@ static int do_one_pass(journal_t *journal, switch(blocktype) { case JBD2_DESCRIPTOR_BLOCK: /* Verify checksum first */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) descr_csum_size = sizeof(struct jbd2_journal_block_tail); if (descr_csum_size > 0 && @@ -574,7 +579,7 @@ static int do_one_pass(journal_t *journal, unsigned long long blocknr; J_ASSERT(obh != NULL); - blocknr = read_tag_block(tag_bytes, + blocknr = read_tag_block(journal, tag); /* If the block has been @@ -598,7 +603,8 @@ static int do_one_pass(journal_t *journal, "checksum recovering " "block %llu in log\n", blocknr); - continue; + block_error = 1; + goto skip_write; } /* Find a buffer for the new @@ -797,7 +803,8 @@ static int do_one_pass(journal_t *journal, success = -EIO; } } - + if (block_error && success == 0) + success = -EIO; return success; failed: @@ -811,7 +818,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j, __be32 provided; __u32 calculated; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 198c9c10276..d5e95a175c9 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -91,8 +91,8 @@ #include <linux/list.h> #include <linux/init.h> #include <linux/bio.h> -#endif #include <linux/log2.h> +#endif static struct kmem_cache *jbd2_revoke_record_cache; static struct kmem_cache *jbd2_revoke_table_cache; @@ -597,7 +597,7 @@ static void write_one_revoke_record(journal_t *journal, offset = *offsetp; /* Do we need to leave space at the end for a checksum? */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) csum_size = sizeof(struct jbd2_journal_revoke_tail); /* Make sure we have a descriptor with space left for the record */ @@ -644,7 +644,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) struct jbd2_journal_revoke_tail *tail; __u32 csum; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 8f27c93f8d2..ec9e082f9ec 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -253,13 +253,11 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net) error = make_socks(serv, net); if (error < 0) - goto err_socks; + goto err_bind; set_grace_period(net); dprintk("lockd_up_net: per-net data created; net=%p\n", net); return 0; -err_socks: - svc_rpcb_cleanup(serv, net); err_bind: ln->nlmsvc_users--; return error; diff --git a/fs/locks.c b/fs/locks.c index a6f54802d27..bb08857f90b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -247,6 +247,18 @@ void locks_free_lock(struct file_lock *fl) } EXPORT_SYMBOL(locks_free_lock); +static void +locks_dispose_list(struct list_head *dispose) +{ + struct file_lock *fl; + + while (!list_empty(dispose)) { + fl = list_first_entry(dispose, struct file_lock, fl_block); + list_del_init(&fl->fl_block); + locks_free_lock(fl); + } +} + void locks_init_lock(struct file_lock *fl) { memset(fl, 0, sizeof(struct file_lock)); @@ -285,7 +297,8 @@ EXPORT_SYMBOL(__locks_copy_lock); void locks_copy_lock(struct file_lock *new, struct file_lock *fl) { - locks_release_private(new); + /* "new" must be a freshly-initialized lock */ + WARN_ON_ONCE(new->fl_ops); __locks_copy_lock(new, fl); new->fl_file = fl->fl_file; @@ -650,12 +663,16 @@ static void locks_unlink_lock(struct file_lock **thisfl_p) * * Must be called with i_lock held! */ -static void locks_delete_lock(struct file_lock **thisfl_p) +static void locks_delete_lock(struct file_lock **thisfl_p, + struct list_head *dispose) { struct file_lock *fl = *thisfl_p; locks_unlink_lock(thisfl_p); - locks_free_lock(fl); + if (dispose) + list_add(&fl->fl_block, dispose); + else + locks_free_lock(fl); } /* Determine if lock sys_fl blocks lock caller_fl. Common functionality @@ -811,6 +828,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) struct inode * inode = file_inode(filp); int error = 0; int found = 0; + LIST_HEAD(dispose); if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { new_fl = locks_alloc_lock(); @@ -833,7 +851,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) if (request->fl_type == fl->fl_type) goto out; found = 1; - locks_delete_lock(before); + locks_delete_lock(before, &dispose); break; } @@ -880,6 +898,7 @@ out: spin_unlock(&inode->i_lock); if (new_fl) locks_free_lock(new_fl); + locks_dispose_list(&dispose); return error; } @@ -893,6 +912,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str struct file_lock **before; int error; bool added = false; + LIST_HEAD(dispose); /* * We may need two file_lock structures for this operation, @@ -988,7 +1008,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str else request->fl_end = fl->fl_end; if (added) { - locks_delete_lock(before); + locks_delete_lock(before, &dispose); continue; } request = fl; @@ -1018,21 +1038,24 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str * one (This may happen several times). */ if (added) { - locks_delete_lock(before); + locks_delete_lock(before, &dispose); continue; } - /* Replace the old lock with the new one. - * Wake up anybody waiting for the old one, - * as the change in lock type might satisfy - * their needs. + /* + * Replace the old lock with new_fl, and + * remove the old one. It's safe to do the + * insert here since we know that we won't be + * using new_fl later, and that the lock is + * just replacing an existing lock. */ - locks_wake_up_blocks(fl); - fl->fl_start = request->fl_start; - fl->fl_end = request->fl_end; - fl->fl_type = request->fl_type; - locks_release_private(fl); - locks_copy_private(fl, request); - request = fl; + error = -ENOLCK; + if (!new_fl) + goto out; + locks_copy_lock(new_fl, request); + request = new_fl; + new_fl = NULL; + locks_delete_lock(before, &dispose); + locks_insert_lock(before, request); added = true; } } @@ -1093,6 +1116,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str locks_free_lock(new_fl); if (new_fl2) locks_free_lock(new_fl2); + locks_dispose_list(&dispose); return error; } @@ -1268,7 +1292,7 @@ int lease_modify(struct file_lock **before, int arg) printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); fl->fl_fasync = NULL; } - locks_delete_lock(before); + locks_delete_lock(before, NULL); } return 0; } @@ -1595,7 +1619,7 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp smp_mb(); error = check_conflicting_open(dentry, arg); if (error) - locks_unlink_lock(flp); + locks_unlink_lock(before); out: if (is_deleg) mutex_unlock(&inode->i_mutex); @@ -1737,13 +1761,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) ret = fl; spin_lock(&inode->i_lock); error = __vfs_setlease(filp, arg, &ret); - if (error) { - spin_unlock(&inode->i_lock); - locks_free_lock(fl); - goto out_free_fasync; - } - if (ret != fl) - locks_free_lock(fl); + if (error) + goto out_unlock; + if (ret == fl) + fl = NULL; /* * fasync_insert_entry() returns the old entry if any. @@ -1755,9 +1776,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) new = NULL; error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); +out_unlock: spin_unlock(&inode->i_lock); - -out_free_fasync: + if (fl) + locks_free_lock(fl); if (new) fasync_free(new); return error; @@ -2320,6 +2342,7 @@ void locks_remove_file(struct file *filp) struct inode * inode = file_inode(filp); struct file_lock *fl; struct file_lock **before; + LIST_HEAD(dispose); if (!inode->i_flock) return; @@ -2365,12 +2388,13 @@ void locks_remove_file(struct file *filp) fl->fl_type, fl->fl_flags, fl->fl_start, fl->fl_end); - locks_delete_lock(before); + locks_delete_lock(before, &dispose); continue; } before = &fl->fl_next; } spin_unlock(&inode->i_lock); + locks_dispose_list(&dispose); } /** @@ -2452,7 +2476,11 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, seq_puts(f, "FLOCK ADVISORY "); } } else if (IS_LEASE(fl)) { - seq_puts(f, "LEASE "); + if (fl->fl_flags & FL_DELEG) + seq_puts(f, "DELEG "); + else + seq_puts(f, "LEASE "); + if (lease_breaking(fl)) seq_puts(f, "BREAKING "); else if (fl->fl_file) diff --git a/fs/mount.h b/fs/mount.h index d55297f2fa0..6740a621552 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -55,7 +55,7 @@ struct mount { int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ - int mnt_pinned; + struct hlist_head mnt_pins; struct path mnt_ex_mountpoint; }; diff --git a/fs/namei.c b/fs/namei.c index 9eb787e5c16..a996bb48dfa 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1091,10 +1091,10 @@ int follow_down_one(struct path *path) } EXPORT_SYMBOL(follow_down_one); -static inline bool managed_dentry_might_block(struct dentry *dentry) +static inline int managed_dentry_rcu(struct dentry *dentry) { - return (dentry->d_flags & DCACHE_MANAGE_TRANSIT && - dentry->d_op->d_manage(dentry, true) < 0); + return (dentry->d_flags & DCACHE_MANAGE_TRANSIT) ? + dentry->d_op->d_manage(dentry, true) : 0; } /* @@ -1110,11 +1110,18 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, * Don't forget we might have a non-mountpoint managed dentry * that wants to block transit. */ - if (unlikely(managed_dentry_might_block(path->dentry))) + switch (managed_dentry_rcu(path->dentry)) { + case -ECHILD: + default: return false; + case -EISDIR: + return true; + case 0: + break; + } if (!d_mountpoint(path->dentry)) - return true; + return !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT); mounted = __lookup_mnt(path->mnt, path->dentry); if (!mounted) @@ -1130,7 +1137,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, */ *inode = path->dentry->d_inode; } - return read_seqretry(&mount_lock, nd->m_seq); + return read_seqretry(&mount_lock, nd->m_seq) && + !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT); } static int follow_dotdot_rcu(struct nameidata *nd) @@ -1402,11 +1410,8 @@ static int lookup_fast(struct nameidata *nd, } path->mnt = mnt; path->dentry = dentry; - if (unlikely(!__follow_mount_rcu(nd, path, inode))) - goto unlazy; - if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) - goto unlazy; - return 0; + if (likely(__follow_mount_rcu(nd, path, inode))) + return 0; unlazy: if (unlazy_walk(nd, dentry)) return -ECHILD; @@ -4019,7 +4024,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * The worst of all namespace operations - renaming directory. "Perverted" * doesn't even start to describe it. Somebody in UCB had a heck of a trip... * Problems: - * a) we can get into loop creation. Check is done in is_subdir(). + * a) we can get into loop creation. * b) race potential - two innocent renames can create a loop together. * That's where 4.4 screws up. Current fix: serialization on * sb->s_vfs_rename_mutex. We might be more accurate, but that's another @@ -4075,7 +4080,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (error) return error; - if (!old_dir->i_op->rename) + if (!old_dir->i_op->rename && !old_dir->i_op->rename2) return -EPERM; if (flags && !old_dir->i_op->rename2) @@ -4134,10 +4139,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (error) goto out; } - if (!flags) { + if (!old_dir->i_op->rename2) { error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); } else { + WARN_ON(old_dir->i_op->rename != NULL); error = old_dir->i_op->rename2(old_dir, old_dentry, new_dir, new_dentry, flags); } diff --git a/fs/namespace.c b/fs/namespace.c index 0acabea5831..ef42d9bee21 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -16,7 +16,6 @@ #include <linux/namei.h> #include <linux/security.h> #include <linux/idr.h> -#include <linux/acct.h> /* acct_auto_close_mnt */ #include <linux/init.h> /* init_rootfs */ #include <linux/fs_struct.h> /* get_fs_root et.al. */ #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ @@ -779,6 +778,20 @@ static void attach_mnt(struct mount *mnt, list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); } +static void attach_shadowed(struct mount *mnt, + struct mount *parent, + struct mount *shadows) +{ + if (shadows) { + hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash); + list_add(&mnt->mnt_child, &shadows->mnt_child); + } else { + hlist_add_head_rcu(&mnt->mnt_hash, + m_hash(&parent->mnt, mnt->mnt_mountpoint)); + list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); + } +} + /* * vfsmount lock must be held for write */ @@ -797,12 +810,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows) list_splice(&head, n->list.prev); - if (shadows) - hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash); - else - hlist_add_head_rcu(&mnt->mnt_hash, - m_hash(&parent->mnt, mnt->mnt_mountpoint)); - list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); + attach_shadowed(mnt, parent, shadows); touch_mnt_namespace(n); } @@ -951,7 +959,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, static void mntput_no_expire(struct mount *mnt) { -put_again: rcu_read_lock(); mnt_add_count(mnt, -1); if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ @@ -964,14 +971,6 @@ put_again: unlock_mount_hash(); return; } - if (unlikely(mnt->mnt_pinned)) { - mnt_add_count(mnt, mnt->mnt_pinned + 1); - mnt->mnt_pinned = 0; - rcu_read_unlock(); - unlock_mount_hash(); - acct_auto_close_mnt(&mnt->mnt); - goto put_again; - } if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { rcu_read_unlock(); unlock_mount_hash(); @@ -994,6 +993,8 @@ put_again: * so mnt_get_writers() below is safe. */ WARN_ON(mnt_get_writers(mnt)); + if (unlikely(mnt->mnt_pins.first)) + mnt_pin_kill(mnt); fsnotify_vfsmount_delete(&mnt->mnt); dput(mnt->mnt.mnt_root); deactivate_super(mnt->mnt.mnt_sb); @@ -1021,25 +1022,15 @@ struct vfsmount *mntget(struct vfsmount *mnt) } EXPORT_SYMBOL(mntget); -void mnt_pin(struct vfsmount *mnt) +struct vfsmount *mnt_clone_internal(struct path *path) { - lock_mount_hash(); - real_mount(mnt)->mnt_pinned++; - unlock_mount_hash(); -} -EXPORT_SYMBOL(mnt_pin); - -void mnt_unpin(struct vfsmount *m) -{ - struct mount *mnt = real_mount(m); - lock_mount_hash(); - if (mnt->mnt_pinned) { - mnt_add_count(mnt, 1); - mnt->mnt_pinned--; - } - unlock_mount_hash(); + struct mount *p; + p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE); + if (IS_ERR(p)) + return ERR_CAST(p); + p->mnt.mnt_flags |= MNT_INTERNAL; + return &p->mnt; } -EXPORT_SYMBOL(mnt_unpin); static inline void mangle(struct seq_file *m, const char *s) { @@ -1226,6 +1217,11 @@ static void namespace_unlock(void) head.first->pprev = &head.first; INIT_HLIST_HEAD(&unmounted); + /* undo decrements we'd done in umount_tree() */ + hlist_for_each_entry(mnt, &head, mnt_hash) + if (mnt->mnt_ex_mountpoint.mnt) + mntget(mnt->mnt_ex_mountpoint.mnt); + up_write(&namespace_sem); synchronize_rcu(); @@ -1262,6 +1258,9 @@ void umount_tree(struct mount *mnt, int how) hlist_add_head(&p->mnt_hash, &tmp_list); } + hlist_for_each_entry(p, &tmp_list, mnt_hash) + list_del_init(&p->mnt_child); + if (how) propagate_umount(&tmp_list); @@ -1272,9 +1271,9 @@ void umount_tree(struct mount *mnt, int how) p->mnt_ns = NULL; if (how < 2) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; - list_del_init(&p->mnt_child); if (mnt_has_parent(p)) { put_mountpoint(p->mnt_mp); + mnt_add_count(p->mnt_parent, -1); /* move the reference to mountpoint into ->mnt_ex_mountpoint */ p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint; p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt; @@ -1505,6 +1504,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, continue; for (s = r; s; s = next_mnt(s, r)) { + struct mount *t = NULL; if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(s)) { s = skip_mnt_tree(s); @@ -1526,7 +1526,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, goto out; lock_mount_hash(); list_add_tail(&q->mnt_list, &res->mnt_list); - attach_mnt(q, parent, p->mnt_mp); + mnt_set_mountpoint(parent, p->mnt_mp, q); + if (!list_empty(&parent->mnt_mounts)) { + t = list_last_entry(&parent->mnt_mounts, + struct mount, mnt_child); + if (t->mnt_mp != p->mnt_mp) + t = NULL; + } + attach_shadowed(q, parent, t); unlock_mount_hash(); } } diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 9b431f44fad..cbb1797149d 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -210,8 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err) SetPageUptodate(bvec->bv_page); if (err) { - struct nfs_pgio_data *rdata = par->data; - struct nfs_pgio_header *header = rdata->header; + struct nfs_pgio_header *header = par->data; if (!header->pnfs_error) header->pnfs_error = -EIO; @@ -224,43 +223,44 @@ static void bl_end_io_read(struct bio *bio, int err) static void bl_read_cleanup(struct work_struct *work) { struct rpc_task *task; - struct nfs_pgio_data *rdata; + struct nfs_pgio_header *hdr; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - rdata = container_of(task, struct nfs_pgio_data, task); - pnfs_ld_read_done(rdata); + hdr = container_of(task, struct nfs_pgio_header, task); + pnfs_ld_read_done(hdr); } static void bl_end_par_io_read(void *data, int unused) { - struct nfs_pgio_data *rdata = data; + struct nfs_pgio_header *hdr = data; - rdata->task.tk_status = rdata->header->pnfs_error; - INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); - schedule_work(&rdata->task.u.tk_work); + hdr->task.tk_status = hdr->pnfs_error; + INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup); + schedule_work(&hdr->task.u.tk_work); } static enum pnfs_try_status -bl_read_pagelist(struct nfs_pgio_data *rdata) +bl_read_pagelist(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *header = rdata->header; + struct nfs_pgio_header *header = hdr; int i, hole; struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; sector_t isect, extent_length = 0; struct parallel_io *par; - loff_t f_offset = rdata->args.offset; - size_t bytes_left = rdata->args.count; + loff_t f_offset = hdr->args.offset; + size_t bytes_left = hdr->args.count; unsigned int pg_offset, pg_len; - struct page **pages = rdata->args.pages; - int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; + struct page **pages = hdr->args.pages; + int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT; const bool is_dio = (header->dreq != NULL); dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, - rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); + hdr->page_array.npages, f_offset, + (unsigned int)hdr->args.count); - par = alloc_parallel(rdata); + par = alloc_parallel(hdr); if (!par) goto use_mds; par->pnfs_callback = bl_end_par_io_read; @@ -268,7 +268,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata) isect = (sector_t) (f_offset >> SECTOR_SHIFT); /* Code assumes extents are page-aligned */ - for (i = pg_index; i < rdata->pages.npages; i++) { + for (i = pg_index; i < hdr->page_array.npages; i++) { if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); @@ -317,7 +317,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata) struct pnfs_block_extent *be_read; be_read = (hole && cow_read) ? cow_read : be; - bio = do_add_page_to_bio(bio, rdata->pages.npages - i, + bio = do_add_page_to_bio(bio, + hdr->page_array.npages - i, READ, isect, pages[i], be_read, bl_end_io_read, par, @@ -332,10 +333,10 @@ bl_read_pagelist(struct nfs_pgio_data *rdata) extent_length -= PAGE_CACHE_SECTORS; } if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { - rdata->res.eof = 1; - rdata->res.count = header->inode->i_size - rdata->args.offset; + hdr->res.eof = 1; + hdr->res.count = header->inode->i_size - hdr->args.offset; } else { - rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; + hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset; } out: bl_put_extent(be); @@ -390,8 +391,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err) } if (unlikely(err)) { - struct nfs_pgio_data *data = par->data; - struct nfs_pgio_header *header = data->header; + struct nfs_pgio_header *header = par->data; if (!header->pnfs_error) header->pnfs_error = -EIO; @@ -405,8 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err) { struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct nfs_pgio_data *data = par->data; - struct nfs_pgio_header *header = data->header; + struct nfs_pgio_header *header = par->data; if (!uptodate) { if (!header->pnfs_error) @@ -423,32 +422,32 @@ static void bl_end_io_write(struct bio *bio, int err) static void bl_write_cleanup(struct work_struct *work) { struct rpc_task *task; - struct nfs_pgio_data *wdata; + struct nfs_pgio_header *hdr; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - wdata = container_of(task, struct nfs_pgio_data, task); - if (likely(!wdata->header->pnfs_error)) { + hdr = container_of(task, struct nfs_pgio_header, task); + if (likely(!hdr->pnfs_error)) { /* Marks for LAYOUTCOMMIT */ - mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), - wdata->args.offset, wdata->args.count); + mark_extents_written(BLK_LSEG2EXT(hdr->lseg), + hdr->args.offset, hdr->args.count); } - pnfs_ld_write_done(wdata); + pnfs_ld_write_done(hdr); } /* Called when last of bios associated with a bl_write_pagelist call finishes */ static void bl_end_par_io_write(void *data, int num_se) { - struct nfs_pgio_data *wdata = data; + struct nfs_pgio_header *hdr = data; - if (unlikely(wdata->header->pnfs_error)) { - bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, + if (unlikely(hdr->pnfs_error)) { + bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval, num_se); } - wdata->task.tk_status = wdata->header->pnfs_error; - wdata->verf.committed = NFS_FILE_SYNC; - INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); - schedule_work(&wdata->task.u.tk_work); + hdr->task.tk_status = hdr->pnfs_error; + hdr->verf.committed = NFS_FILE_SYNC; + INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup); + schedule_work(&hdr->task.u.tk_work); } /* FIXME STUB - mark intersection of layout and page as bad, so is not @@ -673,18 +672,17 @@ check_page: } static enum pnfs_try_status -bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) +bl_write_pagelist(struct nfs_pgio_header *header, int sync) { - struct nfs_pgio_header *header = wdata->header; int i, ret, npg_zero, pg_index, last = 0; struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; sector_t isect, last_isect = 0, extent_length = 0; struct parallel_io *par = NULL; - loff_t offset = wdata->args.offset; - size_t count = wdata->args.count; + loff_t offset = header->args.offset; + size_t count = header->args.count; unsigned int pg_offset, pg_len, saved_len; - struct page **pages = wdata->args.pages; + struct page **pages = header->args.pages; struct page *page; pgoff_t index; u64 temp; @@ -699,11 +697,11 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); goto out_mds; } - /* At this point, wdata->pages is a (sequential) list of nfs_pages. + /* At this point, header->page_aray is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error * to have it redone using nfs. */ - par = alloc_parallel(wdata); + par = alloc_parallel(header); if (!par) goto out_mds; par->pnfs_callback = bl_end_par_io_write; @@ -790,8 +788,8 @@ next_page: bio = bl_submit_bio(WRITE, bio); /* Middle pages */ - pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; - for (i = pg_index; i < wdata->pages.npages; i++) { + pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; + for (i = pg_index; i < header->page_array.npages; i++) { if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); @@ -862,7 +860,8 @@ next_page: } - bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, + bio = do_add_page_to_bio(bio, header->page_array.npages - i, + WRITE, isect, pages[i], be, bl_end_io_write, par, pg_offset, pg_len); @@ -890,7 +889,7 @@ next_page: } write_done: - wdata->res.count = wdata->args.count; + header->res.count = header->args.count; out: bl_put_extent(be); bl_put_extent(cow_read); @@ -1063,7 +1062,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, return ERR_PTR(-ENOMEM); } - pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); + pages = kcalloc(max_pages, sizeof(struct page *), GFP_NOFS); if (pages == NULL) { kfree(dev); return ERR_PTR(-ENOMEM); diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 073b4cf67ed..54de482143c 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -428,6 +428,18 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) if (p == NULL) return 0; + /* + * Did we get the acceptor from userland during the SETCLIENID + * negotiation? + */ + if (clp->cl_acceptor) + return !strcmp(p, clp->cl_acceptor); + + /* + * Otherwise try to verify it using the cl_hostname. Note that this + * doesn't work if a non-canonical hostname was used in the devname. + */ + /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ if (memcmp(p, "nfs@", 4) != 0) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 180d1ec9c32..6a4f3666e27 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -110,8 +110,8 @@ struct nfs_subversion *get_nfs_version(unsigned int version) mutex_unlock(&nfs_version_mutex); } - if (!IS_ERR(nfs)) - try_module_get(nfs->owner); + if (!IS_ERR(nfs) && !try_module_get(nfs->owner)) + return ERR_PTR(-EAGAIN); return nfs; } @@ -158,7 +158,8 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) goto error_0; clp->cl_nfs_mod = cl_init->nfs_mod; - try_module_get(clp->cl_nfs_mod->owner); + if (!try_module_get(clp->cl_nfs_mod->owner)) + goto error_dealloc; clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; @@ -190,6 +191,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) error_cleanup: put_nfs_version(clp->cl_nfs_mod); +error_dealloc: kfree(clp); error_0: return ERR_PTR(err); @@ -252,6 +254,7 @@ void nfs_free_client(struct nfs_client *clp) put_net(clp->cl_net); put_nfs_version(clp->cl_nfs_mod); kfree(clp->cl_hostname); + kfree(clp->cl_acceptor); kfree(clp); dprintk("<-- nfs_free_client()\n"); @@ -482,8 +485,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops; + if (cl_init->hostname == NULL) { + WARN_ON(1); + return NULL; + } + dprintk("--> nfs_get_client(%s,v%u)\n", - cl_init->hostname ?: "", rpc_ops->version); + cl_init->hostname, rpc_ops->version); /* see if the client already exists */ do { @@ -510,7 +518,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, } while (!IS_ERR(new)); dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", - cl_init->hostname ?: "", PTR_ERR(new)); + cl_init->hostname, PTR_ERR(new)); return new; } EXPORT_SYMBOL_GPL(nfs_get_client); @@ -1404,24 +1412,18 @@ int nfs_fs_proc_net_init(struct net *net) p = proc_create("volumes", S_IFREG|S_IRUGO, nn->proc_nfsfs, &nfs_volume_list_fops); if (!p) - goto error_2; + goto error_1; return 0; -error_2: - remove_proc_entry("servers", nn->proc_nfsfs); error_1: - remove_proc_entry("fs/nfsfs", NULL); + remove_proc_subtree("nfsfs", net->proc_net); error_0: return -ENOMEM; } void nfs_fs_proc_net_exit(struct net *net) { - struct nfs_net *nn = net_generic(net, nfs_net_id); - - remove_proc_entry("volumes", nn->proc_nfsfs); - remove_proc_entry("servers", nn->proc_nfsfs); - remove_proc_entry("fs/nfsfs", NULL); + remove_proc_subtree("nfsfs", net->proc_net); } /* diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5d8ccecf5f5..5853f53db73 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -41,14 +41,8 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); } -/** - * nfs_have_delegation - check if inode has a delegation - * @inode: inode to check - * @flags: delegation types to check for - * - * Returns one if inode has the indicated delegation, otherwise zero. - */ -int nfs4_have_delegation(struct inode *inode, fmode_t flags) +static int +nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) { struct nfs_delegation *delegation; int ret = 0; @@ -58,12 +52,34 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags) delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation != NULL && (delegation->type & flags) == flags && !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { - nfs_mark_delegation_referenced(delegation); + if (mark) + nfs_mark_delegation_referenced(delegation); ret = 1; } rcu_read_unlock(); return ret; } +/** + * nfs_have_delegation - check if inode has a delegation, mark it + * NFS_DELEGATION_REFERENCED if there is one. + * @inode: inode to check + * @flags: delegation types to check for + * + * Returns one if inode has the indicated delegation, otherwise zero. + */ +int nfs4_have_delegation(struct inode *inode, fmode_t flags) +{ + return nfs4_do_check_delegation(inode, flags, true); +} + +/* + * nfs4_check_delegation - check if inode has a delegation, do not mark + * NFS_DELEGATION_REFERENCED if it has one. + */ +int nfs4_check_delegation(struct inode *inode, fmode_t flags) +{ + return nfs4_do_check_delegation(inode, flags, false); +} static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 9a79c7a99d6..5c1cce39297 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -59,6 +59,7 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); +int nfs4_check_delegation(struct inode *inode, fmode_t flags); #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4a3d4ef7612..36d921f0c60 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -988,9 +988,13 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate); * A check for whether or not the parent directory has changed. * In the case it has, we assume that the dentries are untrustworthy * and may need to be looked up again. + * If rcu_walk prevents us from performing a full check, return 0. */ -static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +static int nfs_check_verifier(struct inode *dir, struct dentry *dentry, + int rcu_walk) { + int ret; + if (IS_ROOT(dentry)) return 1; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) @@ -998,7 +1002,11 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) if (!nfs_verify_change_attribute(dir, dentry->d_time)) return 0; /* Revalidate nfsi->cache_change_attribute before we declare a match */ - if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) + if (rcu_walk) + ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir); + else + ret = nfs_revalidate_inode(NFS_SERVER(dir), dir); + if (ret < 0) return 0; if (!nfs_verify_change_attribute(dir, dentry->d_time)) return 0; @@ -1042,6 +1050,8 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags) out: return (inode->i_nlink == 0) ? -ENOENT : 0; out_force: + if (flags & LOOKUP_RCU) + return -ECHILD; ret = __nfs_revalidate_inode(server, inode); if (ret != 0) return ret; @@ -1054,6 +1064,9 @@ out_force: * * If parent mtime has changed, we revalidate, else we wait for a * period corresponding to the parent's attribute cache timeout value. + * + * If LOOKUP_RCU prevents us from performing a full check, return 1 + * suggesting a reval is needed. */ static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, @@ -1064,7 +1077,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; - return !nfs_check_verifier(dir, dentry); + return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU); } /* @@ -1088,21 +1101,30 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) struct nfs4_label *label = NULL; int error; - if (flags & LOOKUP_RCU) - return -ECHILD; - - parent = dget_parent(dentry); - dir = parent->d_inode; + if (flags & LOOKUP_RCU) { + parent = ACCESS_ONCE(dentry->d_parent); + dir = ACCESS_ONCE(parent->d_inode); + if (!dir) + return -ECHILD; + } else { + parent = dget_parent(dentry); + dir = parent->d_inode; + } nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; if (!inode) { - if (nfs_neg_need_reval(dir, dentry, flags)) + if (nfs_neg_need_reval(dir, dentry, flags)) { + if (flags & LOOKUP_RCU) + return -ECHILD; goto out_bad; + } goto out_valid_noent; } if (is_bad_inode(inode)) { + if (flags & LOOKUP_RCU) + return -ECHILD; dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", __func__, dentry); goto out_bad; @@ -1112,12 +1134,20 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) goto out_set_verifier; /* Force a full look up iff the parent directory has changed */ - if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) { - if (nfs_lookup_verify_inode(inode, flags)) + if (!nfs_is_exclusive_create(dir, flags) && + nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { + + if (nfs_lookup_verify_inode(inode, flags)) { + if (flags & LOOKUP_RCU) + return -ECHILD; goto out_zap_parent; + } goto out_valid; } + if (flags & LOOKUP_RCU) + return -ECHILD; + if (NFS_STALE(inode)) goto out_bad; @@ -1153,13 +1183,18 @@ out_set_verifier: /* Success: notify readdir to use READDIRPLUS */ nfs_advise_use_readdirplus(dir); out_valid_noent: - dput(parent); + if (flags & LOOKUP_RCU) { + if (parent != ACCESS_ONCE(dentry->d_parent)) + return -ECHILD; + } else + dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", __func__, dentry); return 1; out_zap_parent: nfs_zap_caches(dir); out_bad: + WARN_ON(flags & LOOKUP_RCU); nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); nfs4_label_free(label); @@ -1185,6 +1220,7 @@ out_zap_parent: __func__, dentry); return 0; out_error: + WARN_ON(flags & LOOKUP_RCU); nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); nfs4_label_free(label); @@ -1529,14 +1565,9 @@ EXPORT_SYMBOL_GPL(nfs_atomic_open); static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) { - struct dentry *parent = NULL; struct inode *inode; - struct inode *dir; int ret = 0; - if (flags & LOOKUP_RCU) - return -ECHILD; - if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) goto no_open; if (d_mountpoint(dentry)) @@ -1545,34 +1576,47 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) goto no_open; inode = dentry->d_inode; - parent = dget_parent(dentry); - dir = parent->d_inode; /* We can't create new files in nfs_open_revalidate(), so we * optimize away revalidation of negative dentries. */ if (inode == NULL) { + struct dentry *parent; + struct inode *dir; + + if (flags & LOOKUP_RCU) { + parent = ACCESS_ONCE(dentry->d_parent); + dir = ACCESS_ONCE(parent->d_inode); + if (!dir) + return -ECHILD; + } else { + parent = dget_parent(dentry); + dir = parent->d_inode; + } if (!nfs_neg_need_reval(dir, dentry, flags)) ret = 1; + else if (flags & LOOKUP_RCU) + ret = -ECHILD; + if (!(flags & LOOKUP_RCU)) + dput(parent); + else if (parent != ACCESS_ONCE(dentry->d_parent)) + return -ECHILD; goto out; } /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) - goto no_open_dput; + goto no_open; /* We cannot do exclusive creation on a positive dentry */ if (flags & LOOKUP_EXCL) - goto no_open_dput; + goto no_open; /* Let f_op->open() actually open (and revalidate) the file */ ret = 1; out: - dput(parent); return ret; -no_open_dput: - dput(parent); no_open: return nfs_lookup_revalidate(dentry, flags); } @@ -2028,10 +2072,14 @@ static DEFINE_SPINLOCK(nfs_access_lru_lock); static LIST_HEAD(nfs_access_lru_list); static atomic_long_t nfs_access_nr_entries; +static unsigned long nfs_access_max_cachesize = ULONG_MAX; +module_param(nfs_access_max_cachesize, ulong, 0644); +MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length"); + static void nfs_access_free_entry(struct nfs_access_entry *entry) { put_rpccred(entry->cred); - kfree(entry); + kfree_rcu(entry, rcu_head); smp_mb__before_atomic(); atomic_long_dec(&nfs_access_nr_entries); smp_mb__after_atomic(); @@ -2048,19 +2096,14 @@ static void nfs_access_free_list(struct list_head *head) } } -unsigned long -nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long +nfs_do_access_cache_scan(unsigned int nr_to_scan) { LIST_HEAD(head); struct nfs_inode *nfsi, *next; struct nfs_access_entry *cache; - int nr_to_scan = sc->nr_to_scan; - gfp_t gfp_mask = sc->gfp_mask; long freed = 0; - if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) - return SHRINK_STOP; - spin_lock(&nfs_access_lru_lock); list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { struct inode *inode; @@ -2094,11 +2137,39 @@ remove_lru_entry: } unsigned long +nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + int nr_to_scan = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; + + if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) + return SHRINK_STOP; + return nfs_do_access_cache_scan(nr_to_scan); +} + + +unsigned long nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc) { return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries)); } +static void +nfs_access_cache_enforce_limit(void) +{ + long nr_entries = atomic_long_read(&nfs_access_nr_entries); + unsigned long diff; + unsigned int nr_to_scan; + + if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize) + return; + nr_to_scan = 100; + diff = nr_entries - nfs_access_max_cachesize; + if (diff < nr_to_scan) + nr_to_scan = diff; + nfs_do_access_cache_scan(nr_to_scan); +} + static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) { struct rb_root *root_node = &nfsi->access_cache; @@ -2186,6 +2257,38 @@ out_zap: return -ENOENT; } +static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +{ + /* Only check the most recently returned cache entry, + * but do it without locking. + */ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_access_entry *cache; + int err = -ECHILD; + struct list_head *lh; + + rcu_read_lock(); + if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS) + goto out; + lh = rcu_dereference(nfsi->access_cache_entry_lru.prev); + cache = list_entry(lh, struct nfs_access_entry, lru); + if (lh == &nfsi->access_cache_entry_lru || + cred != cache->cred) + cache = NULL; + if (cache == NULL) + goto out; + if (!nfs_have_delegated_attributes(inode) && + !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) + goto out; + res->jiffies = cache->jiffies; + res->cred = cache->cred; + res->mask = cache->mask; + err = 0; +out: + rcu_read_unlock(); + return err; +} + static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) { struct nfs_inode *nfsi = NFS_I(inode); @@ -2229,6 +2332,11 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) cache->cred = get_rpccred(set->cred); cache->mask = set->mask; + /* The above field assignments must be visible + * before this item appears on the lru. We cannot easily + * use rcu_assign_pointer, so just force the memory barrier. + */ + smp_wmb(); nfs_access_add_rbtree(inode, cache); /* Update accounting */ @@ -2244,6 +2352,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) &nfs_access_lru_list); spin_unlock(&nfs_access_lru_lock); } + nfs_access_cache_enforce_limit(); } EXPORT_SYMBOL_GPL(nfs_access_add_cache); @@ -2267,10 +2376,16 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) trace_nfs_access_enter(inode); - status = nfs_access_get_cached(inode, cred, &cache); + status = nfs_access_get_cached_rcu(inode, cred, &cache); + if (status != 0) + status = nfs_access_get_cached(inode, cred, &cache); if (status == 0) goto out_cached; + status = -ECHILD; + if (mask & MAY_NOT_BLOCK) + goto out; + /* Be clever: ask server to check for all possible rights */ cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; cache.cred = cred; @@ -2321,9 +2436,6 @@ int nfs_permission(struct inode *inode, int mask) struct rpc_cred *cred; int res = 0; - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - nfs_inc_stats(inode, NFSIOS_VFSACCESS); if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) @@ -2350,12 +2462,23 @@ force_lookup: if (!NFS_PROTO(inode)->access) goto out_notsup; - cred = rpc_lookup_cred(); - if (!IS_ERR(cred)) { - res = nfs_do_access(inode, cred, mask); - put_rpccred(cred); - } else + /* Always try fast lookups first */ + rcu_read_lock(); + cred = rpc_lookup_cred_nonblock(); + if (!IS_ERR(cred)) + res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK); + else res = PTR_ERR(cred); + rcu_read_unlock(); + if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) { + /* Fast lookup failed, try the slow way */ + cred = rpc_lookup_cred(); + if (!IS_ERR(cred)) { + res = nfs_do_access(inode, cred, mask); + put_rpccred(cred); + } else + res = PTR_ERR(cred); + } out: if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) res = -EACCES; @@ -2364,6 +2487,9 @@ out: inode->i_sb->s_id, inode->i_ino, mask, res); return res; out_notsup: + if (mask & MAY_NOT_BLOCK) + return -ECHILD; + res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res == 0) res = generic_permission(inode, mask); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f11b9eed0de..65ef6e00dee 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, { struct nfs_writeverf *verfp; - verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, - hdr->data->ds_idx); + verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, + hdr->ds_idx); WARN_ON_ONCE(verfp->committed >= 0); memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); WARN_ON_ONCE(verfp->committed < 0); @@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, { struct nfs_writeverf *verfp; - verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, - hdr->data->ds_idx); + verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, + hdr->ds_idx); if (verfp->committed < 0) { nfs_direct_set_hdr_verf(dreq, hdr); return 0; @@ -715,7 +715,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; struct nfs_commit_info cinfo; - int bit = -1; + bool request_commit = false; struct nfs_page *req = nfs_list_entry(hdr->pages.next); if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) @@ -729,27 +729,20 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) dreq->flags = 0; dreq->error = hdr->error; } - if (dreq->error != 0) - bit = NFS_IOHDR_ERROR; - else { + if (dreq->error == 0) { dreq->count += hdr->good_bytes; - if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - bit = NFS_IOHDR_NEED_RESCHED; - } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + if (nfs_write_need_commit(hdr)) { if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) - bit = NFS_IOHDR_NEED_RESCHED; + request_commit = true; else if (dreq->flags == 0) { nfs_direct_set_hdr_verf(dreq, hdr); - bit = NFS_IOHDR_NEED_COMMIT; + request_commit = true; dreq->flags = NFS_ODIRECT_DO_COMMIT; } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { - if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) { + request_commit = true; + if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - bit = NFS_IOHDR_NEED_RESCHED; - } else - bit = NFS_IOHDR_NEED_COMMIT; } } } @@ -759,9 +752,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); - switch (bit) { - case NFS_IOHDR_NEED_RESCHED: - case NFS_IOHDR_NEED_COMMIT: + if (request_commit) { kref_get(&req->wb_kref); nfs_mark_request_commit(req, hdr->lseg, &cinfo); } diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index d2eba1c13b7..90978075f73 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -84,45 +84,37 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) BUG(); } -static void filelayout_reset_write(struct nfs_pgio_data *data) +static void filelayout_reset_write(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - struct rpc_task *task = &data->task; + struct rpc_task *task = &hdr->task; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { dprintk("%s Reset task %5u for i/o through MDS " "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, - data->task.tk_pid, + hdr->task.tk_pid, hdr->inode->i_sb->s_id, (unsigned long long)NFS_FILEID(hdr->inode), - data->args.count, - (unsigned long long)data->args.offset); + hdr->args.count, + (unsigned long long)hdr->args.offset); - task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); + task->tk_status = pnfs_write_done_resend_to_mds(hdr); } } -static void filelayout_reset_read(struct nfs_pgio_data *data) +static void filelayout_reset_read(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - struct rpc_task *task = &data->task; + struct rpc_task *task = &hdr->task; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { dprintk("%s Reset task %5u for i/o through MDS " "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, - data->task.tk_pid, + hdr->task.tk_pid, hdr->inode->i_sb->s_id, (unsigned long long)NFS_FILEID(hdr->inode), - data->args.count, - (unsigned long long)data->args.offset); + hdr->args.count, + (unsigned long long)hdr->args.offset); - task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); + task->tk_status = pnfs_read_done_resend_to_mds(hdr); } } @@ -243,18 +235,17 @@ wait_on_recovery: /* NFS_PROTO call done callback routines */ static int filelayout_read_done_cb(struct rpc_task *task, - struct nfs_pgio_data *data) + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; int err; - trace_nfs4_pnfs_read(data, task->tk_status); - err = filelayout_async_handle_error(task, data->args.context->state, - data->ds_clp, hdr->lseg); + trace_nfs4_pnfs_read(hdr, task->tk_status); + err = filelayout_async_handle_error(task, hdr->args.context->state, + hdr->ds_clp, hdr->lseg); switch (err) { case -NFS4ERR_RESET_TO_MDS: - filelayout_reset_read(data); + filelayout_reset_read(hdr); return task->tk_status; case -EAGAIN: rpc_restart_call_prepare(task); @@ -270,15 +261,14 @@ static int filelayout_read_done_cb(struct rpc_task *task, * rfc5661 is not clear about which credential should be used. */ static void -filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) +filelayout_set_layoutcommit(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = wdata->header; if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || - wdata->res.verf->committed == NFS_FILE_SYNC) + hdr->res.verf->committed == NFS_FILE_SYNC) return; - pnfs_set_layoutcommit(wdata); + pnfs_set_layoutcommit(hdr); dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); } @@ -305,83 +295,82 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) */ static void filelayout_read_prepare(struct rpc_task *task, void *data) { - struct nfs_pgio_data *rdata = data; + struct nfs_pgio_header *hdr = data; - if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return; } - if (filelayout_reset_to_mds(rdata->header->lseg)) { + if (filelayout_reset_to_mds(hdr->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); - filelayout_reset_read(rdata); + filelayout_reset_read(hdr); rpc_exit(task, 0); return; } - rdata->pgio_done_cb = filelayout_read_done_cb; + hdr->pgio_done_cb = filelayout_read_done_cb; - if (nfs41_setup_sequence(rdata->ds_clp->cl_session, - &rdata->args.seq_args, - &rdata->res.seq_res, + if (nfs41_setup_sequence(hdr->ds_clp->cl_session, + &hdr->args.seq_args, + &hdr->res.seq_res, task)) return; - if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, - rdata->args.lock_context, FMODE_READ) == -EIO) + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, + hdr->args.lock_context, FMODE_READ) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_read_call_done(struct rpc_task *task, void *data) { - struct nfs_pgio_data *rdata = data; + struct nfs_pgio_header *hdr = data; dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); - if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) && + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { - nfs41_sequence_done(task, &rdata->res.seq_res); + nfs41_sequence_done(task, &hdr->res.seq_res); return; } /* Note this may cause RPC to be resent */ - rdata->header->mds_ops->rpc_call_done(task, data); + hdr->mds_ops->rpc_call_done(task, data); } static void filelayout_read_count_stats(struct rpc_task *task, void *data) { - struct nfs_pgio_data *rdata = data; + struct nfs_pgio_header *hdr = data; - rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); + rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); } static void filelayout_read_release(void *data) { - struct nfs_pgio_data *rdata = data; - struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; + struct nfs_pgio_header *hdr = data; + struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; filelayout_fenceme(lo->plh_inode, lo); - nfs_put_client(rdata->ds_clp); - rdata->header->mds_ops->rpc_release(data); + nfs_put_client(hdr->ds_clp); + hdr->mds_ops->rpc_release(data); } static int filelayout_write_done_cb(struct rpc_task *task, - struct nfs_pgio_data *data) + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; int err; - trace_nfs4_pnfs_write(data, task->tk_status); - err = filelayout_async_handle_error(task, data->args.context->state, - data->ds_clp, hdr->lseg); + trace_nfs4_pnfs_write(hdr, task->tk_status); + err = filelayout_async_handle_error(task, hdr->args.context->state, + hdr->ds_clp, hdr->lseg); switch (err) { case -NFS4ERR_RESET_TO_MDS: - filelayout_reset_write(data); + filelayout_reset_write(hdr); return task->tk_status; case -EAGAIN: rpc_restart_call_prepare(task); return -EAGAIN; } - filelayout_set_layoutcommit(data); + filelayout_set_layoutcommit(hdr); return 0; } @@ -419,57 +408,57 @@ static int filelayout_commit_done_cb(struct rpc_task *task, static void filelayout_write_prepare(struct rpc_task *task, void *data) { - struct nfs_pgio_data *wdata = data; + struct nfs_pgio_header *hdr = data; - if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return; } - if (filelayout_reset_to_mds(wdata->header->lseg)) { + if (filelayout_reset_to_mds(hdr->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); - filelayout_reset_write(wdata); + filelayout_reset_write(hdr); rpc_exit(task, 0); return; } - if (nfs41_setup_sequence(wdata->ds_clp->cl_session, - &wdata->args.seq_args, - &wdata->res.seq_res, + if (nfs41_setup_sequence(hdr->ds_clp->cl_session, + &hdr->args.seq_args, + &hdr->res.seq_res, task)) return; - if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, - wdata->args.lock_context, FMODE_WRITE) == -EIO) + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, + hdr->args.lock_context, FMODE_WRITE) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_write_call_done(struct rpc_task *task, void *data) { - struct nfs_pgio_data *wdata = data; + struct nfs_pgio_header *hdr = data; - if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { - nfs41_sequence_done(task, &wdata->res.seq_res); + nfs41_sequence_done(task, &hdr->res.seq_res); return; } /* Note this may cause RPC to be resent */ - wdata->header->mds_ops->rpc_call_done(task, data); + hdr->mds_ops->rpc_call_done(task, data); } static void filelayout_write_count_stats(struct rpc_task *task, void *data) { - struct nfs_pgio_data *wdata = data; + struct nfs_pgio_header *hdr = data; - rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); + rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); } static void filelayout_write_release(void *data) { - struct nfs_pgio_data *wdata = data; - struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; + struct nfs_pgio_header *hdr = data; + struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; filelayout_fenceme(lo->plh_inode, lo); - nfs_put_client(wdata->ds_clp); - wdata->header->mds_ops->rpc_release(data); + nfs_put_client(hdr->ds_clp); + hdr->mds_ops->rpc_release(data); } static void filelayout_commit_prepare(struct rpc_task *task, void *data) @@ -529,19 +518,18 @@ static const struct rpc_call_ops filelayout_commit_call_ops = { }; static enum pnfs_try_status -filelayout_read_pagelist(struct nfs_pgio_data *data) +filelayout_read_pagelist(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; - loff_t offset = data->args.offset; + loff_t offset = hdr->args.offset; u32 j, idx; struct nfs_fh *fh; dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", __func__, hdr->inode->i_ino, - data->args.pgbase, (size_t)data->args.count, offset); + hdr->args.pgbase, (size_t)hdr->args.count, offset); /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); @@ -559,30 +547,29 @@ filelayout_read_pagelist(struct nfs_pgio_data *data) /* No multipath support. Use first DS */ atomic_inc(&ds->ds_clp->cl_count); - data->ds_clp = ds->ds_clp; - data->ds_idx = idx; + hdr->ds_clp = ds->ds_clp; + hdr->ds_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) - data->args.fh = fh; + hdr->args.fh = fh; - data->args.offset = filelayout_get_dserver_offset(lseg, offset); - data->mds_offset = offset; + hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); + hdr->mds_offset = offset; /* Perform an asynchronous read to ds */ - nfs_initiate_pgio(ds_clnt, data, + nfs_initiate_pgio(ds_clnt, hdr, &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; } /* Perform async writes. */ static enum pnfs_try_status -filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) +filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) { - struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; - loff_t offset = data->args.offset; + loff_t offset = hdr->args.offset; u32 j, idx; struct nfs_fh *fh; @@ -598,21 +585,20 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) return PNFS_NOT_ATTEMPTED; dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", - __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, + __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); - data->pgio_done_cb = filelayout_write_done_cb; + hdr->pgio_done_cb = filelayout_write_done_cb; atomic_inc(&ds->ds_clp->cl_count); - data->ds_clp = ds->ds_clp; - data->ds_idx = idx; + hdr->ds_clp = ds->ds_clp; + hdr->ds_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) - data->args.fh = fh; - - data->args.offset = filelayout_get_dserver_offset(lseg, offset); + hdr->args.fh = fh; + hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - nfs_initiate_pgio(ds_clnt, data, + nfs_initiate_pgio(ds_clnt, hdr, &filelayout_write_call_ops, sync, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; @@ -1023,6 +1009,7 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) /* The generic layer is about to remove the req from the commit list. * If this will make the bucket empty, it will need to put the lseg reference. + * Note this is must be called holding the inode (/cinfo) lock */ static void filelayout_clear_request_commit(struct nfs_page *req, @@ -1030,7 +1017,6 @@ filelayout_clear_request_commit(struct nfs_page *req, { struct pnfs_layout_segment *freeme = NULL; - spin_lock(cinfo->lock); if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) goto out; cinfo->ds->nwritten--; @@ -1045,22 +1031,25 @@ filelayout_clear_request_commit(struct nfs_page *req, } out: nfs_request_remove_commit_list(req, cinfo); - spin_unlock(cinfo->lock); - pnfs_put_lseg(freeme); + pnfs_put_lseg_async(freeme); } -static struct list_head * -filelayout_choose_commit_list(struct nfs_page *req, - struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) +static void +filelayout_mark_request_commit(struct nfs_page *req, + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) + { struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); u32 i, j; struct list_head *list; struct pnfs_commit_bucket *buckets; - if (fl->commit_through_mds) - return &cinfo->mds->list; + if (fl->commit_through_mds) { + list = &cinfo->mds->list; + spin_lock(cinfo->lock); + goto mds_commit; + } /* Note that we are calling nfs4_fl_calc_j_index on each page * that ends up being committed to a data server. An attractive @@ -1084,19 +1073,22 @@ filelayout_choose_commit_list(struct nfs_page *req, } set_bit(PG_COMMIT_TO_DS, &req->wb_flags); cinfo->ds->nwritten++; - spin_unlock(cinfo->lock); - return list; -} -static void -filelayout_mark_request_commit(struct nfs_page *req, - struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) -{ - struct list_head *list; - - list = filelayout_choose_commit_list(req, lseg, cinfo); - nfs_request_add_commit_list(req, list, cinfo); +mds_commit: + /* nfs_request_add_commit_list(). We need to add req to list without + * dropping cinfo lock. + */ + set_bit(PG_CLEAN, &(req)->wb_flags); + nfs_list_add_request(req, list); + cinfo->mds->ncommit++; + spin_unlock(cinfo->lock); + if (!cinfo->dreq) { + inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, + BDI_RECLAIMABLE); + __mark_inode_dirty(req->wb_context->dentry->d_inode, + I_DIRTY_DATASYNC); + } } static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) @@ -1244,15 +1236,64 @@ restart: spin_unlock(cinfo->lock); } +/* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest + * for @page + * @cinfo - commit info for current inode + * @page - page to search for matching head request + * + * Returns a the head request if one is found, otherwise returns NULL. + */ +static struct nfs_page * +filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page) +{ + struct nfs_page *freq, *t; + struct pnfs_commit_bucket *b; + int i; + + /* Linearly search the commit lists for each bucket until a matching + * request is found */ + for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { + list_for_each_entry_safe(freq, t, &b->written, wb_list) { + if (freq->wb_page == page) + return freq->wb_head; + } + list_for_each_entry_safe(freq, t, &b->committing, wb_list) { + if (freq->wb_page == page) + return freq->wb_head; + } + } + + return NULL; +} + +static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx) +{ + struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; + struct pnfs_commit_bucket *bucket; + struct pnfs_layout_segment *freeme; + int i; + + for (i = idx; i < fl_cinfo->nbuckets; i++) { + bucket = &fl_cinfo->buckets[i]; + if (list_empty(&bucket->committing)) + continue; + nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); + spin_lock(cinfo->lock); + freeme = bucket->clseg; + bucket->clseg = NULL; + spin_unlock(cinfo->lock); + pnfs_put_lseg(freeme); + } +} + static unsigned int alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) { struct pnfs_ds_commit_info *fl_cinfo; struct pnfs_commit_bucket *bucket; struct nfs_commit_data *data; - int i, j; + int i; unsigned int nreq = 0; - struct pnfs_layout_segment *freeme; fl_cinfo = cinfo->ds; bucket = fl_cinfo->buckets; @@ -1272,16 +1313,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) } /* Clean up on error */ - for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) { - if (list_empty(&bucket->committing)) - continue; - nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); - spin_lock(cinfo->lock); - freeme = bucket->clseg; - bucket->clseg = NULL; - spin_unlock(cinfo->lock); - pnfs_put_lseg(freeme); - } + filelayout_retry_commit(cinfo, i); /* Caller will clean up entries put on list */ return nreq; } @@ -1301,8 +1333,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, data->lseg = NULL; list_add(&data->pages, &list); nreq++; - } else + } else { nfs_retry_commit(mds_pages, NULL, cinfo); + filelayout_retry_commit(cinfo, 0); + cinfo->completion_ops->error_cleanup(NFS_I(inode)); + return -ENOMEM; + } } nreq += alloc_ds_commits(cinfo, &list); @@ -1380,6 +1416,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .clear_request_commit = filelayout_clear_request_commit, .scan_commit_lists = filelayout_scan_commit_lists, .recover_commit_reqs = filelayout_recover_commit_reqs, + .search_commit_reqs = filelayout_search_commit_reqs, .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, .write_pagelist = filelayout_write_pagelist, diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index e2a0361e24c..8540516f4d7 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -695,7 +695,7 @@ filelayout_get_device_info(struct inode *inode, if (pdev == NULL) return NULL; - pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); + pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); if (pages == NULL) { kfree(pdev); return NULL; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index b94f80420a5..880618a8b04 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -112,7 +112,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, * if the dentry tree reaches them; however if the dentry already * exists, we'll pick it up at this point and use it as the root */ - ret = d_obtain_alias(inode); + ret = d_obtain_root(inode); if (IS_ERR(ret)) { dprintk("nfs_get_root: get root dentry failed\n"); goto out; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 68921b01b79..577a36f0a51 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1002,6 +1002,15 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } EXPORT_SYMBOL_GPL(nfs_revalidate_inode); +int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode) +{ + if (!(NFS_I(inode)->cache_validity & + (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) + && !nfs_attribute_cache_expired(inode)) + return NFS_STALE(inode) ? -ESTALE : 0; + return -ECHILD; +} + static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e2a45ae5014..9056622d223 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -247,11 +247,11 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); int nfs_iocounter_wait(struct nfs_io_counter *c); extern const struct nfs_pageio_ops nfs_pgio_rw_ops; -struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); -void nfs_rw_header_free(struct nfs_pgio_header *); -void nfs_pgio_data_release(struct nfs_pgio_data *); +struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); +void nfs_pgio_header_free(struct nfs_pgio_header *); +void nfs_pgio_data_destroy(struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); -int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, +int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *, const struct rpc_call_ops *, int, int); void nfs_free_request(struct nfs_page *req); @@ -451,6 +451,7 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst, void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo); +int nfs_write_need_commit(struct nfs_pgio_header *); int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, @@ -491,7 +492,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ -extern void __nfs4_read_done_cb(struct nfs_pgio_data *); +extern void __nfs4_read_done_cb(struct nfs_pgio_header *); extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr); diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 8f854dde415..24c6898159c 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -129,7 +129,10 @@ static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, .rpc_argp = &args, .rpc_resp = &fattr, }; - int status; + int status = 0; + + if (acl == NULL && (!S_ISDIR(inode->i_mode) || dfacl == NULL)) + goto out; status = -EOPNOTSUPP; if (!nfs_server_capable(inode, NFS_CAP_ACLS)) @@ -256,7 +259,7 @@ nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data, char *p = data + *result; acl = get_acl(inode, type); - if (!acl) + if (IS_ERR_OR_NULL(acl)) return 0; posix_acl_release(acl); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index f0afa291fd5..809670eba52 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -795,41 +795,44 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return status; } -static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; nfs_invalidate_atime(inode); - nfs_refresh_inode(inode, &data->fattr); + nfs_refresh_inode(inode, &hdr->fattr); return 0; } -static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; } -static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, + struct nfs_pgio_header *hdr) { rpc_call_start(task); return 0; } -static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); return 0; } -static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ba2affa5194..a8b855ab4e2 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -54,7 +54,7 @@ struct nfs4_minor_version_ops { const nfs4_stateid *); int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); - int (*free_lock_state)(struct nfs_server *, + void (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); const struct rpc_call_ops *call_sync_ops; const struct nfs4_state_recovery_ops *reboot_recovery_ops; @@ -129,17 +129,6 @@ enum { * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) */ -struct nfs4_lock_owner { - unsigned int lo_type; -#define NFS4_ANY_LOCK_TYPE (0U) -#define NFS4_FLOCK_LOCK_TYPE (1U << 0) -#define NFS4_POSIX_LOCK_TYPE (1U << 1) - union { - fl_owner_t posix_owner; - pid_t flock_owner; - } lo_u; -}; - struct nfs4_lock_state { struct list_head ls_locks; /* Other lock stateids */ struct nfs4_state * ls_state; /* Pointer to open state */ @@ -149,7 +138,7 @@ struct nfs4_lock_state { struct nfs_seqid_counter ls_seqid; nfs4_stateid ls_stateid; atomic_t ls_count; - struct nfs4_lock_owner ls_owner; + fl_owner_t ls_owner; }; /* bits for nfs4_state->flags */ @@ -337,11 +326,11 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, */ static inline void nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, - struct rpc_message *msg, struct nfs_pgio_data *wdata) + struct rpc_message *msg, struct nfs_pgio_header *hdr) { if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) - wdata->args.stable = NFS_FILE_SYNC; + hdr->args.stable = NFS_FILE_SYNC; } #else /* CONFIG_NFS_v4_1 */ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) @@ -369,7 +358,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags, static inline void nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, - struct rpc_message *msg, struct nfs_pgio_data *wdata) + struct rpc_message *msg, struct nfs_pgio_header *hdr) { } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index aa9ef487604..53e435a9526 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -855,6 +855,11 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, }; struct rpc_timeout ds_timeout; struct nfs_client *clp; + char buf[INET6_ADDRSTRLEN + 1]; + + if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0) + return ERR_PTR(-EINVAL); + cl_init.hostname = buf; /* * Set an authflavor equual to the MDS value. Use the MDS nfs_client diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4bf3d97cc5a..7dd8aca31c2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1952,6 +1952,14 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) return status; } +/* + * Additional permission checks in order to distinguish between an + * open for read, and an open for execute. This works around the + * fact that NFSv4 OPEN treats read and execute permissions as being + * the same. + * Note that in the non-execute case, we want to turn off permission + * checking if we just created a new file (POSIX open() semantics). + */ static int nfs4_opendata_access(struct rpc_cred *cred, struct nfs4_opendata *opendata, struct nfs4_state *state, fmode_t fmode, @@ -1966,14 +1974,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred, return 0; mask = 0; - /* don't check MAY_WRITE - a newly created file may not have - * write mode bits, but POSIX allows the creating process to write. - * use openflags to check for exec, because fmode won't - * always have FMODE_EXEC set when file open for exec. */ + /* + * Use openflags to check for exec, because fmode won't + * always have FMODE_EXEC set when file open for exec. + */ if (openflags & __FMODE_EXEC) { /* ONLY check for exec rights */ mask = MAY_EXEC; - } else if (fmode & FMODE_READ) + } else if ((fmode & FMODE_READ) && !opendata->file_created) mask = MAY_READ; cache.cred = cred; @@ -2216,8 +2224,15 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); ret = _nfs4_proc_open(opendata); - if (ret != 0) + if (ret != 0) { + if (ret == -ENOENT) { + d_drop(opendata->dentry); + d_add(opendata->dentry, NULL); + nfs_set_verifier(opendata->dentry, + nfs_save_change_attribute(opendata->dir->d_inode)); + } goto out; + } state = nfs4_opendata_to_nfs4_state(opendata); ret = PTR_ERR(state); @@ -2545,6 +2560,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct nfs_server *server = NFS_SERVER(calldata->inode); + nfs4_stateid *res_stateid = NULL; dprintk("%s: begin!\n", __func__); if (!nfs4_sequence_done(task, &calldata->res.seq_res)) @@ -2555,12 +2571,12 @@ static void nfs4_close_done(struct rpc_task *task, void *data) */ switch (task->tk_status) { case 0: - if (calldata->roc) + res_stateid = &calldata->res.stateid; + if (calldata->arg.fmode == 0 && calldata->roc) pnfs_roc_set_barrier(state->inode, calldata->roc_barrier); - nfs_clear_open_stateid(state, &calldata->res.stateid, 0); renew_lease(server, calldata->timestamp); - goto out_release; + break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_OLD_STATEID: @@ -2574,7 +2590,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) goto out_release; } } - nfs_clear_open_stateid(state, NULL, calldata->arg.fmode); + nfs_clear_open_stateid(state, res_stateid, calldata->arg.fmode); out_release: nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, calldata->res.fattr); @@ -2586,6 +2602,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct inode *inode = calldata->inode; + bool is_rdonly, is_wronly, is_rdwr; int call_close = 0; dprintk("%s: begin!\n", __func__); @@ -2593,18 +2610,24 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_wait; task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; - calldata->arg.fmode = FMODE_READ|FMODE_WRITE; spin_lock(&state->owner->so_lock); + is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); + is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); + is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); + /* Calculate the current open share mode */ + calldata->arg.fmode = 0; + if (is_rdonly || is_rdwr) + calldata->arg.fmode |= FMODE_READ; + if (is_wronly || is_rdwr) + calldata->arg.fmode |= FMODE_WRITE; /* Calculate the change in open mode */ if (state->n_rdwr == 0) { if (state->n_rdonly == 0) { - call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); - call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + call_close |= is_rdonly || is_rdwr; calldata->arg.fmode &= ~FMODE_READ; } if (state->n_wronly == 0) { - call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); - call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + call_close |= is_wronly || is_rdwr; calldata->arg.fmode &= ~FMODE_WRITE; } } @@ -2647,6 +2670,48 @@ static const struct rpc_call_ops nfs4_close_ops = { .rpc_release = nfs4_free_closedata, }; +static bool nfs4_state_has_opener(struct nfs4_state *state) +{ + /* first check existing openers */ + if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 && + state->n_rdonly != 0) + return true; + + if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0 && + state->n_wronly != 0) + return true; + + if (test_bit(NFS_O_RDWR_STATE, &state->flags) != 0 && + state->n_rdwr != 0) + return true; + + return false; +} + +static bool nfs4_roc(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + struct nfs4_state *state; + + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + state = ctx->state; + if (state == NULL) + continue; + if (nfs4_state_has_opener(state)) { + spin_unlock(&inode->i_lock); + return false; + } + } + spin_unlock(&inode->i_lock); + + if (nfs4_check_delegation(inode, FMODE_READ)) + return false; + + return pnfs_roc(inode); +} + /* * It is possible for data to be read/written from a mem-mapped file * after the sys_close call (which hits the vfs layer as a flush). @@ -2697,7 +2762,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; - calldata->roc = pnfs_roc(state->inode); + calldata->roc = nfs4_roc(state->inode); nfs_sb_active(calldata->inode->i_sb); msg.rpc_argp = &calldata->arg; @@ -4033,24 +4098,25 @@ static bool nfs4_error_stateid_expired(int err) return false; } -void __nfs4_read_done_cb(struct nfs_pgio_data *data) +void __nfs4_read_done_cb(struct nfs_pgio_header *hdr) { - nfs_invalidate_atime(data->header->inode); + nfs_invalidate_atime(hdr->inode); } -static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct nfs_server *server = NFS_SERVER(data->header->inode); + struct nfs_server *server = NFS_SERVER(hdr->inode); - trace_nfs4_read(data, task->tk_status); - if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { + trace_nfs4_read(hdr, task->tk_status); + if (nfs4_async_handle_error(task, server, + hdr->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); return -EAGAIN; } - __nfs4_read_done_cb(data); + __nfs4_read_done_cb(hdr); if (task->tk_status > 0) - renew_lease(server, data->timestamp); + renew_lease(server, hdr->timestamp); return 0; } @@ -4068,54 +4134,59 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task, return true; } -static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { dprintk("--> %s\n", __func__); - if (!nfs4_sequence_done(task, &data->res.seq_res)) + if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; - if (nfs4_read_stateid_changed(task, &data->args)) + if (nfs4_read_stateid_changed(task, &hdr->args)) return -EAGAIN; - return data->pgio_done_cb ? data->pgio_done_cb(task, data) : - nfs4_read_done_cb(task, data); + return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) : + nfs4_read_done_cb(task, hdr); } -static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { - data->timestamp = jiffies; - data->pgio_done_cb = nfs4_read_done_cb; + hdr->timestamp = jiffies; + hdr->pgio_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0); } -static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), - &data->args.seq_args, - &data->res.seq_res, + if (nfs4_setup_sequence(NFS_SERVER(hdr->inode), + &hdr->args.seq_args, + &hdr->res.seq_res, task)) return 0; - if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, - data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO) + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, + hdr->args.lock_context, + hdr->rw_ops->rw_mode) == -EIO) return -EIO; - if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) return -EIO; return 0; } -static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_write_done_cb(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; - trace_nfs4_write(data, task->tk_status); - if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { + trace_nfs4_write(hdr, task->tk_status); + if (nfs4_async_handle_error(task, NFS_SERVER(inode), + hdr->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); return -EAGAIN; } if (task->tk_status >= 0) { - renew_lease(NFS_SERVER(inode), data->timestamp); - nfs_post_op_update_inode_force_wcc(inode, &data->fattr); + renew_lease(NFS_SERVER(inode), hdr->timestamp); + nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr); } return 0; } @@ -4134,23 +4205,21 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task, return true; } -static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - if (!nfs4_sequence_done(task, &data->res.seq_res)) + if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; - if (nfs4_write_stateid_changed(task, &data->args)) + if (nfs4_write_stateid_changed(task, &hdr->args)) return -EAGAIN; - return data->pgio_done_cb ? data->pgio_done_cb(task, data) : - nfs4_write_done_cb(task, data); + return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) : + nfs4_write_done_cb(task, hdr); } static -bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) +bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) { - const struct nfs_pgio_header *hdr = data->header; - /* Don't request attributes for pNFS or O_DIRECT writes */ - if (data->ds_clp != NULL || hdr->dreq != NULL) + if (hdr->ds_clp != NULL || hdr->dreq != NULL) return false; /* Otherwise, request attributes if and only if we don't hold * a delegation @@ -4158,23 +4227,24 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } -static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { - struct nfs_server *server = NFS_SERVER(data->header->inode); + struct nfs_server *server = NFS_SERVER(hdr->inode); - if (!nfs4_write_need_cache_consistency_data(data)) { - data->args.bitmask = NULL; - data->res.fattr = NULL; + if (!nfs4_write_need_cache_consistency_data(hdr)) { + hdr->args.bitmask = NULL; + hdr->res.fattr = NULL; } else - data->args.bitmask = server->cache_consistency_bitmask; + hdr->args.bitmask = server->cache_consistency_bitmask; - if (!data->pgio_done_cb) - data->pgio_done_cb = nfs4_write_done_cb; - data->res.server = server; - data->timestamp = jiffies; + if (!hdr->pgio_done_cb) + hdr->pgio_done_cb = nfs4_write_done_cb; + hdr->res.server = server; + hdr->timestamp = jiffies; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) @@ -4881,6 +4951,18 @@ nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len) return scnprintf(buf, len, "tcp"); } +static void nfs4_setclientid_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_setclientid *sc = calldata; + + if (task->tk_status == 0) + sc->sc_cred = get_rpccred(task->tk_rqstp->rq_cred); +} + +static const struct rpc_call_ops nfs4_setclientid_ops = { + .rpc_call_done = nfs4_setclientid_done, +}; + /** * nfs4_proc_setclientid - Negotiate client ID * @clp: state data structure @@ -4907,6 +4989,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_resp = res, .rpc_cred = cred, }; + struct rpc_task *task; + struct rpc_task_setup task_setup_data = { + .rpc_client = clp->cl_rpcclient, + .rpc_message = &msg, + .callback_ops = &nfs4_setclientid_ops, + .callback_data = &setclientid, + .flags = RPC_TASK_TIMEOUT, + }; int status; /* nfs_client_id4 */ @@ -4933,7 +5023,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, dprintk("NFS call setclientid auth=%s, '%.*s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, setclientid.sc_name_len, setclientid.sc_name); - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) { + status = PTR_ERR(task); + goto out; + } + status = task->tk_status; + if (setclientid.sc_cred) { + clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); + put_rpccred(setclientid.sc_cred); + } + rpc_put_task(task); +out: trace_nfs4_setclientid(clp, status); dprintk("NFS reply setclientid: %d\n", status); return status; @@ -4975,6 +5076,9 @@ struct nfs4_delegreturndata { unsigned long timestamp; struct nfs_fattr fattr; int rpc_status; + struct inode *inode; + bool roc; + u32 roc_barrier; }; static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) @@ -4988,7 +5092,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case 0: renew_lease(data->res.server, data->timestamp); - break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_BAD_STATEID: @@ -4996,6 +5099,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: task->tk_status = 0; + if (data->roc) + pnfs_roc_set_barrier(data->inode, data->roc_barrier); break; default: if (nfs4_async_handle_error(task, data->res.server, NULL) == @@ -5009,6 +5114,10 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) static void nfs4_delegreturn_release(void *calldata) { + struct nfs4_delegreturndata *data = calldata; + + if (data->roc) + pnfs_roc_release(data->inode); kfree(calldata); } @@ -5018,6 +5127,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) d_data = (struct nfs4_delegreturndata *)data; + if (d_data->roc && + pnfs_roc_drain(d_data->inode, &d_data->roc_barrier, task)) + return; + nfs4_setup_sequence(d_data->res.server, &d_data->args.seq_args, &d_data->res.seq_res, @@ -5061,6 +5174,9 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; + data->inode = inode; + data->roc = list_empty(&NFS_I(inode)->open_files) ? + pnfs_roc(inode) : false; task_setup_data.callback_data = data; msg.rpc_argp = &data->args; @@ -5834,8 +5950,10 @@ struct nfs_release_lockowner_data { static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata) { struct nfs_release_lockowner_data *data = calldata; - nfs40_setup_sequence(data->server, - &data->args.seq_args, &data->res.seq_res, task); + struct nfs_server *server = data->server; + nfs40_setup_sequence(server, &data->args.seq_args, + &data->res.seq_res, task); + data->args.lock_owner.clientid = server->nfs_client->cl_clientid; data->timestamp = jiffies; } @@ -5852,6 +5970,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) break; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_EXPIRED: + nfs4_schedule_lease_recovery(server->nfs_client); + break; case -NFS4ERR_LEASE_MOVED: case -NFS4ERR_DELAY: if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) @@ -5872,7 +5992,8 @@ static const struct rpc_call_ops nfs4_release_lockowner_ops = { .rpc_release = nfs4_release_lockowner_release, }; -static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) +static void +nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) { struct nfs_release_lockowner_data *data; struct rpc_message msg = { @@ -5880,11 +6001,11 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st }; if (server->nfs_client->cl_mvops->minor_version != 0) - return -EINVAL; + return; data = kmalloc(sizeof(*data), GFP_NOFS); if (!data) - return -ENOMEM; + return; data->lsp = lsp; data->server = server; data->args.lock_owner.clientid = server->nfs_client->cl_clientid; @@ -5895,7 +6016,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st msg.rpc_resp = &data->res; nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); - return 0; } #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" @@ -8182,7 +8302,8 @@ static int nfs41_free_stateid(struct nfs_server *server, return ret; } -static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) +static void +nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) { struct rpc_task *task; struct rpc_cred *cred = lsp->ls_state->owner->so_cred; @@ -8190,9 +8311,8 @@ static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); nfs4_free_lock_state(server, lsp); if (IS_ERR(task)) - return PTR_ERR(task); + return; rpc_put_task(task); - return 0; } static bool nfs41_match_stateid(const nfs4_stateid *s1, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 42f12118216..22fe35104c0 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -787,21 +787,12 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) * that is compatible with current->files */ static struct nfs4_lock_state * -__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) +__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *pos; list_for_each_entry(pos, &state->lock_states, ls_locks) { - if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type) + if (pos->ls_owner != fl_owner) continue; - switch (pos->ls_owner.lo_type) { - case NFS4_POSIX_LOCK_TYPE: - if (pos->ls_owner.lo_u.posix_owner != fl_owner) - continue; - break; - case NFS4_FLOCK_LOCK_TYPE: - if (pos->ls_owner.lo_u.flock_owner != fl_pid) - continue; - } atomic_inc(&pos->ls_count); return pos; } @@ -813,7 +804,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p * exists, return an uninitialized one. * */ -static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) +static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *lsp; struct nfs_server *server = state->owner->so_server; @@ -824,17 +815,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f nfs4_init_seqid_counter(&lsp->ls_seqid); atomic_set(&lsp->ls_count, 1); lsp->ls_state = state; - lsp->ls_owner.lo_type = type; - switch (lsp->ls_owner.lo_type) { - case NFS4_FLOCK_LOCK_TYPE: - lsp->ls_owner.lo_u.flock_owner = fl_pid; - break; - case NFS4_POSIX_LOCK_TYPE: - lsp->ls_owner.lo_u.posix_owner = fl_owner; - break; - default: - goto out_free; - } + lsp->ls_owner = fl_owner; lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); if (lsp->ls_seqid.owner_id < 0) goto out_free; @@ -857,13 +838,13 @@ void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp * exists, return an uninitialized one. * */ -static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type) +static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { struct nfs4_lock_state *lsp, *new = NULL; for(;;) { spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, owner, pid, type); + lsp = __nfs4_find_lock_state(state, owner); if (lsp != NULL) break; if (new != NULL) { @@ -874,7 +855,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ break; } spin_unlock(&state->state_lock); - new = nfs4_alloc_lock_state(state, owner, pid, type); + new = nfs4_alloc_lock_state(state, owner); if (new == NULL) return NULL; } @@ -935,13 +916,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) if (fl->fl_ops != NULL) return 0; - if (fl->fl_flags & FL_POSIX) - lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE); - else if (fl->fl_flags & FL_FLOCK) - lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid, - NFS4_FLOCK_LOCK_TYPE); - else - return -EINVAL; + lsp = nfs4_get_lock_state(state, fl->fl_owner); if (lsp == NULL) return -ENOMEM; fl->fl_u.nfs4_fl.owner = lsp; @@ -955,7 +930,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, { struct nfs4_lock_state *lsp; fl_owner_t fl_owner; - pid_t fl_pid; int ret = -ENOENT; @@ -966,9 +940,8 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, goto out; fl_owner = lockowner->l_owner; - fl_pid = lockowner->l_pid; spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); + lsp = __nfs4_find_lock_state(state, fl_owner); if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) ret = -EIO; else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 0a744f3a86f..1c32adbe728 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -932,11 +932,11 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group); DECLARE_EVENT_CLASS(nfs4_read_event, TP_PROTO( - const struct nfs_pgio_data *data, + const struct nfs_pgio_header *hdr, int error ), - TP_ARGS(data, error), + TP_ARGS(hdr, error), TP_STRUCT__entry( __field(dev_t, dev) @@ -948,12 +948,12 @@ DECLARE_EVENT_CLASS(nfs4_read_event, ), TP_fast_assign( - const struct inode *inode = data->header->inode; + const struct inode *inode = hdr->inode; __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __entry->offset = data->args.offset; - __entry->count = data->args.count; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; __entry->error = error; ), @@ -972,10 +972,10 @@ DECLARE_EVENT_CLASS(nfs4_read_event, #define DEFINE_NFS4_READ_EVENT(name) \ DEFINE_EVENT(nfs4_read_event, name, \ TP_PROTO( \ - const struct nfs_pgio_data *data, \ + const struct nfs_pgio_header *hdr, \ int error \ ), \ - TP_ARGS(data, error)) + TP_ARGS(hdr, error)) DEFINE_NFS4_READ_EVENT(nfs4_read); #ifdef CONFIG_NFS_V4_1 DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); @@ -983,11 +983,11 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); DECLARE_EVENT_CLASS(nfs4_write_event, TP_PROTO( - const struct nfs_pgio_data *data, + const struct nfs_pgio_header *hdr, int error ), - TP_ARGS(data, error), + TP_ARGS(hdr, error), TP_STRUCT__entry( __field(dev_t, dev) @@ -999,12 +999,12 @@ DECLARE_EVENT_CLASS(nfs4_write_event, ), TP_fast_assign( - const struct inode *inode = data->header->inode; + const struct inode *inode = hdr->inode; __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __entry->offset = data->args.offset; - __entry->count = data->args.count; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; __entry->error = error; ), @@ -1024,10 +1024,10 @@ DECLARE_EVENT_CLASS(nfs4_write_event, #define DEFINE_NFS4_WRITE_EVENT(name) \ DEFINE_EVENT(nfs4_write_event, name, \ TP_PROTO( \ - const struct nfs_pgio_data *data, \ + const struct nfs_pgio_header *hdr, \ int error \ ), \ - TP_ARGS(data, error)) + TP_ARGS(hdr, error)) DEFINE_NFS4_WRITE_EVENT(nfs4_write); #ifdef CONFIG_NFS_V4_1 DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 939ae606cfa..e13b59d8d9a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7092,7 +7092,7 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, if (!status) status = decode_sequence(xdr, &res->seq_res, rqstp); if (!status) - status = decode_reclaim_complete(xdr, (void *)NULL); + status = decode_reclaim_complete(xdr, NULL); return status; } diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 611320753db..ae05278b376 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -439,22 +439,21 @@ static void _read_done(struct ore_io_state *ios, void *private) objlayout_read_done(&objios->oir, status, objios->sync); } -int objio_read_pagelist(struct nfs_pgio_data *rdata) +int objio_read_pagelist(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = rdata->header; struct objio_state *objios; int ret; ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true, - hdr->lseg, rdata->args.pages, rdata->args.pgbase, - rdata->args.offset, rdata->args.count, rdata, + hdr->lseg, hdr->args.pages, hdr->args.pgbase, + hdr->args.offset, hdr->args.count, hdr, GFP_KERNEL, &objios); if (unlikely(ret)) return ret; objios->ios->done = _read_done; dprintk("%s: offset=0x%llx length=0x%x\n", __func__, - rdata->args.offset, rdata->args.count); + hdr->args.offset, hdr->args.count); ret = ore_read(objios->ios); if (unlikely(ret)) objio_free_result(&objios->oir); @@ -487,11 +486,11 @@ static void _write_done(struct ore_io_state *ios, void *private) static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) { struct objio_state *objios = priv; - struct nfs_pgio_data *wdata = objios->oir.rpcdata; - struct address_space *mapping = wdata->header->inode->i_mapping; + struct nfs_pgio_header *hdr = objios->oir.rpcdata; + struct address_space *mapping = hdr->inode->i_mapping; pgoff_t index = offset / PAGE_SIZE; struct page *page; - loff_t i_size = i_size_read(wdata->header->inode); + loff_t i_size = i_size_read(hdr->inode); if (offset >= i_size) { *uptodate = true; @@ -531,15 +530,14 @@ static const struct _ore_r4w_op _r4w_op = { .put_page = &__r4w_put_page, }; -int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) +int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) { - struct nfs_pgio_header *hdr = wdata->header; struct objio_state *objios; int ret; ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false, - hdr->lseg, wdata->args.pages, wdata->args.pgbase, - wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, + hdr->lseg, hdr->args.pages, hdr->args.pgbase, + hdr->args.offset, hdr->args.count, hdr, GFP_NOFS, &objios); if (unlikely(ret)) return ret; @@ -551,7 +549,7 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) objios->ios->done = _write_done; dprintk("%s: offset=0x%llx length=0x%x\n", __func__, - wdata->args.offset, wdata->args.count); + hdr->args.offset, hdr->args.count); ret = ore_write(objios->ios); if (unlikely(ret)) { objio_free_result(&objios->oir); diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 765d3f54e98..697a16d11fa 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -229,36 +229,36 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index, static void _rpc_read_complete(struct work_struct *work) { struct rpc_task *task; - struct nfs_pgio_data *rdata; + struct nfs_pgio_header *hdr; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - rdata = container_of(task, struct nfs_pgio_data, task); + hdr = container_of(task, struct nfs_pgio_header, task); - pnfs_ld_read_done(rdata); + pnfs_ld_read_done(hdr); } void objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_pgio_data *rdata = oir->rpcdata; + struct nfs_pgio_header *hdr = oir->rpcdata; - oir->status = rdata->task.tk_status = status; + oir->status = hdr->task.tk_status = status; if (status >= 0) - rdata->res.count = status; + hdr->res.count = status; else - rdata->header->pnfs_error = status; + hdr->pnfs_error = status; objlayout_iodone(oir); /* must not use oir after this point */ dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__, - status, rdata->res.eof, sync); + status, hdr->res.eof, sync); if (sync) - pnfs_ld_read_done(rdata); + pnfs_ld_read_done(hdr); else { - INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete); - schedule_work(&rdata->task.u.tk_work); + INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete); + schedule_work(&hdr->task.u.tk_work); } } @@ -266,12 +266,11 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) * Perform sync or async reads. */ enum pnfs_try_status -objlayout_read_pagelist(struct nfs_pgio_data *rdata) +objlayout_read_pagelist(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = rdata->header; struct inode *inode = hdr->inode; - loff_t offset = rdata->args.offset; - size_t count = rdata->args.count; + loff_t offset = hdr->args.offset; + size_t count = hdr->args.count; int err; loff_t eof; @@ -279,23 +278,23 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata) if (unlikely(offset + count > eof)) { if (offset >= eof) { err = 0; - rdata->res.count = 0; - rdata->res.eof = 1; + hdr->res.count = 0; + hdr->res.eof = 1; /*FIXME: do we need to call pnfs_ld_read_done() */ goto out; } count = eof - offset; } - rdata->res.eof = (offset + count) >= eof; - _fix_verify_io_params(hdr->lseg, &rdata->args.pages, - &rdata->args.pgbase, - rdata->args.offset, rdata->args.count); + hdr->res.eof = (offset + count) >= eof; + _fix_verify_io_params(hdr->lseg, &hdr->args.pages, + &hdr->args.pgbase, + hdr->args.offset, hdr->args.count); dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", - __func__, inode->i_ino, offset, count, rdata->res.eof); + __func__, inode->i_ino, offset, count, hdr->res.eof); - err = objio_read_pagelist(rdata); + err = objio_read_pagelist(hdr); out: if (unlikely(err)) { hdr->pnfs_error = err; @@ -312,38 +311,38 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata) static void _rpc_write_complete(struct work_struct *work) { struct rpc_task *task; - struct nfs_pgio_data *wdata; + struct nfs_pgio_header *hdr; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - wdata = container_of(task, struct nfs_pgio_data, task); + hdr = container_of(task, struct nfs_pgio_header, task); - pnfs_ld_write_done(wdata); + pnfs_ld_write_done(hdr); } void objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_pgio_data *wdata = oir->rpcdata; + struct nfs_pgio_header *hdr = oir->rpcdata; - oir->status = wdata->task.tk_status = status; + oir->status = hdr->task.tk_status = status; if (status >= 0) { - wdata->res.count = status; - wdata->verf.committed = oir->committed; + hdr->res.count = status; + hdr->verf.committed = oir->committed; } else { - wdata->header->pnfs_error = status; + hdr->pnfs_error = status; } objlayout_iodone(oir); /* must not use oir after this point */ dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, - status, wdata->verf.committed, sync); + status, hdr->verf.committed, sync); if (sync) - pnfs_ld_write_done(wdata); + pnfs_ld_write_done(hdr); else { - INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete); - schedule_work(&wdata->task.u.tk_work); + INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete); + schedule_work(&hdr->task.u.tk_work); } } @@ -351,17 +350,15 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) * Perform sync or async writes. */ enum pnfs_try_status -objlayout_write_pagelist(struct nfs_pgio_data *wdata, - int how) +objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how) { - struct nfs_pgio_header *hdr = wdata->header; int err; - _fix_verify_io_params(hdr->lseg, &wdata->args.pages, - &wdata->args.pgbase, - wdata->args.offset, wdata->args.count); + _fix_verify_io_params(hdr->lseg, &hdr->args.pages, + &hdr->args.pgbase, + hdr->args.offset, hdr->args.count); - err = objio_write_pagelist(wdata, how); + err = objio_write_pagelist(hdr, how); if (unlikely(err)) { hdr->pnfs_error = err; dprintk("%s: Returned Error %d\n", __func__, err); diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 01e041029a6..fd13f1d2f13 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg); */ extern void objio_free_result(struct objlayout_io_res *oir); -extern int objio_read_pagelist(struct nfs_pgio_data *rdata); -extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how); +extern int objio_read_pagelist(struct nfs_pgio_header *rdata); +extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how); /* * callback API @@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg( extern void objlayout_free_lseg(struct pnfs_layout_segment *); extern enum pnfs_try_status objlayout_read_pagelist( - struct nfs_pgio_data *); + struct nfs_pgio_header *); extern enum pnfs_try_status objlayout_write_pagelist( - struct nfs_pgio_data *, + struct nfs_pgio_header *, int how); extern void objlayout_encode_layoutcommit( diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 0be5050638f..be7cbce6e4c 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -116,7 +116,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c) if (atomic_read(&c->io_count) == 0) break; ret = nfs_wait_bit_killable(&q.key); - } while (atomic_read(&c->io_count) != 0); + } while (atomic_read(&c->io_count) != 0 && !ret); finish_wait(wq, &q.wait); return ret; } @@ -139,18 +139,49 @@ nfs_iocounter_wait(struct nfs_io_counter *c) /* * nfs_page_group_lock - lock the head of the page group * @req - request in group that is to be locked + * @nonblock - if true don't block waiting for lock * * this lock must be held if modifying the page group list + * + * return 0 on success, < 0 on error: -EDELAY if nonblocking or the + * result from wait_on_bit_lock + * + * NOTE: calling with nonblock=false should always have set the + * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock + * with TASK_UNINTERRUPTIBLE), so there is no need to check the result. + */ +int +nfs_page_group_lock(struct nfs_page *req, bool nonblock) +{ + struct nfs_page *head = req->wb_head; + + WARN_ON_ONCE(head != head->wb_head); + + if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) + return 0; + + if (!nonblock) + return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + TASK_UNINTERRUPTIBLE); + + return -EAGAIN; +} + +/* + * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it + * @req - a request in the group + * + * This is a blocking call to wait for the group lock to be cleared. */ void -nfs_page_group_lock(struct nfs_page *req) +nfs_page_group_lock_wait(struct nfs_page *req) { struct nfs_page *head = req->wb_head; WARN_ON_ONCE(head != head->wb_head); - wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, - TASK_UNINTERRUPTIBLE); + wait_on_bit(&head->wb_flags, PG_HEADLOCK, + TASK_UNINTERRUPTIBLE); } /* @@ -211,7 +242,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) { bool ret; - nfs_page_group_lock(req); + nfs_page_group_lock(req, false); ret = nfs_page_group_sync_on_bit_locked(req, bit); nfs_page_group_unlock(req); @@ -454,123 +485,72 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, } EXPORT_SYMBOL_GPL(nfs_generic_pg_test); -static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) -{ - return container_of(hdr, struct nfs_rw_header, header); -} - -/** - * nfs_rw_header_alloc - Allocate a header for a read or write - * @ops: Read or write function vector - */ -struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops) +struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops) { - struct nfs_rw_header *header = ops->rw_alloc_header(); - - if (header) { - struct nfs_pgio_header *hdr = &header->header; + struct nfs_pgio_header *hdr = ops->rw_alloc_header(); + if (hdr) { INIT_LIST_HEAD(&hdr->pages); spin_lock_init(&hdr->lock); - atomic_set(&hdr->refcnt, 0); hdr->rw_ops = ops; } - return header; + return hdr; } -EXPORT_SYMBOL_GPL(nfs_rw_header_alloc); +EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc); /* - * nfs_rw_header_free - Free a read or write header + * nfs_pgio_header_free - Free a read or write header * @hdr: The header to free */ -void nfs_rw_header_free(struct nfs_pgio_header *hdr) +void nfs_pgio_header_free(struct nfs_pgio_header *hdr) { - hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr)); + hdr->rw_ops->rw_free_header(hdr); } -EXPORT_SYMBOL_GPL(nfs_rw_header_free); +EXPORT_SYMBOL_GPL(nfs_pgio_header_free); /** - * nfs_pgio_data_alloc - Allocate pageio data - * @hdr: The header making a request - * @pagecount: Number of pages to create - */ -static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount) -{ - struct nfs_pgio_data *data, *prealloc; - - prealloc = &NFS_RW_HEADER(hdr)->rpc_data; - if (prealloc->header == NULL) - data = prealloc; - else - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - goto out; - - if (nfs_pgarray_set(&data->pages, pagecount)) { - data->header = hdr; - atomic_inc(&hdr->refcnt); - } else { - if (data != prealloc) - kfree(data); - data = NULL; - } -out: - return data; -} - -/** - * nfs_pgio_data_release - Properly free pageio data - * @data: The data to release + * nfs_pgio_data_destroy - make @hdr suitable for reuse + * + * Frees memory and releases refs from nfs_generic_pgio, so that it may + * be called again. + * + * @hdr: A header that has had nfs_generic_pgio called */ -void nfs_pgio_data_release(struct nfs_pgio_data *data) +void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr); - - put_nfs_open_context(data->args.context); - if (data->pages.pagevec != data->pages.page_array) - kfree(data->pages.pagevec); - if (data == &pageio_header->rpc_data) { - data->header = NULL; - data = NULL; - } - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); - /* Note: we only free the rpc_task after callbacks are done. - * See the comment in rpc_free_task() for why - */ - kfree(data); + put_nfs_open_context(hdr->args.context); + if (hdr->page_array.pagevec != hdr->page_array.page_array) + kfree(hdr->page_array.pagevec); } -EXPORT_SYMBOL_GPL(nfs_pgio_data_release); +EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy); /** * nfs_pgio_rpcsetup - Set up arguments for a pageio call - * @data: The pageio data + * @hdr: The pageio hdr * @count: Number of bytes to read * @offset: Initial offset * @how: How to commit data (writes only) * @cinfo: Commit information for the call (writes only) */ -static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, +static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, unsigned int count, unsigned int offset, int how, struct nfs_commit_info *cinfo) { - struct nfs_page *req = data->header->req; + struct nfs_page *req = hdr->req; /* Set up the RPC argument and reply structs - * NB: take care not to mess about with data->commit et al. */ + * NB: take care not to mess about with hdr->commit et al. */ - data->args.fh = NFS_FH(data->header->inode); - data->args.offset = req_offset(req) + offset; + hdr->args.fh = NFS_FH(hdr->inode); + hdr->args.offset = req_offset(req) + offset; /* pnfs_set_layoutcommit needs this */ - data->mds_offset = data->args.offset; - data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->pages.pagevec; - data->args.count = count; - data->args.context = get_nfs_open_context(req->wb_context); - data->args.lock_context = req->wb_lock_context; - data->args.stable = NFS_UNSTABLE; + hdr->mds_offset = hdr->args.offset; + hdr->args.pgbase = req->wb_pgbase + offset; + hdr->args.pages = hdr->page_array.pagevec; + hdr->args.count = count; + hdr->args.context = get_nfs_open_context(req->wb_context); + hdr->args.lock_context = req->wb_lock_context; + hdr->args.stable = NFS_UNSTABLE; switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { case 0: break; @@ -578,59 +558,59 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, if (nfs_reqs_to_commit(cinfo)) break; default: - data->args.stable = NFS_FILE_SYNC; + hdr->args.stable = NFS_FILE_SYNC; } - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.eof = 0; - data->res.verf = &data->verf; - nfs_fattr_init(&data->fattr); + hdr->res.fattr = &hdr->fattr; + hdr->res.count = count; + hdr->res.eof = 0; + hdr->res.verf = &hdr->verf; + nfs_fattr_init(&hdr->fattr); } /** - * nfs_pgio_prepare - Prepare pageio data to go over the wire + * nfs_pgio_prepare - Prepare pageio hdr to go over the wire * @task: The current task - * @calldata: pageio data to prepare + * @calldata: pageio header to prepare */ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) { - struct nfs_pgio_data *data = calldata; + struct nfs_pgio_header *hdr = calldata; int err; - err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data); + err = NFS_PROTO(hdr->inode)->pgio_rpc_prepare(task, hdr); if (err) rpc_exit(task, err); } -int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data, +int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, const struct rpc_call_ops *call_ops, int how, int flags) { struct rpc_task *task; struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->header->cred, + .rpc_argp = &hdr->args, + .rpc_resp = &hdr->res, + .rpc_cred = hdr->cred, }; struct rpc_task_setup task_setup_data = { .rpc_client = clnt, - .task = &data->task, + .task = &hdr->task, .rpc_message = &msg, .callback_ops = call_ops, - .callback_data = data, + .callback_data = hdr, .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC | flags, }; int ret = 0; - data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how); + hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how); dprintk("NFS: %5u initiated pgio call " "(req %s/%llu, %u bytes @ offset %llu)\n", - data->task.tk_pid, - data->header->inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(data->header->inode), - data->args.count, - (unsigned long long)data->args.offset); + hdr->task.tk_pid, + hdr->inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(hdr->inode), + hdr->args.count, + (unsigned long long)hdr->args.offset); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) { @@ -657,22 +637,23 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { set_bit(NFS_IOHDR_REDO, &hdr->flags); - nfs_pgio_data_release(hdr->data); - hdr->data = NULL; + nfs_pgio_data_destroy(hdr); + hdr->completion_ops->completion(hdr); desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } /** * nfs_pgio_release - Release pageio data - * @calldata: The pageio data to release + * @calldata: The pageio header to release */ static void nfs_pgio_release(void *calldata) { - struct nfs_pgio_data *data = calldata; - if (data->header->rw_ops->rw_release) - data->header->rw_ops->rw_release(data); - nfs_pgio_data_release(data); + struct nfs_pgio_header *hdr = calldata; + if (hdr->rw_ops->rw_release) + hdr->rw_ops->rw_release(hdr); + nfs_pgio_data_destroy(hdr); + hdr->completion_ops->completion(hdr); } /** @@ -713,22 +694,22 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init); /** * nfs_pgio_result - Basic pageio error handling * @task: The task that ran - * @calldata: Pageio data to check + * @calldata: Pageio header to check */ static void nfs_pgio_result(struct rpc_task *task, void *calldata) { - struct nfs_pgio_data *data = calldata; - struct inode *inode = data->header->inode; + struct nfs_pgio_header *hdr = calldata; + struct inode *inode = hdr->inode; dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, task->tk_status); - if (data->header->rw_ops->rw_done(task, data, inode) != 0) + if (hdr->rw_ops->rw_done(task, hdr, inode) != 0) return; if (task->tk_status < 0) - nfs_set_pgio_error(data->header, task->tk_status, data->args.offset); + nfs_set_pgio_error(hdr, task->tk_status, hdr->args.offset); else - data->header->rw_ops->rw_result(task, data); + hdr->rw_ops->rw_result(task, hdr); } /* @@ -743,32 +724,42 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { struct nfs_page *req; - struct page **pages; - struct nfs_pgio_data *data; + struct page **pages, + *last_page; struct list_head *head = &desc->pg_list; struct nfs_commit_info cinfo; + unsigned int pagecount, pageused; - data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!data) + pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); + if (!nfs_pgarray_set(&hdr->page_array, pagecount)) return nfs_pgio_error(desc, hdr); nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); - pages = data->pages.pagevec; + pages = hdr->page_array.pagevec; + last_page = NULL; + pageused = 0; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_list_add_request(req, &hdr->pages); - *pages++ = req->wb_page; + + if (WARN_ON_ONCE(pageused >= pagecount)) + return nfs_pgio_error(desc, hdr); + + if (!last_page || last_page != req->wb_page) { + *pages++ = last_page = req->wb_page; + pageused++; + } } + if (WARN_ON_ONCE(pageused != pagecount)) + return nfs_pgio_error(desc, hdr); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); - hdr->data = data; + nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo); desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -776,25 +767,20 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) { - struct nfs_rw_header *rw_hdr; struct nfs_pgio_header *hdr; int ret; - rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops); - if (!rw_hdr) { + hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); + if (!hdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } - hdr = &rw_hdr->header; - nfs_pgheader_init(desc, hdr, nfs_rw_header_free); - atomic_inc(&hdr->refcnt); + nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); ret = nfs_generic_pgio(desc, hdr); if (ret == 0) ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), - hdr->data, desc->pg_rpc_callops, + hdr, desc->pg_rpc_callops, desc->pg_ioflags, 0); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); return ret; } @@ -837,6 +823,14 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, return false; if (req_offset(req) != req_offset(prev) + prev->wb_bytes) return false; + if (req->wb_page == prev->wb_page) { + if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes) + return false; + } else { + if (req->wb_pgbase != 0 || + prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) + return false; + } } size = pgio->pg_ops->pg_test(pgio, prev, req); WARN_ON_ONCE(size > req->wb_bytes); @@ -908,7 +902,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, unsigned int bytes_left = 0; unsigned int offset, pgbase; - nfs_page_group_lock(req); + nfs_page_group_lock(req, false); subreq = req; bytes_left = subreq->wb_bytes; @@ -930,7 +924,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (desc->pg_recoalesce) return 0; /* retry add_request for this subreq */ - nfs_page_group_lock(req); + nfs_page_group_lock(req, false); continue; } @@ -1005,7 +999,38 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, } while (ret); return ret; } -EXPORT_SYMBOL_GPL(nfs_pageio_add_request); + +/* + * nfs_pageio_resend - Transfer requests to new descriptor and resend + * @hdr - the pgio header to move request from + * @desc - the pageio descriptor to add requests to + * + * Try to move each request (nfs_page) from @hdr to @desc then attempt + * to send them. + * + * Returns 0 on success and < 0 on error. + */ +int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + LIST_HEAD(failed); + + desc->pg_dreq = hdr->dreq; + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + + nfs_list_remove_request(req); + if (!nfs_pageio_add_request(desc, req)) + nfs_list_add_request(req, &failed); + } + nfs_pageio_complete(desc); + if (!list_empty(&failed)) { + list_move(&failed, &hdr->pages); + return -EIO; + } + return 0; +} +EXPORT_SYMBOL_GPL(nfs_pageio_resend); /** * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor @@ -1021,7 +1046,6 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) break; } } -EXPORT_SYMBOL_GPL(nfs_pageio_complete); /** * nfs_pageio_cond_complete - Conditional I/O completion diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a8914b33561..a3851debf8a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -361,6 +361,23 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) } EXPORT_SYMBOL_GPL(pnfs_put_lseg); +static void pnfs_put_lseg_async_work(struct work_struct *work) +{ + struct pnfs_layout_segment *lseg; + + lseg = container_of(work, struct pnfs_layout_segment, pls_work); + + pnfs_put_lseg(lseg); +} + +void +pnfs_put_lseg_async(struct pnfs_layout_segment *lseg) +{ + INIT_WORK(&lseg->pls_work, pnfs_put_lseg_async_work); + schedule_work(&lseg->pls_work); +} +EXPORT_SYMBOL_GPL(pnfs_put_lseg_async); + static u64 end_offset(u64 start, u64 len) { @@ -1470,41 +1487,19 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -int pnfs_write_done_resend_to_mds(struct inode *inode, - struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq) +int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) { struct nfs_pageio_descriptor pgio; - LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops); - pgio.pg_dreq = dreq; - while (!list_empty(head)) { - struct nfs_page *req = nfs_list_entry(head->next); - - nfs_list_remove_request(req); - if (!nfs_pageio_add_request(&pgio, req)) - nfs_list_add_request(req, &failed); - } - nfs_pageio_complete(&pgio); - - if (!list_empty(&failed)) { - /* For some reason our attempt to resend pages. Mark the - * overall send request as having failed, and let - * nfs_writeback_release_full deal with the error. - */ - list_move(&failed, head); - return -EIO; - } - return 0; + nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, + hdr->completion_ops); + return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); -static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) +static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; dprintk("pnfs write error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & @@ -1512,50 +1507,42 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) - data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); + hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr); } /* * Called by non rpc-based layout drivers */ -void pnfs_ld_write_done(struct nfs_pgio_data *data) +void pnfs_ld_write_done(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - - trace_nfs4_pnfs_write(data, hdr->pnfs_error); + trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); if (!hdr->pnfs_error) { - pnfs_set_layoutcommit(data); - hdr->mds_ops->rpc_call_done(&data->task, data); + pnfs_set_layoutcommit(hdr); + hdr->mds_ops->rpc_call_done(&hdr->task, hdr); } else - pnfs_ld_handle_write_error(data); - hdr->mds_ops->rpc_release(data); + pnfs_ld_handle_write_error(hdr); + hdr->mds_ops->rpc_release(hdr); } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_data *data) + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { list_splice_tail_init(&hdr->pages, &desc->pg_list); nfs_pageio_reset_write_mds(desc); desc->pg_recoalesce = 1; } - nfs_pgio_data_release(data); + nfs_pgio_data_destroy(hdr); } static enum pnfs_try_status -pnfs_try_to_write_data(struct nfs_pgio_data *wdata, +pnfs_try_to_write_data(struct nfs_pgio_header *hdr, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg, int how) { - struct nfs_pgio_header *hdr = wdata->header; struct inode *inode = hdr->inode; enum pnfs_try_status trypnfs; struct nfs_server *nfss = NFS_SERVER(inode); @@ -1563,8 +1550,8 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata, hdr->mds_ops = call_ops; dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, - inode->i_ino, wdata->args.count, wdata->args.offset, how); - trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); + inode->i_ino, hdr->args.count, hdr->args.offset, how); + trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how); if (trypnfs != PNFS_NOT_ATTEMPTED) nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); @@ -1575,139 +1562,105 @@ static void pnfs_do_write(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, int how) { - struct nfs_pgio_data *data = hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); + trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_write_through_mds(desc, data); + pnfs_write_through_mds(desc, hdr); pnfs_put_lseg(lseg); } static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_rw_header_free(hdr); + nfs_pgio_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - struct nfs_rw_header *whdr; struct nfs_pgio_header *hdr; int ret; - whdr = nfs_rw_header_alloc(desc->pg_rw_ops); - if (!whdr) { + hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); + if (!hdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return -ENOMEM; } - hdr = &whdr->header; nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); - atomic_inc(&hdr->refcnt); ret = nfs_generic_pgio(desc, hdr); if (ret != 0) { pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else pnfs_do_write(desc, hdr, desc->pg_ioflags); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); -int pnfs_read_done_resend_to_mds(struct inode *inode, - struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq) +int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr) { struct nfs_pageio_descriptor pgio; - LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_read(&pgio, inode, true, compl_ops); - pgio.pg_dreq = dreq; - while (!list_empty(head)) { - struct nfs_page *req = nfs_list_entry(head->next); - - nfs_list_remove_request(req); - if (!nfs_pageio_add_request(&pgio, req)) - nfs_list_add_request(req, &failed); - } - nfs_pageio_complete(&pgio); - - if (!list_empty(&failed)) { - list_move(&failed, head); - return -EIO; - } - return 0; + nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops); + return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); -static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) +static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - dprintk("pnfs read error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) - data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); + hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr); } /* * Called by non rpc-based layout drivers */ -void pnfs_ld_read_done(struct nfs_pgio_data *data) +void pnfs_ld_read_done(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - - trace_nfs4_pnfs_read(data, hdr->pnfs_error); + trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); if (likely(!hdr->pnfs_error)) { - __nfs4_read_done_cb(data); - hdr->mds_ops->rpc_call_done(&data->task, data); + __nfs4_read_done_cb(hdr); + hdr->mds_ops->rpc_call_done(&hdr->task, hdr); } else - pnfs_ld_handle_read_error(data); - hdr->mds_ops->rpc_release(data); + pnfs_ld_handle_read_error(hdr); + hdr->mds_ops->rpc_release(hdr); } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_data *data) + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { list_splice_tail_init(&hdr->pages, &desc->pg_list); nfs_pageio_reset_read_mds(desc); desc->pg_recoalesce = 1; } - nfs_pgio_data_release(data); + nfs_pgio_data_destroy(hdr); } /* * Call the appropriate parallel I/O subsystem read function. */ static enum pnfs_try_status -pnfs_try_to_read_data(struct nfs_pgio_data *rdata, +pnfs_try_to_read_data(struct nfs_pgio_header *hdr, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg) { - struct nfs_pgio_header *hdr = rdata->header; struct inode *inode = hdr->inode; struct nfs_server *nfss = NFS_SERVER(inode); enum pnfs_try_status trypnfs; @@ -1715,9 +1668,9 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata, hdr->mds_ops = call_ops; dprintk("%s: Reading ino:%lu %u@%llu\n", - __func__, inode->i_ino, rdata->args.count, rdata->args.offset); + __func__, inode->i_ino, hdr->args.count, hdr->args.offset); - trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); + trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr); if (trypnfs != PNFS_NOT_ATTEMPTED) nfs_inc_stats(inode, NFSIOS_PNFS_READ); dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); @@ -1727,52 +1680,46 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata, static void pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_data *data = hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); + trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_read_through_mds(desc, data); + pnfs_read_through_mds(desc, hdr); pnfs_put_lseg(lseg); } static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_rw_header_free(hdr); + nfs_pgio_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - struct nfs_rw_header *rhdr; struct nfs_pgio_header *hdr; int ret; - rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); - if (!rhdr) { + hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); + if (!hdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); ret = -ENOMEM; pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return ret; } - hdr = &rhdr->header; nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); - atomic_inc(&hdr->refcnt); ret = nfs_generic_pgio(desc, hdr); if (ret != 0) { pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else pnfs_do_read(desc, hdr); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); @@ -1820,12 +1767,11 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); void -pnfs_set_layoutcommit(struct nfs_pgio_data *wdata) +pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = wdata->header; struct inode *inode = hdr->inode; struct nfs_inode *nfsi = NFS_I(inode); - loff_t end_pos = wdata->mds_offset + wdata->res.count; + loff_t end_pos = hdr->mds_offset + hdr->res.count; bool mark_as_dirty = false; spin_lock(&inode->i_lock); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 4fb309a2b4c..aca3dff5dae 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -32,6 +32,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_page.h> +#include <linux/workqueue.h> enum { NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ @@ -46,6 +47,7 @@ struct pnfs_layout_segment { atomic_t pls_refcount; unsigned long pls_flags; struct pnfs_layout_hdr *pls_layout; + struct work_struct pls_work; }; enum pnfs_try_status { @@ -104,6 +106,8 @@ struct pnfs_layoutdriver_type { int max); void (*recover_commit_reqs) (struct list_head *list, struct nfs_commit_info *cinfo); + struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo, + struct page *page); int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how, @@ -113,8 +117,8 @@ struct pnfs_layoutdriver_type { * Return PNFS_ATTEMPTED to indicate the layout code has attempted * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS */ - enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data); - enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how); + enum pnfs_try_status (*read_pagelist)(struct nfs_pgio_header *); + enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int); void (*free_deviceid_node) (struct nfs4_deviceid_node *); @@ -179,6 +183,7 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_put_lseg(struct pnfs_layout_segment *lseg); +void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); @@ -213,13 +218,13 @@ bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); -void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata); +void pnfs_set_layoutcommit(struct nfs_pgio_header *); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); int pnfs_commit_and_return_layout(struct inode *); -void pnfs_ld_write_done(struct nfs_pgio_data *); -void pnfs_ld_read_done(struct nfs_pgio_data *); +void pnfs_ld_write_done(struct nfs_pgio_header *); +void pnfs_ld_read_done(struct nfs_pgio_header *); struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, @@ -228,12 +233,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, gfp_t gfp_flags); void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); -int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq); -int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq); +int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *); +int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); /* nfs4_deviceid_flags */ @@ -345,6 +346,17 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); } +static inline struct nfs_page * +pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, + struct page *page) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + + if (ld == NULL || ld->search_commit_reqs == NULL) + return NULL; + return ld->search_commit_reqs(cinfo, page); +} + /* Should the pNFS client commit and return the layout upon a setattr */ static inline bool pnfs_ld_layoutret_on_setattr(struct inode *inode) @@ -410,6 +422,10 @@ static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg) { } +static inline void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg) +{ +} + static inline int pnfs_return_layout(struct inode *ino) { return 0; @@ -496,6 +512,13 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, { } +static inline struct nfs_page * +pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, + struct page *page) +{ + return NULL; +} + static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { return 0; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index c171ce1a8a3..b09cc23d6f4 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -578,46 +578,49 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return 0; } -static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; nfs_invalidate_atime(inode); if (task->tk_status >= 0) { - nfs_refresh_inode(inode, data->res.fattr); + nfs_refresh_inode(inode, hdr->res.fattr); /* Emulate the eof flag, which isn't normally needed in NFSv2 * as it is guaranteed to always return the file attributes */ - if (data->args.offset + data->res.count >= data->res.fattr->size) - data->res.eof = 1; + if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size) + hdr->res.eof = 1; } return 0; } -static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs_proc_read_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; } -static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, + struct nfs_pgio_header *hdr) { rpc_call_start(task); return 0; } -static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); return 0; } -static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs_proc_write_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ - data->args.stable = NFS_FILE_SYNC; + hdr->args.stable = NFS_FILE_SYNC; msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index e818a475ca6..beff2769c5c 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -33,12 +33,12 @@ static const struct nfs_rw_ops nfs_rw_read_ops; static struct kmem_cache *nfs_rdata_cachep; -static struct nfs_rw_header *nfs_readhdr_alloc(void) +static struct nfs_pgio_header *nfs_readhdr_alloc(void) { return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); } -static void nfs_readhdr_free(struct nfs_rw_header *rhdr) +static void nfs_readhdr_free(struct nfs_pgio_header *rhdr) { kmem_cache_free(nfs_rdata_cachep, rhdr); } @@ -115,12 +115,6 @@ static void nfs_readpage_release(struct nfs_page *req) unlock_page(req->wb_page); } - - dprintk("NFS: read done (%s/%Lu %d@%Ld)\n", - req->wb_context->dentry->d_inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode), - req->wb_bytes, - (long long)req_offset(req)); nfs_release_request(req); } @@ -172,14 +166,15 @@ out: hdr->release(hdr); } -static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg, +static void nfs_initiate_read(struct nfs_pgio_header *hdr, + struct rpc_message *msg, struct rpc_task_setup *task_setup_data, int how) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; task_setup_data->flags |= swap_flags; - NFS_PROTO(inode)->read_setup(data, msg); + NFS_PROTO(inode)->read_setup(hdr, msg); } static void @@ -203,14 +198,15 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, +static int nfs_readpage_done(struct rpc_task *task, + struct nfs_pgio_header *hdr, struct inode *inode) { - int status = NFS_PROTO(inode)->read_done(task, data); + int status = NFS_PROTO(inode)->read_done(task, hdr); if (status != 0) return status; - nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count); + nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count); if (task->tk_status == -ESTALE) { set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); @@ -219,34 +215,34 @@ static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, return 0; } -static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data) +static void nfs_readpage_retry(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - struct nfs_pgio_args *argp = &data->args; - struct nfs_pgio_res *resp = &data->res; + struct nfs_pgio_args *argp = &hdr->args; + struct nfs_pgio_res *resp = &hdr->res; /* This is a short read! */ - nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); + nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count == 0) { - nfs_set_pgio_error(data->header, -EIO, argp->offset); + nfs_set_pgio_error(hdr, -EIO, argp->offset); return; } - /* Yes, so retry the read at the end of the data */ - data->mds_offset += resp->count; + /* Yes, so retry the read at the end of the hdr */ + hdr->mds_offset += resp->count; argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; rpc_restart_call_prepare(task); } -static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) +static void nfs_readpage_result(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - - if (data->res.eof) { + if (hdr->res.eof) { loff_t bound; - bound = data->args.offset + data->res.count; + bound = hdr->args.offset + hdr->res.count; spin_lock(&hdr->lock); if (bound < hdr->io_start + hdr->good_bytes) { set_bit(NFS_IOHDR_EOF, &hdr->flags); @@ -254,8 +250,8 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *dat hdr->good_bytes = bound - hdr->io_start; } spin_unlock(&hdr->lock); - } else if (data->res.count != data->args.count) - nfs_readpage_retry(task, data); + } else if (hdr->res.count != hdr->args.count) + nfs_readpage_retry(task, hdr); } /* @@ -404,7 +400,7 @@ out: int __init nfs_init_readpagecache(void) { nfs_rdata_cachep = kmem_cache_create("nfs_read_data", - sizeof(struct nfs_rw_header), + sizeof(struct nfs_pgio_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_rdata_cachep == NULL) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 084af1060d7..e4499d5b51e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1027,8 +1027,7 @@ static bool nfs_auth_info_add(struct nfs_auth_info *auth_info, rpc_authflavor_t flavor) { unsigned int i; - unsigned int max_flavor_len = (sizeof(auth_info->flavors) / - sizeof(auth_info->flavors[0])); + unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors); /* make sure this flavor isn't already in the list */ for (i = 0; i < auth_info->flavor_len; i++) { @@ -2180,7 +2179,7 @@ out_no_address: return -EINVAL; } -#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ +#define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ | NFS_MOUNT_SECURE \ | NFS_MOUNT_TCP \ | NFS_MOUNT_VER3 \ @@ -2188,15 +2187,16 @@ out_no_address: | NFS_MOUNT_NONLM \ | NFS_MOUNT_BROKEN_SUID \ | NFS_MOUNT_STRICTLOCK \ - | NFS_MOUNT_UNSHARED \ - | NFS_MOUNT_NORESVPORT \ | NFS_MOUNT_LEGACY_INTERFACE) +#define NFS_MOUNT_CMP_FLAGMASK (NFS_REMOUNT_CMP_FLAGMASK & \ + ~(NFS_MOUNT_UNSHARED | NFS_MOUNT_NORESVPORT)) + static int nfs_compare_remount_data(struct nfs_server *nfss, struct nfs_parsed_mount_data *data) { - if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK || + if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK || data->rsize != nfss->rsize || data->wsize != nfss->wsize || data->version != nfss->nfs_client->rpc_ops->version || diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 962c9ee758b..175d5d073cc 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -47,6 +47,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops; static const struct nfs_rw_ops nfs_rw_write_ops; static void nfs_clear_request_commit(struct nfs_page *req); +static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, + struct inode *inode); static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -71,18 +73,18 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -static struct nfs_rw_header *nfs_writehdr_alloc(void) +static struct nfs_pgio_header *nfs_writehdr_alloc(void) { - struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); + struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); if (p) memset(p, 0, sizeof(*p)); return p; } -static void nfs_writehdr_free(struct nfs_rw_header *whdr) +static void nfs_writehdr_free(struct nfs_pgio_header *hdr) { - mempool_free(whdr, nfs_wdata_mempool); + mempool_free(hdr, nfs_wdata_mempool); } static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) @@ -93,6 +95,38 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) } /* + * nfs_page_search_commits_for_head_request_locked + * + * Search through commit lists on @inode for the head request for @page. + * Must be called while holding the inode (which is cinfo) lock. + * + * Returns the head request if found, or NULL if not found. + */ +static struct nfs_page * +nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, + struct page *page) +{ + struct nfs_page *freq, *t; + struct nfs_commit_info cinfo; + struct inode *inode = &nfsi->vfs_inode; + + nfs_init_cinfo_from_inode(&cinfo, inode); + + /* search through pnfs commit lists */ + freq = pnfs_search_commit_reqs(inode, &cinfo, page); + if (freq) + return freq->wb_head; + + /* Linearly search the commit list for the correct request */ + list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { + if (freq->wb_page == page) + return freq->wb_head; + } + + return NULL; +} + +/* * nfs_page_find_head_request_locked - find head request associated with @page * * must be called while holding the inode lock. @@ -106,21 +140,12 @@ nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page) if (PagePrivate(page)) req = (struct nfs_page *)page_private(page); - else if (unlikely(PageSwapCache(page))) { - struct nfs_page *freq, *t; - - /* Linearly search the commit list for the correct req */ - list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) { - if (freq->wb_page == page) { - req = freq->wb_head; - break; - } - } - } + else if (unlikely(PageSwapCache(page))) + req = nfs_page_search_commits_for_head_request_locked(nfsi, + page); if (req) { WARN_ON_ONCE(req->wb_head != req); - kref_get(&req->wb_kref); } @@ -216,7 +241,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req) unsigned int pos = 0; unsigned int len = nfs_page_length(req->wb_page); - nfs_page_group_lock(req); + nfs_page_group_lock(req, false); do { tmp = nfs_page_group_search_locked(req->wb_head, pos); @@ -379,8 +404,6 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, subreq->wb_head = subreq; subreq->wb_this_page = subreq; - nfs_clear_request_commit(subreq); - /* subreq is now totally disconnected from page group or any * write / commit lists. last chance to wake any waiters */ nfs_unlock_request(subreq); @@ -455,8 +478,23 @@ try_again: return NULL; } + /* holding inode lock, so always make a non-blocking call to try the + * page group lock */ + ret = nfs_page_group_lock(head, true); + if (ret < 0) { + spin_unlock(&inode->i_lock); + + if (!nonblock && ret == -EAGAIN) { + nfs_page_group_lock_wait(head); + nfs_release_request(head); + goto try_again; + } + + nfs_release_request(head); + return ERR_PTR(ret); + } + /* lock each request in the page group */ - nfs_page_group_lock(head); subreq = head; do { /* @@ -488,7 +526,7 @@ try_again: * Commit list removal accounting is done after locks are dropped */ subreq = head; do { - nfs_list_remove_request(subreq); + nfs_clear_request_commit(subreq); subreq = subreq->wb_this_page; } while (subreq != head); @@ -518,15 +556,11 @@ try_again: nfs_page_group_unlock(head); - /* drop lock to clear_request_commit the head req and clean up - * requests on destroy list */ + /* drop lock to clean uprequests on destroy list */ spin_unlock(&inode->i_lock); nfs_destroy_unlinked_subrequests(destroy_list, head); - /* clean up commit list state */ - nfs_clear_request_commit(head); - /* still holds ref on head from nfs_page_find_head_request_locked * and still has lock on head from lock loop */ return head; @@ -705,6 +739,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) nfs_release_request(req); + else + WARN_ON_ONCE(1); } static void @@ -808,6 +844,7 @@ nfs_clear_page_commit(struct page *page) dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); } +/* Called holding inode (/cinfo) lock */ static void nfs_clear_request_commit(struct nfs_page *req) { @@ -817,20 +854,17 @@ nfs_clear_request_commit(struct nfs_page *req) nfs_init_cinfo_from_inode(&cinfo, inode); if (!pnfs_clear_request_commit(req, &cinfo)) { - spin_lock(cinfo.lock); nfs_request_remove_commit_list(req, &cinfo); - spin_unlock(cinfo.lock); } nfs_clear_page_commit(req->wb_page); } } -static inline -int nfs_write_need_commit(struct nfs_pgio_data *data) +int nfs_write_need_commit(struct nfs_pgio_header *hdr) { - if (data->verf.committed == NFS_DATA_SYNC) - return data->header->lseg == NULL; - return data->verf.committed != NFS_FILE_SYNC; + if (hdr->verf.committed == NFS_DATA_SYNC) + return hdr->lseg == NULL; + return hdr->verf.committed != NFS_FILE_SYNC; } #else @@ -856,8 +890,7 @@ nfs_clear_request_commit(struct nfs_page *req) { } -static inline -int nfs_write_need_commit(struct nfs_pgio_data *data) +int nfs_write_need_commit(struct nfs_pgio_header *hdr) { return 0; } @@ -883,11 +916,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) nfs_context_set_write_error(req->wb_context, hdr->error); goto remove_req; } - if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { - nfs_mark_request_dirty(req); - goto next; - } - if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + if (nfs_write_need_commit(hdr)) { memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; @@ -1038,9 +1067,9 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, else req->wb_bytes = rqend - req->wb_offset; out_unlock: - spin_unlock(&inode->i_lock); if (req) nfs_clear_request_commit(req); + spin_unlock(&inode->i_lock); return req; out_flushme: spin_unlock(&inode->i_lock); @@ -1241,17 +1270,18 @@ static int flush_task_priority(int how) return RPC_PRIORITY_NORMAL; } -static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg, +static void nfs_initiate_write(struct nfs_pgio_header *hdr, + struct rpc_message *msg, struct rpc_task_setup *task_setup_data, int how) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; int priority = flush_task_priority(how); task_setup_data->priority = priority; - NFS_PROTO(inode)->write_setup(data, msg); + NFS_PROTO(inode)->write_setup(hdr, msg); nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, - &task_setup_data->rpc_client, msg, data); + &task_setup_data->rpc_client, msg, hdr); } /* If a nfs_flush_* function fails, it should remove reqs from @head and @@ -1313,21 +1343,9 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -static void nfs_writeback_release_common(struct nfs_pgio_data *data) +static void nfs_writeback_release_common(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - int status = data->task.tk_status; - - if ((status >= 0) && nfs_write_need_commit(data)) { - spin_lock(&hdr->lock); - if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) - ; /* Do nothing */ - else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) - memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf)); - else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf))) - set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); - spin_unlock(&hdr->lock); - } + /* do nothing! */ } /* @@ -1358,7 +1376,8 @@ static int nfs_should_remove_suid(const struct inode *inode) /* * This function is called when the WRITE call is complete. */ -static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, +static int nfs_writeback_done(struct rpc_task *task, + struct nfs_pgio_header *hdr, struct inode *inode) { int status; @@ -1370,13 +1389,14 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, * another writer had changed the file, but some applications * depend on tighter cache coherency when writing. */ - status = NFS_PROTO(inode)->write_done(task, data); + status = NFS_PROTO(inode)->write_done(task, hdr); if (status != 0) return status; - nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count); + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) - if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) { + if (hdr->res.verf->committed < hdr->args.stable && + task->tk_status >= 0) { /* We tried a write call, but the server did not * commit data to stable storage even though we * requested it. @@ -1392,7 +1412,7 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", NFS_SERVER(inode)->nfs_client->cl_hostname, - data->res.verf->committed, data->args.stable); + hdr->res.verf->committed, hdr->args.stable); complain = jiffies + 300 * HZ; } } @@ -1407,16 +1427,17 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, /* * This function is called when the WRITE call is complete. */ -static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data) +static void nfs_writeback_result(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - struct nfs_pgio_args *argp = &data->args; - struct nfs_pgio_res *resp = &data->res; + struct nfs_pgio_args *argp = &hdr->args; + struct nfs_pgio_res *resp = &hdr->res; if (resp->count < argp->count) { static unsigned long complain; /* This a short write! */ - nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE); + nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE); /* Has the server at least made some progress? */ if (resp->count == 0) { @@ -1426,14 +1447,14 @@ static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *da argp->count); complain = jiffies + 300 * HZ; } - nfs_set_pgio_error(data->header, -EIO, argp->offset); + nfs_set_pgio_error(hdr, -EIO, argp->offset); task->tk_status = -EIO; return; } /* Was this an NFSv2 write or an NFSv3 stable write? */ if (resp->verf->committed != NFS_UNSTABLE) { /* Resend from where the server left off */ - data->mds_offset += resp->count; + hdr->mds_offset += resp->count; argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; @@ -1884,7 +1905,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", - sizeof(struct nfs_rw_header), + sizeof(struct nfs_pgio_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_wdata_cachep == NULL) diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index ed628f71274..538f142935e 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -30,9 +30,6 @@ MODULE_LICENSE("GPL"); -EXPORT_SYMBOL_GPL(nfsacl_encode); -EXPORT_SYMBOL_GPL(nfsacl_decode); - struct nfsacl_encode_desc { struct xdr_array2_desc desc; unsigned int count; @@ -136,6 +133,7 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, nfsacl_desc.desc.array_len; return err; } +EXPORT_SYMBOL_GPL(nfsacl_encode); struct nfsacl_decode_desc { struct xdr_array2_desc desc; @@ -295,3 +293,4 @@ int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, return 8 + nfsacl_desc.desc.elem_size * nfsacl_desc.desc.array_len; } +EXPORT_SYMBOL_GPL(nfsacl_decode); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f9821ce6658..e94457c33ad 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2657,6 +2657,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, struct xdr_stream *xdr = cd->xdr; int start_offset = xdr->buf->len; int cookie_offset; + u32 name_and_cookie; int entry_bytes; __be32 nfserr = nfserr_toosmall; __be64 wire_offset; @@ -2718,7 +2719,14 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, cd->rd_maxcount -= entry_bytes; if (!cd->rd_dircount) goto fail; - cd->rd_dircount--; + /* + * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so + * let's always let through the first entry, at least: + */ + name_and_cookie = 4 * XDR_QUADLEN(namlen) + 8; + if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) + goto fail; + cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); cd->cookie_offset = cookie_offset; skip_entry: cd->common.err = nfs_ok; @@ -3321,6 +3329,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } maxcount = min_t(int, maxcount-16, bytes_left); + /* RFC 3530 14.2.24 allows us to ignore dircount when it's 0: */ + if (!readdir->rd_dircount) + readdir->rd_dircount = INT_MAX; + readdir->xdr = xdr; readdir->rd_maxcount = maxcount; readdir->common.err = 0; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c519927b7b5..228f5bdf077 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -942,7 +942,7 @@ static int nilfs_get_root_dentry(struct super_block *sb, iput(inode); } } else { - dentry = d_obtain_alias(inode); + dentry = d_obtain_root(inode); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto failed_dentry; diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 238a5930cb3..9d7e2b9659c 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -42,7 +42,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode) { struct { struct file_handle handle; - u8 pad[64]; + u8 pad[MAX_HANDLE_SZ]; } f; int size, ret, i; @@ -50,7 +50,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode) size = f.handle.handle_bytes >> 2; ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0); - if ((ret == 255) || (ret == -ENOSPC)) { + if ((ret == FILEID_INVALID) || (ret < 0)) { WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); return 0; } diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index 1ec141e758d..62e8ec619b4 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c @@ -160,9 +160,18 @@ static void o2quo_make_decision(struct work_struct *work) } out: - spin_unlock(&qs->qs_lock); - if (fence) + if (fence) { + spin_unlock(&qs->qs_lock); o2quo_fence_self(); + } else { + mlog(ML_NOTICE, "not fencing this node, heartbeating: %d, " + "connected: %d, lowest: %d (%sreachable)\n", + qs->qs_heartbeating, qs->qs_connected, lowest_hb, + lowest_reachable ? "" : "un"); + spin_unlock(&qs->qs_lock); + + } + } static void o2quo_set_hold(struct o2quo_state *qs, u8 node) diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 681691bc233..ea34952f949 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1480,6 +1480,14 @@ static int o2net_set_nodelay(struct socket *sock) return ret; } +static int o2net_set_usertimeout(struct socket *sock) +{ + int user_timeout = O2NET_TCP_USER_TIMEOUT; + + return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, + (char *)&user_timeout, sizeof(user_timeout)); +} + static void o2net_initialize_handshake(void) { o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( @@ -1536,16 +1544,20 @@ static void o2net_idle_timer(unsigned long data) #endif printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been " - "idle for %lu.%lu secs, shutting it down.\n", SC_NODEF_ARGS(sc), - msecs / 1000, msecs % 1000); + "idle for %lu.%lu secs.\n", + SC_NODEF_ARGS(sc), msecs / 1000, msecs % 1000); - /* - * Initialize the nn_timeout so that the next connection attempt - * will continue in o2net_start_connect. + /* idle timerout happen, don't shutdown the connection, but + * make fence decision. Maybe the connection can recover before + * the decision is made. */ atomic_set(&nn->nn_timeout, 1); + o2quo_conn_err(o2net_num_from_nn(nn)); + queue_delayed_work(o2net_wq, &nn->nn_still_up, + msecs_to_jiffies(O2NET_QUORUM_DELAY_MS)); + + o2net_sc_reset_idle_timer(sc); - o2net_sc_queue_work(sc, &sc->sc_shutdown_work); } static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) @@ -1560,6 +1572,15 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) { + struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); + + /* clear fence decision since the connection recover from timeout*/ + if (atomic_read(&nn->nn_timeout)) { + o2quo_conn_up(o2net_num_from_nn(nn)); + cancel_delayed_work(&nn->nn_still_up); + atomic_set(&nn->nn_timeout, 0); + } + /* Only push out an existing timer */ if (timer_pending(&sc->sc_idle_timeout)) o2net_sc_reset_idle_timer(sc); @@ -1650,6 +1671,12 @@ static void o2net_start_connect(struct work_struct *work) goto out; } + ret = o2net_set_usertimeout(sock); + if (ret) { + mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret); + goto out; + } + o2net_register_callbacks(sc->sc_sock->sk, sc); spin_lock(&nn->nn_lock); @@ -1831,6 +1858,12 @@ static int o2net_accept_one(struct socket *sock, int *more) goto out; } + ret = o2net_set_usertimeout(new_sock); + if (ret) { + mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret); + goto out; + } + slen = sizeof(sin); ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, &slen, 1); diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index 5bada2a69b5..c571e849fda 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h @@ -63,6 +63,7 @@ typedef void (o2net_post_msg_handler_func)(int status, void *data, #define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000 #define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000 +#define O2NET_TCP_USER_TIMEOUT 0x7fffffff /* TODO: figure this out.... */ static inline int o2net_link_down(int err, struct socket *sock) diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 6f66b3751ac..53e6c40ed4c 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -35,9 +35,8 @@ copy_to_user((typeof(a) __user *)b, &(a), sizeof(a)) /* - * This call is void because we are already reporting an error that may - * be -EFAULT. The error will be returned from the ioctl(2) call. It's - * just a best-effort to tell userspace that this request caused the error. + * This is just a best-effort to tell userspace that this request + * caused the error. */ static inline void o2info_set_request_error(struct ocfs2_info_request *kreq, struct ocfs2_info_request __user *req) @@ -146,136 +145,105 @@ bail: static int ocfs2_info_handle_blocksize(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_blocksize oib; if (o2info_from_user(oib, req)) - goto bail; + return -EFAULT; oib.ib_blocksize = inode->i_sb->s_blocksize; o2info_set_request_filled(&oib.ib_req); if (o2info_to_user(oib, req)) - goto bail; - - status = 0; -bail: - if (status) - o2info_set_request_error(&oib.ib_req, req); + return -EFAULT; - return status; + return 0; } static int ocfs2_info_handle_clustersize(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_clustersize oic; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (o2info_from_user(oic, req)) - goto bail; + return -EFAULT; oic.ic_clustersize = osb->s_clustersize; o2info_set_request_filled(&oic.ic_req); if (o2info_to_user(oic, req)) - goto bail; - - status = 0; -bail: - if (status) - o2info_set_request_error(&oic.ic_req, req); + return -EFAULT; - return status; + return 0; } static int ocfs2_info_handle_maxslots(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_maxslots oim; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (o2info_from_user(oim, req)) - goto bail; + return -EFAULT; oim.im_max_slots = osb->max_slots; o2info_set_request_filled(&oim.im_req); if (o2info_to_user(oim, req)) - goto bail; + return -EFAULT; - status = 0; -bail: - if (status) - o2info_set_request_error(&oim.im_req, req); - - return status; + return 0; } static int ocfs2_info_handle_label(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_label oil; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (o2info_from_user(oil, req)) - goto bail; + return -EFAULT; memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); o2info_set_request_filled(&oil.il_req); if (o2info_to_user(oil, req)) - goto bail; + return -EFAULT; - status = 0; -bail: - if (status) - o2info_set_request_error(&oil.il_req, req); - - return status; + return 0; } static int ocfs2_info_handle_uuid(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_uuid oiu; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (o2info_from_user(oiu, req)) - goto bail; + return -EFAULT; memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); o2info_set_request_filled(&oiu.iu_req); if (o2info_to_user(oiu, req)) - goto bail; - - status = 0; -bail: - if (status) - o2info_set_request_error(&oiu.iu_req, req); + return -EFAULT; - return status; + return 0; } static int ocfs2_info_handle_fs_features(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_fs_features oif; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (o2info_from_user(oif, req)) - goto bail; + return -EFAULT; oif.if_compat_features = osb->s_feature_compat; oif.if_incompat_features = osb->s_feature_incompat; @@ -284,39 +252,28 @@ static int ocfs2_info_handle_fs_features(struct inode *inode, o2info_set_request_filled(&oif.if_req); if (o2info_to_user(oif, req)) - goto bail; + return -EFAULT; - status = 0; -bail: - if (status) - o2info_set_request_error(&oif.if_req, req); - - return status; + return 0; } static int ocfs2_info_handle_journal_size(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_journal_size oij; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (o2info_from_user(oij, req)) - goto bail; + return -EFAULT; oij.ij_journal_size = i_size_read(osb->journal->j_inode); o2info_set_request_filled(&oij.ij_req); if (o2info_to_user(oij, req)) - goto bail; + return -EFAULT; - status = 0; -bail: - if (status) - o2info_set_request_error(&oij.ij_req, req); - - return status; + return 0; } static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, @@ -373,7 +330,7 @@ static int ocfs2_info_handle_freeinode(struct inode *inode, u32 i; u64 blkno = -1; char namebuf[40]; - int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE; + int status, type = INODE_ALLOC_SYSTEM_INODE; struct ocfs2_info_freeinode *oifi = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *inode_alloc = NULL; @@ -385,8 +342,10 @@ static int ocfs2_info_handle_freeinode(struct inode *inode, goto out_err; } - if (o2info_from_user(*oifi, req)) - goto bail; + if (o2info_from_user(*oifi, req)) { + status = -EFAULT; + goto out_free; + } oifi->ifi_slotnum = osb->max_slots; @@ -424,14 +383,16 @@ static int ocfs2_info_handle_freeinode(struct inode *inode, o2info_set_request_filled(&oifi->ifi_req); - if (o2info_to_user(*oifi, req)) - goto bail; + if (o2info_to_user(*oifi, req)) { + status = -EFAULT; + goto out_free; + } status = 0; bail: if (status) o2info_set_request_error(&oifi->ifi_req, req); - +out_free: kfree(oifi); out_err: return status; @@ -658,7 +619,7 @@ static int ocfs2_info_handle_freefrag(struct inode *inode, { u64 blkno = -1; char namebuf[40]; - int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE; + int status, type = GLOBAL_BITMAP_SYSTEM_INODE; struct ocfs2_info_freefrag *oiff; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); @@ -671,8 +632,10 @@ static int ocfs2_info_handle_freefrag(struct inode *inode, goto out_err; } - if (o2info_from_user(*oiff, req)) - goto bail; + if (o2info_from_user(*oiff, req)) { + status = -EFAULT; + goto out_free; + } /* * chunksize from userspace should be power of 2. */ @@ -711,14 +674,14 @@ static int ocfs2_info_handle_freefrag(struct inode *inode, if (o2info_to_user(*oiff, req)) { status = -EFAULT; - goto bail; + goto out_free; } status = 0; bail: if (status) o2info_set_request_error(&oiff->iff_req, req); - +out_free: kfree(oiff); out_err: return status; @@ -727,23 +690,17 @@ out_err: static int ocfs2_info_handle_unknown(struct inode *inode, struct ocfs2_info_request __user *req) { - int status = -EFAULT; struct ocfs2_info_request oir; if (o2info_from_user(oir, req)) - goto bail; + return -EFAULT; o2info_clear_request_filled(&oir); if (o2info_to_user(oir, req)) - goto bail; + return -EFAULT; - status = 0; -bail: - if (status) - o2info_set_request_error(&oir, req); - - return status; + return 0; } /* diff --git a/fs/pnode.c b/fs/pnode.c index 302bf22c4a3..aae331a5d03 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -381,6 +381,7 @@ static void __propagate_umount(struct mount *mnt) * other children */ if (child && list_empty(&child->mnt_mounts)) { + list_del_init(&child->mnt_child); hlist_del_init_rcu(&child->mnt_hash); hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 7f30bdc57d1..f2d0eee9d1f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -96,13 +96,16 @@ * Note that some things (eg. sb pointer, type, id) doesn't change during * the life of the dquot structure and so needn't to be protected by a lock * - * Any operation working on dquots via inode pointers must hold dqptr_sem. If - * operation is just reading pointers from inode (or not using them at all) the - * read lock is enough. If pointers are altered function must hold write lock. + * Operation accessing dquots via inode pointers are protected by dquot_srcu. + * Operation of reading pointer needs srcu_read_lock(&dquot_srcu), and + * synchronize_srcu(&dquot_srcu) is called after clearing pointers from + * inode and before dropping dquot references to avoid use of dquots after + * they are freed. dq_data_lock is used to serialize the pointer setting and + * clearing operations. * Special care needs to be taken about S_NOQUOTA inode flag (marking that * inode is a quota file). Functions adding pointers from inode to dquots have - * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they - * have to do all pointer modifications before dropping dqptr_sem. This makes + * to check this flag under dq_data_lock and then (if S_NOQUOTA is not set) they + * have to do all pointer modifications before dropping dq_data_lock. This makes * sure they cannot race with quotaon which first sets S_NOQUOTA flag and * then drops all pointers to dquots from an inode. * @@ -116,21 +119,15 @@ * spinlock to internal buffers before writing. * * Lock ordering (including related VFS locks) is the following: - * dqonoff_mutex > i_mutex > journal_lock > dqptr_sem > dquot->dq_lock > - * dqio_mutex + * dqonoff_mutex > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc. - * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem > - * dqptr_sem. But filesystem has to count with the fact that functions such as - * dquot_alloc_space() acquire dqptr_sem and they usually have to be called - * from inside a transaction to keep filesystem consistency after a crash. Also - * filesystems usually want to do some IO on dquot from ->mark_dirty which is - * called with dqptr_sem held. */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); EXPORT_SYMBOL(dq_data_lock); +DEFINE_STATIC_SRCU(dquot_srcu); void __quota_error(struct super_block *sb, const char *func, const char *fmt, ...) @@ -733,7 +730,6 @@ static struct shrinker dqcache_shrinker = { /* * Put reference to dquot - * NOTE: If you change this function please check whether dqput_blocks() works right... */ void dqput(struct dquot *dquot) { @@ -963,46 +959,33 @@ static void add_dquot_ref(struct super_block *sb, int type) } /* - * Return 0 if dqput() won't block. - * (note that 1 doesn't necessarily mean blocking) - */ -static inline int dqput_blocks(struct dquot *dquot) -{ - if (atomic_read(&dquot->dq_count) <= 1) - return 1; - return 0; -} - -/* * Remove references to dquots from inode and add dquot to list for freeing * if we have the last reference to dquot - * We can't race with anybody because we hold dqptr_sem for writing... */ -static int remove_inode_dquot_ref(struct inode *inode, int type, - struct list_head *tofree_head) +static void remove_inode_dquot_ref(struct inode *inode, int type, + struct list_head *tofree_head) { struct dquot *dquot = inode->i_dquot[type]; inode->i_dquot[type] = NULL; - if (dquot) { - if (dqput_blocks(dquot)) { -#ifdef CONFIG_QUOTA_DEBUG - if (atomic_read(&dquot->dq_count) != 1) - quota_error(inode->i_sb, "Adding dquot with " - "dq_count %d to dispose list", - atomic_read(&dquot->dq_count)); -#endif - spin_lock(&dq_list_lock); - /* As dquot must have currently users it can't be on - * the free list... */ - list_add(&dquot->dq_free, tofree_head); - spin_unlock(&dq_list_lock); - return 1; - } - else - dqput(dquot); /* We have guaranteed we won't block */ + if (!dquot) + return; + + if (list_empty(&dquot->dq_free)) { + /* + * The inode still has reference to dquot so it can't be in the + * free list + */ + spin_lock(&dq_list_lock); + list_add(&dquot->dq_free, tofree_head); + spin_unlock(&dq_list_lock); + } else { + /* + * Dquot is already in a list to put so we won't drop the last + * reference here. + */ + dqput(dquot); } - return 0; } /* @@ -1037,13 +1020,15 @@ static void remove_dquot_ref(struct super_block *sb, int type, * We have to scan also I_NEW inodes because they can already * have quota pointer initialized. Luckily, we need to touch * only quota pointers and these have separate locking - * (dqptr_sem). + * (dq_data_lock). */ + spin_lock(&dq_data_lock); if (!IS_NOQUOTA(inode)) { if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; remove_inode_dquot_ref(inode, type, tofree_head); } + spin_unlock(&dq_data_lock); } spin_unlock(&inode_sb_list_lock); #ifdef CONFIG_QUOTA_DEBUG @@ -1061,9 +1046,8 @@ static void drop_dquot_ref(struct super_block *sb, int type) LIST_HEAD(tofree_head); if (sb->dq_op) { - down_write(&sb_dqopt(sb)->dqptr_sem); remove_dquot_ref(sb, type, &tofree_head); - up_write(&sb_dqopt(sb)->dqptr_sem); + synchronize_srcu(&dquot_srcu); put_dquot_list(&tofree_head); } } @@ -1394,21 +1378,16 @@ static int dquot_active(const struct inode *inode) /* * Initialize quota pointers in inode * - * We do things in a bit complicated way but by that we avoid calling - * dqget() and thus filesystem callbacks under dqptr_sem. - * * It is better to call this function outside of any transaction as it * might need a lot of space in journal for dquot structure allocation. */ static void __dquot_initialize(struct inode *inode, int type) { - int cnt; + int cnt, init_needed = 0; struct dquot *got[MAXQUOTAS]; struct super_block *sb = inode->i_sb; qsize_t rsv; - /* First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex */ if (!dquot_active(inode)) return; @@ -1418,6 +1397,15 @@ static void __dquot_initialize(struct inode *inode, int type) got[cnt] = NULL; if (type != -1 && cnt != type) continue; + /* + * The i_dquot should have been initialized in most cases, + * we check it without locking here to avoid unnecessary + * dqget()/dqput() calls. + */ + if (inode->i_dquot[cnt]) + continue; + init_needed = 1; + switch (cnt) { case USRQUOTA: qid = make_kqid_uid(inode->i_uid); @@ -1429,7 +1417,11 @@ static void __dquot_initialize(struct inode *inode, int type) got[cnt] = dqget(sb, qid); } - down_write(&sb_dqopt(sb)->dqptr_sem); + /* All required i_dquot has been initialized */ + if (!init_needed) + return; + + spin_lock(&dq_data_lock); if (IS_NOQUOTA(inode)) goto out_err; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1449,15 +1441,12 @@ static void __dquot_initialize(struct inode *inode, int type) * did a write before quota was turned on */ rsv = inode_get_rsv_space(inode); - if (unlikely(rsv)) { - spin_lock(&dq_data_lock); + if (unlikely(rsv)) dquot_resv_space(inode->i_dquot[cnt], rsv); - spin_unlock(&dq_data_lock); - } } } out_err: - up_write(&sb_dqopt(sb)->dqptr_sem); + spin_unlock(&dq_data_lock); /* Drop unused references */ dqput_all(got); } @@ -1469,19 +1458,24 @@ void dquot_initialize(struct inode *inode) EXPORT_SYMBOL(dquot_initialize); /* - * Release all quotas referenced by inode + * Release all quotas referenced by inode. + * + * This function only be called on inode free or converting + * a file to quota file, no other users for the i_dquot in + * both cases, so we needn't call synchronize_srcu() after + * clearing i_dquot. */ static void __dquot_drop(struct inode *inode) { int cnt; struct dquot *put[MAXQUOTAS]; - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { put[cnt] = inode->i_dquot[cnt]; inode->i_dquot[cnt] = NULL; } - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_unlock(&dq_data_lock); dqput_all(put); } @@ -1599,15 +1593,11 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve) */ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) { - int cnt, ret = 0; + int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; struct dquot **dquots = inode->i_dquot; int reserve = flags & DQUOT_SPACE_RESERVE; - /* - * First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex - */ if (!dquot_active(inode)) { inode_incr_space(inode, number, reserve); goto out; @@ -1616,7 +1606,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + index = srcu_read_lock(&dquot_srcu); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!dquots[cnt]) @@ -1643,7 +1633,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) goto out_flush_warn; mark_all_dquot_dirty(dquots); out_flush_warn: - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); out: return ret; @@ -1655,17 +1645,16 @@ EXPORT_SYMBOL(__dquot_alloc_space); */ int dquot_alloc_inode(const struct inode *inode) { - int cnt, ret = 0; + int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; struct dquot * const *dquots = inode->i_dquot; - /* First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex */ if (!dquot_active(inode)) return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + + index = srcu_read_lock(&dquot_srcu); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!dquots[cnt]) @@ -1685,7 +1674,7 @@ warn_put_all: spin_unlock(&dq_data_lock); if (ret == 0) mark_all_dquot_dirty(dquots); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); return ret; } @@ -1696,14 +1685,14 @@ EXPORT_SYMBOL(dquot_alloc_inode); */ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { - int cnt; + int cnt, index; if (!dquot_active(inode)) { inode_claim_rsv_space(inode, number); return 0; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + index = srcu_read_lock(&dquot_srcu); spin_lock(&dq_data_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1715,7 +1704,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) inode_claim_rsv_space(inode, number); spin_unlock(&dq_data_lock); mark_all_dquot_dirty(inode->i_dquot); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + srcu_read_unlock(&dquot_srcu, index); return 0; } EXPORT_SYMBOL(dquot_claim_space_nodirty); @@ -1725,14 +1714,14 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty); */ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) { - int cnt; + int cnt, index; if (!dquot_active(inode)) { inode_reclaim_rsv_space(inode, number); return; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + index = srcu_read_lock(&dquot_srcu); spin_lock(&dq_data_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1744,7 +1733,7 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) inode_reclaim_rsv_space(inode, number); spin_unlock(&dq_data_lock); mark_all_dquot_dirty(inode->i_dquot); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + srcu_read_unlock(&dquot_srcu, index); return; } EXPORT_SYMBOL(dquot_reclaim_space_nodirty); @@ -1757,16 +1746,14 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot **dquots = inode->i_dquot; - int reserve = flags & DQUOT_SPACE_RESERVE; + int reserve = flags & DQUOT_SPACE_RESERVE, index; - /* First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex */ if (!dquot_active(inode)) { inode_decr_space(inode, number, reserve); return; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + index = srcu_read_lock(&dquot_srcu); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; @@ -1789,7 +1776,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) goto out_unlock; mark_all_dquot_dirty(dquots); out_unlock: - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); } EXPORT_SYMBOL(__dquot_free_space); @@ -1802,13 +1789,12 @@ void dquot_free_inode(const struct inode *inode) unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot * const *dquots = inode->i_dquot; + int index; - /* First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex */ if (!dquot_active(inode)) return; - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + index = srcu_read_lock(&dquot_srcu); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; @@ -1823,7 +1809,7 @@ void dquot_free_inode(const struct inode *inode) } spin_unlock(&dq_data_lock); mark_all_dquot_dirty(dquots); - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); } EXPORT_SYMBOL(dquot_free_inode); @@ -1837,6 +1823,8 @@ EXPORT_SYMBOL(dquot_free_inode); * This operation can block, but only after everything is updated * A transaction must be started when entering this function. * + * We are holding reference on transfer_from & transfer_to, no need to + * protect them by srcu_read_lock(). */ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) { @@ -1849,8 +1837,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) struct dquot_warn warn_from_inodes[MAXQUOTAS]; struct dquot_warn warn_from_space[MAXQUOTAS]; - /* First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex */ if (IS_NOQUOTA(inode)) return 0; /* Initialize the arrays */ @@ -1859,12 +1845,12 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN; warn_from_space[cnt].w_type = QUOTA_NL_NOWARN; } - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + + spin_lock(&dq_data_lock); if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + spin_unlock(&dq_data_lock); return 0; } - spin_lock(&dq_data_lock); cur_space = inode_get_bytes(inode); rsv_space = inode_get_rsv_space(inode); space = cur_space + rsv_space; @@ -1918,7 +1904,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) inode->i_dquot[cnt] = transfer_to[cnt]; } spin_unlock(&dq_data_lock); - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); mark_all_dquot_dirty(transfer_from); mark_all_dquot_dirty(transfer_to); @@ -1932,7 +1917,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) return 0; over_quota: spin_unlock(&dq_data_lock); - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); flush_warnings(warn_to); return ret; } diff --git a/fs/quota/kqid.c b/fs/quota/kqid.c index 2f97b0e2c50..ebc5e628580 100644 --- a/fs/quota/kqid.c +++ b/fs/quota/kqid.c @@ -55,7 +55,7 @@ EXPORT_SYMBOL(qid_lt); /** * from_kqid - Create a qid from a kqid user-namespace pair. * @targ: The user namespace we want a qid in. - * @kuid: The kernel internal quota identifier to start with. + * @kqid: The kernel internal quota identifier to start with. * * Map @kqid into the user-namespace specified by @targ and * return the resulting qid. diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 72d29177998..bb2869f5dfd 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -32,8 +32,7 @@ static struct genl_family quota_genl_family = { /** * quota_send_warning - Send warning to userspace about exceeded quota - * @type: The quota type: USRQQUOTA, GRPQUOTA,... - * @id: The user or group id of the quota that was exceeded + * @qid: The kernel internal quota identifier. * @dev: The device on which the fs is mounted (sb->s_dev) * @warntype: The type of the warning: QUOTA_NL_... * diff --git a/fs/quota/quota.c b/fs/quota/quota.c index ff3f0b3cfdb..75621649dbd 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -79,13 +79,13 @@ static int quota_getfmt(struct super_block *sb, int type, void __user *addr) { __u32 fmt; - down_read(&sb_dqopt(sb)->dqptr_sem); + mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); if (!sb_has_quota_active(sb, type)) { - up_read(&sb_dqopt(sb)->dqptr_sem); + mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); return -ESRCH; } fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; - up_read(&sb_dqopt(sb)->dqptr_sem); + mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); if (copy_to_user(addr, &fmt, sizeof(fmt))) return -EFAULT; return 0; diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c index 5739cb99de7..9c02d96d3a4 100644 --- a/fs/reiserfs/do_balan.c +++ b/fs/reiserfs/do_balan.c @@ -286,12 +286,14 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag) return 0; } -static void balance_leaf_insert_left(struct tree_balance *tb, - struct item_head *ih, const char *body) +static unsigned int balance_leaf_insert_left(struct tree_balance *tb, + struct item_head *const ih, + const char * const body) { int ret; struct buffer_info bi; int n = B_NR_ITEMS(tb->L[0]); + unsigned body_shift_bytes = 0; if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) { /* part of new item falls into L[0] */ @@ -329,7 +331,7 @@ static void balance_leaf_insert_left(struct tree_balance *tb, put_ih_item_len(ih, new_item_len); if (tb->lbytes > tb->zeroes_num) { - body += (tb->lbytes - tb->zeroes_num); + body_shift_bytes = tb->lbytes - tb->zeroes_num; tb->zeroes_num = 0; } else tb->zeroes_num -= tb->lbytes; @@ -349,11 +351,12 @@ static void balance_leaf_insert_left(struct tree_balance *tb, tb->insert_size[0] = 0; tb->zeroes_num = 0; } + return body_shift_bytes; } static void balance_leaf_paste_left_shift_dirent(struct tree_balance *tb, - struct item_head *ih, - const char *body) + struct item_head * const ih, + const char * const body) { int n = B_NR_ITEMS(tb->L[0]); struct buffer_info bi; @@ -413,17 +416,18 @@ static void balance_leaf_paste_left_shift_dirent(struct tree_balance *tb, tb->pos_in_item -= tb->lbytes; } -static void balance_leaf_paste_left_shift(struct tree_balance *tb, - struct item_head *ih, - const char *body) +static unsigned int balance_leaf_paste_left_shift(struct tree_balance *tb, + struct item_head * const ih, + const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n = B_NR_ITEMS(tb->L[0]); struct buffer_info bi; + int body_shift_bytes = 0; if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) { balance_leaf_paste_left_shift_dirent(tb, ih, body); - return; + return 0; } RFALSE(tb->lbytes <= 0, @@ -497,7 +501,7 @@ static void balance_leaf_paste_left_shift(struct tree_balance *tb, * insert_size[0] */ if (l_n > tb->zeroes_num) { - body += (l_n - tb->zeroes_num); + body_shift_bytes = l_n - tb->zeroes_num; tb->zeroes_num = 0; } else tb->zeroes_num -= l_n; @@ -526,13 +530,14 @@ static void balance_leaf_paste_left_shift(struct tree_balance *tb, */ leaf_shift_left(tb, tb->lnum[0], tb->lbytes); } + return body_shift_bytes; } /* appended item will be in L[0] in whole */ static void balance_leaf_paste_left_whole(struct tree_balance *tb, - struct item_head *ih, - const char *body) + struct item_head * const ih, + const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n = B_NR_ITEMS(tb->L[0]); @@ -584,39 +589,44 @@ static void balance_leaf_paste_left_whole(struct tree_balance *tb, tb->zeroes_num = 0; } -static void balance_leaf_paste_left(struct tree_balance *tb, - struct item_head *ih, const char *body) +static unsigned int balance_leaf_paste_left(struct tree_balance *tb, + struct item_head * const ih, + const char * const body) { /* we must shift the part of the appended item */ if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) - balance_leaf_paste_left_shift(tb, ih, body); + return balance_leaf_paste_left_shift(tb, ih, body); else balance_leaf_paste_left_whole(tb, ih, body); + return 0; } /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ -static void balance_leaf_left(struct tree_balance *tb, struct item_head *ih, - const char *body, int flag) +static unsigned int balance_leaf_left(struct tree_balance *tb, + struct item_head * const ih, + const char * const body, int flag) { if (tb->lnum[0] <= 0) - return; + return 0; /* new item or it part falls to L[0], shift it too */ if (tb->item_pos < tb->lnum[0]) { BUG_ON(flag != M_INSERT && flag != M_PASTE); if (flag == M_INSERT) - balance_leaf_insert_left(tb, ih, body); + return balance_leaf_insert_left(tb, ih, body); else /* M_PASTE */ - balance_leaf_paste_left(tb, ih, body); + return balance_leaf_paste_left(tb, ih, body); } else /* new item doesn't fall into L[0] */ leaf_shift_left(tb, tb->lnum[0], tb->lbytes); + return 0; } static void balance_leaf_insert_right(struct tree_balance *tb, - struct item_head *ih, const char *body) + struct item_head * const ih, + const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); @@ -704,7 +714,8 @@ static void balance_leaf_insert_right(struct tree_balance *tb, static void balance_leaf_paste_right_shift_dirent(struct tree_balance *tb, - struct item_head *ih, const char *body) + struct item_head * const ih, + const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); struct buffer_info bi; @@ -754,7 +765,8 @@ static void balance_leaf_paste_right_shift_dirent(struct tree_balance *tb, } static void balance_leaf_paste_right_shift(struct tree_balance *tb, - struct item_head *ih, const char *body) + struct item_head * const ih, + const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n_shift, n_rem, r_zeroes_number, version; @@ -831,7 +843,8 @@ static void balance_leaf_paste_right_shift(struct tree_balance *tb, } static void balance_leaf_paste_right_whole(struct tree_balance *tb, - struct item_head *ih, const char *body) + struct item_head * const ih, + const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n = B_NR_ITEMS(tbS0); @@ -874,7 +887,8 @@ static void balance_leaf_paste_right_whole(struct tree_balance *tb, } static void balance_leaf_paste_right(struct tree_balance *tb, - struct item_head *ih, const char *body) + struct item_head * const ih, + const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); int n = B_NR_ITEMS(tbS0); @@ -896,8 +910,9 @@ static void balance_leaf_paste_right(struct tree_balance *tb, } /* shift rnum[0] items from S[0] to the right neighbor R[0] */ -static void balance_leaf_right(struct tree_balance *tb, struct item_head *ih, - const char *body, int flag) +static void balance_leaf_right(struct tree_balance *tb, + struct item_head * const ih, + const char * const body, int flag) { if (tb->rnum[0] <= 0) return; @@ -911,8 +926,8 @@ static void balance_leaf_right(struct tree_balance *tb, struct item_head *ih, } static void balance_leaf_new_nodes_insert(struct tree_balance *tb, - struct item_head *ih, - const char *body, + struct item_head * const ih, + const char * const body, struct item_head *insert_key, struct buffer_head **insert_ptr, int i) @@ -1003,8 +1018,8 @@ static void balance_leaf_new_nodes_insert(struct tree_balance *tb, /* we append to directory item */ static void balance_leaf_new_nodes_paste_dirent(struct tree_balance *tb, - struct item_head *ih, - const char *body, + struct item_head * const ih, + const char * const body, struct item_head *insert_key, struct buffer_head **insert_ptr, int i) @@ -1058,8 +1073,8 @@ static void balance_leaf_new_nodes_paste_dirent(struct tree_balance *tb, } static void balance_leaf_new_nodes_paste_shift(struct tree_balance *tb, - struct item_head *ih, - const char *body, + struct item_head * const ih, + const char * const body, struct item_head *insert_key, struct buffer_head **insert_ptr, int i) @@ -1131,8 +1146,8 @@ static void balance_leaf_new_nodes_paste_shift(struct tree_balance *tb, } static void balance_leaf_new_nodes_paste_whole(struct tree_balance *tb, - struct item_head *ih, - const char *body, + struct item_head * const ih, + const char * const body, struct item_head *insert_key, struct buffer_head **insert_ptr, int i) @@ -1184,8 +1199,8 @@ static void balance_leaf_new_nodes_paste_whole(struct tree_balance *tb, } static void balance_leaf_new_nodes_paste(struct tree_balance *tb, - struct item_head *ih, - const char *body, + struct item_head * const ih, + const char * const body, struct item_head *insert_key, struct buffer_head **insert_ptr, int i) @@ -1214,8 +1229,8 @@ static void balance_leaf_new_nodes_paste(struct tree_balance *tb, /* Fill new nodes that appear in place of S[0] */ static void balance_leaf_new_nodes(struct tree_balance *tb, - struct item_head *ih, - const char *body, + struct item_head * const ih, + const char * const body, struct item_head *insert_key, struct buffer_head **insert_ptr, int flag) @@ -1254,8 +1269,8 @@ static void balance_leaf_new_nodes(struct tree_balance *tb, } static void balance_leaf_finish_node_insert(struct tree_balance *tb, - struct item_head *ih, - const char *body) + struct item_head * const ih, + const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); struct buffer_info bi; @@ -1271,8 +1286,8 @@ static void balance_leaf_finish_node_insert(struct tree_balance *tb, } static void balance_leaf_finish_node_paste_dirent(struct tree_balance *tb, - struct item_head *ih, - const char *body) + struct item_head * const ih, + const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); struct item_head *pasted = item_head(tbS0, tb->item_pos); @@ -1305,8 +1320,8 @@ static void balance_leaf_finish_node_paste_dirent(struct tree_balance *tb, } static void balance_leaf_finish_node_paste(struct tree_balance *tb, - struct item_head *ih, - const char *body) + struct item_head * const ih, + const char * const body) { struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); struct buffer_info bi; @@ -1349,8 +1364,8 @@ static void balance_leaf_finish_node_paste(struct tree_balance *tb, * of the affected item which remains in S */ static void balance_leaf_finish_node(struct tree_balance *tb, - struct item_head *ih, - const char *body, int flag) + struct item_head * const ih, + const char * const body, int flag) { /* if we must insert or append into buffer S[0] */ if (0 <= tb->item_pos && tb->item_pos < tb->s0num) { @@ -1402,7 +1417,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, && is_indirect_le_ih(item_head(tbS0, tb->item_pos))) tb->pos_in_item *= UNFM_P_SIZE; - balance_leaf_left(tb, ih, body, flag); + body += balance_leaf_left(tb, ih, body, flag); /* tb->lnum[0] > 0 */ /* Calculate new item position */ diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index e8870de4627..a88b1b3e7db 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1947,8 +1947,6 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, } } - /* wait for all commits to finish */ - cancel_delayed_work(&SB_JOURNAL(sb)->j_work); /* * We must release the write lock here because @@ -1956,8 +1954,14 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, */ reiserfs_write_unlock(sb); + /* + * Cancel flushing of old commits. Note that neither of these works + * will be requeued because superblock is being shutdown and doesn't + * have MS_ACTIVE set. + */ cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work); - flush_workqueue(REISERFS_SB(sb)->commit_wq); + /* wait for all commits to finish */ + cancel_delayed_work_sync(&SB_JOURNAL(sb)->j_work); free_journal_ram(sb); @@ -4292,9 +4296,15 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, int flags) if (flush) { flush_commit_list(sb, jl, 1); flush_journal_list(sb, jl, 1); - } else if (!(jl->j_state & LIST_COMMIT_PENDING)) - queue_delayed_work(REISERFS_SB(sb)->commit_wq, - &journal->j_work, HZ / 10); + } else if (!(jl->j_state & LIST_COMMIT_PENDING)) { + /* + * Avoid queueing work when sb is being shut down. Transaction + * will be flushed on journal shutdown. + */ + if (sb->s_flags & MS_ACTIVE) + queue_delayed_work(REISERFS_SB(sb)->commit_wq, + &journal->j_work, HZ / 10); + } /* * if the next transaction has any chance of wrapping, flush diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c index 814dda3ec99..249594a821e 100644 --- a/fs/reiserfs/lbalance.c +++ b/fs/reiserfs/lbalance.c @@ -899,8 +899,9 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first, /* insert item into the leaf node in position before */ void leaf_insert_into_buf(struct buffer_info *bi, int before, - struct item_head *inserted_item_ih, - const char *inserted_item_body, int zeros_number) + struct item_head * const inserted_item_ih, + const char * const inserted_item_body, + int zeros_number) { struct buffer_head *bh = bi->bi_bh; int nr, free_space; diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index bf53888c7f5..735c2c2b453 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -3216,11 +3216,12 @@ int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes); void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first, int del_num, int del_bytes); void leaf_insert_into_buf(struct buffer_info *bi, int before, - struct item_head *inserted_item_ih, - const char *inserted_item_body, int zeros_number); -void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num, - int pos_in_item, int paste_size, const char *body, + struct item_head * const inserted_item_ih, + const char * const inserted_item_body, int zeros_number); +void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num, + int pos_in_item, int paste_size, + const char * const body, int zeros_number); void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, int pos_in_item, int cut_size); void leaf_paste_entries(struct buffer_info *bi, int item_num, int before, diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 709ea92d716..d46e88a33b0 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -100,7 +100,11 @@ void reiserfs_schedule_old_flush(struct super_block *s) struct reiserfs_sb_info *sbi = REISERFS_SB(s); unsigned long delay; - if (s->s_flags & MS_RDONLY) + /* + * Avoid scheduling flush when sb is being shut down. It can race + * with journal shutdown and free still queued delayed work. + */ + if (s->s_flags & MS_RDONLY || !(s->s_flags & MS_ACTIVE)) return; spin_lock(&sbi->old_work_lock); diff --git a/fs/super.c b/fs/super.c index d20d5b11ded..b9a214d2fe9 100644 --- a/fs/super.c +++ b/fs/super.c @@ -22,7 +22,6 @@ #include <linux/export.h> #include <linux/slab.h> -#include <linux/acct.h> #include <linux/blkdev.h> #include <linux/mount.h> #include <linux/security.h> @@ -218,7 +217,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); mutex_init(&s->s_dquot.dqio_mutex); mutex_init(&s->s_dquot.dqonoff_mutex); - init_rwsem(&s->s_dquot.dqptr_sem); s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; s->s_time_gran = 1000000000; @@ -702,12 +700,22 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) return -EACCES; #endif - if (flags & MS_RDONLY) - acct_auto_close(sb); - shrink_dcache_sb(sb); - remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); + if (remount_ro) { + if (sb->s_pins.first) { + up_write(&sb->s_umount); + sb_pin_kill(sb); + down_write(&sb->s_umount); + if (!sb->s_root) + return 0; + if (sb->s_writers.frozen != SB_UNFROZEN) + return -EBUSY; + remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); + } + } + shrink_dcache_sb(sb); + /* If we are remounting RDONLY and current sb is read/write, make sure there are no rw files opened */ if (remount_ro) { diff --git a/fs/sync.c b/fs/sync.c index b28d1dd10e8..bdc729d80e5 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -65,7 +65,7 @@ int sync_filesystem(struct super_block *sb) return ret; return __sync_filesystem(sb, 1); } -EXPORT_SYMBOL_GPL(sync_filesystem); +EXPORT_SYMBOL(sync_filesystem); static void sync_inodes_one_sb(struct super_block *sb, void *arg) { diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index ff8229340cd..aa13ad053b1 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -174,7 +174,6 @@ static int do_commit(struct ubifs_info *c) if (err) goto out; - mutex_lock(&c->mst_mutex); c->mst_node->cmt_no = cpu_to_le64(c->cmt_no); c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); @@ -204,7 +203,6 @@ static int do_commit(struct ubifs_info *c) else c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS); err = ubifs_write_master(c); - mutex_unlock(&c->mst_mutex); if (err) goto out; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 2290d586672..fb08b0c514b 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -431,7 +431,7 @@ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) /** * wbuf_timer_callback - write-buffer timer callback function. - * @data: timer data (write-buffer descriptor) + * @timer: timer data (write-buffer descriptor) * * This function is called when the write-buffer timer expires. */ diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index a902c5919e4..a47ddfc9be6 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -240,6 +240,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + ubifs_assert(c->lhead_lnum != c->ltail_lnum); c->lhead_offs = 0; } @@ -404,15 +405,14 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) /* Switch to the next log LEB */ if (c->lhead_offs) { c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + ubifs_assert(c->lhead_lnum != c->ltail_lnum); c->lhead_offs = 0; } - if (c->lhead_offs == 0) { - /* Must ensure next LEB has been unmapped */ - err = ubifs_leb_unmap(c, c->lhead_lnum); - if (err) - goto out; - } + /* Must ensure next LEB has been unmapped */ + err = ubifs_leb_unmap(c, c->lhead_lnum); + if (err) + goto out; len = ALIGN(len, c->min_io_size); dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len); diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index d46b19ec181..421bd0a8042 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1464,7 +1464,6 @@ struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum) return ERR_CAST(nnode); } iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); - shft -= UBIFS_LPT_FANOUT_SHIFT; pnode = ubifs_get_pnode(c, nnode, iip); if (IS_ERR(pnode)) return ERR_CAST(pnode); @@ -1604,7 +1603,6 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) return ERR_CAST(nnode); } iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); - shft -= UBIFS_LPT_FANOUT_SHIFT; pnode = ubifs_get_pnode(c, nnode, iip); if (IS_ERR(pnode)) return ERR_CAST(pnode); @@ -1964,7 +1962,6 @@ again: } } iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); - shft -= UBIFS_LPT_FANOUT_SHIFT; pnode = scan_get_pnode(c, path + h, nnode, iip); if (IS_ERR(pnode)) { err = PTR_ERR(pnode); @@ -2198,6 +2195,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, lprops->dirty); return -EINVAL; } + break; case LPROPS_FREEABLE: case LPROPS_FRDI_IDX: if (lprops->free + lprops->dirty != c->leb_size) { @@ -2206,6 +2204,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, lprops->dirty); return -EINVAL; } + break; } } return 0; diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 45d4e96a6ba..d9c02928e99 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -304,7 +304,6 @@ static int layout_cnodes(struct ubifs_info *c) ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); } - done_ltab = 1; c->ltab_lnum = lnum; c->ltab_offs = offs; offs += c->ltab_sz; @@ -514,7 +513,6 @@ static int write_cnodes(struct ubifs_info *c) if (err) return err; } - done_ltab = 1; ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); offs += c->ltab_sz; dbg_chk_lpt_sz(c, 1, c->ltab_sz); @@ -1941,6 +1939,11 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) pr_err("LEB %d:%d, nnode, ", lnum, offs); err = ubifs_unpack_nnode(c, p, &nnode); + if (err) { + pr_err("failed to unpack_node, error %d\n", + err); + break; + } for (i = 0; i < UBIFS_LPT_FANOUT; i++) { pr_cont("%d:%d", nnode.nbranch[i].lnum, nnode.nbranch[i].offs); diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index ab83ace9910..1a4bb9e8b3b 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -352,10 +352,9 @@ int ubifs_read_master(struct ubifs_info *c) * ubifs_write_master - write master node. * @c: UBIFS file-system description object * - * This function writes the master node. The caller has to take the - * @c->mst_mutex lock before calling this function. Returns zero in case of - * success and a negative error code in case of failure. The master node is - * written twice to enable recovery. + * This function writes the master node. Returns zero in case of success and a + * negative error code in case of failure. The master node is written twice to + * enable recovery. */ int ubifs_write_master(struct ubifs_info *c) { diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index f1c3e5a1b31..4409f486ece 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -346,7 +346,6 @@ static int write_orph_nodes(struct ubifs_info *c, int atomic) int lnum; /* Unmap any unused LEBs after consolidation */ - lnum = c->ohead_lnum + 1; for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) { err = ubifs_leb_unmap(c, lnum); if (err) diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index c14adb2f420..c640938f62f 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -596,7 +596,6 @@ static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs) * drop_last_node - drop the last node. * @sleb: scanned LEB information * @offs: offset of dropped nodes is returned here - * @grouped: non-zero if whole group of nodes have to be dropped * * This is a helper function for 'ubifs_recover_leb()' which drops the last * node of the scanned LEB. @@ -629,8 +628,8 @@ static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) * * This function does a scan of a LEB, but caters for errors that might have * been caused by the unclean unmount from which we are attempting to recover. - * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is - * found, and a negative error code in case of failure. + * Returns the scanned information on success and a negative error code on + * failure. */ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf, int jhead) diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 4c37607a958..79c6dbbc0e0 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -332,6 +332,8 @@ static int create_default_filesystem(struct ubifs_info *c) cs->ch.node_type = UBIFS_CS_NODE; err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, 0); kfree(cs); + if (err) + return err; ubifs_msg("default file-system created"); return 0; @@ -447,7 +449,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) goto failed; } - if (c->default_compr < 0 || c->default_compr >= UBIFS_COMPR_TYPES_CNT) { + if (c->default_compr >= UBIFS_COMPR_TYPES_CNT) { err = 13; goto failed; } diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 58aa05df2bb..89adbc4d08a 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -131,7 +131,8 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, * @offs: offset to start at (usually zero) * @sbuf: scan buffer (must be c->leb_size) * - * This function returns %0 on success and a negative error code on failure. + * This function returns the scanned information on success and a negative error + * code on failure. */ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, int offs, void *sbuf) @@ -157,9 +158,10 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, return ERR_PTR(err); } - if (err == -EBADMSG) - sleb->ecc = 1; - + /* + * Note, we ignore integrity errors (EBASMSG) because all the nodes are + * protected by CRC checksums. + */ return sleb; } @@ -169,8 +171,6 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, * @sleb: scanning information * @lnum: logical eraseblock number * @offs: offset to start at (usually zero) - * - * This function returns %0 on success and a negative error code on failure. */ void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, int lnum, int offs) @@ -257,7 +257,7 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, * @quiet: print no messages * * This function scans LEB number @lnum and returns complete information about - * its contents. Returns the scaned information in case of success and, + * its contents. Returns the scanned information in case of success and, * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case * of failure. * diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 3904c8574ef..106bf20629c 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -75,7 +75,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) return 1; } - if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { + if (ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { ubifs_err("unknown compression type %d", ui->compr_type); return 2; } @@ -424,19 +424,19 @@ static int ubifs_show_options(struct seq_file *s, struct dentry *root) struct ubifs_info *c = root->d_sb->s_fs_info; if (c->mount_opts.unmount_mode == 2) - seq_printf(s, ",fast_unmount"); + seq_puts(s, ",fast_unmount"); else if (c->mount_opts.unmount_mode == 1) - seq_printf(s, ",norm_unmount"); + seq_puts(s, ",norm_unmount"); if (c->mount_opts.bulk_read == 2) - seq_printf(s, ",bulk_read"); + seq_puts(s, ",bulk_read"); else if (c->mount_opts.bulk_read == 1) - seq_printf(s, ",no_bulk_read"); + seq_puts(s, ",no_bulk_read"); if (c->mount_opts.chk_data_crc == 2) - seq_printf(s, ",chk_data_crc"); + seq_puts(s, ",chk_data_crc"); else if (c->mount_opts.chk_data_crc == 1) - seq_printf(s, ",no_chk_data_crc"); + seq_puts(s, ",no_chk_data_crc"); if (c->mount_opts.override_compr) { seq_printf(s, ",compr=%s", @@ -796,8 +796,8 @@ static int alloc_wbufs(struct ubifs_info *c) { int i, err; - c->jheads = kzalloc(c->jhead_cnt * sizeof(struct ubifs_jhead), - GFP_KERNEL); + c->jheads = kcalloc(c->jhead_cnt, sizeof(struct ubifs_jhead), + GFP_KERNEL); if (!c->jheads) return -ENOMEM; @@ -1963,7 +1963,6 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) mutex_init(&c->lp_mutex); mutex_init(&c->tnc_mutex); mutex_init(&c->log_mutex); - mutex_init(&c->mst_mutex); mutex_init(&c->umount_mutex); mutex_init(&c->bu_mutex); mutex_init(&c->write_reserve_mutex); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 8a40cf9c02d..6793db0754f 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -3294,7 +3294,6 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, goto out_unlock; if (err) { - err = -EINVAL; key = &from_key; goto out_dump; } diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 3600994f841..7a205e04677 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -389,7 +389,6 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) ubifs_dump_lprops(c); } /* Try to commit anyway */ - err = 0; break; } p++; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index c1f71fe17cc..c4fe900c67a 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -314,7 +314,6 @@ struct ubifs_scan_node { * @nodes_cnt: number of nodes scanned * @nodes: list of struct ubifs_scan_node * @endpt: end point (and therefore the start of empty space) - * @ecc: read returned -EBADMSG * @buf: buffer containing entire LEB scanned */ struct ubifs_scan_leb { @@ -322,7 +321,6 @@ struct ubifs_scan_leb { int nodes_cnt; struct list_head nodes; int endpt; - int ecc; void *buf; }; @@ -1051,7 +1049,6 @@ struct ubifs_debug_info; * * @mst_node: master node * @mst_offs: offset of valid master node - * @mst_mutex: protects the master node area, @mst_node, and @mst_offs * * @max_bu_buf_len: maximum bulk-read buffer length * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu @@ -1292,7 +1289,6 @@ struct ubifs_info { struct ubifs_mst_node *mst_node; int mst_offs; - struct mutex mst_mutex; int max_bu_buf_len; struct mutex bu_mutex; diff --git a/fs/udf/file.c b/fs/udf/file.c index d80738fdf42..86c6743ec1f 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -27,7 +27,7 @@ #include "udfdecl.h" #include <linux/fs.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/kernel.h> #include <linux/string.h> /* memset */ #include <linux/capability.h> @@ -100,24 +100,6 @@ static int udf_adinicb_write_begin(struct file *file, return 0; } -static int udf_adinicb_write_end(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - struct inode *inode = mapping->host; - unsigned offset = pos & (PAGE_CACHE_SIZE - 1); - char *kaddr; - struct udf_inode_info *iinfo = UDF_I(inode); - - kaddr = kmap_atomic(page); - memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset, - kaddr + offset, copied); - kunmap_atomic(kaddr); - - return simple_write_end(file, mapping, pos, len, copied, page, fsdata); -} - static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset) @@ -130,7 +112,7 @@ const struct address_space_operations udf_adinicb_aops = { .readpage = udf_adinicb_readpage, .writepage = udf_adinicb_writepage, .write_begin = udf_adinicb_write_begin, - .write_end = udf_adinicb_write_end, + .write_end = simple_write_end, .direct_IO = udf_adinicb_direct_IO, }; diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 6eaf5edf1ea..e77db621ec8 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -45,7 +45,7 @@ void udf_free_inode(struct inode *inode) udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1); } -struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) +struct inode *udf_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; struct udf_sb_info *sbi = UDF_SB(sb); @@ -55,14 +55,12 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) struct udf_inode_info *iinfo; struct udf_inode_info *dinfo = UDF_I(dir); struct logicalVolIntegrityDescImpUse *lvidiu; + int err; inode = new_inode(sb); - if (!inode) { - *err = -ENOMEM; - return NULL; - } - *err = -ENOSPC; + if (!inode) + return ERR_PTR(-ENOMEM); iinfo = UDF_I(inode); if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { @@ -80,21 +78,22 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) } if (!iinfo->i_ext.i_data) { iput(inode); - *err = -ENOMEM; - return NULL; + return ERR_PTR(-ENOMEM); } + err = -ENOSPC; block = udf_new_block(dir->i_sb, NULL, dinfo->i_location.partitionReferenceNum, - start, err); - if (*err) { + start, &err); + if (err) { iput(inode); - return NULL; + return ERR_PTR(err); } lvidiu = udf_sb_lvidiu(sb); if (lvidiu) { iinfo->i_unique = lvid_get_unique_id(sb); + inode->i_generation = iinfo->i_unique; mutex_lock(&sbi->s_alloc_mutex); if (S_ISDIR(mode)) le32_add_cpu(&lvidiu->numDirs, 1); @@ -123,9 +122,12 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; inode->i_mtime = inode->i_atime = inode->i_ctime = iinfo->i_crtime = current_fs_time(inode->i_sb); - insert_inode_hash(inode); + if (unlikely(insert_inode_locked(inode) < 0)) { + make_bad_inode(inode); + iput(inode); + return ERR_PTR(-EIO); + } mark_inode_dirty(inode); - *err = 0; return inode; } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 236cd48184c..08598843288 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -51,7 +51,6 @@ MODULE_LICENSE("GPL"); static umode_t udf_convert_permissions(struct fileEntry *); static int udf_update_inode(struct inode *, int); -static void udf_fill_inode(struct inode *, struct buffer_head *); static int udf_sync_inode(struct inode *inode); static int udf_alloc_i_data(struct inode *inode, size_t size); static sector_t inode_getblk(struct inode *, sector_t, int *, int *); @@ -1271,12 +1270,33 @@ update_time: return 0; } -static void __udf_read_inode(struct inode *inode) +/* + * Maximum length of linked list formed by ICB hierarchy. The chosen number is + * arbitrary - just that we hopefully don't limit any real use of rewritten + * inode on write-once media but avoid looping for too long on corrupted media. + */ +#define UDF_MAX_ICB_NESTING 1024 + +static int udf_read_inode(struct inode *inode) { struct buffer_head *bh = NULL; struct fileEntry *fe; + struct extendedFileEntry *efe; uint16_t ident; struct udf_inode_info *iinfo = UDF_I(inode); + struct udf_sb_info *sbi = UDF_SB(inode->i_sb); + struct kernel_lb_addr *iloc = &iinfo->i_location; + unsigned int link_count; + unsigned int indirections = 0; + int ret = -EIO; + +reread: + if (iloc->logicalBlockNum >= + sbi->s_partmaps[iloc->partitionReferenceNum].s_partition_len) { + udf_debug("block=%d, partition=%d out of range\n", + iloc->logicalBlockNum, iloc->partitionReferenceNum); + return -EIO; + } /* * Set defaults, but the inode is still incomplete! @@ -1290,78 +1310,54 @@ static void __udf_read_inode(struct inode *inode) * i_nlink = 1 * i_op = NULL; */ - bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident); + bh = udf_read_ptagged(inode->i_sb, iloc, 0, &ident); if (!bh) { udf_err(inode->i_sb, "(ino %ld) failed !bh\n", inode->i_ino); - make_bad_inode(inode); - return; + return -EIO; } if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE && ident != TAG_IDENT_USE) { udf_err(inode->i_sb, "(ino %ld) failed ident=%d\n", inode->i_ino, ident); - brelse(bh); - make_bad_inode(inode); - return; + goto out; } fe = (struct fileEntry *)bh->b_data; + efe = (struct extendedFileEntry *)bh->b_data; if (fe->icbTag.strategyType == cpu_to_le16(4096)) { struct buffer_head *ibh; - ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1, - &ident); + ibh = udf_read_ptagged(inode->i_sb, iloc, 1, &ident); if (ident == TAG_IDENT_IE && ibh) { - struct buffer_head *nbh = NULL; struct kernel_lb_addr loc; struct indirectEntry *ie; ie = (struct indirectEntry *)ibh->b_data; loc = lelb_to_cpu(ie->indirectICB.extLocation); - if (ie->indirectICB.extLength && - (nbh = udf_read_ptagged(inode->i_sb, &loc, 0, - &ident))) { - if (ident == TAG_IDENT_FE || - ident == TAG_IDENT_EFE) { - memcpy(&iinfo->i_location, - &loc, - sizeof(struct kernel_lb_addr)); - brelse(bh); - brelse(ibh); - brelse(nbh); - __udf_read_inode(inode); - return; + if (ie->indirectICB.extLength) { + brelse(ibh); + memcpy(&iinfo->i_location, &loc, + sizeof(struct kernel_lb_addr)); + if (++indirections > UDF_MAX_ICB_NESTING) { + udf_err(inode->i_sb, + "too many ICBs in ICB hierarchy" + " (max %d supported)\n", + UDF_MAX_ICB_NESTING); + goto out; } - brelse(nbh); + brelse(bh); + goto reread; } } brelse(ibh); } else if (fe->icbTag.strategyType != cpu_to_le16(4)) { udf_err(inode->i_sb, "unsupported strategy type: %d\n", le16_to_cpu(fe->icbTag.strategyType)); - brelse(bh); - make_bad_inode(inode); - return; + goto out; } - udf_fill_inode(inode, bh); - - brelse(bh); -} - -static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) -{ - struct fileEntry *fe; - struct extendedFileEntry *efe; - struct udf_sb_info *sbi = UDF_SB(inode->i_sb); - struct udf_inode_info *iinfo = UDF_I(inode); - unsigned int link_count; - - fe = (struct fileEntry *)bh->b_data; - efe = (struct extendedFileEntry *)bh->b_data; - if (fe->icbTag.strategyType == cpu_to_le16(4)) iinfo->i_strat4096 = 0; else /* if (fe->icbTag.strategyType == cpu_to_le16(4096)) */ @@ -1378,11 +1374,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) { iinfo->i_efe = 1; iinfo->i_use = 0; - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - - sizeof(struct extendedFileEntry))) { - make_bad_inode(inode); - return; - } + ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - + sizeof(struct extendedFileEntry)); + if (ret) + goto out; memcpy(iinfo->i_ext.i_data, bh->b_data + sizeof(struct extendedFileEntry), inode->i_sb->s_blocksize - @@ -1390,11 +1385,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) { iinfo->i_efe = 0; iinfo->i_use = 0; - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - - sizeof(struct fileEntry))) { - make_bad_inode(inode); - return; - } + ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - + sizeof(struct fileEntry)); + if (ret) + goto out; memcpy(iinfo->i_ext.i_data, bh->b_data + sizeof(struct fileEntry), inode->i_sb->s_blocksize - sizeof(struct fileEntry)); @@ -1404,18 +1398,18 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) iinfo->i_lenAlloc = le32_to_cpu( ((struct unallocSpaceEntry *)bh->b_data)-> lengthAllocDescs); - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - - sizeof(struct unallocSpaceEntry))) { - make_bad_inode(inode); - return; - } + ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - + sizeof(struct unallocSpaceEntry)); + if (ret) + goto out; memcpy(iinfo->i_ext.i_data, bh->b_data + sizeof(struct unallocSpaceEntry), inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)); - return; + return 0; } + ret = -EIO; read_lock(&sbi->s_cred_lock); i_uid_write(inode, le32_to_cpu(fe->uid)); if (!uid_valid(inode->i_uid) || @@ -1441,8 +1435,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) read_unlock(&sbi->s_cred_lock); link_count = le16_to_cpu(fe->fileLinkCount); - if (!link_count) - link_count = 1; + if (!link_count) { + ret = -ESTALE; + goto out; + } set_nlink(inode, link_count); inode->i_size = le64_to_cpu(fe->informationLength); @@ -1488,6 +1484,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs); iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint); } + inode->i_generation = iinfo->i_unique; switch (fe->icbTag.fileType) { case ICBTAG_FILE_TYPE_DIRECTORY: @@ -1537,8 +1534,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) default: udf_err(inode->i_sb, "(ino %ld) failed unknown file type=%d\n", inode->i_ino, fe->icbTag.fileType); - make_bad_inode(inode); - return; + goto out; } if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { struct deviceSpec *dsea = @@ -1549,8 +1545,12 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) le32_to_cpu(dsea->minorDeviceIdent))); /* Developer ID ??? */ } else - make_bad_inode(inode); + goto out; } + ret = 0; +out: + brelse(bh); + return ret; } static int udf_alloc_i_data(struct inode *inode, size_t size) @@ -1664,7 +1664,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) FE_PERM_U_DELETE | FE_PERM_U_CHATTR)); fe->permissions = cpu_to_le32(udfperms); - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode) && inode->i_nlink > 0) fe->fileLinkCount = cpu_to_le16(inode->i_nlink - 1); else fe->fileLinkCount = cpu_to_le16(inode->i_nlink); @@ -1830,32 +1830,23 @@ struct inode *udf_iget(struct super_block *sb, struct kernel_lb_addr *ino) { unsigned long block = udf_get_lb_pblock(sb, ino, 0); struct inode *inode = iget_locked(sb, block); + int err; if (!inode) - return NULL; - - if (inode->i_state & I_NEW) { - memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr)); - __udf_read_inode(inode); - unlock_new_inode(inode); - } + return ERR_PTR(-ENOMEM); - if (is_bad_inode(inode)) - goto out_iput; + if (!(inode->i_state & I_NEW)) + return inode; - if (ino->logicalBlockNum >= UDF_SB(sb)-> - s_partmaps[ino->partitionReferenceNum].s_partition_len) { - udf_debug("block=%d, partition=%d out of range\n", - ino->logicalBlockNum, ino->partitionReferenceNum); - make_bad_inode(inode); - goto out_iput; + memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr)); + err = udf_read_inode(inode); + if (err < 0) { + iget_failed(inode); + return ERR_PTR(err); } + unlock_new_inode(inode); return inode; - - out_iput: - iput(inode); - return NULL; } int udf_add_aext(struct inode *inode, struct extent_position *epos, diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c index 6583fe9b064..6ad5a453af9 100644 --- a/fs/udf/lowlevel.c +++ b/fs/udf/lowlevel.c @@ -21,7 +21,7 @@ #include <linux/blkdev.h> #include <linux/cdrom.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "udf_sb.h" diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 9737cba1357..c12e260fd6c 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -270,9 +270,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, NULL, 0), }; inode = udf_iget(dir->i_sb, lb); - if (!inode) { - return ERR_PTR(-EACCES); - } + if (IS_ERR(inode)) + return inode; } else #endif /* UDF_RECOVERY */ @@ -285,9 +284,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, loc = lelb_to_cpu(cfi.icb.extLocation); inode = udf_iget(dir->i_sb, &loc); - if (!inode) { - return ERR_PTR(-EACCES); - } + if (IS_ERR(inode)) + return ERR_CAST(inode); } return d_splice_alias(inode, dentry); @@ -550,32 +548,18 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi, return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL); } -static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, - bool excl) +static int udf_add_nondir(struct dentry *dentry, struct inode *inode) { + struct udf_inode_info *iinfo = UDF_I(inode); + struct inode *dir = dentry->d_parent->d_inode; struct udf_fileident_bh fibh; - struct inode *inode; struct fileIdentDesc cfi, *fi; int err; - struct udf_inode_info *iinfo; - - inode = udf_new_inode(dir, mode, &err); - if (!inode) { - return err; - } - - iinfo = UDF_I(inode); - if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) - inode->i_data.a_ops = &udf_adinicb_aops; - else - inode->i_data.a_ops = &udf_aops; - inode->i_op = &udf_file_inode_operations; - inode->i_fop = &udf_file_operations; - mark_inode_dirty(inode); fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); - if (!fi) { + if (unlikely(!fi)) { inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -589,23 +573,21 @@ static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); + unlock_new_inode(inode); d_instantiate(dentry, inode); return 0; } -static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) { - struct inode *inode; - struct udf_inode_info *iinfo; - int err; + struct inode *inode = udf_new_inode(dir, mode); - inode = udf_new_inode(dir, mode, &err); - if (!inode) - return err; + if (IS_ERR(inode)) + return PTR_ERR(inode); - iinfo = UDF_I(inode); - if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) + if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) inode->i_data.a_ops = &udf_adinicb_aops; else inode->i_data.a_ops = &udf_aops; @@ -613,7 +595,25 @@ static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_fop = &udf_file_operations; mark_inode_dirty(inode); + return udf_add_nondir(dentry, inode); +} + +static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct inode *inode = udf_new_inode(dir, mode); + + if (IS_ERR(inode)) + return PTR_ERR(inode); + + if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) + inode->i_data.a_ops = &udf_adinicb_aops; + else + inode->i_data.a_ops = &udf_aops; + inode->i_op = &udf_file_inode_operations; + inode->i_fop = &udf_file_operations; + mark_inode_dirty(inode); d_tmpfile(dentry, inode); + unlock_new_inode(inode); return 0; } @@ -621,44 +621,16 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; - struct udf_fileident_bh fibh; - struct fileIdentDesc cfi, *fi; - int err; - struct udf_inode_info *iinfo; if (!old_valid_dev(rdev)) return -EINVAL; - err = -EIO; - inode = udf_new_inode(dir, mode, &err); - if (!inode) - goto out; + inode = udf_new_inode(dir, mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); - iinfo = UDF_I(inode); init_special_inode(inode, mode, rdev); - fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); - if (!fi) { - inode_dec_link_count(inode); - iput(inode); - return err; - } - cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); - cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location); - *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL); - udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) - mark_inode_dirty(dir); - mark_inode_dirty(inode); - - if (fibh.sbh != fibh.ebh) - brelse(fibh.ebh); - brelse(fibh.sbh); - d_instantiate(dentry, inode); - err = 0; - -out: - return err; + return udf_add_nondir(dentry, inode); } static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) @@ -670,10 +642,9 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct udf_inode_info *dinfo = UDF_I(dir); struct udf_inode_info *iinfo; - err = -EIO; - inode = udf_new_inode(dir, S_IFDIR | mode, &err); - if (!inode) - goto out; + inode = udf_new_inode(dir, S_IFDIR | mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); iinfo = UDF_I(inode); inode->i_op = &udf_dir_inode_operations; @@ -681,6 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err); if (!fi) { inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); goto out; } @@ -699,6 +671,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (!fi) { clear_nlink(inode); mark_inode_dirty(inode); + unlock_new_inode(inode); iput(inode); goto out; } @@ -710,6 +683,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); inc_nlink(dir); mark_inode_dirty(dir); + unlock_new_inode(inode); d_instantiate(dentry, inode); if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); @@ -876,14 +850,11 @@ out: static int udf_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct inode *inode; + struct inode *inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO); struct pathComponent *pc; const char *compstart; - struct udf_fileident_bh fibh; struct extent_position epos = {}; int eoffset, elen = 0; - struct fileIdentDesc *fi; - struct fileIdentDesc cfi; uint8_t *ea; int err; int block; @@ -892,9 +863,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, struct udf_inode_info *iinfo; struct super_block *sb = dir->i_sb; - inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err); - if (!inode) - goto out; + if (IS_ERR(inode)) + return PTR_ERR(inode); iinfo = UDF_I(inode); down_write(&iinfo->i_data_sem); @@ -1012,24 +982,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, mark_inode_dirty(inode); up_write(&iinfo->i_data_sem); - fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); - if (!fi) - goto out_no_entry; - cfi.icb.extLength = cpu_to_le32(sb->s_blocksize); - cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location); - if (UDF_SB(inode->i_sb)->s_lvid_bh) { - *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(lvid_get_unique_id(sb)); - } - udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) - mark_inode_dirty(dir); - if (fibh.sbh != fibh.ebh) - brelse(fibh.ebh); - brelse(fibh.sbh); - d_instantiate(dentry, inode); - err = 0; - + err = udf_add_nondir(dentry, inode); out: kfree(name); return err; @@ -1037,6 +990,7 @@ out: out_no_entry: up_write(&iinfo->i_data_sem); inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); goto out; } @@ -1221,7 +1175,7 @@ static struct dentry *udf_get_parent(struct dentry *child) struct udf_fileident_bh fibh; if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) - goto out_unlock; + return ERR_PTR(-EACCES); if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); @@ -1229,12 +1183,10 @@ static struct dentry *udf_get_parent(struct dentry *child) tloc = lelb_to_cpu(cfi.icb.extLocation); inode = udf_iget(child->d_inode->i_sb, &tloc); - if (!inode) - goto out_unlock; + if (IS_ERR(inode)) + return ERR_CAST(inode); return d_obtain_alias(inode); -out_unlock: - return ERR_PTR(-EACCES); } @@ -1251,8 +1203,8 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block, loc.partitionReferenceNum = partref; inode = udf_iget(sb, &loc); - if (inode == NULL) - return ERR_PTR(-ENOMEM); + if (IS_ERR(inode)) + return ERR_CAST(inode); if (generation && inode->i_generation != generation) { iput(inode); diff --git a/fs/udf/super.c b/fs/udf/super.c index 3286db047a4..5401fc33f5c 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -63,7 +63,7 @@ #include "udf_i.h" #include <linux/init.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #define VDS_POS_PRIMARY_VOL_DESC 0 #define VDS_POS_UNALLOC_SPACE_DESC 1 @@ -961,12 +961,14 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb, metadata_fe = udf_iget(sb, &addr); - if (metadata_fe == NULL) + if (IS_ERR(metadata_fe)) { udf_warn(sb, "metadata inode efe not found\n"); - else if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) { + return metadata_fe; + } + if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) { udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n"); iput(metadata_fe); - metadata_fe = NULL; + return ERR_PTR(-EIO); } return metadata_fe; @@ -978,6 +980,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) struct udf_part_map *map; struct udf_meta_data *mdata; struct kernel_lb_addr addr; + struct inode *fe; map = &sbi->s_partmaps[partition]; mdata = &map->s_type_specific.s_metadata; @@ -986,22 +989,24 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) udf_debug("Metadata file location: block = %d part = %d\n", mdata->s_meta_file_loc, map->s_partition_num); - mdata->s_metadata_fe = udf_find_metadata_inode_efe(sb, - mdata->s_meta_file_loc, map->s_partition_num); - - if (mdata->s_metadata_fe == NULL) { + fe = udf_find_metadata_inode_efe(sb, mdata->s_meta_file_loc, + map->s_partition_num); + if (IS_ERR(fe)) { /* mirror file entry */ udf_debug("Mirror metadata file location: block = %d part = %d\n", mdata->s_mirror_file_loc, map->s_partition_num); - mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb, - mdata->s_mirror_file_loc, map->s_partition_num); + fe = udf_find_metadata_inode_efe(sb, mdata->s_mirror_file_loc, + map->s_partition_num); - if (mdata->s_mirror_fe == NULL) { + if (IS_ERR(fe)) { udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n"); - return -EIO; + return PTR_ERR(fe); } - } + mdata->s_mirror_fe = fe; + } else + mdata->s_metadata_fe = fe; + /* * bitmap file entry @@ -1015,15 +1020,16 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) udf_debug("Bitmap file location: block = %d part = %d\n", addr.logicalBlockNum, addr.partitionReferenceNum); - mdata->s_bitmap_fe = udf_iget(sb, &addr); - if (mdata->s_bitmap_fe == NULL) { + fe = udf_iget(sb, &addr); + if (IS_ERR(fe)) { if (sb->s_flags & MS_RDONLY) udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n"); else { udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n"); - return -EIO; + return PTR_ERR(fe); } - } + } else + mdata->s_bitmap_fe = fe; } udf_debug("udf_load_metadata_files Ok\n"); @@ -1111,13 +1117,15 @@ static int udf_fill_partdesc_info(struct super_block *sb, phd->unallocSpaceTable.extPosition), .partitionReferenceNum = p_index, }; + struct inode *inode; - map->s_uspace.s_table = udf_iget(sb, &loc); - if (!map->s_uspace.s_table) { + inode = udf_iget(sb, &loc); + if (IS_ERR(inode)) { udf_debug("cannot load unallocSpaceTable (part %d)\n", p_index); - return -EIO; + return PTR_ERR(inode); } + map->s_uspace.s_table = inode; map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE; udf_debug("unallocSpaceTable (part %d) @ %ld\n", p_index, map->s_uspace.s_table->i_ino); @@ -1144,14 +1152,15 @@ static int udf_fill_partdesc_info(struct super_block *sb, phd->freedSpaceTable.extPosition), .partitionReferenceNum = p_index, }; + struct inode *inode; - map->s_fspace.s_table = udf_iget(sb, &loc); - if (!map->s_fspace.s_table) { + inode = udf_iget(sb, &loc); + if (IS_ERR(inode)) { udf_debug("cannot load freedSpaceTable (part %d)\n", p_index); - return -EIO; + return PTR_ERR(inode); } - + map->s_fspace.s_table = inode; map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE; udf_debug("freedSpaceTable (part %d) @ %ld\n", p_index, map->s_fspace.s_table->i_ino); @@ -1178,6 +1187,7 @@ static void udf_find_vat_block(struct super_block *sb, int p_index, struct udf_part_map *map = &sbi->s_partmaps[p_index]; sector_t vat_block; struct kernel_lb_addr ino; + struct inode *inode; /* * VAT file entry is in the last recorded block. Some broken disks have @@ -1186,10 +1196,13 @@ static void udf_find_vat_block(struct super_block *sb, int p_index, ino.partitionReferenceNum = type1_index; for (vat_block = start_block; vat_block >= map->s_partition_root && - vat_block >= start_block - 3 && - !sbi->s_vat_inode; vat_block--) { + vat_block >= start_block - 3; vat_block--) { ino.logicalBlockNum = vat_block - map->s_partition_root; - sbi->s_vat_inode = udf_iget(sb, &ino); + inode = udf_iget(sb, &ino); + if (!IS_ERR(inode)) { + sbi->s_vat_inode = inode; + break; + } } } @@ -2205,10 +2218,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) /* assign inodes by physical block number */ /* perhaps it's not extensible enough, but for now ... */ inode = udf_iget(sb, &rootdir); - if (!inode) { + if (IS_ERR(inode)) { udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n", rootdir.logicalBlockNum, rootdir.partitionReferenceNum); - ret = -EIO; + ret = PTR_ERR(inode); goto error_out; } diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index d7c6dbe4194..6fb7945c1e6 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -20,7 +20,7 @@ */ #include "udfdecl.h" -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/time.h> diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index be7dabbbcb4..742557be993 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -143,7 +143,6 @@ extern int udf_expand_file_adinicb(struct inode *); extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); extern struct buffer_head *udf_bread(struct inode *, int, int, int *); extern int udf_setsize(struct inode *, loff_t); -extern void udf_read_inode(struct inode *); extern void udf_evict_inode(struct inode *); extern int udf_write_inode(struct inode *, struct writeback_control *wbc); extern long udf_block_map(struct inode *, sector_t); @@ -209,7 +208,7 @@ extern int udf_CS0toUTF8(struct ustr *, const struct ustr *); /* ialloc.c */ extern void udf_free_inode(struct inode *); -extern struct inode *udf_new_inode(struct inode *, umode_t, int *); +extern struct inode *udf_new_inode(struct inode *, umode_t); /* truncate.c */ extern void udf_truncate_tail_extent(struct inode *); diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 44b815e57f9..afd470e588f 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -412,7 +412,6 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int extIndex = 0, newExtIndex = 0, hasExt = 0; unsigned short valueCRC; uint8_t curr; - const uint8_t hexChar[] = "0123456789ABCDEF"; if (udfName[0] == '.' && (udfLen == 1 || (udfLen == 2 && udfName[1] == '.'))) { @@ -477,10 +476,10 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, newIndex = 250; newName[newIndex++] = CRC_MARK; valueCRC = crc_itu_t(0, fidName, fidNameLen); - newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12]; - newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8]; - newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4]; - newName[newIndex++] = hexChar[(valueCRC & 0x000f)]; + newName[newIndex++] = hex_asc_upper_hi(valueCRC >> 8); + newName[newIndex++] = hex_asc_upper_lo(valueCRC >> 8); + newName[newIndex++] = hex_asc_upper_hi(valueCRC); + newName[newIndex++] = hex_asc_upper_lo(valueCRC); if (hasExt) { newName[newIndex++] = EXT_MARK; diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 7c580c97990..be7d42c7d93 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -902,9 +902,6 @@ void ufs_evict_inode(struct inode * inode) invalidate_inode_buffers(inode); clear_inode(inode); - if (want_delete) { - lock_ufs(inode->i_sb); - ufs_free_inode (inode); - unlock_ufs(inode->i_sb); - } + if (want_delete) + ufs_free_inode(inode); } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 90d74b8f8eb..2df62a73f20 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -126,12 +126,12 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, if (l > sb->s_blocksize) goto out_notlocked; - lock_ufs(dir->i_sb); inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); err = PTR_ERR(inode); if (IS_ERR(inode)) - goto out; + goto out_notlocked; + lock_ufs(dir->i_sb); if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { /* slow symlink */ inode->i_op = &ufs_symlink_inode_operations; @@ -181,13 +181,9 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) struct inode * inode; int err; - lock_ufs(dir->i_sb); - inode_inc_link_count(dir); - inode = ufs_new_inode(dir, S_IFDIR|mode); - err = PTR_ERR(inode); if (IS_ERR(inode)) - goto out_dir; + return PTR_ERR(inode); inode->i_op = &ufs_dir_inode_operations; inode->i_fop = &ufs_dir_operations; @@ -195,6 +191,9 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) inode_inc_link_count(inode); + lock_ufs(dir->i_sb); + inode_inc_link_count(dir); + err = ufs_make_empty(inode, dir); if (err) goto out_fail; @@ -212,7 +211,6 @@ out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); iput (inode); -out_dir: inode_dec_link_count(dir); unlock_ufs(dir->i_sb); goto out; diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 399e8cec6e6..5d47b4df61e 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -1,6 +1,7 @@ config XFS_FS tristate "XFS filesystem support" depends on BLOCK + depends on (64BIT || LBDAF) select EXPORTFS select LIBCRC32C help diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index c21f4350666..d6179994958 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -17,6 +17,7 @@ # ccflags-y += -I$(src) # needed for trace events +ccflags-y += -I$(src)/libxfs ccflags-$(CONFIG_XFS_DEBUG) += -g @@ -25,6 +26,39 @@ obj-$(CONFIG_XFS_FS) += xfs.o # this one should be compiled first, as the tracing macros can easily blow up xfs-y += xfs_trace.o +# build the libxfs code first +xfs-y += $(addprefix libxfs/, \ + xfs_alloc.o \ + xfs_alloc_btree.o \ + xfs_attr.o \ + xfs_attr_leaf.o \ + xfs_attr_remote.o \ + xfs_bmap.o \ + xfs_bmap_btree.o \ + xfs_btree.o \ + xfs_da_btree.o \ + xfs_da_format.o \ + xfs_dir2.o \ + xfs_dir2_block.o \ + xfs_dir2_data.o \ + xfs_dir2_leaf.o \ + xfs_dir2_node.o \ + xfs_dir2_sf.o \ + xfs_dquot_buf.o \ + xfs_ialloc.o \ + xfs_ialloc_btree.o \ + xfs_inode_fork.o \ + xfs_inode_buf.o \ + xfs_log_rlimit.o \ + xfs_sb.o \ + xfs_symlink_remote.o \ + xfs_trans_resv.o \ + ) +# xfs_rtbitmap is shared with libxfs +xfs-$(CONFIG_XFS_RT) += $(addprefix libxfs/, \ + xfs_rtbitmap.o \ + ) + # highlevel code xfs-y += xfs_aops.o \ xfs_attr_inactive.o \ @@ -45,53 +79,27 @@ xfs-y += xfs_aops.o \ xfs_ioctl.o \ xfs_iomap.o \ xfs_iops.o \ + xfs_inode.o \ xfs_itable.o \ xfs_message.o \ xfs_mount.o \ xfs_mru_cache.o \ xfs_super.o \ xfs_symlink.o \ + xfs_sysfs.o \ xfs_trans.o \ xfs_xattr.o \ kmem.o \ uuid.o -# code shared with libxfs -xfs-y += xfs_alloc.o \ - xfs_alloc_btree.o \ - xfs_attr.o \ - xfs_attr_leaf.o \ - xfs_attr_remote.o \ - xfs_bmap.o \ - xfs_bmap_btree.o \ - xfs_btree.o \ - xfs_da_btree.o \ - xfs_da_format.o \ - xfs_dir2.o \ - xfs_dir2_block.o \ - xfs_dir2_data.o \ - xfs_dir2_leaf.o \ - xfs_dir2_node.o \ - xfs_dir2_sf.o \ - xfs_dquot_buf.o \ - xfs_ialloc.o \ - xfs_ialloc_btree.o \ - xfs_icreate_item.o \ - xfs_inode.o \ - xfs_inode_fork.o \ - xfs_inode_buf.o \ - xfs_log_recover.o \ - xfs_log_rlimit.o \ - xfs_sb.o \ - xfs_symlink_remote.o \ - xfs_trans_resv.o - # low-level transaction/log code xfs-y += xfs_log.o \ xfs_log_cil.o \ xfs_buf_item.o \ xfs_extfree_item.o \ + xfs_icreate_item.o \ xfs_inode_item.o \ + xfs_log_recover.o \ xfs_trans_ail.o \ xfs_trans_buf.o \ xfs_trans_extfree.o \ @@ -107,8 +115,7 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ xfs_quotaops.o # xfs_rtbitmap is shared with libxfs -xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o \ - xfs_rtbitmap.o +xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o xfs-$(CONFIG_PROC_FS) += xfs_stats.o diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 6e247a99f5d..6e247a99f5d 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index d43813267a8..4bffffe038a 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -483,9 +483,9 @@ xfs_agfl_read_verify( return; if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); else if (!xfs_agfl_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) xfs_verifier_error(bp); @@ -503,7 +503,7 @@ xfs_agfl_write_verify( return; if (!xfs_agfl_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } @@ -559,7 +559,7 @@ xfs_alloc_update_counters( xfs_trans_agblocks_delta(tp, len); if (unlikely(be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))) - return EFSCORRUPTED; + return -EFSCORRUPTED; xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); return 0; @@ -2234,11 +2234,11 @@ xfs_agf_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, XFS_ERRTAG_ALLOC_READ_AGF, XFS_RANDOM_ALLOC_READ_AGF)) - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) xfs_verifier_error(bp); @@ -2252,7 +2252,7 @@ xfs_agf_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; if (!xfs_agf_verify(mp, bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } @@ -2601,11 +2601,11 @@ xfs_free_extent( */ args.agno = XFS_FSB_TO_AGNO(args.mp, bno); if (args.agno >= args.mp->m_sb.sb_agcount) - return EFSCORRUPTED; + return -EFSCORRUPTED; args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); if (args.agbno >= args.mp->m_sb.sb_agblocks) - return EFSCORRUPTED; + return -EFSCORRUPTED; args.pag = xfs_perag_get(args.mp, args.agno); ASSERT(args.pag); @@ -2617,7 +2617,7 @@ xfs_free_extent( /* validate the extent size is legal now we have the agf locked */ if (args.agbno + len > be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto error0; } diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index feacb061bab..feacb061bab 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 8358f1ded94..e0e83e24d3e 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -355,9 +355,9 @@ xfs_allocbt_read_verify( struct xfs_buf *bp) { if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); else if (!xfs_allocbt_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) { trace_xfs_btree_corrupt(bp, _RET_IP_); @@ -371,7 +371,7 @@ xfs_allocbt_write_verify( { if (!xfs_allocbt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index 45e189e7e81..45e189e7e81 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index bfe36fc2cdc..353fb425fae 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -85,7 +85,7 @@ xfs_attr_args_init( { if (!name) - return EINVAL; + return -EINVAL; memset(args, 0, sizeof(*args)); args->geo = dp->i_mount->m_attr_geo; @@ -95,7 +95,7 @@ xfs_attr_args_init( args->name = name; args->namelen = strlen((const char *)name); if (args->namelen >= MAXNAMELEN) - return EFAULT; /* match IRIX behaviour */ + return -EFAULT; /* match IRIX behaviour */ args->hashval = xfs_da_hashname(args->name, args->namelen); return 0; @@ -131,10 +131,10 @@ xfs_attr_get( XFS_STATS_INC(xs_attr_get); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return EIO; + return -EIO; if (!xfs_inode_hasattr(ip)) - return ENOATTR; + return -ENOATTR; error = xfs_attr_args_init(&args, ip, name, flags); if (error) @@ -145,7 +145,7 @@ xfs_attr_get( lock_mode = xfs_ilock_attr_map_shared(ip); if (!xfs_inode_hasattr(ip)) - error = ENOATTR; + error = -ENOATTR; else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) error = xfs_attr_shortform_getvalue(&args); else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) @@ -155,7 +155,7 @@ xfs_attr_get( xfs_iunlock(ip, lock_mode); *valuelenp = args.valuelen; - return error == EEXIST ? 0 : error; + return error == -EEXIST ? 0 : error; } /* @@ -213,7 +213,7 @@ xfs_attr_set( XFS_STATS_INC(xs_attr_set); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return EIO; + return -EIO; error = xfs_attr_args_init(&args, dp, name, flags); if (error) @@ -304,7 +304,7 @@ xfs_attr_set( * the inode. */ error = xfs_attr_shortform_addname(&args); - if (error != ENOSPC) { + if (error != -ENOSPC) { /* * Commit the shortform mods, and we're done. * NOTE: this is also the error path (EEXIST, etc). @@ -419,10 +419,10 @@ xfs_attr_remove( XFS_STATS_INC(xs_attr_remove); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return EIO; + return -EIO; if (!xfs_inode_hasattr(dp)) - return ENOATTR; + return -ENOATTR; error = xfs_attr_args_init(&args, dp, name, flags); if (error) @@ -477,7 +477,7 @@ xfs_attr_remove( xfs_trans_ijoin(args.trans, dp, 0); if (!xfs_inode_hasattr(dp)) { - error = XFS_ERROR(ENOATTR); + error = -ENOATTR; } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); error = xfs_attr_shortform_remove(&args); @@ -534,28 +534,28 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) trace_xfs_attr_sf_addname(args); retval = xfs_attr_shortform_lookup(args); - if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { - return(retval); - } else if (retval == EEXIST) { + if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { + return retval; + } else if (retval == -EEXIST) { if (args->flags & ATTR_CREATE) - return(retval); + return retval; retval = xfs_attr_shortform_remove(args); ASSERT(retval == 0); } if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX || args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX) - return(XFS_ERROR(ENOSPC)); + return -ENOSPC; newsize = XFS_ATTR_SF_TOTSIZE(args->dp); newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize); if (!forkoff) - return(XFS_ERROR(ENOSPC)); + return -ENOSPC; xfs_attr_shortform_add(args, forkoff); - return(0); + return 0; } @@ -592,10 +592,10 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * the given flags produce an error or call for an atomic rename. */ retval = xfs_attr3_leaf_lookup_int(bp, args); - if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { + if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { xfs_trans_brelse(args->trans, bp); return retval; - } else if (retval == EEXIST) { + } else if (retval == -EEXIST) { if (args->flags & ATTR_CREATE) { /* pure create op */ xfs_trans_brelse(args->trans, bp); return retval; @@ -626,7 +626,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * if required. */ retval = xfs_attr3_leaf_add(bp, args); - if (retval == ENOSPC) { + if (retval == -ENOSPC) { /* * Promote the attribute list to the Btree format, then * Commit that transaction so that the node_addname() call @@ -642,7 +642,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) ASSERT(committed); args->trans = NULL; xfs_bmap_cancel(args->flist); - return(error); + return error; } /* @@ -658,13 +658,13 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) */ error = xfs_trans_roll(&args->trans, dp); if (error) - return (error); + return error; /* * Fob the whole rest of the problem off on the Btree code. */ error = xfs_attr_node_addname(args); - return(error); + return error; } /* @@ -673,7 +673,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) */ error = xfs_trans_roll(&args->trans, dp); if (error) - return (error); + return error; /* * If there was an out-of-line value, allocate the blocks we @@ -684,7 +684,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) if (args->rmtblkno > 0) { error = xfs_attr_rmtval_set(args); if (error) - return(error); + return error; } /* @@ -700,7 +700,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) */ error = xfs_attr3_leaf_flipflags(args); if (error) - return(error); + return error; /* * Dismantle the "old" attribute/value pair by removing @@ -714,7 +714,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) if (args->rmtblkno) { error = xfs_attr_rmtval_remove(args); if (error) - return(error); + return error; } /* @@ -744,7 +744,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) ASSERT(committed); args->trans = NULL; xfs_bmap_cancel(args->flist); - return(error); + return error; } /* @@ -795,7 +795,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) return error; error = xfs_attr3_leaf_lookup_int(bp, args); - if (error == ENOATTR) { + if (error == -ENOATTR) { xfs_trans_brelse(args->trans, bp); return error; } @@ -850,7 +850,7 @@ xfs_attr_leaf_get(xfs_da_args_t *args) return error; error = xfs_attr3_leaf_lookup_int(bp, args); - if (error != EEXIST) { + if (error != -EEXIST) { xfs_trans_brelse(args->trans, bp); return error; } @@ -906,9 +906,9 @@ restart: goto out; blk = &state->path.blk[ state->path.active-1 ]; ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { + if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { goto out; - } else if (retval == EEXIST) { + } else if (retval == -EEXIST) { if (args->flags & ATTR_CREATE) goto out; @@ -933,7 +933,7 @@ restart: } retval = xfs_attr3_leaf_add(blk->bp, state->args); - if (retval == ENOSPC) { + if (retval == -ENOSPC) { if (state->path.active == 1) { /* * Its really a single leaf node, but it had @@ -1031,7 +1031,7 @@ restart: if (args->rmtblkno > 0) { error = xfs_attr_rmtval_set(args); if (error) - return(error); + return error; } /* @@ -1061,7 +1061,7 @@ restart: if (args->rmtblkno) { error = xfs_attr_rmtval_remove(args); if (error) - return(error); + return error; } /* @@ -1134,8 +1134,8 @@ out: if (state) xfs_da_state_free(state); if (error) - return(error); - return(retval); + return error; + return retval; } /* @@ -1168,7 +1168,7 @@ xfs_attr_node_removename(xfs_da_args_t *args) * Search to see if name exists, and get back a pointer to it. */ error = xfs_da3_node_lookup_int(state, &retval); - if (error || (retval != EEXIST)) { + if (error || (retval != -EEXIST)) { if (error == 0) error = retval; goto out; @@ -1297,7 +1297,7 @@ xfs_attr_node_removename(xfs_da_args_t *args) out: xfs_da_state_free(state); - return(error); + return error; } /* @@ -1345,7 +1345,7 @@ xfs_attr_fillstate(xfs_da_state_t *state) } } - return(0); + return 0; } /* @@ -1376,7 +1376,7 @@ xfs_attr_refillstate(xfs_da_state_t *state) blk->blkno, blk->disk_blkno, &blk->bp, XFS_ATTR_FORK); if (error) - return(error); + return error; } else { blk->bp = NULL; } @@ -1395,13 +1395,13 @@ xfs_attr_refillstate(xfs_da_state_t *state) blk->blkno, blk->disk_blkno, &blk->bp, XFS_ATTR_FORK); if (error) - return(error); + return error; } else { blk->bp = NULL; } } - return(0); + return 0; } /* @@ -1431,7 +1431,7 @@ xfs_attr_node_get(xfs_da_args_t *args) error = xfs_da3_node_lookup_int(state, &retval); if (error) { retval = error; - } else if (retval == EEXIST) { + } else if (retval == -EEXIST) { blk = &state->path.blk[ state->path.active-1 ]; ASSERT(blk->bp != NULL); ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); @@ -1455,5 +1455,5 @@ xfs_attr_node_get(xfs_da_args_t *args) } xfs_da_state_free(state); - return(retval); + return retval; } diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 28712d29e43..b1f73dbbf3d 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -214,7 +214,7 @@ xfs_attr3_leaf_write_verify( struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; if (!xfs_attr3_leaf_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } @@ -242,9 +242,9 @@ xfs_attr3_leaf_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); else if (!xfs_attr3_leaf_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) xfs_verifier_error(bp); @@ -547,7 +547,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) break; } if (i == end) - return(XFS_ERROR(ENOATTR)); + return -ENOATTR; /* * Fix up the attribute fork data, covering the hole @@ -582,7 +582,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) xfs_sbversion_add_attr2(mp, args->trans); - return(0); + return 0; } /* @@ -611,9 +611,9 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) continue; if (!xfs_attr_namesp_match(args->flags, sfe->flags)) continue; - return(XFS_ERROR(EEXIST)); + return -EEXIST; } - return(XFS_ERROR(ENOATTR)); + return -ENOATTR; } /* @@ -640,18 +640,18 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args) continue; if (args->flags & ATTR_KERNOVAL) { args->valuelen = sfe->valuelen; - return(XFS_ERROR(EEXIST)); + return -EEXIST; } if (args->valuelen < sfe->valuelen) { args->valuelen = sfe->valuelen; - return(XFS_ERROR(ERANGE)); + return -ERANGE; } args->valuelen = sfe->valuelen; memcpy(args->value, &sfe->nameval[args->namelen], args->valuelen); - return(XFS_ERROR(EEXIST)); + return -EEXIST; } - return(XFS_ERROR(ENOATTR)); + return -ENOATTR; } /* @@ -691,7 +691,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) * If we hit an IO error middle of the transaction inside * grow_inode(), we may have inconsistent data. Bail out. */ - if (error == EIO) + if (error == -EIO) goto out; xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ @@ -730,9 +730,9 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) sfe->namelen); nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */ - ASSERT(error == ENOATTR); + ASSERT(error == -ENOATTR); error = xfs_attr3_leaf_add(bp, &nargs); - ASSERT(error != ENOSPC); + ASSERT(error != -ENOSPC); if (error) goto out; sfe = XFS_ATTR_SF_NEXTENTRY(sfe); @@ -741,7 +741,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) out: kmem_free(tmpbuffer); - return(error); + return error; } /* @@ -769,12 +769,12 @@ xfs_attr_shortform_allfit( if (entry->flags & XFS_ATTR_INCOMPLETE) continue; /* don't copy partial entries */ if (!(entry->flags & XFS_ATTR_LOCAL)) - return(0); + return 0; name_loc = xfs_attr3_leaf_name_local(leaf, i); if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX) - return(0); + return 0; if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX) - return(0); + return 0; bytes += sizeof(struct xfs_attr_sf_entry) - 1 + name_loc->namelen + be16_to_cpu(name_loc->valuelen); @@ -809,7 +809,7 @@ xfs_attr3_leaf_to_shortform( tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); if (!tmpbuffer) - return ENOMEM; + return -ENOMEM; memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); @@ -1017,10 +1017,10 @@ xfs_attr3_leaf_split( ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC); error = xfs_da_grow_inode(state->args, &blkno); if (error) - return(error); + return error; error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp); if (error) - return(error); + return error; newblk->blkno = blkno; newblk->magic = XFS_ATTR_LEAF_MAGIC; @@ -1031,7 +1031,7 @@ xfs_attr3_leaf_split( xfs_attr3_leaf_rebalance(state, oldblk, newblk); error = xfs_da3_blk_link(state, oldblk, newblk); if (error) - return(error); + return error; /* * Save info on "old" attribute for "atomic rename" ops, leaf_add() @@ -1053,7 +1053,7 @@ xfs_attr3_leaf_split( */ oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL); newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL); - return(error); + return error; } /* @@ -1108,7 +1108,7 @@ xfs_attr3_leaf_add( * no good and we should just give up. */ if (!ichdr.holes && sum < entsize) - return XFS_ERROR(ENOSPC); + return -ENOSPC; /* * Compact the entries to coalesce free space. @@ -1121,7 +1121,7 @@ xfs_attr3_leaf_add( * free region, in freemap[0]. If it is not big enough, give up. */ if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) { - tmp = ENOSPC; + tmp = -ENOSPC; goto out_log_hdr; } @@ -1692,7 +1692,7 @@ xfs_attr3_leaf_toosmall( ichdr.usedbytes; if (bytes > (state->args->geo->blksize >> 1)) { *action = 0; /* blk over 50%, don't try to join */ - return(0); + return 0; } /* @@ -1711,7 +1711,7 @@ xfs_attr3_leaf_toosmall( error = xfs_da3_path_shift(state, &state->altpath, forward, 0, &retval); if (error) - return(error); + return error; if (retval) { *action = 0; } else { @@ -1740,7 +1740,7 @@ xfs_attr3_leaf_toosmall( error = xfs_attr3_leaf_read(state->args->trans, state->args->dp, blkno, -1, &bp); if (error) - return(error); + return error; xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr); @@ -1757,7 +1757,7 @@ xfs_attr3_leaf_toosmall( } if (i >= 2) { *action = 0; - return(0); + return 0; } /* @@ -1773,13 +1773,13 @@ xfs_attr3_leaf_toosmall( 0, &retval); } if (error) - return(error); + return error; if (retval) { *action = 0; } else { *action = 1; } - return(0); + return 0; } /* @@ -2123,7 +2123,7 @@ xfs_attr3_leaf_lookup_int( } if (probe == ichdr.count || be32_to_cpu(entry->hashval) != hashval) { args->index = probe; - return XFS_ERROR(ENOATTR); + return -ENOATTR; } /* @@ -2152,7 +2152,7 @@ xfs_attr3_leaf_lookup_int( if (!xfs_attr_namesp_match(args->flags, entry->flags)) continue; args->index = probe; - return XFS_ERROR(EEXIST); + return -EEXIST; } else { name_rmt = xfs_attr3_leaf_name_remote(leaf, probe); if (name_rmt->namelen != args->namelen) @@ -2168,11 +2168,11 @@ xfs_attr3_leaf_lookup_int( args->rmtblkcnt = xfs_attr3_rmt_blocks( args->dp->i_mount, args->rmtvaluelen); - return XFS_ERROR(EEXIST); + return -EEXIST; } } args->index = probe; - return XFS_ERROR(ENOATTR); + return -ENOATTR; } /* @@ -2208,7 +2208,7 @@ xfs_attr3_leaf_getvalue( } if (args->valuelen < valuelen) { args->valuelen = valuelen; - return XFS_ERROR(ERANGE); + return -ERANGE; } args->valuelen = valuelen; memcpy(args->value, &name_loc->nameval[args->namelen], valuelen); @@ -2226,7 +2226,7 @@ xfs_attr3_leaf_getvalue( } if (args->valuelen < args->rmtvaluelen) { args->valuelen = args->rmtvaluelen; - return XFS_ERROR(ERANGE); + return -ERANGE; } args->valuelen = args->rmtvaluelen; } @@ -2481,7 +2481,7 @@ xfs_attr3_leaf_clearflag( */ error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); if (error) - return(error); + return error; leaf = bp->b_addr; entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; @@ -2548,7 +2548,7 @@ xfs_attr3_leaf_setflag( */ error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); if (error) - return(error); + return error; leaf = bp->b_addr; #ifdef DEBUG diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index e2929da7c3b..e2929da7c3b 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index b5adfecbb8e..7510ab8058a 100644 --- a/fs/xfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -138,11 +138,11 @@ xfs_attr3_rmt_read_verify( while (len > 0) { if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); break; } if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); break; } len -= blksize; @@ -178,7 +178,7 @@ xfs_attr3_rmt_write_verify( while (len > 0) { if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } @@ -257,7 +257,7 @@ xfs_attr_rmtval_copyout( xfs_alert(mp, "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", bno, *offset, byte_cnt, ino); - return EFSCORRUPTED; + return -EFSCORRUPTED; } hdr_size = sizeof(struct xfs_attr3_rmt_hdr); } @@ -452,7 +452,7 @@ xfs_attr_rmtval_set( ASSERT(committed); args->trans = NULL; xfs_bmap_cancel(args->flist); - return(error); + return error; } /* @@ -473,7 +473,7 @@ xfs_attr_rmtval_set( */ error = xfs_trans_roll(&args->trans, dp); if (error) - return (error); + return error; } /* @@ -498,7 +498,7 @@ xfs_attr_rmtval_set( blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); if (error) - return(error); + return error; ASSERT(nmap == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); @@ -508,7 +508,7 @@ xfs_attr_rmtval_set( bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0); if (!bp) - return ENOMEM; + return -ENOMEM; bp->b_ops = &xfs_attr3_rmt_buf_ops; xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset, @@ -563,7 +563,7 @@ xfs_attr_rmtval_remove( error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); if (error) - return(error); + return error; ASSERT(nmap == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); @@ -622,7 +622,7 @@ xfs_attr_rmtval_remove( */ error = xfs_trans_roll(&args->trans, args->dp); if (error) - return (error); + return error; } - return(0); + return 0; } diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h index 5a9acfa156d..5a9acfa156d 100644 --- a/fs/xfs/xfs_attr_remote.h +++ b/fs/xfs/libxfs/xfs_attr_remote.h diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h index 919756e3ba5..919756e3ba5 100644 --- a/fs/xfs/xfs_attr_sf.h +++ b/fs/xfs/libxfs/xfs_attr_sf.h diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/libxfs/xfs_bit.h index e1649c0d3e0..e1649c0d3e0 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/libxfs/xfs_bit.h diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 75c3fe5f3d9..86df952d3e2 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -392,7 +392,7 @@ xfs_bmap_check_leaf_extents( pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); - ASSERT(bno != NULLDFSBNO); + ASSERT(bno != NULLFSBLOCK); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); @@ -1033,7 +1033,7 @@ xfs_bmap_add_attrfork_btree( goto error0; if (stat == 0) { xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - return XFS_ERROR(ENOSPC); + return -ENOSPC; } *firstblock = cur->bc_private.b.firstblock; cur->bc_private.b.allocated = 0; @@ -1115,7 +1115,7 @@ xfs_bmap_add_attrfork_local( /* should only be called for types that support local format data */ ASSERT(0); - return EFSCORRUPTED; + return -EFSCORRUPTED; } /* @@ -1192,7 +1192,7 @@ xfs_bmap_add_attrfork( break; default: ASSERT(0); - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto trans_cancel; } @@ -1299,7 +1299,7 @@ xfs_bmap_read_extents( ASSERT(level > 0); pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); - ASSERT(bno != NULLDFSBNO); + ASSERT(bno != NULLFSBLOCK); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); /* @@ -1399,7 +1399,7 @@ xfs_bmap_read_extents( return 0; error0: xfs_trans_brelse(tp, bp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } @@ -1429,11 +1429,7 @@ xfs_bmap_search_multi_extents( gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL; gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL; gotp->br_state = XFS_EXT_INVALID; -#if XFS_BIG_BLKNOS gotp->br_startblock = 0xffffa5a5a5a5a5a5LL; -#else - gotp->br_startblock = 0xffffa5a5; -#endif prevp->br_startoff = NULLFILEOFF; ep = xfs_iext_bno_to_ext(ifp, bno, &lastx); @@ -1576,7 +1572,7 @@ xfs_bmap_last_before( if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) - return XFS_ERROR(EIO); + return -EIO; if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { *last_block = 0; return 0; @@ -1690,7 +1686,7 @@ xfs_bmap_last_offset( if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) - return XFS_ERROR(EIO); + return -EIO; error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); if (error || is_empty) @@ -3323,7 +3319,7 @@ xfs_bmap_extsize_align( if (orig_off < align_off || orig_end > align_off + align_alen || align_alen - temp < orig_alen) - return XFS_ERROR(EINVAL); + return -EINVAL; /* * Try to fix it by moving the start up. */ @@ -3348,7 +3344,7 @@ xfs_bmap_extsize_align( * Result doesn't cover the request, fail it. */ if (orig_off < align_off || orig_end > align_off + align_alen) - return XFS_ERROR(EINVAL); + return -EINVAL; } else { ASSERT(orig_off >= align_off); ASSERT(orig_end <= align_off + align_alen); @@ -4051,11 +4047,11 @@ xfs_bmapi_read( XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; XFS_STATS_INC(xs_blk_mapr); @@ -4246,11 +4242,11 @@ xfs_bmapi_delay( XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; XFS_STATS_INC(xs_blk_mapw); @@ -4469,7 +4465,7 @@ xfs_bmapi_convert_unwritten( * so generate another request. */ if (mval->br_blockcount < len) - return EAGAIN; + return -EAGAIN; return 0; } @@ -4540,11 +4536,11 @@ xfs_bmapi_write( XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; ifp = XFS_IFORK_PTR(ip, whichfork); @@ -4620,7 +4616,7 @@ xfs_bmapi_write( /* Execute unwritten extent conversion if necessary */ error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags); - if (error == EAGAIN) + if (error == -EAGAIN) continue; if (error) goto error0; @@ -4922,7 +4918,7 @@ xfs_bmap_del_extent( goto done; cur->bc_rec.b = new; error = xfs_btree_insert(cur, &i); - if (error && error != ENOSPC) + if (error && error != -ENOSPC) goto done; /* * If get no-space back from btree insert, @@ -4930,7 +4926,7 @@ xfs_bmap_del_extent( * block reservation. * Fix up our state and return the error. */ - if (error == ENOSPC) { + if (error == -ENOSPC) { /* * Reset the cursor, don't trust * it after any insert operation. @@ -4958,7 +4954,7 @@ xfs_bmap_del_extent( xfs_bmbt_set_blockcount(ep, got.br_blockcount); flags = 0; - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; goto done; } XFS_WANT_CORRUPTED_GOTO(i == 1, done); @@ -5076,11 +5072,11 @@ xfs_bunmapi( XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW, ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } mp = ip->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(len > 0); @@ -5325,7 +5321,7 @@ xfs_bunmapi( del.br_startoff > got.br_startoff && del.br_startoff + del.br_blockcount < got.br_startoff + got.br_blockcount) { - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; goto error0; } error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, @@ -5428,7 +5424,7 @@ xfs_bmap_shift_extents( struct xfs_bmap_free *flist, int num_exts) { - struct xfs_btree_cur *cur; + struct xfs_btree_cur *cur = NULL; struct xfs_bmbt_rec_host *gotp; struct xfs_bmbt_irec got; struct xfs_bmbt_irec left; @@ -5439,7 +5435,7 @@ xfs_bmap_shift_extents( int error = 0; int i; int whichfork = XFS_DATA_FORK; - int logflags; + int logflags = 0; xfs_filblks_t blockcount = 0; int total_extents; @@ -5449,11 +5445,11 @@ xfs_bmap_shift_extents( mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { XFS_ERROR_REPORT("xfs_bmap_shift_extents", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; ASSERT(current_ext != NULL); @@ -5482,16 +5478,11 @@ xfs_bmap_shift_extents( } } - /* We are going to change core inode */ - logflags = XFS_ILOG_CORE; if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; cur->bc_private.b.flags = 0; - } else { - cur = NULL; - logflags |= XFS_ILOG_DEXT; } /* @@ -5516,14 +5507,14 @@ xfs_bmap_shift_extents( *current_ext - 1), &left); if (startoff < left.br_startoff + left.br_blockcount) - error = XFS_ERROR(EINVAL); + error = -EINVAL; } else if (offset_shift_fsb > got.br_startoff) { /* * When first extent is shifted, offset_shift_fsb * should be less than the stating offset of * the first extent. */ - error = XFS_ERROR(EINVAL); + error = -EINVAL; } if (error) @@ -5549,11 +5540,14 @@ xfs_bmap_shift_extents( blockcount = left.br_blockcount + got.br_blockcount; xfs_iext_remove(ip, *current_ext, 1, 0); + logflags |= XFS_ILOG_CORE; if (cur) { error = xfs_btree_delete(cur, &i); if (error) goto del_cursor; XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); + } else { + logflags |= XFS_ILOG_DEXT; } XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); @@ -5579,6 +5573,7 @@ xfs_bmap_shift_extents( got.br_startoff = startoff; } + logflags |= XFS_ILOG_CORE; if (cur) { error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, @@ -5586,6 +5581,8 @@ xfs_bmap_shift_extents( got.br_state); if (error) goto del_cursor; + } else { + logflags |= XFS_ILOG_DEXT; } (*current_ext)++; @@ -5601,6 +5598,7 @@ del_cursor: xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - xfs_trans_log_inode(tp, ip, logflags); + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); return error; } diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index b879ca56a64..b879ca56a64 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 948836c4fd9..fba753308f3 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -111,23 +111,8 @@ __xfs_bmbt_get_all( ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN)); s->br_startoff = ((xfs_fileoff_t)l0 & xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; -#if XFS_BIG_BLKNOS s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) | (((xfs_fsblock_t)l1) >> 21); -#else -#ifdef DEBUG - { - xfs_dfsbno_t b; - - b = (((xfs_dfsbno_t)l0 & xfs_mask64lo(9)) << 43) | - (((xfs_dfsbno_t)l1) >> 21); - ASSERT((b >> 32) == 0 || isnulldstartblock(b)); - s->br_startblock = (xfs_fsblock_t)b; - } -#else /* !DEBUG */ - s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)l1) >> 21); -#endif /* DEBUG */ -#endif /* XFS_BIG_BLKNOS */ s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21)); /* This is xfs_extent_state() in-line */ if (ext_flag) { @@ -163,21 +148,8 @@ xfs_fsblock_t xfs_bmbt_get_startblock( xfs_bmbt_rec_host_t *r) { -#if XFS_BIG_BLKNOS return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) | (((xfs_fsblock_t)r->l1) >> 21); -#else -#ifdef DEBUG - xfs_dfsbno_t b; - - b = (((xfs_dfsbno_t)r->l0 & xfs_mask64lo(9)) << 43) | - (((xfs_dfsbno_t)r->l1) >> 21); - ASSERT((b >> 32) == 0 || isnulldstartblock(b)); - return (xfs_fsblock_t)b; -#else /* !DEBUG */ - return (xfs_fsblock_t)(((xfs_dfsbno_t)r->l1) >> 21); -#endif /* DEBUG */ -#endif /* XFS_BIG_BLKNOS */ } /* @@ -241,7 +213,6 @@ xfs_bmbt_set_allf( ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); -#if XFS_BIG_BLKNOS ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | @@ -250,23 +221,6 @@ xfs_bmbt_set_allf( r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | ((xfs_bmbt_rec_base_t)blockcount & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); -#else /* !XFS_BIG_BLKNOS */ - if (isnullstartblock(startblock)) { - r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9) | - (xfs_bmbt_rec_base_t)xfs_mask64lo(9); - r->l1 = xfs_mask64hi(11) | - ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); - } else { - r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9); - r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); - } -#endif /* XFS_BIG_BLKNOS */ } /* @@ -298,8 +252,6 @@ xfs_bmbt_disk_set_allf( ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); - -#if XFS_BIG_BLKNOS ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); r->l0 = cpu_to_be64( @@ -310,26 +262,6 @@ xfs_bmbt_disk_set_allf( ((xfs_bmbt_rec_base_t)startblock << 21) | ((xfs_bmbt_rec_base_t)blockcount & (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); -#else /* !XFS_BIG_BLKNOS */ - if (isnullstartblock(startblock)) { - r->l0 = cpu_to_be64( - ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9) | - (xfs_bmbt_rec_base_t)xfs_mask64lo(9)); - r->l1 = cpu_to_be64(xfs_mask64hi(11) | - ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); - } else { - r->l0 = cpu_to_be64( - ((xfs_bmbt_rec_base_t)extent_flag << 63) | - ((xfs_bmbt_rec_base_t)startoff << 9)); - r->l1 = cpu_to_be64( - ((xfs_bmbt_rec_base_t)startblock << 21) | - ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); - } -#endif /* XFS_BIG_BLKNOS */ } /* @@ -365,24 +297,11 @@ xfs_bmbt_set_startblock( xfs_bmbt_rec_host_t *r, xfs_fsblock_t v) { -#if XFS_BIG_BLKNOS ASSERT((v & xfs_mask64hi(12)) == 0); r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) | (xfs_bmbt_rec_base_t)(v >> 43); r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) | (xfs_bmbt_rec_base_t)(v << 21); -#else /* !XFS_BIG_BLKNOS */ - if (isnullstartblock(v)) { - r->l0 |= (xfs_bmbt_rec_base_t)xfs_mask64lo(9); - r->l1 = (xfs_bmbt_rec_base_t)xfs_mask64hi(11) | - ((xfs_bmbt_rec_base_t)v << 21) | - (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); - } else { - r->l0 &= ~(xfs_bmbt_rec_base_t)xfs_mask64lo(9); - r->l1 = ((xfs_bmbt_rec_base_t)v << 21) | - (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); - } -#endif /* XFS_BIG_BLKNOS */ } /* @@ -438,8 +357,8 @@ xfs_bmbt_to_bmdr( cpu_to_be64(XFS_BUF_DADDR_NULL)); } else ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); - ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)); - ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)); + ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)); + ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)); ASSERT(rblock->bb_level != 0); dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; @@ -554,7 +473,7 @@ xfs_bmbt_alloc_block( args.minlen = args.maxlen = args.prod = 1; args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) { - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; goto error0; } error = xfs_alloc_vextent(&args); @@ -763,11 +682,11 @@ xfs_bmbt_verify( /* sibling pointer verification */ if (!block->bb_u.l.bb_leftsib || - (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) && + (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) return false; if (!block->bb_u.l.bb_rightsib || - (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) && + (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) return false; @@ -779,9 +698,9 @@ xfs_bmbt_read_verify( struct xfs_buf *bp) { if (!xfs_btree_lblock_verify_crc(bp)) - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); else if (!xfs_bmbt_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) { trace_xfs_btree_corrupt(bp, _RET_IP_); @@ -795,7 +714,7 @@ xfs_bmbt_write_verify( { if (!xfs_bmbt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } @@ -959,7 +878,7 @@ xfs_bmbt_change_owner( cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); if (!cur) - return ENOMEM; + return -ENOMEM; error = xfs_btree_change_owner(cur, new_owner, buffer_list); xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 819a8a4dee9..819a8a4dee9 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index cf893bc1e37..8fe6a93ff47 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -78,11 +78,11 @@ xfs_btree_check_lblock( be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && block->bb_u.l.bb_leftsib && - (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || + (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) && block->bb_u.l.bb_rightsib && - (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || + (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))); @@ -92,7 +92,7 @@ xfs_btree_check_lblock( if (bp) trace_xfs_btree_corrupt(bp, _RET_IP_); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; } @@ -140,7 +140,7 @@ xfs_btree_check_sblock( if (bp) trace_xfs_btree_corrupt(bp, _RET_IP_); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; } @@ -167,12 +167,12 @@ xfs_btree_check_block( int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lptr( struct xfs_btree_cur *cur, /* btree cursor */ - xfs_dfsbno_t bno, /* btree block disk address */ + xfs_fsblock_t bno, /* btree block disk address */ int level) /* btree block level */ { XFS_WANT_CORRUPTED_RETURN( level > 0 && - bno != NULLDFSBNO && + bno != NULLFSBLOCK && XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); return 0; } @@ -595,7 +595,7 @@ xfs_btree_islastblock( block = xfs_btree_get_block(cur, level, &bp); xfs_btree_check_block(cur, block, level, bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO); + return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); else return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); } @@ -771,16 +771,16 @@ xfs_btree_readahead_lblock( struct xfs_btree_block *block) { int rval = 0; - xfs_dfsbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); - xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); + xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); + xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); - if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { + if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { xfs_btree_reada_bufl(cur->bc_mp, left, 1, cur->bc_ops->buf_ops); rval++; } - if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) { + if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { xfs_btree_reada_bufl(cur->bc_mp, right, 1, cur->bc_ops->buf_ops); rval++; @@ -852,7 +852,7 @@ xfs_btree_ptr_to_daddr( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); + ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK)); return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); } else { @@ -900,9 +900,9 @@ xfs_btree_setbuf( b = XFS_BUF_TO_BLOCK(bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)) + if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)) + if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } else { if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) @@ -918,7 +918,7 @@ xfs_btree_ptr_is_null( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return ptr->l == cpu_to_be64(NULLDFSBNO); + return ptr->l == cpu_to_be64(NULLFSBLOCK); else return ptr->s == cpu_to_be32(NULLAGBLOCK); } @@ -929,7 +929,7 @@ xfs_btree_set_ptr_null( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - ptr->l = cpu_to_be64(NULLDFSBNO); + ptr->l = cpu_to_be64(NULLFSBLOCK); else ptr->s = cpu_to_be32(NULLAGBLOCK); } @@ -997,8 +997,8 @@ xfs_btree_init_block_int( buf->bb_numrecs = cpu_to_be16(numrecs); if (flags & XFS_BTREE_LONG_PTRS) { - buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); - buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); + buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); + buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); if (flags & XFS_BTREE_CRC_BLOCKS) { buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); buf->bb_u.l.bb_owner = cpu_to_be64(owner); @@ -1140,7 +1140,7 @@ xfs_btree_get_buf_block( mp->m_bsize, flags); if (!*bpp) - return ENOMEM; + return -ENOMEM; (*bpp)->b_ops = cur->bc_ops->buf_ops; *block = XFS_BUF_TO_BLOCK(*bpp); @@ -1498,7 +1498,7 @@ xfs_btree_increment( if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) goto out0; ASSERT(0); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto error0; } ASSERT(lev < cur->bc_nlevels); @@ -1597,7 +1597,7 @@ xfs_btree_decrement( if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) goto out0; ASSERT(0); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto error0; } ASSERT(lev < cur->bc_nlevels); @@ -4018,7 +4018,7 @@ xfs_btree_block_change_owner( /* now read rh sibling block for next iteration */ xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); if (xfs_btree_ptr_is_null(cur, &rptr)) - return ENOENT; + return -ENOENT; return xfs_btree_lookup_get_block(cur, level, &rptr, &block); } @@ -4061,7 +4061,7 @@ xfs_btree_change_owner( buffer_list); } while (!error); - if (error != ENOENT) + if (error != -ENOENT) return error; } diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index a04b69422f6..8f18bab73ea 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -258,7 +258,7 @@ xfs_btree_check_block( int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lptr( struct xfs_btree_cur *cur, /* btree cursor */ - xfs_dfsbno_t ptr, /* btree block disk address */ + xfs_fsblock_t ptr, /* btree block disk address */ int level); /* btree block level */ /* diff --git a/fs/xfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h index fad1676ad8c..fad1676ad8c 100644 --- a/fs/xfs/xfs_cksum.h +++ b/fs/xfs/libxfs/xfs_cksum.h diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index a514ab61665..2c42ae28d02 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -185,7 +185,7 @@ xfs_da3_node_write_verify( struct xfs_da3_node_hdr *hdr3 = bp->b_addr; if (!xfs_da3_node_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } @@ -214,13 +214,13 @@ xfs_da3_node_read_verify( switch (be16_to_cpu(info->magic)) { case XFS_DA3_NODE_MAGIC: if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); break; } /* fall through */ case XFS_DA_NODE_MAGIC: if (!xfs_da3_node_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); break; } return; @@ -315,7 +315,7 @@ xfs_da3_node_create( error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork); if (error) - return(error); + return error; bp->b_ops = &xfs_da3_node_buf_ops; xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); node = bp->b_addr; @@ -337,7 +337,7 @@ xfs_da3_node_create( XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); *bpp = bp; - return(0); + return 0; } /* @@ -385,8 +385,8 @@ xfs_da3_split( switch (oldblk->magic) { case XFS_ATTR_LEAF_MAGIC: error = xfs_attr3_leaf_split(state, oldblk, newblk); - if ((error != 0) && (error != ENOSPC)) { - return(error); /* GROT: attr is inconsistent */ + if ((error != 0) && (error != -ENOSPC)) { + return error; /* GROT: attr is inconsistent */ } if (!error) { addblk = newblk; @@ -408,7 +408,7 @@ xfs_da3_split( &state->extrablk); } if (error) - return(error); /* GROT: attr inconsistent */ + return error; /* GROT: attr inconsistent */ addblk = newblk; break; case XFS_DIR2_LEAFN_MAGIC: @@ -422,7 +422,7 @@ xfs_da3_split( max - i, &action); addblk->bp = NULL; if (error) - return(error); /* GROT: dir is inconsistent */ + return error; /* GROT: dir is inconsistent */ /* * Record the newly split block for the next time thru? */ @@ -439,7 +439,7 @@ xfs_da3_split( xfs_da3_fixhashpath(state, &state->path); } if (!addblk) - return(0); + return 0; /* * Split the root node. @@ -449,7 +449,7 @@ xfs_da3_split( error = xfs_da3_root_split(state, oldblk, addblk); if (error) { addblk->bp = NULL; - return(error); /* GROT: dir is inconsistent */ + return error; /* GROT: dir is inconsistent */ } /* @@ -492,7 +492,7 @@ xfs_da3_split( sizeof(node->hdr.info))); } addblk->bp = NULL; - return(0); + return 0; } /* @@ -670,18 +670,18 @@ xfs_da3_node_split( */ error = xfs_da_grow_inode(state->args, &blkno); if (error) - return(error); /* GROT: dir is inconsistent */ + return error; /* GROT: dir is inconsistent */ error = xfs_da3_node_create(state->args, blkno, treelevel, &newblk->bp, state->args->whichfork); if (error) - return(error); /* GROT: dir is inconsistent */ + return error; /* GROT: dir is inconsistent */ newblk->blkno = blkno; newblk->magic = XFS_DA_NODE_MAGIC; xfs_da3_node_rebalance(state, oldblk, newblk); error = xfs_da3_blk_link(state, oldblk, newblk); if (error) - return(error); + return error; *result = 1; } else { *result = 0; @@ -721,7 +721,7 @@ xfs_da3_node_split( } } - return(0); + return 0; } /* @@ -963,9 +963,9 @@ xfs_da3_join( case XFS_ATTR_LEAF_MAGIC: error = xfs_attr3_leaf_toosmall(state, &action); if (error) - return(error); + return error; if (action == 0) - return(0); + return 0; xfs_attr3_leaf_unbalance(state, drop_blk, save_blk); break; case XFS_DIR2_LEAFN_MAGIC: @@ -985,7 +985,7 @@ xfs_da3_join( xfs_da3_fixhashpath(state, &state->path); error = xfs_da3_node_toosmall(state, &action); if (error) - return(error); + return error; if (action == 0) return 0; xfs_da3_node_unbalance(state, drop_blk, save_blk); @@ -995,12 +995,12 @@ xfs_da3_join( error = xfs_da3_blk_unlink(state, drop_blk, save_blk); xfs_da_state_kill_altpath(state); if (error) - return(error); + return error; error = xfs_da_shrink_inode(state->args, drop_blk->blkno, drop_blk->bp); drop_blk->bp = NULL; if (error) - return(error); + return error; } /* * We joined all the way to the top. If it turns out that @@ -1010,7 +1010,7 @@ xfs_da3_join( xfs_da3_node_remove(state, drop_blk); xfs_da3_fixhashpath(state, &state->path); error = xfs_da3_root_join(state, &state->path.blk[0]); - return(error); + return error; } #ifdef DEBUG @@ -1099,7 +1099,7 @@ xfs_da3_root_join( xfs_trans_log_buf(args->trans, root_blk->bp, 0, args->geo->blksize - 1); error = xfs_da_shrink_inode(args, child, bp); - return(error); + return error; } /* @@ -1142,7 +1142,7 @@ xfs_da3_node_toosmall( dp->d_ops->node_hdr_from_disk(&nodehdr, node); if (nodehdr.count > (state->args->geo->node_ents >> 1)) { *action = 0; /* blk over 50%, don't try to join */ - return(0); /* blk over 50%, don't try to join */ + return 0; /* blk over 50%, don't try to join */ } /* @@ -1161,13 +1161,13 @@ xfs_da3_node_toosmall( error = xfs_da3_path_shift(state, &state->altpath, forward, 0, &retval); if (error) - return(error); + return error; if (retval) { *action = 0; } else { *action = 2; } - return(0); + return 0; } /* @@ -1194,7 +1194,7 @@ xfs_da3_node_toosmall( error = xfs_da3_node_read(state->args->trans, dp, blkno, -1, &bp, state->args->whichfork); if (error) - return(error); + return error; node = bp->b_addr; dp->d_ops->node_hdr_from_disk(&thdr, node); @@ -1486,7 +1486,7 @@ xfs_da3_node_lookup_int( if (error) { blk->blkno = 0; state->path.active--; - return(error); + return error; } curr = blk->bp->b_addr; blk->magic = be16_to_cpu(curr->magic); @@ -1579,25 +1579,25 @@ xfs_da3_node_lookup_int( args->blkno = blk->blkno; } else { ASSERT(0); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } - if (((retval == ENOENT) || (retval == ENOATTR)) && + if (((retval == -ENOENT) || (retval == -ENOATTR)) && (blk->hashval == args->hashval)) { error = xfs_da3_path_shift(state, &state->path, 1, 1, &retval); if (error) - return(error); + return error; if (retval == 0) { continue; } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { /* path_shift() gives ENOENT */ - retval = XFS_ERROR(ENOATTR); + retval = -ENOATTR; } } break; } *result = retval; - return(0); + return 0; } /*======================================================================== @@ -1692,7 +1692,7 @@ xfs_da3_blk_link( be32_to_cpu(old_info->back), -1, &bp, args->whichfork); if (error) - return(error); + return error; ASSERT(bp != NULL); tmp_info = bp->b_addr; ASSERT(tmp_info->magic == old_info->magic); @@ -1713,7 +1713,7 @@ xfs_da3_blk_link( be32_to_cpu(old_info->forw), -1, &bp, args->whichfork); if (error) - return(error); + return error; ASSERT(bp != NULL); tmp_info = bp->b_addr; ASSERT(tmp_info->magic == old_info->magic); @@ -1726,7 +1726,7 @@ xfs_da3_blk_link( xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); - return(0); + return 0; } /* @@ -1772,7 +1772,7 @@ xfs_da3_blk_unlink( be32_to_cpu(drop_info->back), -1, &bp, args->whichfork); if (error) - return(error); + return error; ASSERT(bp != NULL); tmp_info = bp->b_addr; ASSERT(tmp_info->magic == save_info->magic); @@ -1789,7 +1789,7 @@ xfs_da3_blk_unlink( be32_to_cpu(drop_info->forw), -1, &bp, args->whichfork); if (error) - return(error); + return error; ASSERT(bp != NULL); tmp_info = bp->b_addr; ASSERT(tmp_info->magic == save_info->magic); @@ -1801,7 +1801,7 @@ xfs_da3_blk_unlink( } xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); - return(0); + return 0; } /* @@ -1859,9 +1859,9 @@ xfs_da3_path_shift( } } if (level < 0) { - *result = XFS_ERROR(ENOENT); /* we're out of our tree */ + *result = -ENOENT; /* we're out of our tree */ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); - return(0); + return 0; } /* @@ -1883,7 +1883,7 @@ xfs_da3_path_shift( error = xfs_da3_node_read(args->trans, dp, blkno, -1, &blk->bp, args->whichfork); if (error) - return(error); + return error; info = blk->bp->b_addr; ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || @@ -2004,7 +2004,7 @@ xfs_da_grow_inode_int( struct xfs_trans *tp = args->trans; struct xfs_inode *dp = args->dp; int w = args->whichfork; - xfs_drfsbno_t nblks = dp->i_d.di_nblocks; + xfs_rfsblock_t nblks = dp->i_d.di_nblocks; struct xfs_bmbt_irec map, *mapp; int nmap, error, got, i, mapi; @@ -2068,7 +2068,7 @@ xfs_da_grow_inode_int( if (got != count || mapp[0].br_startoff != *bno || mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != *bno + count) { - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; goto out_free_map; } @@ -2158,7 +2158,7 @@ xfs_da3_swap_lastblock( if (unlikely(lastoff == 0)) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } /* * Read the last block in the btree space. @@ -2209,7 +2209,7 @@ xfs_da3_swap_lastblock( sib_info->magic != dead_info->magic)) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto done; } sib_info->forw = cpu_to_be32(dead_blkno); @@ -2231,7 +2231,7 @@ xfs_da3_swap_lastblock( sib_info->magic != dead_info->magic)) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto done; } sib_info->back = cpu_to_be32(dead_blkno); @@ -2254,7 +2254,7 @@ xfs_da3_swap_lastblock( if (level >= 0 && level != par_hdr.level + 1) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto done; } level = par_hdr.level; @@ -2267,7 +2267,7 @@ xfs_da3_swap_lastblock( if (entno == par_hdr.count) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto done; } par_blkno = be32_to_cpu(btree[entno].before); @@ -2294,7 +2294,7 @@ xfs_da3_swap_lastblock( if (unlikely(par_blkno == 0)) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto done; } error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); @@ -2305,7 +2305,7 @@ xfs_da3_swap_lastblock( if (par_hdr.level != level) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto done; } btree = dp->d_ops->node_tree_p(par_node); @@ -2359,7 +2359,7 @@ xfs_da_shrink_inode( error = xfs_bunmapi(tp, dp, dead_blkno, count, xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, &done); - if (error == ENOSPC) { + if (error == -ENOSPC) { if (w != XFS_DATA_FORK) break; error = xfs_da3_swap_lastblock(args, &dead_blkno, @@ -2427,7 +2427,7 @@ xfs_buf_map_from_irec( map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_SLEEP | KM_NOFS); if (!map) - return ENOMEM; + return -ENOMEM; *mapp = map; } @@ -2500,8 +2500,8 @@ xfs_dabuf_map( } if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) { - error = mappedbno == -2 ? -1 : XFS_ERROR(EFSCORRUPTED); - if (unlikely(error == EFSCORRUPTED)) { + error = mappedbno == -2 ? -1 : -EFSCORRUPTED; + if (unlikely(error == -EFSCORRUPTED)) { if (xfs_error_level >= XFS_ERRLEVEL_LOW) { int i; xfs_alert(mp, "%s: bno %lld dir: inode %lld", @@ -2561,7 +2561,7 @@ xfs_da_get_buf( bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp, mapp, nmap, 0); - error = bp ? bp->b_error : XFS_ERROR(EIO); + error = bp ? bp->b_error : -EIO; if (error) { xfs_trans_brelse(trans, bp); goto out_free; diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 6e153e399a7..6e153e399a7 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h diff --git a/fs/xfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c index c9aee52a37e..c9aee52a37e 100644 --- a/fs/xfs/xfs_da_format.c +++ b/fs/xfs/libxfs/xfs_da_format.c diff --git a/fs/xfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 0a49b028637..0a49b028637 100644 --- a/fs/xfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/libxfs/xfs_dinode.h index 623bbe8fd92..623bbe8fd92 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/libxfs/xfs_dinode.h diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 79670cda48a..6cef22152fd 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -108,7 +108,7 @@ xfs_da_mount( if (!mp->m_dir_geo || !mp->m_attr_geo) { kmem_free(mp->m_dir_geo); kmem_free(mp->m_attr_geo); - return ENOMEM; + return -ENOMEM; } /* set up directory geometry */ @@ -202,7 +202,7 @@ xfs_dir_ino_validate( xfs_warn(mp, "Invalid inode number 0x%Lx", (unsigned long long) ino); XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; } @@ -226,7 +226,7 @@ xfs_dir_init( args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); if (!args) - return ENOMEM; + return -ENOMEM; args->geo = dp->i_mount->m_dir_geo; args->dp = dp; @@ -261,7 +261,7 @@ xfs_dir_createname( args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); if (!args) - return ENOMEM; + return -ENOMEM; args->geo = dp->i_mount->m_dir_geo; args->name = name->name; @@ -314,18 +314,18 @@ xfs_dir_cilookup_result( int len) { if (args->cmpresult == XFS_CMP_DIFFERENT) - return ENOENT; + return -ENOENT; if (args->cmpresult != XFS_CMP_CASE || !(args->op_flags & XFS_DA_OP_CILOOKUP)) - return EEXIST; + return -EEXIST; args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL); if (!args->value) - return ENOMEM; + return -ENOMEM; memcpy(args->value, name, len); args->valuelen = len; - return EEXIST; + return -EEXIST; } /* @@ -392,7 +392,7 @@ xfs_dir_lookup( rval = xfs_dir2_node_lookup(args); out_check_rval: - if (rval == EEXIST) + if (rval == -EEXIST) rval = 0; if (!rval) { *inum = args->inumber; @@ -428,7 +428,7 @@ xfs_dir_removename( args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); if (!args) - return ENOMEM; + return -ENOMEM; args->geo = dp->i_mount->m_dir_geo; args->name = name->name; @@ -493,7 +493,7 @@ xfs_dir_replace( args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); if (!args) - return ENOMEM; + return -ENOMEM; args->geo = dp->i_mount->m_dir_geo; args->name = name->name; @@ -555,7 +555,7 @@ xfs_dir_canenter( args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); if (!args) - return ENOMEM; + return -ENOMEM; args->geo = dp->i_mount->m_dir_geo; args->name = name->name; diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index c8e86b0b5e9..c8e86b0b5e9 100644 --- a/fs/xfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index c7cd3154026..9628ceccfa0 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -91,9 +91,9 @@ xfs_dir3_block_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); else if (!xfs_dir3_block_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) xfs_verifier_error(bp); @@ -108,7 +108,7 @@ xfs_dir3_block_write_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_block_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } @@ -392,7 +392,7 @@ xfs_dir2_block_addname( if (args->op_flags & XFS_DA_OP_JUSTCHECK) { xfs_trans_brelse(tp, bp); if (!dup) - return XFS_ERROR(ENOSPC); + return -ENOSPC; return 0; } @@ -402,7 +402,7 @@ xfs_dir2_block_addname( if (!dup) { /* Don't have a space reservation: return no-space. */ if (args->total == 0) - return XFS_ERROR(ENOSPC); + return -ENOSPC; /* * Convert to the next larger format. * Then add the new entry in that format. @@ -647,7 +647,7 @@ xfs_dir2_block_lookup( args->filetype = dp->d_ops->data_get_ftype(dep); error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); xfs_trans_brelse(args->trans, bp); - return XFS_ERROR(error); + return error; } /* @@ -703,7 +703,7 @@ xfs_dir2_block_lookup_int( if (low > high) { ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); xfs_trans_brelse(tp, bp); - return XFS_ERROR(ENOENT); + return -ENOENT; } } /* @@ -751,7 +751,7 @@ xfs_dir2_block_lookup_int( * No match, release the buffer and return ENOENT. */ xfs_trans_brelse(tp, bp); - return XFS_ERROR(ENOENT); + return -ENOENT; } /* @@ -1091,7 +1091,7 @@ xfs_dir2_sf_to_block( */ if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); - return XFS_ERROR(EIO); + return -EIO; } oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data; diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 8c2f6422648..fdd803fecb8 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -100,7 +100,7 @@ __xfs_dir3_data_check( break; default: XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); - return EFSCORRUPTED; + return -EFSCORRUPTED; } /* @@ -256,7 +256,7 @@ xfs_dir3_data_reada_verify( xfs_dir3_data_verify(bp); return; default: - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); break; } @@ -270,9 +270,9 @@ xfs_dir3_data_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); else if (!xfs_dir3_data_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) xfs_verifier_error(bp); @@ -287,7 +287,7 @@ xfs_dir3_data_write_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_data_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index fb0aad4440c..a19174eb3cb 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -183,9 +183,9 @@ __read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); else if (!xfs_dir3_leaf_verify(bp, magic)) - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) xfs_verifier_error(bp); @@ -201,7 +201,7 @@ __write_verify( struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_leaf_verify(bp, magic)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } @@ -731,7 +731,7 @@ xfs_dir2_leaf_addname( if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) { xfs_trans_brelse(tp, lbp); - return XFS_ERROR(ENOSPC); + return -ENOSPC; } /* * Convert to node form. @@ -755,7 +755,7 @@ xfs_dir2_leaf_addname( */ if (args->op_flags & XFS_DA_OP_JUSTCHECK) { xfs_trans_brelse(tp, lbp); - return use_block == -1 ? XFS_ERROR(ENOSPC) : 0; + return use_block == -1 ? -ENOSPC : 0; } /* * If no allocations are allowed, return now before we've @@ -763,7 +763,7 @@ xfs_dir2_leaf_addname( */ if (args->total == 0 && use_block == -1) { xfs_trans_brelse(tp, lbp); - return XFS_ERROR(ENOSPC); + return -ENOSPC; } /* * Need to compact the leaf entries, removing stale ones. @@ -1198,7 +1198,7 @@ xfs_dir2_leaf_lookup( error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); xfs_trans_brelse(tp, dbp); xfs_trans_brelse(tp, lbp); - return XFS_ERROR(error); + return error; } /* @@ -1327,13 +1327,13 @@ xfs_dir2_leaf_lookup_int( return 0; } /* - * No match found, return ENOENT. + * No match found, return -ENOENT. */ ASSERT(cidb == -1); if (dbp) xfs_trans_brelse(tp, dbp); xfs_trans_brelse(tp, lbp); - return XFS_ERROR(ENOENT); + return -ENOENT; } /* @@ -1440,7 +1440,7 @@ xfs_dir2_leaf_removename( * Just go on, returning success, leaving the * empty block in place. */ - if (error == ENOSPC && args->total == 0) + if (error == -ENOSPC && args->total == 0) error = 0; xfs_dir3_leaf_check(dp, lbp); return error; @@ -1641,7 +1641,7 @@ xfs_dir2_leaf_trim_data( * Get rid of the data block. */ if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { - ASSERT(error != ENOSPC); + ASSERT(error != -ENOSPC); xfs_trans_brelse(tp, dbp); return error; } @@ -1815,7 +1815,7 @@ xfs_dir2_node_to_leaf( * punching out the middle of an extent, and this is an * isolated block. */ - ASSERT(error != ENOSPC); + ASSERT(error != -ENOSPC); return error; } fbp = NULL; diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index da43d304fca..2ae6ac2c11a 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -117,9 +117,9 @@ xfs_dir3_free_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); else if (!xfs_dir3_free_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) xfs_verifier_error(bp); @@ -134,7 +134,7 @@ xfs_dir3_free_write_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_free_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } @@ -406,7 +406,7 @@ xfs_dir2_leafn_add( * into other peoples memory */ if (index < 0) - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; /* * If there are already the maximum number of leaf entries in @@ -417,7 +417,7 @@ xfs_dir2_leafn_add( if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) { if (!leafhdr.stale) - return XFS_ERROR(ENOSPC); + return -ENOSPC; compact = leafhdr.stale > 1; } else compact = 0; @@ -629,7 +629,7 @@ xfs_dir2_leafn_lookup_for_addname( XFS_ERRLEVEL_LOW, mp); if (curfdb != newfdb) xfs_trans_brelse(tp, curbp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } curfdb = newfdb; if (be16_to_cpu(bests[fi]) >= length) @@ -660,7 +660,7 @@ out: * Return the index, that will be the insertion point. */ *indexp = index; - return XFS_ERROR(ENOENT); + return -ENOENT; } /* @@ -789,7 +789,7 @@ xfs_dir2_leafn_lookup_for_entry( curbp->b_ops = &xfs_dir3_data_buf_ops; xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF); if (cmp == XFS_CMP_EXACT) - return XFS_ERROR(EEXIST); + return -EEXIST; } } ASSERT(index == leafhdr.count || (args->op_flags & XFS_DA_OP_OKNOENT)); @@ -812,7 +812,7 @@ xfs_dir2_leafn_lookup_for_entry( state->extravalid = 0; } *indexp = index; - return XFS_ERROR(ENOENT); + return -ENOENT; } /* @@ -1133,7 +1133,7 @@ xfs_dir3_data_block_free( if (error == 0) { fbp = NULL; logfree = 0; - } else if (error != ENOSPC || args->total != 0) + } else if (error != -ENOSPC || args->total != 0) return error; /* * It's possible to get ENOSPC if there is no @@ -1287,7 +1287,7 @@ xfs_dir2_leafn_remove( * In this case just drop the buffer and some one else * will eventually get rid of the empty block. */ - else if (!(error == ENOSPC && args->total == 0)) + else if (!(error == -ENOSPC && args->total == 0)) return error; } /* @@ -1599,7 +1599,7 @@ xfs_dir2_node_addname( error = xfs_da3_node_lookup_int(state, &rval); if (error) rval = error; - if (rval != ENOENT) { + if (rval != -ENOENT) { goto done; } /* @@ -1628,7 +1628,7 @@ xfs_dir2_node_addname( * It didn't work, we need to split the leaf block. */ if (args->total == 0) { - ASSERT(rval == ENOSPC); + ASSERT(rval == -ENOSPC); goto done; } /* @@ -1815,7 +1815,7 @@ xfs_dir2_node_addname_int( * Not allowed to allocate, return failure. */ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) - return XFS_ERROR(ENOSPC); + return -ENOSPC; /* * Allocate and initialize the new data block. @@ -1876,7 +1876,7 @@ xfs_dir2_node_addname_int( } XFS_ERROR_REPORT("xfs_dir2_node_addname_int", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } /* @@ -2042,8 +2042,8 @@ xfs_dir2_node_lookup( error = xfs_da3_node_lookup_int(state, &rval); if (error) rval = error; - else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) { - /* If a CI match, dup the actual name and return EEXIST */ + else if (rval == -ENOENT && args->cmpresult == XFS_CMP_CASE) { + /* If a CI match, dup the actual name and return -EEXIST */ xfs_dir2_data_entry_t *dep; dep = (xfs_dir2_data_entry_t *) @@ -2096,7 +2096,7 @@ xfs_dir2_node_removename( goto out_free; /* Didn't find it, upper layer screwed up. */ - if (rval != EEXIST) { + if (rval != -EEXIST) { error = rval; goto out_free; } @@ -2169,7 +2169,7 @@ xfs_dir2_node_replace( * It should be found, since the vnodeops layer has looked it up * and locked it. But paranoia is good. */ - if (rval == EEXIST) { + if (rval == -EEXIST) { struct xfs_dir2_leaf_entry *ents; /* * Find the leaf entry. @@ -2272,7 +2272,7 @@ xfs_dir2_node_trim_free( * space reservation, when breaking up an extent into two * pieces. This is the last block of an extent. */ - ASSERT(error != ENOSPC); + ASSERT(error != -ENOSPC); xfs_trans_brelse(tp, bp); return error; } diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 27ce0794d19..27ce0794d19 100644 --- a/fs/xfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 53c3be619db..5079e051ef0 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -51,10 +51,9 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args); #else #define xfs_dir2_sf_check(args) #endif /* DEBUG */ -#if XFS_BIG_INUMS + static void xfs_dir2_sf_toino4(xfs_da_args_t *args); static void xfs_dir2_sf_toino8(xfs_da_args_t *args); -#endif /* XFS_BIG_INUMS */ /* * Given a block directory (dp/block), calculate its size as a shortform (sf) @@ -117,10 +116,10 @@ xfs_dir2_block_sfsize( isdotdot = dep->namelen == 2 && dep->name[0] == '.' && dep->name[1] == '.'; -#if XFS_BIG_INUMS + if (!isdot) i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM; -#endif + /* take into account the file type field */ if (!isdot && !isdotdot) { count++; @@ -251,7 +250,7 @@ xfs_dir2_block_to_sf( logflags = XFS_ILOG_CORE; error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp); if (error) { - ASSERT(error != ENOSPC); + ASSERT(error != -ENOSPC); goto out; } @@ -299,7 +298,7 @@ xfs_dir2_sf_addname( trace_xfs_dir2_sf_addname(args); - ASSERT(xfs_dir2_sf_lookup(args) == ENOENT); + ASSERT(xfs_dir2_sf_lookup(args) == -ENOENT); dp = args->dp; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); /* @@ -307,7 +306,7 @@ xfs_dir2_sf_addname( */ if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); + return -EIO; } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); @@ -318,7 +317,7 @@ xfs_dir2_sf_addname( */ incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen); objchange = 0; -#if XFS_BIG_INUMS + /* * Do we have to change to 8 byte inodes? */ @@ -332,7 +331,7 @@ xfs_dir2_sf_addname( (uint)sizeof(xfs_dir2_ino4_t)); objchange = 1; } -#endif + new_isize = (int)dp->i_d.di_size + incr_isize; /* * Won't fit as shortform any more (due to size), @@ -345,7 +344,7 @@ xfs_dir2_sf_addname( * Just checking or no space reservation, it doesn't fit. */ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) - return XFS_ERROR(ENOSPC); + return -ENOSPC; /* * Convert to block form then add the name. */ @@ -370,10 +369,8 @@ xfs_dir2_sf_addname( */ else { ASSERT(pick == 2); -#if XFS_BIG_INUMS if (objchange) xfs_dir2_sf_toino8(args); -#endif xfs_dir2_sf_addname_hard(args, objchange, new_isize); } xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); @@ -425,10 +422,8 @@ xfs_dir2_sf_addname_easy( * Update the header and inode. */ sfp->count++; -#if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) sfp->i8count++; -#endif dp->i_d.di_size = new_isize; xfs_dir2_sf_check(args); } @@ -516,10 +511,8 @@ xfs_dir2_sf_addname_hard( dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); dp->d_ops->sf_put_ftype(sfep, args->filetype); sfp->count++; -#if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) sfp->i8count++; -#endif /* * If there's more left to copy, do that. */ @@ -593,13 +586,8 @@ xfs_dir2_sf_addname_pick( /* * If changing the inode number size, do it the hard way. */ -#if XFS_BIG_INUMS - if (objchange) { + if (objchange) return 2; - } -#else - ASSERT(objchange == 0); -#endif /* * If it won't fit at the end then do it the hard way (use the hole). */ @@ -650,7 +638,6 @@ xfs_dir2_sf_check( ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX); } ASSERT(i8count == sfp->i8count); - ASSERT(XFS_BIG_INUMS || i8count == 0); ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); ASSERT(offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + @@ -738,7 +725,7 @@ xfs_dir2_sf_lookup( */ if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); + return -EIO; } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); @@ -751,7 +738,7 @@ xfs_dir2_sf_lookup( args->inumber = dp->i_ino; args->cmpresult = XFS_CMP_EXACT; args->filetype = XFS_DIR3_FT_DIR; - return XFS_ERROR(EEXIST); + return -EEXIST; } /* * Special case for .. @@ -761,7 +748,7 @@ xfs_dir2_sf_lookup( args->inumber = dp->d_ops->sf_get_parent_ino(sfp); args->cmpresult = XFS_CMP_EXACT; args->filetype = XFS_DIR3_FT_DIR; - return XFS_ERROR(EEXIST); + return -EEXIST; } /* * Loop over all the entries trying to match ours. @@ -781,20 +768,20 @@ xfs_dir2_sf_lookup( args->inumber = dp->d_ops->sf_get_ino(sfp, sfep); args->filetype = dp->d_ops->sf_get_ftype(sfep); if (cmp == XFS_CMP_EXACT) - return XFS_ERROR(EEXIST); + return -EEXIST; ci_sfep = sfep; } } ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); /* * Here, we can only be doing a lookup (not a rename or replace). - * If a case-insensitive match was not found, return ENOENT. + * If a case-insensitive match was not found, return -ENOENT. */ if (!ci_sfep) - return XFS_ERROR(ENOENT); + return -ENOENT; /* otherwise process the CI match as required by the caller */ error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen); - return XFS_ERROR(error); + return error; } /* @@ -824,7 +811,7 @@ xfs_dir2_sf_removename( */ if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) { ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); + return -EIO; } ASSERT(dp->i_df.if_bytes == oldsize); ASSERT(dp->i_df.if_u1.if_data != NULL); @@ -847,7 +834,7 @@ xfs_dir2_sf_removename( * Didn't find it. */ if (i == sfp->count) - return XFS_ERROR(ENOENT); + return -ENOENT; /* * Calculate sizes. */ @@ -870,7 +857,6 @@ xfs_dir2_sf_removename( */ xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; -#if XFS_BIG_INUMS /* * Are we changing inode number size? */ @@ -880,7 +866,6 @@ xfs_dir2_sf_removename( else sfp->i8count--; } -#endif xfs_dir2_sf_check(args); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); return 0; @@ -895,12 +880,8 @@ xfs_dir2_sf_replace( { xfs_inode_t *dp; /* incore directory inode */ int i; /* entry index */ -#if XFS_BIG_INUMS || defined(DEBUG) xfs_ino_t ino=0; /* entry old inode number */ -#endif -#if XFS_BIG_INUMS int i8elevated; /* sf_toino8 set i8count=1 */ -#endif xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ @@ -914,13 +895,13 @@ xfs_dir2_sf_replace( */ if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); + return -EIO; } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); -#if XFS_BIG_INUMS + /* * New inode number is large, and need to convert to 8-byte inodes. */ @@ -951,17 +932,15 @@ xfs_dir2_sf_replace( sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; } else i8elevated = 0; -#endif + ASSERT(args->namelen != 1 || args->name[0] != '.'); /* * Replace ..'s entry. */ if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { -#if XFS_BIG_INUMS || defined(DEBUG) ino = dp->d_ops->sf_get_parent_ino(sfp); ASSERT(args->inumber != ino); -#endif dp->d_ops->sf_put_parent_ino(sfp, args->inumber); } /* @@ -972,10 +951,8 @@ xfs_dir2_sf_replace( i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { -#if XFS_BIG_INUMS || defined(DEBUG) ino = dp->d_ops->sf_get_ino(sfp, sfep); ASSERT(args->inumber != ino); -#endif dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); dp->d_ops->sf_put_ftype(sfep, args->filetype); break; @@ -986,14 +963,11 @@ xfs_dir2_sf_replace( */ if (i == sfp->count) { ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); -#if XFS_BIG_INUMS if (i8elevated) xfs_dir2_sf_toino4(args); -#endif - return XFS_ERROR(ENOENT); + return -ENOENT; } } -#if XFS_BIG_INUMS /* * See if the old number was large, the new number is small. */ @@ -1020,13 +994,11 @@ xfs_dir2_sf_replace( if (!i8elevated) sfp->i8count++; } -#endif xfs_dir2_sf_check(args); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA); return 0; } -#if XFS_BIG_INUMS /* * Convert from 8-byte inode numbers to 4-byte inode numbers. * The last 8-byte inode number is gone, but the count is still 1. @@ -1181,4 +1153,3 @@ xfs_dir2_sf_toino8( dp->i_d.di_size = newsize; xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); } -#endif /* XFS_BIG_INUMS */ diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index c2ac0c611ad..bb969337efc 100644 --- a/fs/xfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -257,9 +257,9 @@ xfs_dquot_buf_read_verify( struct xfs_mount *mp = bp->b_target->bt_mount; if (!xfs_dquot_buf_verify_crc(mp, bp)) - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); else if (!xfs_dquot_buf_verify(mp, bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) xfs_verifier_error(bp); @@ -277,7 +277,7 @@ xfs_dquot_buf_write_verify( struct xfs_mount *mp = bp->b_target->bt_mount; if (!xfs_dquot_buf_verify(mp, bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } diff --git a/fs/xfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 34d85aca305..7e42bba9a42 100644 --- a/fs/xfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -68,11 +68,7 @@ struct xfs_ifork; #define XFS_RTLOBIT(w) xfs_lowbit32(w) #define XFS_RTHIBIT(w) xfs_highbit32(w) -#if XFS_BIG_BLKNOS #define XFS_RTBLOCKLOG(b) xfs_highbit64(b) -#else -#define XFS_RTBLOCKLOG(b) xfs_highbit32(b) -#endif /* * Dquot and dquot block format definitions @@ -304,23 +300,15 @@ typedef struct xfs_bmbt_rec_host { * Values and macros for delayed-allocation startblock fields. */ #define STARTBLOCKVALBITS 17 -#define STARTBLOCKMASKBITS (15 + XFS_BIG_BLKNOS * 20) -#define DSTARTBLOCKMASKBITS (15 + 20) +#define STARTBLOCKMASKBITS (15 + 20) #define STARTBLOCKMASK \ (((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) -#define DSTARTBLOCKMASK \ - (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) static inline int isnullstartblock(xfs_fsblock_t x) { return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK; } -static inline int isnulldstartblock(xfs_dfsbno_t x) -{ - return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK; -} - static inline xfs_fsblock_t nullstartblock(int k) { ASSERT(k < (1 << STARTBLOCKVALBITS)); diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 5960e5593fe..b62771f1f4b 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -292,7 +292,7 @@ xfs_ialloc_inode_init( mp->m_bsize * blks_per_cluster, XBF_UNMAPPED); if (!fbuf) - return ENOMEM; + return -ENOMEM; /* Initialize the inode buffers and log them appropriately. */ fbuf->b_ops = &xfs_inode_buf_ops; @@ -380,7 +380,7 @@ xfs_ialloc_ag_alloc( newlen = args.mp->m_ialloc_inos; if (args.mp->m_maxicount && args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) - return XFS_ERROR(ENOSPC); + return -ENOSPC; args.minlen = args.maxlen = args.mp->m_ialloc_blks; /* * First try to allocate inodes contiguous with the last-allocated @@ -1385,7 +1385,7 @@ xfs_dialloc( if (error) { xfs_trans_brelse(tp, agbp); - if (error != ENOSPC) + if (error != -ENOSPC) goto out_error; xfs_perag_put(pag); @@ -1416,7 +1416,7 @@ nextag: agno = 0; if (agno == start_agno) { *inop = NULLFSINO; - return noroom ? ENOSPC : 0; + return noroom ? -ENOSPC : 0; } } @@ -1425,7 +1425,7 @@ out_alloc: return xfs_dialloc_ag(tp, agbp, parent, inop); out_error: xfs_perag_put(pag); - return XFS_ERROR(error); + return error; } STATIC int @@ -1682,7 +1682,7 @@ xfs_difree( xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", __func__, agno, mp->m_sb.sb_agcount); ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } agino = XFS_INO_TO_AGINO(mp, inode); if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { @@ -1690,14 +1690,14 @@ xfs_difree( __func__, (unsigned long long)inode, (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } agbno = XFS_AGINO_TO_AGBNO(mp, agino); if (agbno >= mp->m_sb.sb_agblocks) { xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", __func__, agbno, mp->m_sb.sb_agblocks); ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } /* * Get the allocation group header. @@ -1769,7 +1769,7 @@ xfs_imap_lookup( if (i) error = xfs_inobt_get_rec(cur, &rec, &i); if (!error && i == 0) - error = EINVAL; + error = -EINVAL; } xfs_trans_brelse(tp, agbp); @@ -1780,12 +1780,12 @@ xfs_imap_lookup( /* check that the returned record contains the required inode */ if (rec.ir_startino > agino || rec.ir_startino + mp->m_ialloc_inos <= agino) - return EINVAL; + return -EINVAL; /* for untrusted inodes check it is allocated first */ if ((flags & XFS_IGET_UNTRUSTED) && (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) - return EINVAL; + return -EINVAL; *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino); *offset_agbno = agbno - *chunk_agbno; @@ -1829,7 +1829,7 @@ xfs_imap( * as they can be invalid without implying corruption. */ if (flags & XFS_IGET_UNTRUSTED) - return XFS_ERROR(EINVAL); + return -EINVAL; if (agno >= mp->m_sb.sb_agcount) { xfs_alert(mp, "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", @@ -1849,7 +1849,7 @@ xfs_imap( } xfs_stack_trace(); #endif /* DEBUG */ - return XFS_ERROR(EINVAL); + return -EINVAL; } blks_per_cluster = xfs_icluster_size_fsb(mp); @@ -1922,7 +1922,7 @@ out_map: __func__, (unsigned long long) imap->im_blkno, (unsigned long long) imap->im_len, XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); - return XFS_ERROR(EINVAL); + return -EINVAL; } return 0; } @@ -2072,11 +2072,11 @@ xfs_agi_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, XFS_ERRTAG_IALLOC_READ_AGI, XFS_RANDOM_IALLOC_READ_AGI)) - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) xfs_verifier_error(bp); @@ -2090,7 +2090,7 @@ xfs_agi_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; if (!xfs_agi_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 95ad1c002d6..95ad1c002d6 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 726f83a681a..c9b06f30fe8 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -272,9 +272,9 @@ xfs_inobt_read_verify( struct xfs_buf *bp) { if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); else if (!xfs_inobt_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) { trace_xfs_btree_corrupt(bp, _RET_IP_); @@ -288,7 +288,7 @@ xfs_inobt_write_verify( { if (!xfs_inobt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index d7ebea72c2d..d7ebea72c2d 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index cb35ae41d4a..f18fd2da49f 100644 --- a/fs/xfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -101,7 +101,7 @@ xfs_inode_buf_verify( return; } - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); #ifdef DEBUG xfs_alert(mp, @@ -174,14 +174,14 @@ xfs_imap_to_bp( (int)imap->im_len, buf_flags, &bp, &xfs_inode_buf_ops); if (error) { - if (error == EAGAIN) { + if (error == -EAGAIN) { ASSERT(buf_flags & XBF_TRYLOCK); return error; } - if (error == EFSCORRUPTED && + if (error == -EFSCORRUPTED && (iget_flags & XFS_IGET_UNTRUSTED)) - return XFS_ERROR(EINVAL); + return -EINVAL; xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", __func__, error); @@ -390,7 +390,7 @@ xfs_iread( __func__, ip->i_ino); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto out_brelse; } diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 9308c47f2a5..9308c47f2a5 100644 --- a/fs/xfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index b031e8d0d92..6a00f7fed69 100644 --- a/fs/xfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -102,7 +102,7 @@ xfs_iformat_fork( be64_to_cpu(dip->di_nblocks)); XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { @@ -111,7 +111,7 @@ xfs_iformat_fork( dip->di_forkoff); XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && @@ -121,7 +121,7 @@ xfs_iformat_fork( ip->i_ino); XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } switch (ip->i_d.di_mode & S_IFMT) { @@ -132,7 +132,7 @@ xfs_iformat_fork( if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } ip->i_d.di_size = 0; ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); @@ -153,7 +153,7 @@ xfs_iformat_fork( XFS_CORRUPTION_ERROR("xfs_iformat(4)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } di_size = be64_to_cpu(dip->di_size); @@ -166,7 +166,7 @@ xfs_iformat_fork( XFS_CORRUPTION_ERROR("xfs_iformat(5)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } size = (int)di_size; @@ -181,13 +181,13 @@ xfs_iformat_fork( default: XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } break; default: XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (error) { return error; @@ -211,7 +211,7 @@ xfs_iformat_fork( XFS_CORRUPTION_ERROR("xfs_iformat(8)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); @@ -223,7 +223,7 @@ xfs_iformat_fork( error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); break; default: - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; break; } if (error) { @@ -266,7 +266,7 @@ xfs_iformat_local( XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } ifp = XFS_IFORK_PTR(ip, whichfork); real_size = 0; @@ -322,7 +322,7 @@ xfs_iformat_extents( (unsigned long long) ip->i_ino, nex); XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } ifp->if_real_bytes = 0; @@ -350,7 +350,7 @@ xfs_iformat_extents( XFS_ERROR_REPORT("xfs_iformat_extents(2)", XFS_ERRLEVEL_LOW, ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } } ifp->if_flags |= XFS_IFEXTENTS; @@ -399,7 +399,7 @@ xfs_iformat_btree( (unsigned long long) ip->i_ino); XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, mp, dip); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } ifp->if_broot_bytes = size; @@ -436,7 +436,7 @@ xfs_iread_extents( if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } nextents = XFS_IFORK_NEXTENTS(ip, whichfork); ifp = XFS_IFORK_PTR(ip, whichfork); @@ -528,7 +528,7 @@ xfs_iroot_realloc( ifp->if_broot_bytes = (int)new_size; ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= XFS_IFORK_SIZE(ip, whichfork)); - memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); + memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t)); return; } @@ -575,7 +575,7 @@ xfs_iroot_realloc( ifp->if_broot_bytes); np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, (int)new_size); - memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); + memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t)); } kmem_free(ifp->if_broot); ifp->if_broot = new_broot; @@ -1692,7 +1692,7 @@ xfs_iext_idx_to_irec( } *idxp = page_idx; *erp_idxp = erp_idx; - return(erp); + return erp; } /* diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 7d3b1ed6dcb..7d3b1ed6dcb 100644 --- a/fs/xfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/libxfs/xfs_inum.h index 90efdaf1706..4ff2278e147 100644 --- a/fs/xfs/xfs_inum.h +++ b/fs/xfs/libxfs/xfs_inum.h @@ -54,11 +54,7 @@ struct xfs_mount; #define XFS_OFFBNO_TO_AGINO(mp,b,o) \ ((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o))) -#if XFS_BIG_INUMS #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) -#else -#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL)) -#endif #define XFS_MAXINUMBER_32 ((xfs_ino_t)((1ULL << 32) - 1ULL)) #endif /* __XFS_INUM_H__ */ diff --git a/fs/xfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index f0969c77bdb..aff12f2d442 100644 --- a/fs/xfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -380,7 +380,7 @@ typedef struct xfs_icdinode { xfs_ictimestamp_t di_mtime; /* time last modified */ xfs_ictimestamp_t di_ctime; /* time created/inode modified */ xfs_fsize_t di_size; /* number of bytes in file */ - xfs_drfsbno_t di_nblocks; /* # of direct & btree blocks used */ + xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ xfs_extnum_t di_nextents; /* number of extents in data fork */ xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ @@ -516,7 +516,7 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf) * EFI/EFD log format definitions */ typedef struct xfs_extent { - xfs_dfsbno_t ext_start; + xfs_fsblock_t ext_start; xfs_extlen_t ext_len; } xfs_extent_t; diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index 1c55ccbb379..1c55ccbb379 100644 --- a/fs/xfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index ee7e0e80246..ee7e0e80246 100644 --- a/fs/xfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index 137e2093707..1b0a0837975 100644 --- a/fs/xfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -98,8 +98,6 @@ typedef __uint16_t xfs_qwarncnt_t; #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \ XFS_GQUOTA_ACTIVE | \ XFS_PQUOTA_ACTIVE)) -#define XFS_IS_OQUOTA_ON(mp) ((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \ - XFS_PQUOTA_ACTIVE)) #define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE) #define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE) #define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE) diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index f4dd697cac0..f4dd697cac0 100644 --- a/fs/xfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 7703fa6770f..ad525a5623a 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -186,13 +186,13 @@ xfs_mount_validate_sb( */ if (sbp->sb_magicnum != XFS_SB_MAGIC) { xfs_warn(mp, "bad magic number"); - return XFS_ERROR(EWRONGFS); + return -EWRONGFS; } if (!xfs_sb_good_version(sbp)) { xfs_warn(mp, "bad version"); - return XFS_ERROR(EWRONGFS); + return -EWRONGFS; } /* @@ -220,7 +220,7 @@ xfs_mount_validate_sb( xfs_warn(mp, "Attempted to mount read-only compatible filesystem read-write.\n" "Filesystem can only be safely mounted read only."); - return XFS_ERROR(EINVAL); + return -EINVAL; } } if (xfs_sb_has_incompat_feature(sbp, @@ -230,7 +230,7 @@ xfs_mount_validate_sb( "Filesystem can not be safely mounted by this kernel.", (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_UNKNOWN)); - return XFS_ERROR(EINVAL); + return -EINVAL; } } @@ -238,13 +238,13 @@ xfs_mount_validate_sb( if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) { xfs_notice(mp, "Version 5 of Super block has XFS_OQUOTA bits."); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { xfs_notice(mp, "Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits."); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (unlikely( @@ -252,7 +252,7 @@ xfs_mount_validate_sb( xfs_warn(mp, "filesystem is marked as having an external log; " "specify logdev on the mount command line."); - return XFS_ERROR(EINVAL); + return -EINVAL; } if (unlikely( @@ -260,7 +260,7 @@ xfs_mount_validate_sb( xfs_warn(mp, "filesystem is marked as having an internal log; " "do not specify logdev on the mount command line."); - return XFS_ERROR(EINVAL); + return -EINVAL; } /* @@ -294,7 +294,7 @@ xfs_mount_validate_sb( sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp) || sbp->sb_shared_vn != 0)) { xfs_notice(mp, "SB sanity check failed"); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } /* @@ -305,7 +305,7 @@ xfs_mount_validate_sb( "File system with blocksize %d bytes. " "Only pagesize (%ld) or less will currently work.", sbp->sb_blocksize, PAGE_SIZE); - return XFS_ERROR(ENOSYS); + return -ENOSYS; } /* @@ -320,19 +320,19 @@ xfs_mount_validate_sb( default: xfs_warn(mp, "inode size of %d bytes not supported", sbp->sb_inodesize); - return XFS_ERROR(ENOSYS); + return -ENOSYS; } if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { xfs_warn(mp, "file system too large to be mounted on this system."); - return XFS_ERROR(EFBIG); + return -EFBIG; } if (check_inprogress && sbp->sb_inprogress) { xfs_warn(mp, "Offline file system operation in progress!"); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; } @@ -386,10 +386,11 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp) } } -void -xfs_sb_from_disk( +static void +__xfs_sb_from_disk( struct xfs_sb *to, - xfs_dsb_t *from) + xfs_dsb_t *from, + bool convert_xquota) { to->sb_magicnum = be32_to_cpu(from->sb_magicnum); to->sb_blocksize = be32_to_cpu(from->sb_blocksize); @@ -445,6 +446,17 @@ xfs_sb_from_disk( to->sb_pad = 0; to->sb_pquotino = be64_to_cpu(from->sb_pquotino); to->sb_lsn = be64_to_cpu(from->sb_lsn); + /* Convert on-disk flags to in-memory flags? */ + if (convert_xquota) + xfs_sb_quota_from_disk(to); +} + +void +xfs_sb_from_disk( + struct xfs_sb *to, + xfs_dsb_t *from) +{ + __xfs_sb_from_disk(to, from, true); } static inline void @@ -577,7 +589,11 @@ xfs_sb_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_sb sb; - xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp)); + /* + * Use call variant which doesn't convert quota flags from disk + * format, because xfs_mount_validate_sb checks the on-disk flags. + */ + __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false); /* * Only check the in progress field for the primary superblock as @@ -620,7 +636,7 @@ xfs_sb_read_verify( /* Only fail bad secondaries on a known V5 filesystem */ if (bp->b_bn == XFS_SB_DADDR || xfs_sb_version_hascrc(&mp->m_sb)) { - error = EFSBADCRC; + error = -EFSBADCRC; goto out_error; } } @@ -630,7 +646,7 @@ xfs_sb_read_verify( out_error: if (error) { xfs_buf_ioerror(bp, error); - if (error == EFSCORRUPTED || error == EFSBADCRC) + if (error == -EFSCORRUPTED || error == -EFSBADCRC) xfs_verifier_error(bp); } } @@ -653,7 +669,7 @@ xfs_sb_quiet_read_verify( return; } /* quietly fail */ - xfs_buf_ioerror(bp, EWRONGFS); + xfs_buf_ioerror(bp, -EWRONGFS); } static void diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index c43c2d609a2..2e739708afd 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -87,11 +87,11 @@ struct xfs_trans; typedef struct xfs_sb { __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ __uint32_t sb_blocksize; /* logical block size, bytes */ - xfs_drfsbno_t sb_dblocks; /* number of data blocks */ - xfs_drfsbno_t sb_rblocks; /* number of realtime blocks */ - xfs_drtbno_t sb_rextents; /* number of realtime extents */ + xfs_rfsblock_t sb_dblocks; /* number of data blocks */ + xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */ + xfs_rtblock_t sb_rextents; /* number of realtime extents */ uuid_t sb_uuid; /* file system unique id */ - xfs_dfsbno_t sb_logstart; /* starting block of log if internal */ + xfs_fsblock_t sb_logstart; /* starting block of log if internal */ xfs_ino_t sb_rootino; /* root inode number */ xfs_ino_t sb_rbmino; /* bitmap inode for realtime extents */ xfs_ino_t sb_rsumino; /* summary inode for rt bitmap */ diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 82404da2ca6..82404da2ca6 100644 --- a/fs/xfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index 23c2f2577c8..5782f037eab 100644 --- a/fs/xfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -133,9 +133,9 @@ xfs_symlink_read_verify( return; if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) - xfs_buf_ioerror(bp, EFSBADCRC); + xfs_buf_ioerror(bp, -EFSBADCRC); else if (!xfs_symlink_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) xfs_verifier_error(bp); @@ -153,7 +153,7 @@ xfs_symlink_write_verify( return; if (!xfs_symlink_verify(bp)) { - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index f2bda7c76b8..f2bda7c76b8 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h index 1097d14cd58..1097d14cd58 100644 --- a/fs/xfs/xfs_trans_resv.h +++ b/fs/xfs/libxfs/xfs_trans_resv.h diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index bf9c4579334..bf9c4579334 100644 --- a/fs/xfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 6888ad886ff..a65fa5dde6e 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -152,7 +152,7 @@ xfs_get_acl(struct inode *inode, int type) if (!xfs_acl) return ERR_PTR(-ENOMEM); - error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, + error = xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, &len, ATTR_ROOT); if (error) { /* @@ -210,7 +210,7 @@ __xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) len -= sizeof(struct xfs_acl_entry) * (XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count); - error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, + error = xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, len, ATTR_ROOT); kmem_free(xfs_acl); @@ -218,7 +218,7 @@ __xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) /* * A NULL ACL argument means we want to remove the ACL. */ - error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT); + error = xfs_attr_remove(ip, ea_name, ATTR_ROOT); /* * If the attribute didn't exist to start with that's fine. @@ -244,7 +244,7 @@ xfs_set_mode(struct inode *inode, umode_t mode) iattr.ia_mode = mode; iattr.ia_ctime = current_fs_time(inode->i_sb); - error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); + error = xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); } return error; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index faaf716e208..b984647c24d 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -240,7 +240,7 @@ xfs_end_io( done: if (error) - ioend->io_error = -error; + ioend->io_error = error; xfs_destroy_ioend(ioend); } @@ -308,14 +308,14 @@ xfs_map_blocks( int nimaps = 1; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; if (type == XFS_IO_UNWRITTEN) bmapi_flags |= XFS_BMAPI_IGSTATE; if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { if (nonblocking) - return -XFS_ERROR(EAGAIN); + return -EAGAIN; xfs_ilock(ip, XFS_ILOCK_SHARED); } @@ -332,14 +332,14 @@ xfs_map_blocks( xfs_iunlock(ip, XFS_ILOCK_SHARED); if (error) - return -XFS_ERROR(error); + return error; if (type == XFS_IO_DELALLOC && (!nimaps || isnullstartblock(imap->br_startblock))) { error = xfs_iomap_write_allocate(ip, offset, imap); if (!error) trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); - return -XFS_ERROR(error); + return error; } #ifdef DEBUG @@ -502,7 +502,7 @@ xfs_submit_ioend( * time. */ if (fail) { - ioend->io_error = -fail; + ioend->io_error = fail; xfs_finish_ioend(ioend); continue; } @@ -1253,7 +1253,7 @@ __xfs_get_blocks( int new = 0; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; offset = (xfs_off_t)iblock << inode->i_blkbits; ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); @@ -1302,7 +1302,7 @@ __xfs_get_blocks( error = xfs_iomap_write_direct(ip, offset, size, &imap, nimaps); if (error) - return -error; + return error; new = 1; } else { /* @@ -1415,7 +1415,7 @@ __xfs_get_blocks( out_unlock: xfs_iunlock(ip, lockmode); - return -error; + return error; } int @@ -1753,11 +1753,72 @@ xfs_vm_readpages( return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); } +/* + * This is basically a copy of __set_page_dirty_buffers() with one + * small tweak: buffers beyond EOF do not get marked dirty. If we mark them + * dirty, we'll never be able to clean them because we don't write buffers + * beyond EOF, and that means we can't invalidate pages that span EOF + * that have been marked dirty. Further, the dirty state can leak into + * the file interior if the file is extended, resulting in all sorts of + * bad things happening as the state does not match the underlying data. + * + * XXX: this really indicates that bufferheads in XFS need to die. Warts like + * this only exist because of bufferheads and how the generic code manages them. + */ +STATIC int +xfs_vm_set_page_dirty( + struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + loff_t end_offset; + loff_t offset; + int newly_dirty; + + if (unlikely(!mapping)) + return !TestSetPageDirty(page); + + end_offset = i_size_read(inode); + offset = page_offset(page); + + spin_lock(&mapping->private_lock); + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + + do { + if (offset < end_offset) + set_buffer_dirty(bh); + bh = bh->b_this_page; + offset += 1 << inode->i_blkbits; + } while (bh != head); + } + newly_dirty = !TestSetPageDirty(page); + spin_unlock(&mapping->private_lock); + + if (newly_dirty) { + /* sigh - __set_page_dirty() is static, so copy it here, too */ + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); + if (page->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(!PageUptodate(page)); + account_page_dirtied(page, mapping); + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + } + spin_unlock_irqrestore(&mapping->tree_lock, flags); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + } + return newly_dirty; +} + const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, .writepage = xfs_vm_writepage, .writepages = xfs_vm_writepages, + .set_page_dirty = xfs_vm_set_page_dirty, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .write_begin = xfs_vm_write_begin, diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 09480c57f06..aa2a8b1838a 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -76,7 +76,7 @@ xfs_attr3_leaf_freextent( error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); if (error) { - return(error); + return error; } ASSERT(nmap == 1); ASSERT(map.br_startblock != DELAYSTARTBLOCK); @@ -95,21 +95,21 @@ xfs_attr3_leaf_freextent( dp->i_mount->m_ddev_targp, dblkno, dblkcnt, 0); if (!bp) - return ENOMEM; + return -ENOMEM; xfs_trans_binval(*trans, bp); /* * Roll to next transaction. */ error = xfs_trans_roll(trans, dp); if (error) - return (error); + return error; } tblkno += map.br_blockcount; tblkcnt -= map.br_blockcount; } - return(0); + return 0; } /* @@ -227,7 +227,7 @@ xfs_attr3_node_inactive( */ if (level > XFS_DA_NODE_MAXDEPTH) { xfs_trans_brelse(*trans, bp); /* no locks for later trans */ - return XFS_ERROR(EIO); + return -EIO; } node = bp->b_addr; @@ -256,7 +256,7 @@ xfs_attr3_node_inactive( error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp, XFS_ATTR_FORK); if (error) - return(error); + return error; if (child_bp) { /* save for re-read later */ child_blkno = XFS_BUF_ADDR(child_bp); @@ -277,7 +277,7 @@ xfs_attr3_node_inactive( child_bp); break; default: - error = XFS_ERROR(EIO); + error = -EIO; xfs_trans_brelse(*trans, child_bp); break; } @@ -360,7 +360,7 @@ xfs_attr3_root_inactive( error = xfs_attr3_leaf_inactive(trans, dp, bp); break; default: - error = XFS_ERROR(EIO); + error = -EIO; xfs_trans_brelse(*trans, bp); break; } @@ -414,7 +414,7 @@ xfs_attr_inactive(xfs_inode_t *dp) error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0); if (error) { xfs_trans_cancel(trans, 0); - return(error); + return error; } xfs_ilock(dp, XFS_ILOCK_EXCL); @@ -443,10 +443,10 @@ xfs_attr_inactive(xfs_inode_t *dp) error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error); + return error; out: xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); xfs_iunlock(dp, XFS_ILOCK_EXCL); - return(error); + return error; } diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 90e2eeb2120..62db83ab6cb 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -50,11 +50,11 @@ xfs_attr_shortform_compare(const void *a, const void *b) sa = (xfs_attr_sf_sort_t *)a; sb = (xfs_attr_sf_sort_t *)b; if (sa->hash < sb->hash) { - return(-1); + return -1; } else if (sa->hash > sb->hash) { - return(1); + return 1; } else { - return(sa->entno - sb->entno); + return sa->entno - sb->entno; } } @@ -86,7 +86,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data; ASSERT(sf != NULL); if (!sf->hdr.count) - return(0); + return 0; cursor = context->cursor; ASSERT(cursor != NULL); @@ -124,7 +124,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) sfe = XFS_ATTR_SF_NEXTENTRY(sfe); } trace_xfs_attr_list_sf_all(context); - return(0); + return 0; } /* do no more for a search callback */ @@ -150,7 +150,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) XFS_ERRLEVEL_LOW, context->dp->i_mount, sfe); kmem_free(sbuf); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } sbp->entno = i; @@ -188,7 +188,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) } if (i == nsbuf) { kmem_free(sbuf); - return(0); + return 0; } /* @@ -213,7 +213,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) } kmem_free(sbuf); - return(0); + return 0; } STATIC int @@ -243,8 +243,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) if (cursor->blkno > 0) { error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1, &bp, XFS_ATTR_FORK); - if ((error != 0) && (error != EFSCORRUPTED)) - return(error); + if ((error != 0) && (error != -EFSCORRUPTED)) + return error; if (bp) { struct xfs_attr_leaf_entry *entries; @@ -295,7 +295,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) cursor->blkno, -1, &bp, XFS_ATTR_FORK); if (error) - return(error); + return error; node = bp->b_addr; magic = be16_to_cpu(node->hdr.info.magic); if (magic == XFS_ATTR_LEAF_MAGIC || @@ -308,7 +308,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) context->dp->i_mount, node); xfs_trans_brelse(NULL, bp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } dp->d_ops->node_hdr_from_disk(&nodehdr, node); @@ -496,11 +496,11 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context) context->cursor->blkno = 0; error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp); if (error) - return XFS_ERROR(error); + return error; error = xfs_attr3_leaf_list_int(bp, context); xfs_trans_brelse(NULL, bp); - return XFS_ERROR(error); + return error; } int @@ -514,7 +514,7 @@ xfs_attr_list_int( XFS_STATS_INC(xs_attr_list); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return EIO; + return -EIO; /* * Decide on what work routines to call based on the inode size. @@ -616,16 +616,16 @@ xfs_attr_list( * Validate the cursor. */ if (cursor->pad1 || cursor->pad2) - return(XFS_ERROR(EINVAL)); + return -EINVAL; if ((cursor->initted == 0) && (cursor->hashval || cursor->blkno || cursor->offset)) - return XFS_ERROR(EINVAL); + return -EINVAL; /* * Check for a properly aligned buffer. */ if (((long)buffer) & (sizeof(int)-1)) - return XFS_ERROR(EFAULT); + return -EFAULT; if (flags & ATTR_KERNOVAL) bufsize = 0; @@ -648,6 +648,6 @@ xfs_attr_list( alist->al_offset[0] = context.bufsize; error = xfs_attr_list_int(&context); - ASSERT(error >= 0); + ASSERT(error <= 0); return error; } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 64731ef3324..1707980f9a4 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -133,7 +133,7 @@ xfs_bmap_finish( mp = ntp->t_mountp; if (!XFS_FORCED_SHUTDOWN(mp)) xfs_force_shutdown(mp, - (error == EFSCORRUPTED) ? + (error == -EFSCORRUPTED) ? SHUTDOWN_CORRUPT_INCORE : SHUTDOWN_META_IO_ERROR); return error; @@ -365,7 +365,7 @@ xfs_bmap_count_tree( xfs_trans_brelse(tp, bp); XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } xfs_trans_brelse(tp, bp); } else { @@ -425,14 +425,14 @@ xfs_bmap_count_blocks( ASSERT(level > 0); pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); - ASSERT(bno != NULLDFSBNO); + ASSERT(bno != NULLFSBLOCK); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; @@ -524,13 +524,13 @@ xfs_getbmap( if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) - return XFS_ERROR(EINVAL); + return -EINVAL; } else if (unlikely( ip->i_d.di_aformat != 0 && ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, ip->i_mount); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } prealloced = 0; @@ -539,7 +539,7 @@ xfs_getbmap( if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_format != XFS_DINODE_FMT_BTREE && ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) - return XFS_ERROR(EINVAL); + return -EINVAL; if (xfs_get_extsz_hint(ip) || ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ @@ -559,26 +559,26 @@ xfs_getbmap( bmv->bmv_entries = 0; return 0; } else if (bmv->bmv_length < 0) { - return XFS_ERROR(EINVAL); + return -EINVAL; } nex = bmv->bmv_count - 1; if (nex <= 0) - return XFS_ERROR(EINVAL); + return -EINVAL; bmvend = bmv->bmv_offset + bmv->bmv_length; if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) - return XFS_ERROR(ENOMEM); + return -ENOMEM; out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); if (!out) - return XFS_ERROR(ENOMEM); + return -ENOMEM; xfs_ilock(ip, XFS_IOLOCK_SHARED); if (whichfork == XFS_DATA_FORK) { if (!(iflags & BMV_IF_DELALLOC) && (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { - error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); + error = filemap_write_and_wait(VFS_I(ip)->i_mapping); if (error) goto out_unlock_iolock; @@ -611,7 +611,7 @@ xfs_getbmap( /* * Allocate enough space to handle "subnex" maps at a time. */ - error = ENOMEM; + error = -ENOMEM; subnex = 16; map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); if (!map) @@ -809,7 +809,7 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) * have speculative prealloc/delalloc blocks to remove. */ if (VFS_I(ip)->i_size == 0 && - VN_CACHED(VFS_I(ip)) == 0 && + VFS_I(ip)->i_mapping->nrpages == 0 && ip->i_delayed_blks == 0) return false; @@ -882,7 +882,7 @@ xfs_free_eofblocks( if (need_iolock) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { xfs_trans_cancel(tp, 0); - return EAGAIN; + return -EAGAIN; } } @@ -955,14 +955,14 @@ xfs_alloc_file_space( trace_xfs_alloc_file_space(ip); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; error = xfs_qm_dqattach(ip, 0); if (error) return error; if (len <= 0) - return XFS_ERROR(EINVAL); + return -EINVAL; rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); @@ -1028,7 +1028,7 @@ xfs_alloc_file_space( /* * Free the transaction structure. */ - ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); + ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); break; } @@ -1065,7 +1065,7 @@ xfs_alloc_file_space( allocated_fsb = imapp->br_blockcount; if (nimaps == 0) { - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; break; } @@ -1126,7 +1126,7 @@ xfs_zero_remaining_bytes( mp->m_rtdev_targp : mp->m_ddev_targp, BTOBB(mp->m_sb.sb_blocksize), 0); if (!bp) - return XFS_ERROR(ENOMEM); + return -ENOMEM; xfs_buf_unlock(bp); @@ -1158,7 +1158,7 @@ xfs_zero_remaining_bytes( XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); if (XFS_FORCED_SHUTDOWN(mp)) { - error = XFS_ERROR(EIO); + error = -EIO; break; } xfs_buf_iorequest(bp); @@ -1176,7 +1176,7 @@ xfs_zero_remaining_bytes( XFS_BUF_WRITE(bp); if (XFS_FORCED_SHUTDOWN(mp)) { - error = XFS_ERROR(EIO); + error = -EIO; break; } xfs_buf_iorequest(bp); @@ -1234,7 +1234,7 @@ xfs_free_file_space( rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); ioffset = offset & ~(rounding - 1); - error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset, -1); if (error) goto out; @@ -1315,7 +1315,7 @@ xfs_free_file_space( /* * Free the transaction structure. */ - ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); + ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); break; } @@ -1470,6 +1470,26 @@ xfs_collapse_file_space( start_fsb = XFS_B_TO_FSB(mp, offset + len); shift_fsb = XFS_B_TO_FSB(mp, len); + /* + * Writeback the entire file and force remove any post-eof blocks. The + * writeback prevents changes to the extent list via concurrent + * writeback and the eofblocks trim prevents the extent shift algorithm + * from running into a post-eof delalloc extent. + * + * XXX: This is a temporary fix until the extent shift loop below is + * converted to use offsets and lookups within the ILOCK rather than + * carrying around the index into the extent list for the next + * iteration. + */ + error = filemap_write_and_wait(VFS_I(ip)->i_mapping); + if (error) + return error; + if (xfs_can_free_eofblocks(ip, true)) { + error = xfs_free_eofblocks(mp, ip, false); + if (error) + return error; + } + error = xfs_free_file_space(ip, offset, len); if (error) return error; @@ -1557,14 +1577,14 @@ xfs_swap_extents_check_format( /* Should never get a local format */ if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) - return EINVAL; + return -EINVAL; /* * if the target inode has less extents that then temporary inode then * why did userspace call us? */ if (ip->i_d.di_nextents < tip->i_d.di_nextents) - return EINVAL; + return -EINVAL; /* * if the target inode is in extent form and the temp inode is in btree @@ -1573,19 +1593,19 @@ xfs_swap_extents_check_format( */ if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && tip->i_d.di_format == XFS_DINODE_FMT_BTREE) - return EINVAL; + return -EINVAL; /* Check temp in extent form to max in target */ if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) - return EINVAL; + return -EINVAL; /* Check target in extent form to max in temp */ if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) - return EINVAL; + return -EINVAL; /* * If we are in a btree format, check that the temp root block will fit @@ -1599,26 +1619,50 @@ xfs_swap_extents_check_format( if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { if (XFS_IFORK_BOFF(ip) && XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) - return EINVAL; + return -EINVAL; if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) - return EINVAL; + return -EINVAL; } /* Reciprocal target->temp btree format checks */ if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { if (XFS_IFORK_BOFF(tip) && XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) - return EINVAL; + return -EINVAL; if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) - return EINVAL; + return -EINVAL; } return 0; } int +xfs_swap_extent_flush( + struct xfs_inode *ip) +{ + int error; + + error = filemap_write_and_wait(VFS_I(ip)->i_mapping); + if (error) + return error; + truncate_pagecache_range(VFS_I(ip), 0, -1); + + /* Verify O_DIRECT for ftmp */ + if (VFS_I(ip)->i_mapping->nrpages) + return -EINVAL; + + /* + * Don't try to swap extents on mmap()d files because we can't lock + * out races against page faults safely. + */ + if (mapping_mapped(VFS_I(ip)->i_mapping)) + return -EBUSY; + return 0; +} + +int xfs_swap_extents( xfs_inode_t *ip, /* target inode */ xfs_inode_t *tip, /* tmp inode */ @@ -1633,51 +1677,57 @@ xfs_swap_extents( int aforkblks = 0; int taforkblks = 0; __uint64_t tmp; + int lock_flags; tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); if (!tempifp) { - error = XFS_ERROR(ENOMEM); + error = -ENOMEM; goto out; } /* - * we have to do two separate lock calls here to keep lockdep - * happy. If we try to get all the locks in one call, lock will - * report false positives when we drop the ILOCK and regain them - * below. + * Lock up the inodes against other IO and truncate to begin with. + * Then we can ensure the inodes are flushed and have no page cache + * safely. Once we have done this we can take the ilocks and do the rest + * of the checks. */ + lock_flags = XFS_IOLOCK_EXCL; xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); - xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_unlock; } /* Verify both files are either real-time or non-realtime */ if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_unlock; } - error = -filemap_write_and_wait(VFS_I(tip)->i_mapping); + error = xfs_swap_extent_flush(ip); + if (error) + goto out_unlock; + error = xfs_swap_extent_flush(tip); if (error) goto out_unlock; - truncate_pagecache_range(VFS_I(tip), 0, -1); - /* Verify O_DIRECT for ftmp */ - if (VN_CACHED(VFS_I(tip)) != 0) { - error = XFS_ERROR(EINVAL); + tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); goto out_unlock; } + xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); + lock_flags |= XFS_ILOCK_EXCL; /* Verify all data are being swapped */ if (sxp->sx_offset != 0 || sxp->sx_length != ip->i_d.di_size || sxp->sx_length != tip->i_d.di_size) { - error = XFS_ERROR(EFAULT); - goto out_unlock; + error = -EFAULT; + goto out_trans_cancel; } trace_xfs_swap_extent_before(ip, 0); @@ -1689,7 +1739,7 @@ xfs_swap_extents( xfs_notice(mp, "%s: inode 0x%llx format is incompatible for exchanging.", __func__, ip->i_ino); - goto out_unlock; + goto out_trans_cancel; } /* @@ -1703,43 +1753,9 @@ xfs_swap_extents( (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { - error = XFS_ERROR(EBUSY); - goto out_unlock; + error = -EBUSY; + goto out_trans_cancel; } - - /* We need to fail if the file is memory mapped. Once we have tossed - * all existing pages, the page fault will have no option - * but to go to the filesystem for pages. By making the page fault call - * vop_read (or write in the case of autogrow) they block on the iolock - * until we have switched the extents. - */ - if (VN_MAPPED(VFS_I(ip))) { - error = XFS_ERROR(EBUSY); - goto out_unlock; - } - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_iunlock(tip, XFS_ILOCK_EXCL); - - /* - * There is a race condition here since we gave up the - * ilock. However, the data fork will not change since - * we have the iolock (locked for truncation too) so we - * are safe. We don't really care if non-io related - * fields change. - */ - truncate_pagecache_range(VFS_I(ip), 0, -1); - - tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); - if (error) { - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - xfs_iunlock(tip, XFS_IOLOCK_EXCL); - xfs_trans_cancel(tp, 0); - goto out; - } - xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); - /* * Count the number of extended attribute blocks */ @@ -1757,8 +1773,8 @@ xfs_swap_extents( goto out_trans_cancel; } - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_trans_ijoin(tp, ip, lock_flags); + xfs_trans_ijoin(tp, tip, lock_flags); /* * Before we've swapped the forks, lets set the owners of the forks @@ -1887,8 +1903,8 @@ out: return error; out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_iunlock(ip, lock_flags); + xfs_iunlock(tip, lock_flags); goto out; out_trans_cancel: diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 7a34a1ae655..cd7b8ca9b06 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -130,7 +130,7 @@ xfs_buf_get_maps( bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map), KM_NOFS); if (!bp->b_maps) - return ENOMEM; + return -ENOMEM; return 0; } @@ -344,7 +344,7 @@ retry: if (unlikely(page == NULL)) { if (flags & XBF_READ_AHEAD) { bp->b_page_count = i; - error = ENOMEM; + error = -ENOMEM; goto out_free_pages; } @@ -465,7 +465,7 @@ _xfs_buf_find( eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); if (blkno >= eofs) { /* - * XXX (dgc): we should really be returning EFSCORRUPTED here, + * XXX (dgc): we should really be returning -EFSCORRUPTED here, * but none of the higher level infrastructure supports * returning a specific error on buffer lookup failures. */ @@ -1052,8 +1052,8 @@ xfs_buf_ioerror( xfs_buf_t *bp, int error) { - ASSERT(error >= 0 && error <= 0xffff); - bp->b_error = (unsigned short)error; + ASSERT(error <= 0 && error >= -1000); + bp->b_error = error; trace_xfs_buf_ioerror(bp, error, _RET_IP_); } @@ -1064,7 +1064,7 @@ xfs_buf_ioerror_alert( { xfs_alert(bp->b_target->bt_mount, "metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d", - (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length); + (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); } /* @@ -1083,7 +1083,7 @@ xfs_bioerror( /* * No need to wait until the buffer is unpinned, we aren't flushing it. */ - xfs_buf_ioerror(bp, EIO); + xfs_buf_ioerror(bp, -EIO); /* * We're calling xfs_buf_ioend, so delete XBF_DONE flag. @@ -1094,7 +1094,7 @@ xfs_bioerror( xfs_buf_ioend(bp, 0); - return EIO; + return -EIO; } /* @@ -1127,13 +1127,13 @@ xfs_bioerror_relse( * There's no reason to mark error for * ASYNC buffers. */ - xfs_buf_ioerror(bp, EIO); + xfs_buf_ioerror(bp, -EIO); complete(&bp->b_iowait); } else { xfs_buf_relse(bp); } - return EIO; + return -EIO; } STATIC int @@ -1199,7 +1199,7 @@ xfs_buf_bio_end_io( * buffers that require multiple bios to complete. */ if (!bp->b_error) - xfs_buf_ioerror(bp, -error); + xfs_buf_ioerror(bp, error); if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); @@ -1286,7 +1286,7 @@ next_chunk: * because the caller (xfs_buf_iorequest) holds a count itself. */ atomic_dec(&bp->b_io_remaining); - xfs_buf_ioerror(bp, EIO); + xfs_buf_ioerror(bp, -EIO); bio_put(bio); } @@ -1330,6 +1330,20 @@ _xfs_buf_ioapply( SHUTDOWN_CORRUPT_INCORE); return; } + } else if (bp->b_bn != XFS_BUF_DADDR_NULL) { + struct xfs_mount *mp = bp->b_target->bt_mount; + + /* + * non-crc filesystems don't attach verifiers during + * log recovery, so don't warn for such filesystems. + */ + if (xfs_sb_version_hascrc(&mp->m_sb)) { + xfs_warn(mp, + "%s: no ops on block 0x%llx/0x%x", + __func__, bp->b_bn, bp->b_length); + xfs_hex_dump(bp->b_addr, 64); + dump_stack(); + } } } else if (bp->b_flags & XBF_READ_AHEAD) { rw = READA; @@ -1628,7 +1642,7 @@ xfs_setsize_buftarg( xfs_warn(btp->bt_mount, "Cannot set_blocksize to %u on device %s", sectorsize, name); - return EINVAL; + return -EINVAL; } /* Set up device logical sector size mask */ diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 3a7a5523d3d..c753183900b 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -178,7 +178,7 @@ typedef struct xfs_buf { atomic_t b_io_remaining; /* #outstanding I/O requests */ unsigned int b_page_count; /* size of page array */ unsigned int b_offset; /* page offset in first page */ - unsigned short b_error; /* error code on I/O */ + int b_error; /* error code on I/O */ const struct xfs_buf_ops *b_ops; #ifdef XFS_BUF_LOCK_TRACKING diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 4654338b03f..76007deed31 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -488,7 +488,7 @@ xfs_buf_item_unpin( xfs_buf_lock(bp); xfs_buf_hold(bp); bp->b_flags |= XBF_ASYNC; - xfs_buf_ioerror(bp, EIO); + xfs_buf_ioerror(bp, -EIO); XFS_BUF_UNDONE(bp); xfs_buf_stale(bp); xfs_buf_ioend(bp, 0); @@ -725,7 +725,7 @@ xfs_buf_item_get_format( bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format), KM_SLEEP); if (!bip->bli_formats) - return ENOMEM; + return -ENOMEM; return 0; } diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 48e99afb9cb..f1b69edcdf3 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -95,7 +95,7 @@ xfs_dir2_sf_getdents( */ if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return XFS_ERROR(EIO); + return -EIO; } ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); @@ -677,7 +677,7 @@ xfs_readdir( trace_xfs_readdir(dp); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return XFS_ERROR(EIO); + return -EIO; ASSERT(S_ISDIR(dp->i_d.di_mode)); XFS_STATS_INC(xs_dir_getdents); diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 4f11ef01113..13d08a1b390 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -124,7 +124,7 @@ xfs_trim_extents( } trace_xfs_discard_extent(mp, agno, fbno, flen); - error = -blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0); + error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0); if (error) goto out_del_cursor; *blocks_trimmed += flen; @@ -166,11 +166,11 @@ xfs_ioc_trim( int error, last_error = 0; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (!blk_queue_discard(q)) - return -XFS_ERROR(EOPNOTSUPP); + return -EOPNOTSUPP; if (copy_from_user(&range, urange, sizeof(range))) - return -XFS_ERROR(EFAULT); + return -EFAULT; /* * Truncating down the len isn't actually quite correct, but using @@ -182,7 +182,7 @@ xfs_ioc_trim( if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) || range.len < mp->m_sb.sb_blocksize) - return -XFS_ERROR(EINVAL); + return -EINVAL; start = BTOBB(range.start); end = start + BTOBBT(range.len) - 1; @@ -195,7 +195,7 @@ xfs_ioc_trim( end_agno = xfs_daddr_to_agno(mp, end); for (agno = start_agno; agno <= end_agno; agno++) { - error = -xfs_trim_extents(mp, agno, start, end, minlen, + error = xfs_trim_extents(mp, agno, start, end, minlen, &blocks_trimmed); if (error) last_error = error; @@ -206,7 +206,7 @@ xfs_ioc_trim( range.len = XFS_FSB_TO_B(mp, blocks_trimmed); if (copy_to_user(urange, &range, sizeof(range))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -222,11 +222,11 @@ xfs_discard_extents( trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, busyp->length); - error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, + error = blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), XFS_FSB_TO_BB(mp, busyp->length), GFP_NOFS, 0); - if (error && error != EOPNOTSUPP) { + if (error && error != -EOPNOTSUPP) { xfs_info(mp, "discard failed for extent [0x%llu,%u], error %d", (unsigned long long)busyp->bno, diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 3ee0cd43edc..63c2de49f61 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -327,7 +327,7 @@ xfs_qm_dqalloc( */ if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { xfs_iunlock(quotip, XFS_ILOCK_EXCL); - return (ESRCH); + return -ESRCH; } xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); @@ -354,7 +354,7 @@ xfs_qm_dqalloc( mp->m_quotainfo->qi_dqchunklen, 0); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error1; } bp->b_ops = &xfs_dquot_buf_ops; @@ -400,7 +400,7 @@ xfs_qm_dqalloc( error0: xfs_iunlock(quotip, XFS_ILOCK_EXCL); - return (error); + return error; } STATIC int @@ -426,7 +426,7 @@ xfs_qm_dqrepair( if (error) { ASSERT(*bpp == NULL); - return XFS_ERROR(error); + return error; } (*bpp)->b_ops = &xfs_dquot_buf_ops; @@ -442,7 +442,7 @@ xfs_qm_dqrepair( if (error) { /* repair failed, we're screwed */ xfs_trans_brelse(tp, *bpp); - return XFS_ERROR(EIO); + return -EIO; } } @@ -480,7 +480,7 @@ xfs_qm_dqtobp( * didn't have the quota inode lock. */ xfs_iunlock(quotip, lock_mode); - return ESRCH; + return -ESRCH; } /* @@ -508,7 +508,7 @@ xfs_qm_dqtobp( * We don't allocate unless we're asked to */ if (!(flags & XFS_QMOPT_DQALLOC)) - return ENOENT; + return -ENOENT; ASSERT(tp); error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, @@ -530,7 +530,7 @@ xfs_qm_dqtobp( mp->m_quotainfo->qi_dqchunklen, 0, &bp, &xfs_dquot_buf_ops); - if (error == EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) { + if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) { xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff * mp->m_quotainfo->qi_dqperchunk; ASSERT(bp == NULL); @@ -539,7 +539,7 @@ xfs_qm_dqtobp( if (error) { ASSERT(bp == NULL); - return XFS_ERROR(error); + return error; } } @@ -547,7 +547,7 @@ xfs_qm_dqtobp( *O_bpp = bp; *O_ddpp = bp->b_addr + dqp->q_bufoffset; - return (0); + return 0; } @@ -715,7 +715,7 @@ xfs_qm_dqget( if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { - return (ESRCH); + return -ESRCH; } #ifdef DEBUG @@ -723,7 +723,7 @@ xfs_qm_dqget( if ((xfs_dqerror_target == mp->m_ddev_targp) && (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { xfs_debug(mp, "Returning error in dqget"); - return (EIO); + return -EIO; } } @@ -796,14 +796,14 @@ restart: } else { /* inode stays locked on return */ xfs_qm_dqdestroy(dqp); - return XFS_ERROR(ESRCH); + return -ESRCH; } } mutex_lock(&qi->qi_tree_lock); - error = -radix_tree_insert(tree, id, dqp); + error = radix_tree_insert(tree, id, dqp); if (unlikely(error)) { - WARN_ON(error != EEXIST); + WARN_ON(error != -EEXIST); /* * Duplicate found. Just throw away the new dquot and start @@ -829,7 +829,7 @@ restart: ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); trace_xfs_dqget_miss(dqp); *O_dqpp = dqp; - return (0); + return 0; } /* @@ -966,7 +966,7 @@ xfs_qm_dqflush( SHUTDOWN_CORRUPT_INCORE); else spin_unlock(&mp->m_ail->xa_lock); - error = XFS_ERROR(EIO); + error = -EIO; goto out_unlock; } @@ -974,7 +974,8 @@ xfs_qm_dqflush( * Get the buffer containing the on-disk dquot */ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL); + mp->m_quotainfo->qi_dqchunklen, 0, &bp, + &xfs_dquot_buf_ops); if (error) goto out_unlock; @@ -992,7 +993,7 @@ xfs_qm_dqflush( xfs_buf_relse(bp); xfs_dqfunlock(dqp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - return XFS_ERROR(EIO); + return -EIO; } /* This is the only portion of data that needs to persist */ @@ -1045,7 +1046,7 @@ xfs_qm_dqflush( out_unlock: xfs_dqfunlock(dqp); - return XFS_ERROR(EIO); + return -EIO; } /* diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 68a68f70483..c24c67e22a2 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -139,6 +139,21 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) } } +/* + * Check whether a dquot is under low free space conditions. We assume the quota + * is enabled and enforced. + */ +static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp) +{ + int64_t freesp; + + freesp = be64_to_cpu(dqp->q_core.d_blk_hardlimit) - dqp->q_res_bcount; + if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT]) + return true; + + return false; +} + #define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index edac5b057d2..b92fd7bc49e 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -27,29 +27,6 @@ #ifdef DEBUG -int xfs_etrap[XFS_ERROR_NTRAP] = { - 0, -}; - -int -xfs_error_trap(int e) -{ - int i; - - if (!e) - return 0; - for (i = 0; i < XFS_ERROR_NTRAP; i++) { - if (xfs_etrap[i] == 0) - break; - if (e != xfs_etrap[i]) - continue; - xfs_notice(NULL, "%s: error %d", __func__, e); - BUG(); - break; - } - return e; -} - int xfs_etest[XFS_NUM_INJECT_ERROR]; int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; @@ -190,7 +167,7 @@ xfs_verifier_error( struct xfs_mount *mp = bp->b_target->bt_mount; xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx", - bp->b_error == EFSBADCRC ? "CRC error" : "corruption", + bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", __return_address, bp->b_bn); xfs_alert(mp, "Unmount and run xfs_repair"); diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index c1c57d4a4b5..279a76e5279 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -18,15 +18,6 @@ #ifndef __XFS_ERROR_H__ #define __XFS_ERROR_H__ -#ifdef DEBUG -#define XFS_ERROR_NTRAP 10 -extern int xfs_etrap[XFS_ERROR_NTRAP]; -extern int xfs_error_trap(int); -#define XFS_ERROR(e) xfs_error_trap(e) -#else -#define XFS_ERROR(e) (e) -#endif - struct xfs_mount; extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, @@ -56,7 +47,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp); if (unlikely(!fs_is_ok)) { \ XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \ XFS_ERRLEVEL_LOW, NULL); \ - error = XFS_ERROR(EFSCORRUPTED); \ + error = -EFSCORRUPTED; \ goto l; \ } \ } @@ -68,7 +59,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp); if (unlikely(!fs_is_ok)) { \ XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \ XFS_ERRLEVEL_LOW, NULL); \ - return XFS_ERROR(EFSCORRUPTED); \ + return -EFSCORRUPTED; \ } \ } diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 753e467aa1a..5a6bd5d8779 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -147,9 +147,9 @@ xfs_nfs_get_inode( * We don't use ESTALE directly down the chain to not * confuse applications using bulkstat that expect EINVAL. */ - if (error == EINVAL || error == ENOENT) - error = ESTALE; - return ERR_PTR(-error); + if (error == -EINVAL || error == -ENOENT) + error = -ESTALE; + return ERR_PTR(error); } if (ip->i_d.di_gen != generation) { @@ -217,7 +217,7 @@ xfs_fs_get_parent( error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL); if (unlikely(error)) - return ERR_PTR(-error); + return ERR_PTR(error); return d_obtain_alias(VFS_I(cip)); } @@ -237,7 +237,7 @@ xfs_fs_nfs_commit_metadata( if (!lsn) return 0; - return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); + return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); } const struct export_operations xfs_export_operations = { diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index fb7a4c1ce1c..c4327419dc5 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -298,7 +298,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) } return 0; } - return EFSCORRUPTED; + return -EFSCORRUPTED; } /* diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 1f66779d7a4..de5368c803f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -38,6 +38,7 @@ #include "xfs_trace.h" #include "xfs_log.h" #include "xfs_dinode.h" +#include "xfs_icache.h" #include <linux/aio.h> #include <linux/dcache.h> @@ -155,7 +156,7 @@ xfs_dir_fsync( if (!lsn) return 0; - return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); + return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); } STATIC int @@ -179,7 +180,7 @@ xfs_file_fsync( return error; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; xfs_iflags_clear(ip, XFS_ITRUNCATED); @@ -225,7 +226,7 @@ xfs_file_fsync( !log_flushed) xfs_blkdev_issue_flush(mp->m_ddev_targp); - return -error; + return error; } STATIC ssize_t @@ -246,11 +247,11 @@ xfs_file_read_iter( XFS_STATS_INC(xs_read_calls); if (unlikely(file->f_flags & O_DIRECT)) - ioflags |= IO_ISDIRECT; + ioflags |= XFS_IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; + ioflags |= XFS_IO_INVIS; - if (unlikely(ioflags & IO_ISDIRECT)) { + if (unlikely(ioflags & XFS_IO_ISDIRECT)) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; @@ -258,7 +259,7 @@ xfs_file_read_iter( if ((pos | size) & target->bt_logical_sectormask) { if (pos == i_size_read(inode)) return 0; - return -XFS_ERROR(EINVAL); + return -EINVAL; } } @@ -283,19 +284,29 @@ xfs_file_read_iter( * proceeed concurrently without serialisation. */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) { + if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) { xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); if (inode->i_mapping->nrpages) { ret = filemap_write_and_wait_range( VFS_I(ip)->i_mapping, - pos, -1); + pos, pos + size - 1); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } - truncate_pagecache_range(VFS_I(ip), pos, -1); + + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + size - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } @@ -325,7 +336,7 @@ xfs_file_splice_read( XFS_STATS_INC(xs_read_calls); if (infilp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; + ioflags |= XFS_IO_INVIS; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; @@ -524,7 +535,7 @@ restart: xfs_rw_ilock(ip, *iolock); goto restart; } - error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); + error = xfs_zero_eof(ip, *pos, i_size_read(inode)); if (error) return error; } @@ -594,7 +605,7 @@ xfs_file_dio_aio_write( /* DIO must be aligned to device logical sector size */ if ((pos | count) & target->bt_logical_sectormask) - return -XFS_ERROR(EINVAL); + return -EINVAL; /* "unaligned" here means not aligned to a filesystem block */ if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) @@ -631,10 +642,19 @@ xfs_file_dio_aio_write( if (mapping->nrpages) { ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - pos, -1); + pos, pos + count - 1); if (ret) goto out; - truncate_pagecache_range(VFS_I(ip), pos, -1); + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + count - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } /* @@ -689,14 +709,28 @@ write_retry: ret = generic_perform_write(file, from, pos); if (likely(ret >= 0)) iocb->ki_pos = pos + ret; + /* - * If we just got an ENOSPC, try to write back all dirty inodes to - * convert delalloc space to free up some of the excess reserved - * metadata space. + * If we hit a space limit, try to free up some lingering preallocated + * space before returning an error. In the case of ENOSPC, first try to + * write back all dirty inodes to free up some of the excess reserved + * metadata space. This reduces the chances that the eofblocks scan + * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this + * also behaves as a filter to prevent too many eofblocks scans from + * running at the same time. */ - if (ret == -ENOSPC && !enospc) { + if (ret == -EDQUOT && !enospc) { + enospc = xfs_inode_free_quota_eofblocks(ip); + if (enospc) + goto write_retry; + } else if (ret == -ENOSPC && !enospc) { + struct xfs_eofblocks eofb = {0}; + enospc = 1; xfs_flush_inodes(ip->i_mount); + eofb.eof_scan_owner = ip->i_ino; /* for locking */ + eofb.eof_flags = XFS_EOF_FLAGS_SYNC; + xfs_icache_free_eofblocks(ip->i_mount, &eofb); goto write_retry; } @@ -772,7 +806,7 @@ xfs_file_fallocate( unsigned blksize_mask = (1 << inode->i_blkbits) - 1; if (offset & blksize_mask || len & blksize_mask) { - error = EINVAL; + error = -EINVAL; goto out_unlock; } @@ -781,7 +815,7 @@ xfs_file_fallocate( * in which case it is effectively a truncate operation */ if (offset + len >= i_size_read(inode)) { - error = EINVAL; + error = -EINVAL; goto out_unlock; } @@ -794,7 +828,7 @@ xfs_file_fallocate( if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) { new_size = offset + len; - error = -inode_newsize_ok(inode, new_size); + error = inode_newsize_ok(inode, new_size); if (error) goto out_unlock; } @@ -844,7 +878,7 @@ xfs_file_fallocate( out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return -error; + return error; } @@ -889,7 +923,7 @@ xfs_file_release( struct inode *inode, struct file *filp) { - return -xfs_release(XFS_I(inode)); + return xfs_release(XFS_I(inode)); } STATIC int @@ -918,7 +952,7 @@ xfs_file_readdir( error = xfs_readdir(ip, ctx, bufsize); if (error) - return -error; + return error; return 0; } @@ -1184,7 +1218,7 @@ xfs_seek_data( isize = i_size_read(inode); if (start >= isize) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1206,7 +1240,7 @@ xfs_seek_data( /* No extents at given offset, must be beyond EOF */ if (nmap == 0) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1237,7 +1271,7 @@ xfs_seek_data( * we are reading after EOF if nothing in map[1]. */ if (nmap == 1) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1250,7 +1284,7 @@ xfs_seek_data( fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; start = XFS_FSB_TO_B(mp, fsbno); if (start >= isize) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } } @@ -1262,7 +1296,7 @@ out_unlock: xfs_iunlock(ip, lock); if (error) - return -error; + return error; return offset; } @@ -1282,13 +1316,13 @@ xfs_seek_hole( int error; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; lock = xfs_ilock_data_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1307,7 +1341,7 @@ xfs_seek_hole( /* No extents at given offset, must be beyond EOF */ if (nmap == 0) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1370,7 +1404,7 @@ out_unlock: xfs_iunlock(ip, lock); if (error) - return -error; + return error; return offset; } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 8ec81bed799..e92730c1d3c 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -258,7 +258,7 @@ next_ag: if (*agp == NULLAGNUMBER) return 0; - err = ENOMEM; + err = -ENOMEM; item = kmem_alloc(sizeof(*item), KM_MAYFAIL); if (!item) goto out_put_ag; @@ -268,7 +268,7 @@ next_ag: err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru); if (err) { - if (err == EEXIST) + if (err == -EEXIST) err = 0; goto out_free_item; } diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index d34703dbcb4..18dc721ca19 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -255,8 +255,8 @@ typedef struct xfs_fsop_resblks { ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) /* Used for sanity checks on superblock */ -#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks) -#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) * \ +#define XFS_MAX_DBLOCKS(s) ((xfs_rfsblock_t)(s)->sb_agcount * (s)->sb_agblocks) +#define XFS_MIN_DBLOCKS(s) ((xfs_rfsblock_t)((s)->sb_agcount - 1) * \ (s)->sb_agblocks + XFS_MIN_AG_BLOCKS) /* @@ -375,6 +375,9 @@ struct xfs_fs_eofblocks { #define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */ #define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */ #define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */ +#define XFS_EOF_FLAGS_UNION (1 << 5) /* union filter algorithm; + * kernel only, not included in + * valid mask */ #define XFS_EOF_FLAGS_VALID \ (XFS_EOF_FLAGS_SYNC | \ XFS_EOF_FLAGS_UID | \ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index d2295561570..f91de1ef05e 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -168,7 +168,7 @@ xfs_growfs_data_private( nb = in->newblocks; pct = in->imaxpct; if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100) - return XFS_ERROR(EINVAL); + return -EINVAL; if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) return error; dpct = pct - mp->m_sb.sb_imax_pct; @@ -176,7 +176,7 @@ xfs_growfs_data_private( XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), XFS_FSS_TO_BB(mp, 1), 0, NULL); if (!bp) - return EIO; + return -EIO; if (bp->b_error) { error = bp->b_error; xfs_buf_relse(bp); @@ -191,7 +191,7 @@ xfs_growfs_data_private( nagcount--; nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; if (nb < mp->m_sb.sb_dblocks) - return XFS_ERROR(EINVAL); + return -EINVAL; } new = nb - mp->m_sb.sb_dblocks; oagcount = mp->m_sb.sb_agcount; @@ -229,7 +229,7 @@ xfs_growfs_data_private( XFS_FSS_TO_BB(mp, 1), 0, &xfs_agf_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error0; } @@ -270,7 +270,7 @@ xfs_growfs_data_private( XFS_FSS_TO_BB(mp, 1), 0, &xfs_agfl_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error0; } @@ -298,7 +298,7 @@ xfs_growfs_data_private( XFS_FSS_TO_BB(mp, 1), 0, &xfs_agi_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error0; } @@ -336,7 +336,7 @@ xfs_growfs_data_private( &xfs_allocbt_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error0; } @@ -365,7 +365,7 @@ xfs_growfs_data_private( BTOBB(mp->m_sb.sb_blocksize), 0, &xfs_allocbt_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error0; } @@ -395,7 +395,7 @@ xfs_growfs_data_private( BTOBB(mp->m_sb.sb_blocksize), 0, &xfs_inobt_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error0; } @@ -420,7 +420,7 @@ xfs_growfs_data_private( BTOBB(mp->m_sb.sb_blocksize), 0, &xfs_inobt_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error0; } @@ -531,7 +531,7 @@ xfs_growfs_data_private( bp->b_ops = &xfs_sb_buf_ops; xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); } else - error = ENOMEM; + error = -ENOMEM; } /* @@ -576,17 +576,17 @@ xfs_growfs_log_private( nb = in->newblocks; if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES)) - return XFS_ERROR(EINVAL); + return -EINVAL; if (nb == mp->m_sb.sb_logblocks && in->isint == (mp->m_sb.sb_logstart != 0)) - return XFS_ERROR(EINVAL); + return -EINVAL; /* * Moving the log is hard, need new interfaces to sync * the log first, hold off all activity while moving it. * Can have shorter or longer log in the same space, * or transform internal to external log or vice versa. */ - return XFS_ERROR(ENOSYS); + return -ENOSYS; } /* @@ -604,9 +604,9 @@ xfs_growfs_data( int error; if (!capable(CAP_SYS_ADMIN)) - return XFS_ERROR(EPERM); + return -EPERM; if (!mutex_trylock(&mp->m_growlock)) - return XFS_ERROR(EWOULDBLOCK); + return -EWOULDBLOCK; error = xfs_growfs_data_private(mp, in); mutex_unlock(&mp->m_growlock); return error; @@ -620,9 +620,9 @@ xfs_growfs_log( int error; if (!capable(CAP_SYS_ADMIN)) - return XFS_ERROR(EPERM); + return -EPERM; if (!mutex_trylock(&mp->m_growlock)) - return XFS_ERROR(EWOULDBLOCK); + return -EWOULDBLOCK; error = xfs_growfs_log_private(mp, in); mutex_unlock(&mp->m_growlock); return error; @@ -674,7 +674,7 @@ xfs_reserve_blocks( /* If inval is null, report current values and return */ if (inval == (__uint64_t *)NULL) { if (!outval) - return EINVAL; + return -EINVAL; outval->resblks = mp->m_resblks; outval->resblks_avail = mp->m_resblks_avail; return 0; @@ -757,7 +757,7 @@ out: int error; error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, fdblks_delta, 0); - if (error == ENOSPC) + if (error == -ENOSPC) goto retry; } return 0; @@ -818,7 +818,7 @@ xfs_fs_goingdown( SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR); break; default: - return XFS_ERROR(EINVAL); + return -EINVAL; } return 0; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index c48df5f25b9..981b2cf5198 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -33,6 +33,9 @@ #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_bmap_util.h" +#include "xfs_quota.h" +#include "xfs_dquot_item.h" +#include "xfs_dquot.h" #include <linux/kthread.h> #include <linux/freezer.h> @@ -158,7 +161,7 @@ xfs_iget_cache_hit( if (ip->i_ino != ino) { trace_xfs_iget_skip(ip); XFS_STATS_INC(xs_ig_frecycle); - error = EAGAIN; + error = -EAGAIN; goto out_error; } @@ -176,7 +179,7 @@ xfs_iget_cache_hit( if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { trace_xfs_iget_skip(ip); XFS_STATS_INC(xs_ig_frecycle); - error = EAGAIN; + error = -EAGAIN; goto out_error; } @@ -184,7 +187,7 @@ xfs_iget_cache_hit( * If lookup is racing with unlink return an error immediately. */ if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { - error = ENOENT; + error = -ENOENT; goto out_error; } @@ -206,7 +209,7 @@ xfs_iget_cache_hit( spin_unlock(&ip->i_flags_lock); rcu_read_unlock(); - error = -inode_init_always(mp->m_super, inode); + error = inode_init_always(mp->m_super, inode); if (error) { /* * Re-initializing the inode failed, and we are in deep @@ -243,7 +246,7 @@ xfs_iget_cache_hit( /* If the VFS inode is being torn down, pause and try again. */ if (!igrab(inode)) { trace_xfs_iget_skip(ip); - error = EAGAIN; + error = -EAGAIN; goto out_error; } @@ -285,7 +288,7 @@ xfs_iget_cache_miss( ip = xfs_inode_alloc(mp, ino); if (!ip) - return ENOMEM; + return -ENOMEM; error = xfs_iread(mp, tp, ip, flags); if (error) @@ -294,7 +297,7 @@ xfs_iget_cache_miss( trace_xfs_iget_miss(ip); if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { - error = ENOENT; + error = -ENOENT; goto out_destroy; } @@ -305,7 +308,7 @@ xfs_iget_cache_miss( * recurse into the file system. */ if (radix_tree_preload(GFP_NOFS)) { - error = EAGAIN; + error = -EAGAIN; goto out_destroy; } @@ -341,7 +344,7 @@ xfs_iget_cache_miss( if (unlikely(error)) { WARN_ON(error != -EEXIST); XFS_STATS_INC(xs_ig_dup); - error = EAGAIN; + error = -EAGAIN; goto out_preload_end; } spin_unlock(&pag->pag_ici_lock); @@ -408,7 +411,7 @@ xfs_iget( /* reject inode numbers outside existing AGs */ if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) - return EINVAL; + return -EINVAL; /* get the perag structure and ensure that it's inode capable */ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); @@ -445,7 +448,7 @@ again: return 0; out_error_or_again: - if (error == EAGAIN) { + if (error == -EAGAIN) { delay(1); goto again; } @@ -489,18 +492,18 @@ xfs_inode_ag_walk_grab( /* nothing to sync during shutdown */ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return EFSCORRUPTED; + return -EFSCORRUPTED; /* If we can't grab the inode, it must on it's way to reclaim. */ if (!igrab(inode)) - return ENOENT; + return -ENOENT; /* inode is valid */ return 0; out_unlock_noent: spin_unlock(&ip->i_flags_lock); - return ENOENT; + return -ENOENT; } STATIC int @@ -583,16 +586,16 @@ restart: continue; error = execute(batch[i], flags, args); IRELE(batch[i]); - if (error == EAGAIN) { + if (error == -EAGAIN) { skipped++; continue; } - if (error && last_error != EFSCORRUPTED) + if (error && last_error != -EFSCORRUPTED) last_error = error; } /* bail out if the filesystem is corrupted. */ - if (error == EFSCORRUPTED) + if (error == -EFSCORRUPTED) break; cond_resched(); @@ -652,11 +655,11 @@ xfs_inode_ag_iterator( xfs_perag_put(pag); if (error) { last_error = error; - if (error == EFSCORRUPTED) + if (error == -EFSCORRUPTED) break; } } - return XFS_ERROR(last_error); + return last_error; } int @@ -680,11 +683,11 @@ xfs_inode_ag_iterator_tag( xfs_perag_put(pag); if (error) { last_error = error; - if (error == EFSCORRUPTED) + if (error == -EFSCORRUPTED) break; } } - return XFS_ERROR(last_error); + return last_error; } /* @@ -944,7 +947,7 @@ restart: * see the stale flag set on the inode. */ error = xfs_iflush(ip, &bp); - if (error == EAGAIN) { + if (error == -EAGAIN) { xfs_iunlock(ip, XFS_ILOCK_EXCL); /* backoff longer than in xfs_ifree_cluster */ delay(2); @@ -997,7 +1000,7 @@ out: xfs_iflags_clear(ip, XFS_IRECLAIM); xfs_iunlock(ip, XFS_ILOCK_EXCL); /* - * We could return EAGAIN here to make reclaim rescan the inode tree in + * We could return -EAGAIN here to make reclaim rescan the inode tree in * a short while. However, this just burns CPU time scanning the tree * waiting for IO to complete and the reclaim work never goes back to * the idle state. Instead, return 0 to let the next scheduled @@ -1100,7 +1103,7 @@ restart: if (!batch[i]) continue; error = xfs_reclaim_inode(batch[i], pag, flags); - if (error && last_error != EFSCORRUPTED) + if (error && last_error != -EFSCORRUPTED) last_error = error; } @@ -1129,7 +1132,7 @@ restart: trylock = 0; goto restart; } - return XFS_ERROR(last_error); + return last_error; } int @@ -1203,6 +1206,30 @@ xfs_inode_match_id( return 1; } +/* + * A union-based inode filtering algorithm. Process the inode if any of the + * criteria match. This is for global/internal scans only. + */ +STATIC int +xfs_inode_match_id_union( + struct xfs_inode *ip, + struct xfs_eofblocks *eofb) +{ + if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) && + uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid)) + return 1; + + if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) && + gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid)) + return 1; + + if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && + xfs_get_projid(ip) == eofb->eof_prid) + return 1; + + return 0; +} + STATIC int xfs_inode_free_eofblocks( struct xfs_inode *ip, @@ -1211,6 +1238,10 @@ xfs_inode_free_eofblocks( { int ret; struct xfs_eofblocks *eofb = args; + bool need_iolock = true; + int match; + + ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); if (!xfs_can_free_eofblocks(ip, false)) { /* inode could be preallocated or append-only */ @@ -1228,19 +1259,31 @@ xfs_inode_free_eofblocks( return 0; if (eofb) { - if (!xfs_inode_match_id(ip, eofb)) + if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) + match = xfs_inode_match_id_union(ip, eofb); + else + match = xfs_inode_match_id(ip, eofb); + if (!match) return 0; /* skip the inode if the file size is too small */ if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; + + /* + * A scan owner implies we already hold the iolock. Skip it in + * xfs_free_eofblocks() to avoid deadlock. This also eliminates + * the possibility of EAGAIN being returned. + */ + if (eofb->eof_scan_owner == ip->i_ino) + need_iolock = false; } - ret = xfs_free_eofblocks(ip->i_mount, ip, true); + ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); /* don't revisit the inode if we're not waiting */ - if (ret == EAGAIN && !(flags & SYNC_WAIT)) + if (ret == -EAGAIN && !(flags & SYNC_WAIT)) ret = 0; return ret; @@ -1260,6 +1303,55 @@ xfs_icache_free_eofblocks( eofb, XFS_ICI_EOFBLOCKS_TAG); } +/* + * Run eofblocks scans on the quotas applicable to the inode. For inodes with + * multiple quotas, we don't know exactly which quota caused an allocation + * failure. We make a best effort by including each quota under low free space + * conditions (less than 1% free space) in the scan. + */ +int +xfs_inode_free_quota_eofblocks( + struct xfs_inode *ip) +{ + int scan = 0; + struct xfs_eofblocks eofb = {0}; + struct xfs_dquot *dq; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + + /* + * Set the scan owner to avoid a potential livelock. Otherwise, the scan + * can repeatedly trylock on the inode we're currently processing. We + * run a sync scan to increase effectiveness and use the union filter to + * cover all applicable quotas in a single scan. + */ + eofb.eof_scan_owner = ip->i_ino; + eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; + + if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { + dq = xfs_inode_dquot(ip, XFS_DQ_USER); + if (dq && xfs_dquot_lowsp(dq)) { + eofb.eof_uid = VFS_I(ip)->i_uid; + eofb.eof_flags |= XFS_EOF_FLAGS_UID; + scan = 1; + } + } + + if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) { + dq = xfs_inode_dquot(ip, XFS_DQ_GROUP); + if (dq && xfs_dquot_lowsp(dq)) { + eofb.eof_gid = VFS_I(ip)->i_gid; + eofb.eof_flags |= XFS_EOF_FLAGS_GID; + scan = 1; + } + } + + if (scan) + xfs_icache_free_eofblocks(ip->i_mount, &eofb); + + return scan; +} + void xfs_inode_set_eofblocks_tag( xfs_inode_t *ip) diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 9cf017b899b..46748b86b12 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -27,6 +27,7 @@ struct xfs_eofblocks { kgid_t eof_gid; prid_t eof_prid; __u64 eof_min_file_size; + xfs_ino_t eof_scan_owner; }; #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ @@ -57,6 +58,7 @@ void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip); void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); +int xfs_inode_free_quota_eofblocks(struct xfs_inode *ip); void xfs_eofblocks_worker(struct work_struct *); int xfs_inode_ag_iterator(struct xfs_mount *mp, @@ -72,31 +74,32 @@ xfs_fs_eofblocks_from_user( struct xfs_eofblocks *dst) { if (src->eof_version != XFS_EOFBLOCKS_VERSION) - return EINVAL; + return -EINVAL; if (src->eof_flags & ~XFS_EOF_FLAGS_VALID) - return EINVAL; + return -EINVAL; if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) || memchr_inv(src->pad64, 0, sizeof(src->pad64))) - return EINVAL; + return -EINVAL; dst->eof_flags = src->eof_flags; dst->eof_prid = src->eof_prid; dst->eof_min_file_size = src->eof_min_file_size; + dst->eof_scan_owner = NULLFSINO; dst->eof_uid = INVALID_UID; if (src->eof_flags & XFS_EOF_FLAGS_UID) { dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid); if (!uid_valid(dst->eof_uid)) - return EINVAL; + return -EINVAL; } dst->eof_gid = INVALID_GID; if (src->eof_flags & XFS_EOF_FLAGS_GID) { dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid); if (!gid_valid(dst->eof_gid)) - return EINVAL; + return -EINVAL; } return 0; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a6115fe1ac9..fea3c92fb3f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -583,7 +583,7 @@ xfs_lookup( trace_xfs_lookup(dp, name); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return XFS_ERROR(EIO); + return -EIO; lock_mode = xfs_ilock_data_map_shared(dp); error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); @@ -893,7 +893,7 @@ xfs_dir_ialloc( } if (!ialloc_context && !ip) { *ipp = NULL; - return XFS_ERROR(ENOSPC); + return -ENOSPC; } /* @@ -1088,7 +1088,7 @@ xfs_create( trace_xfs_create(dp, name); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; prid = xfs_get_initial_prid(dp); @@ -1125,12 +1125,12 @@ xfs_create( */ tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; error = xfs_trans_reserve(tp, &tres, resblks, 0); - if (error == ENOSPC) { + if (error == -ENOSPC) { /* flush outstanding delalloc blocks and retry */ xfs_flush_inodes(mp); error = xfs_trans_reserve(tp, &tres, resblks, 0); } - if (error == ENOSPC) { + if (error == -ENOSPC) { /* No space at all so try a "no-allocation" reservation */ resblks = 0; error = xfs_trans_reserve(tp, &tres, 0, 0); @@ -1165,7 +1165,7 @@ xfs_create( error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, resblks > 0, &ip, &committed); if (error) { - if (error == ENOSPC) + if (error == -ENOSPC) goto out_trans_cancel; goto out_trans_abort; } @@ -1184,7 +1184,7 @@ xfs_create( &first_block, &free_list, resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); if (error) { - ASSERT(error != ENOSPC); + ASSERT(error != -ENOSPC); goto out_trans_abort; } xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -1274,7 +1274,7 @@ xfs_create_tmpfile( uint resblks; if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; prid = xfs_get_initial_prid(dp); @@ -1293,7 +1293,7 @@ xfs_create_tmpfile( tres = &M_RES(mp)->tr_create_tmpfile; error = xfs_trans_reserve(tp, tres, resblks, 0); - if (error == ENOSPC) { + if (error == -ENOSPC) { /* No space at all so try a "no-allocation" reservation */ resblks = 0; error = xfs_trans_reserve(tp, tres, 0, 0); @@ -1311,7 +1311,7 @@ xfs_create_tmpfile( error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, resblks > 0, &ip, NULL); if (error) { - if (error == ENOSPC) + if (error == -ENOSPC) goto out_trans_cancel; goto out_trans_abort; } @@ -1382,7 +1382,7 @@ xfs_link( ASSERT(!S_ISDIR(sip->i_d.di_mode)); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; error = xfs_qm_dqattach(sip, 0); if (error) @@ -1396,7 +1396,7 @@ xfs_link( cancel_flags = XFS_TRANS_RELEASE_LOG_RES; resblks = XFS_LINK_SPACE_RES(mp, target_name->len); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0); - if (error == ENOSPC) { + if (error == -ENOSPC) { resblks = 0; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0); } @@ -1417,7 +1417,7 @@ xfs_link( */ if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { - error = XFS_ERROR(EXDEV); + error = -EXDEV; goto error_return; } @@ -1635,8 +1635,8 @@ xfs_release( truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); if (truncated) { xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); - if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) { - error = -filemap_flush(VFS_I(ip)->i_mapping); + if (ip->i_delayed_blks > 0) { + error = filemap_flush(VFS_I(ip)->i_mapping); if (error) return error; } @@ -1673,7 +1673,7 @@ xfs_release( return 0; error = xfs_free_eofblocks(mp, ip, true); - if (error && error != EAGAIN) + if (error && error != -EAGAIN) return error; /* delalloc blocks after truncation means it really is dirty */ @@ -1772,7 +1772,7 @@ xfs_inactive_ifree( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, XFS_IFREE_SPACE_RES(mp), 0); if (error) { - if (error == ENOSPC) { + if (error == -ENOSPC) { xfs_warn_ratelimited(mp, "Failed to remove inode(s) from unlinked list. " "Please free space, unmount and run xfs_repair."); @@ -2219,7 +2219,7 @@ xfs_ifree_cluster( XBF_UNMAPPED); if (!bp) - return ENOMEM; + return -ENOMEM; /* * This buffer may not have been correctly initialised as we @@ -2491,7 +2491,7 @@ xfs_remove( trace_xfs_remove(dp, name); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; error = xfs_qm_dqattach(dp, 0); if (error) @@ -2521,12 +2521,12 @@ xfs_remove( */ resblks = XFS_REMOVE_SPACE_RES(mp); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0); - if (error == ENOSPC) { + if (error == -ENOSPC) { resblks = 0; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0); } if (error) { - ASSERT(error != ENOSPC); + ASSERT(error != -ENOSPC); cancel_flags = 0; goto out_trans_cancel; } @@ -2543,11 +2543,11 @@ xfs_remove( if (is_dir) { ASSERT(ip->i_d.di_nlink >= 2); if (ip->i_d.di_nlink != 2) { - error = XFS_ERROR(ENOTEMPTY); + error = -ENOTEMPTY; goto out_trans_cancel; } if (!xfs_dir_isempty(ip)) { - error = XFS_ERROR(ENOTEMPTY); + error = -ENOTEMPTY; goto out_trans_cancel; } @@ -2582,7 +2582,7 @@ xfs_remove( error = xfs_dir_removename(tp, dp, name, ip->i_ino, &first_block, &free_list, resblks); if (error) { - ASSERT(error != ENOENT); + ASSERT(error != -ENOENT); goto out_bmap_cancel; } @@ -2702,7 +2702,7 @@ xfs_rename( cancel_flags = XFS_TRANS_RELEASE_LOG_RES; spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); - if (error == ENOSPC) { + if (error == -ENOSPC) { spaceres = 0; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); } @@ -2747,7 +2747,7 @@ xfs_rename( */ if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { - error = XFS_ERROR(EXDEV); + error = -EXDEV; goto error_return; } @@ -2770,7 +2770,7 @@ xfs_rename( error = xfs_dir_createname(tp, target_dp, target_name, src_ip->i_ino, &first_block, &free_list, spaceres); - if (error == ENOSPC) + if (error == -ENOSPC) goto error_return; if (error) goto abort_return; @@ -2795,7 +2795,7 @@ xfs_rename( */ if (!(xfs_dir_isempty(target_ip)) || (target_ip->i_d.di_nlink > 2)) { - error = XFS_ERROR(EEXIST); + error = -EEXIST; goto error_return; } } @@ -2847,7 +2847,7 @@ xfs_rename( error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, target_dp->i_ino, &first_block, &free_list, spaceres); - ASSERT(error != EEXIST); + ASSERT(error != -EEXIST); if (error) goto abort_return; } @@ -3055,7 +3055,7 @@ cluster_corrupt_out: if (bp->b_iodone) { XFS_BUF_UNDONE(bp); xfs_buf_stale(bp); - xfs_buf_ioerror(bp, EIO); + xfs_buf_ioerror(bp, -EIO); xfs_buf_ioend(bp, 0); } else { xfs_buf_stale(bp); @@ -3069,7 +3069,7 @@ cluster_corrupt_out: xfs_iflush_abort(iq, false); kmem_free(ilist); xfs_perag_put(pag); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } /* @@ -3124,7 +3124,7 @@ xfs_iflush( * as we wait for an empty AIL as part of the unmount process. */ if (XFS_FORCED_SHUTDOWN(mp)) { - error = XFS_ERROR(EIO); + error = -EIO; goto abort_out; } @@ -3167,7 +3167,7 @@ corrupt_out: xfs_buf_relse(bp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); cluster_corrupt_out: - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; abort_out: /* * Unlocks the flush lock @@ -3331,5 +3331,5 @@ xfs_iflush_int( return 0; corrupt_out: - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f72bffa6726..c10e3fadd9a 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -398,4 +398,14 @@ do { \ extern struct kmem_zone *xfs_inode_zone; +/* + * Flags for read/write calls + */ +#define XFS_IO_ISDIRECT 0x00001 /* bypass page cache */ +#define XFS_IO_INVIS 0x00002 /* don't update inode timestamps */ + +#define XFS_IO_FLAGS \ + { XFS_IO_ISDIRECT, "DIRECT" }, \ + { XFS_IO_INVIS, "INVIS"} + #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index a640137b357..de5a7be36e6 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -788,5 +788,5 @@ xfs_inode_item_format_convert( in_f->ilf_boffset = in_f64->ilf_boffset; return 0; } - return EFSCORRUPTED; + return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 8bc1bbce745..3799695b924 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -207,7 +207,7 @@ xfs_open_by_handle( struct path path; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; dentry = xfs_handlereq_to_dentry(parfilp, hreq); if (IS_ERR(dentry)) @@ -216,7 +216,7 @@ xfs_open_by_handle( /* Restrict xfs_open_by_handle to directories & regular files. */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { - error = -XFS_ERROR(EPERM); + error = -EPERM; goto out_dput; } @@ -228,18 +228,18 @@ xfs_open_by_handle( fmode = OPEN_FMODE(permflag); if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && (fmode & FMODE_WRITE) && IS_APPEND(inode)) { - error = -XFS_ERROR(EPERM); + error = -EPERM; goto out_dput; } if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) { - error = -XFS_ERROR(EACCES); + error = -EACCES; goto out_dput; } /* Can't write directories. */ if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) { - error = -XFS_ERROR(EISDIR); + error = -EISDIR; goto out_dput; } @@ -282,7 +282,7 @@ xfs_readlink_by_handle( int error; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; dentry = xfs_handlereq_to_dentry(parfilp, hreq); if (IS_ERR(dentry)) @@ -290,22 +290,22 @@ xfs_readlink_by_handle( /* Restrict this handle operation to symlinks only. */ if (!S_ISLNK(dentry->d_inode->i_mode)) { - error = -XFS_ERROR(EINVAL); + error = -EINVAL; goto out_dput; } if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { - error = -XFS_ERROR(EFAULT); + error = -EFAULT; goto out_dput; } link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); if (!link) { - error = -XFS_ERROR(ENOMEM); + error = -ENOMEM; goto out_dput; } - error = -xfs_readlink(XFS_I(dentry->d_inode), link); + error = xfs_readlink(XFS_I(dentry->d_inode), link); if (error) goto out_kfree; error = readlink_copy(hreq->ohandle, olen, link); @@ -330,10 +330,10 @@ xfs_set_dmattrs( int error; if (!capable(CAP_SYS_ADMIN)) - return XFS_ERROR(EPERM); + return -EPERM; if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); @@ -364,9 +364,9 @@ xfs_fssetdm_by_handle( struct dentry *dentry; if (!capable(CAP_MKNOD)) - return -XFS_ERROR(EPERM); + return -EPERM; if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(parfilp); if (error) @@ -379,16 +379,16 @@ xfs_fssetdm_by_handle( } if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { - error = -XFS_ERROR(EPERM); + error = -EPERM; goto out; } if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { - error = -XFS_ERROR(EFAULT); + error = -EFAULT; goto out; } - error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, + error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, fsd.fsd_dmstate); out: @@ -409,18 +409,18 @@ xfs_attrlist_by_handle( char *kbuf; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (al_hreq.buflen < sizeof(struct attrlist) || al_hreq.buflen > XATTR_LIST_MAX) - return -XFS_ERROR(EINVAL); + return -EINVAL; /* * Reject flags, only allow namespaces. */ if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) - return -XFS_ERROR(EINVAL); + return -EINVAL; dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); if (IS_ERR(dentry)) @@ -431,7 +431,7 @@ xfs_attrlist_by_handle( goto out_dput; cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, + error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, al_hreq.flags, cursor); if (error) goto out_kfree; @@ -455,20 +455,20 @@ xfs_attrmulti_attr_get( __uint32_t flags) { unsigned char *kbuf; - int error = EFAULT; + int error = -EFAULT; if (*len > XATTR_SIZE_MAX) - return EINVAL; + return -EINVAL; kbuf = kmem_zalloc_large(*len, KM_SLEEP); if (!kbuf) - return ENOMEM; + return -ENOMEM; error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); if (error) goto out_kfree; if (copy_to_user(ubuf, kbuf, *len)) - error = EFAULT; + error = -EFAULT; out_kfree: kmem_free(kbuf); @@ -484,20 +484,17 @@ xfs_attrmulti_attr_set( __uint32_t flags) { unsigned char *kbuf; - int error = EFAULT; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return EPERM; + return -EPERM; if (len > XATTR_SIZE_MAX) - return EINVAL; + return -EINVAL; kbuf = memdup_user(ubuf, len); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); - error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); - - return error; + return xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); } int @@ -507,7 +504,7 @@ xfs_attrmulti_attr_remove( __uint32_t flags) { if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return EPERM; + return -EPERM; return xfs_attr_remove(XFS_I(inode), name, flags); } @@ -524,9 +521,9 @@ xfs_attrmulti_by_handle( unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; /* overflow check */ if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t)) @@ -536,18 +533,18 @@ xfs_attrmulti_by_handle( if (IS_ERR(dentry)) return PTR_ERR(dentry); - error = E2BIG; + error = -E2BIG; size = am_hreq.opcount * sizeof(xfs_attr_multiop_t); if (!size || size > 16 * PAGE_SIZE) goto out_dput; ops = memdup_user(am_hreq.ops, size); if (IS_ERR(ops)) { - error = -PTR_ERR(ops); + error = PTR_ERR(ops); goto out_dput; } - error = ENOMEM; + error = -ENOMEM; attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); if (!attr_name) goto out_kfree_ops; @@ -557,7 +554,7 @@ xfs_attrmulti_by_handle( ops[i].am_error = strncpy_from_user((char *)attr_name, ops[i].am_attrname, MAXNAMELEN); if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) - error = ERANGE; + error = -ERANGE; if (ops[i].am_error < 0) break; @@ -588,19 +585,19 @@ xfs_attrmulti_by_handle( mnt_drop_write_file(parfilp); break; default: - ops[i].am_error = EINVAL; + ops[i].am_error = -EINVAL; } } if (copy_to_user(am_hreq.ops, ops, size)) - error = XFS_ERROR(EFAULT); + error = -EFAULT; kfree(attr_name); out_kfree_ops: kfree(ops); out_dput: dput(dentry); - return -error; + return error; } int @@ -625,16 +622,16 @@ xfs_ioc_space( */ if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && !capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) - return -XFS_ERROR(EPERM); + return -EPERM; if (!(filp->f_mode & FMODE_WRITE)) - return -XFS_ERROR(EBADF); + return -EBADF; if (!S_ISREG(inode->i_mode)) - return -XFS_ERROR(EINVAL); + return -EINVAL; error = mnt_want_write_file(filp); if (error) @@ -652,7 +649,7 @@ xfs_ioc_space( bf->l_start += XFS_ISIZE(ip); break; default: - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_unlock; } @@ -669,7 +666,7 @@ xfs_ioc_space( case XFS_IOC_UNRESVSP: case XFS_IOC_UNRESVSP64: if (bf->l_len <= 0) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_unlock; } break; @@ -682,7 +679,7 @@ xfs_ioc_space( bf->l_start > mp->m_super->s_maxbytes || bf->l_start + bf->l_len < 0 || bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_unlock; } @@ -723,7 +720,7 @@ xfs_ioc_space( break; default: ASSERT(0); - error = XFS_ERROR(EINVAL); + error = -EINVAL; } if (error) @@ -739,7 +736,7 @@ xfs_ioc_space( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - if (!(ioflags & IO_INVIS)) { + if (!(ioflags & XFS_IO_INVIS)) { ip->i_d.di_mode &= ~S_ISUID; if (ip->i_d.di_mode & S_IXGRP) ip->i_d.di_mode &= ~S_ISGID; @@ -759,7 +756,7 @@ xfs_ioc_space( out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); mnt_drop_write_file(filp); - return -error; + return error; } STATIC int @@ -781,41 +778,41 @@ xfs_ioc_bulkstat( return -EPERM; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if ((count = bulkreq.icount) <= 0) - return -XFS_ERROR(EINVAL); + return -EINVAL; if (bulkreq.ubuffer == NULL) - return -XFS_ERROR(EINVAL); + return -EINVAL; if (cmd == XFS_IOC_FSINUMBERS) error = xfs_inumbers(mp, &inlast, &count, bulkreq.ubuffer, xfs_inumbers_fmt); else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) - error = xfs_bulkstat_single(mp, &inlast, - bulkreq.ubuffer, &done); + error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, + sizeof(xfs_bstat_t), NULL, &done); else /* XFS_IOC_FSBULKSTAT */ error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, sizeof(xfs_bstat_t), bulkreq.ubuffer, &done); if (error) - return -error; + return error; if (bulkreq.ocount != NULL) { if (copy_to_user(bulkreq.lastip, &inlast, sizeof(xfs_ino_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) - return -XFS_ERROR(EFAULT); + return -EFAULT; } return 0; @@ -831,7 +828,7 @@ xfs_ioc_fsgeometry_v1( error = xfs_fs_geometry(mp, &fsgeo, 3); if (error) - return -error; + return error; /* * Caller should have passed an argument of type @@ -839,7 +836,7 @@ xfs_ioc_fsgeometry_v1( * xfs_fsop_geom_t that xfs_fs_geometry() fills in. */ if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -853,10 +850,10 @@ xfs_ioc_fsgeometry( error = xfs_fs_geometry(mp, &fsgeo, 4); if (error) - return -error; + return error; if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1041,16 +1038,16 @@ xfs_ioctl_setattr( trace_xfs_ioctl_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); + return -EROFS; if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; /* * Disallow 32bit project ids when projid32bit feature is not enabled. */ if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) - return XFS_ERROR(EINVAL); + return -EINVAL; /* * If disk quotas is on, we make sure that the dquots do exist on disk, @@ -1088,7 +1085,7 @@ xfs_ioctl_setattr( * CAP_FSETID capability is applicable. */ if (!inode_owner_or_capable(VFS_I(ip))) { - code = XFS_ERROR(EPERM); + code = -EPERM; goto error_return; } @@ -1099,7 +1096,7 @@ xfs_ioctl_setattr( */ if (mask & FSX_PROJID) { if (current_user_ns() != &init_user_ns) { - code = XFS_ERROR(EINVAL); + code = -EINVAL; goto error_return; } @@ -1122,7 +1119,7 @@ xfs_ioctl_setattr( if (ip->i_d.di_nextents && ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) { - code = XFS_ERROR(EINVAL); /* EFBIG? */ + code = -EINVAL; /* EFBIG? */ goto error_return; } @@ -1141,7 +1138,7 @@ xfs_ioctl_setattr( extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); if (extsize_fsb > MAXEXTLEN) { - code = XFS_ERROR(EINVAL); + code = -EINVAL; goto error_return; } @@ -1153,13 +1150,13 @@ xfs_ioctl_setattr( } else { size = mp->m_sb.sb_blocksize; if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { - code = XFS_ERROR(EINVAL); + code = -EINVAL; goto error_return; } } if (fa->fsx_extsize % size) { - code = XFS_ERROR(EINVAL); + code = -EINVAL; goto error_return; } } @@ -1173,7 +1170,7 @@ xfs_ioctl_setattr( if ((ip->i_d.di_nextents || ip->i_delayed_blks) && (XFS_IS_REALTIME_INODE(ip)) != (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { - code = XFS_ERROR(EINVAL); /* EFBIG? */ + code = -EINVAL; /* EFBIG? */ goto error_return; } @@ -1184,7 +1181,7 @@ xfs_ioctl_setattr( if ((mp->m_sb.sb_rblocks == 0) || (mp->m_sb.sb_rextsize == 0) || (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { - code = XFS_ERROR(EINVAL); + code = -EINVAL; goto error_return; } } @@ -1198,7 +1195,7 @@ xfs_ioctl_setattr( (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && !capable(CAP_LINUX_IMMUTABLE)) { - code = XFS_ERROR(EPERM); + code = -EPERM; goto error_return; } } @@ -1301,7 +1298,7 @@ xfs_ioc_fssetxattr( return error; error = xfs_ioctl_setattr(ip, &fa, mask); mnt_drop_write_file(filp); - return -error; + return error; } STATIC int @@ -1346,7 +1343,7 @@ xfs_ioc_setxflags( return error; error = xfs_ioctl_setattr(ip, &fa, mask); mnt_drop_write_file(filp); - return -error; + return error; } STATIC int @@ -1356,7 +1353,7 @@ xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) /* copy only getbmap portion (not getbmapx) */ if (copy_to_user(base, bmv, sizeof(struct getbmap))) - return XFS_ERROR(EFAULT); + return -EFAULT; *ap += sizeof(struct getbmap); return 0; @@ -1373,23 +1370,23 @@ xfs_ioc_getbmap( int error; if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (bmx.bmv_count < 2) - return -XFS_ERROR(EINVAL); + return -EINVAL; bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); - if (ioflags & IO_INVIS) + if (ioflags & XFS_IO_INVIS) bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; error = xfs_getbmap(ip, &bmx, xfs_getbmap_format, (struct getbmap *)arg+1); if (error) - return -error; + return error; /* copy back header - only size of getbmap */ if (copy_to_user(arg, &bmx, sizeof(struct getbmap))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1399,7 +1396,7 @@ xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) struct getbmapx __user *base = *ap; if (copy_to_user(base, bmv, sizeof(struct getbmapx))) - return XFS_ERROR(EFAULT); + return -EFAULT; *ap += sizeof(struct getbmapx); return 0; @@ -1414,22 +1411,22 @@ xfs_ioc_getbmapx( int error; if (copy_from_user(&bmx, arg, sizeof(bmx))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (bmx.bmv_count < 2) - return -XFS_ERROR(EINVAL); + return -EINVAL; if (bmx.bmv_iflags & (~BMV_IF_VALID)) - return -XFS_ERROR(EINVAL); + return -EINVAL; error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, (struct getbmapx *)arg+1); if (error) - return -error; + return error; /* copy back header */ if (copy_to_user(arg, &bmx, sizeof(struct getbmapx))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1445,33 +1442,33 @@ xfs_ioc_swapext( /* Pull information for the target fd */ f = fdget((int)sxp->sx_fdtarget); if (!f.file) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out; } if (!(f.file->f_mode & FMODE_WRITE) || !(f.file->f_mode & FMODE_READ) || (f.file->f_flags & O_APPEND)) { - error = XFS_ERROR(EBADF); + error = -EBADF; goto out_put_file; } tmp = fdget((int)sxp->sx_fdtmp); if (!tmp.file) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_put_file; } if (!(tmp.file->f_mode & FMODE_WRITE) || !(tmp.file->f_mode & FMODE_READ) || (tmp.file->f_flags & O_APPEND)) { - error = XFS_ERROR(EBADF); + error = -EBADF; goto out_put_tmp_file; } if (IS_SWAPFILE(file_inode(f.file)) || IS_SWAPFILE(file_inode(tmp.file))) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_put_tmp_file; } @@ -1479,17 +1476,17 @@ xfs_ioc_swapext( tip = XFS_I(file_inode(tmp.file)); if (ip->i_mount != tip->i_mount) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_put_tmp_file; } if (ip->i_ino == tip->i_ino) { - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto out_put_tmp_file; } if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - error = XFS_ERROR(EIO); + error = -EIO; goto out_put_tmp_file; } @@ -1523,7 +1520,7 @@ xfs_file_ioctl( int error; if (filp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; + ioflags |= XFS_IO_INVIS; trace_xfs_file_ioctl(ip); @@ -1542,7 +1539,7 @@ xfs_file_ioctl( xfs_flock64_t bf; if (copy_from_user(&bf, arg, sizeof(bf))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); } case XFS_IOC_DIOINFO: { @@ -1555,7 +1552,7 @@ xfs_file_ioctl( da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); if (copy_to_user(arg, &da, sizeof(da))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1588,7 +1585,7 @@ xfs_file_ioctl( struct fsdmidata dmi; if (copy_from_user(&dmi, arg, sizeof(dmi))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) @@ -1597,7 +1594,7 @@ xfs_file_ioctl( error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, dmi.fsd_dmstate); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_GETBMAP: @@ -1613,14 +1610,14 @@ xfs_file_ioctl( xfs_fsop_handlereq_t hreq; if (copy_from_user(&hreq, arg, sizeof(hreq))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return xfs_find_handle(cmd, &hreq); } case XFS_IOC_OPEN_BY_HANDLE: { xfs_fsop_handlereq_t hreq; if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return xfs_open_by_handle(filp, &hreq); } case XFS_IOC_FSSETDM_BY_HANDLE: @@ -1630,7 +1627,7 @@ xfs_file_ioctl( xfs_fsop_handlereq_t hreq; if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return xfs_readlink_by_handle(filp, &hreq); } case XFS_IOC_ATTRLIST_BY_HANDLE: @@ -1643,13 +1640,13 @@ xfs_file_ioctl( struct xfs_swapext sxp; if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_ioc_swapext(&sxp); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_FSCOUNTS: { @@ -1657,10 +1654,10 @@ xfs_file_ioctl( error = xfs_fs_counts(mp, &out); if (error) - return -error; + return error; if (copy_to_user(arg, &out, sizeof(out))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1672,10 +1669,10 @@ xfs_file_ioctl( return -EPERM; if (mp->m_flags & XFS_MOUNT_RDONLY) - return -XFS_ERROR(EROFS); + return -EROFS; if (copy_from_user(&inout, arg, sizeof(inout))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) @@ -1686,10 +1683,10 @@ xfs_file_ioctl( error = xfs_reserve_blocks(mp, &in, &inout); mnt_drop_write_file(filp); if (error) - return -error; + return error; if (copy_to_user(arg, &inout, sizeof(inout))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1701,10 +1698,10 @@ xfs_file_ioctl( error = xfs_reserve_blocks(mp, NULL, &out); if (error) - return -error; + return error; if (copy_to_user(arg, &out, sizeof(out))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -1713,42 +1710,42 @@ xfs_file_ioctl( xfs_growfs_data_t in; if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_growfs_data(mp, &in); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_FSGROWFSLOG: { xfs_growfs_log_t in; if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_growfs_log(mp, &in); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_FSGROWFSRT: { xfs_growfs_rt_t in; if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_growfs_rt(mp, &in); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_GOINGDOWN: { @@ -1758,10 +1755,9 @@ xfs_file_ioctl( return -EPERM; if (get_user(in, (__uint32_t __user *)arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; - error = xfs_fs_goingdown(mp, in); - return -error; + return xfs_fs_goingdown(mp, in); } case XFS_IOC_ERROR_INJECTION: { @@ -1771,18 +1767,16 @@ xfs_file_ioctl( return -EPERM; if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); + return -EFAULT; - error = xfs_errortag_add(in.errtag, mp); - return -error; + return xfs_errortag_add(in.errtag, mp); } case XFS_IOC_ERROR_CLEARALL: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - error = xfs_errortag_clearall(mp, 1); - return -error; + return xfs_errortag_clearall(mp, 1); case XFS_IOC_FREE_EOFBLOCKS: { struct xfs_fs_eofblocks eofb; @@ -1792,16 +1786,16 @@ xfs_file_ioctl( return -EPERM; if (mp->m_flags & XFS_MOUNT_RDONLY) - return -XFS_ERROR(EROFS); + return -EROFS; if (copy_from_user(&eofb, arg, sizeof(eofb))) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = xfs_fs_eofblocks_from_user(&eofb, &keofb); if (error) - return -error; + return error; - return -xfs_icache_free_eofblocks(mp, &keofb); + return xfs_icache_free_eofblocks(mp, &keofb); } default: diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 944d5baa710..a554646ff14 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -28,7 +28,6 @@ #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_vnode.h" #include "xfs_inode.h" #include "xfs_itable.h" #include "xfs_error.h" @@ -56,7 +55,7 @@ xfs_compat_flock64_copyin( get_user(bf->l_sysid, &arg32->l_sysid) || get_user(bf->l_pid, &arg32->l_pid) || copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -70,10 +69,10 @@ xfs_compat_ioc_fsgeometry_v1( error = xfs_fs_geometry(mp, &fsgeo, 3); if (error) - return -error; + return error; /* The 32-bit variant simply has some padding at the end */ if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1))) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -84,7 +83,7 @@ xfs_compat_growfs_data_copyin( { if (get_user(in->newblocks, &arg32->newblocks) || get_user(in->imaxpct, &arg32->imaxpct)) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -95,14 +94,14 @@ xfs_compat_growfs_rt_copyin( { if (get_user(in->newblocks, &arg32->newblocks) || get_user(in->extsize, &arg32->extsize)) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } STATIC int xfs_inumbers_fmt_compat( void __user *ubuffer, - const xfs_inogrp_t *buffer, + const struct xfs_inogrp *buffer, long count, long *written) { @@ -113,7 +112,7 @@ xfs_inumbers_fmt_compat( if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) - return -XFS_ERROR(EFAULT); + return -EFAULT; } *written = count * sizeof(*p32); return 0; @@ -132,7 +131,7 @@ xfs_ioctl32_bstime_copyin( if (get_user(sec32, &bstime32->tv_sec) || get_user(bstime->tv_nsec, &bstime32->tv_nsec)) - return -XFS_ERROR(EFAULT); + return -EFAULT; bstime->tv_sec = sec32; return 0; } @@ -164,7 +163,7 @@ xfs_ioctl32_bstat_copyin( get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || get_user(bstat->bs_aextents, &bstat32->bs_aextents)) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -180,7 +179,7 @@ xfs_bstime_store_compat( sec32 = p->tv_sec; if (put_user(sec32, &p32->tv_sec) || put_user(p->tv_nsec, &p32->tv_nsec)) - return -XFS_ERROR(EFAULT); + return -EFAULT; return 0; } @@ -195,7 +194,7 @@ xfs_bulkstat_one_fmt_compat( compat_xfs_bstat_t __user *p32 = ubuffer; if (ubsize < sizeof(*p32)) - return XFS_ERROR(ENOMEM); + return -ENOMEM; if (put_user(buffer->bs_ino, &p32->bs_ino) || put_user(buffer->bs_mode, &p32->bs_mode) || @@ -218,7 +217,7 @@ xfs_bulkstat_one_fmt_compat( put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || put_user(buffer->bs_dmstate, &p32->bs_dmstate) || put_user(buffer->bs_aextents, &p32->bs_aextents)) - return XFS_ERROR(EFAULT); + return -EFAULT; if (ubused) *ubused = sizeof(*p32); return 0; @@ -256,30 +255,30 @@ xfs_compat_ioc_bulkstat( /* should be called again (unused here, but used in dmapi) */ if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; if (get_user(addr, &p32->lastip)) - return -XFS_ERROR(EFAULT); + return -EFAULT; bulkreq.lastip = compat_ptr(addr); if (get_user(bulkreq.icount, &p32->icount) || get_user(addr, &p32->ubuffer)) - return -XFS_ERROR(EFAULT); + return -EFAULT; bulkreq.ubuffer = compat_ptr(addr); if (get_user(addr, &p32->ocount)) - return -XFS_ERROR(EFAULT); + return -EFAULT; bulkreq.ocount = compat_ptr(addr); if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if ((count = bulkreq.icount) <= 0) - return -XFS_ERROR(EINVAL); + return -EINVAL; if (bulkreq.ubuffer == NULL) - return -XFS_ERROR(EINVAL); + return -EINVAL; if (cmd == XFS_IOC_FSINUMBERS_32) { error = xfs_inumbers(mp, &inlast, &count, @@ -294,17 +293,17 @@ xfs_compat_ioc_bulkstat( xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, &done); } else - error = XFS_ERROR(EINVAL); + error = -EINVAL; if (error) - return -error; + return error; if (bulkreq.ocount != NULL) { if (copy_to_user(bulkreq.lastip, &inlast, sizeof(xfs_ino_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) - return -XFS_ERROR(EFAULT); + return -EFAULT; } return 0; @@ -318,7 +317,7 @@ xfs_compat_handlereq_copyin( compat_xfs_fsop_handlereq_t hreq32; if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; hreq->fd = hreq32.fd; hreq->path = compat_ptr(hreq32.path); @@ -352,19 +351,19 @@ xfs_compat_attrlist_by_handle( char *kbuf; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (copy_from_user(&al_hreq, arg, sizeof(compat_xfs_fsop_attrlist_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; if (al_hreq.buflen < sizeof(struct attrlist) || al_hreq.buflen > XATTR_LIST_MAX) - return -XFS_ERROR(EINVAL); + return -EINVAL; /* * Reject flags, only allow namespaces. */ if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) - return -XFS_ERROR(EINVAL); + return -EINVAL; dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq); if (IS_ERR(dentry)) @@ -376,7 +375,7 @@ xfs_compat_attrlist_by_handle( goto out_dput; cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, + error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, al_hreq.flags, cursor); if (error) goto out_kfree; @@ -404,10 +403,10 @@ xfs_compat_attrmulti_by_handle( unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); + return -EPERM; if (copy_from_user(&am_hreq, arg, sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; /* overflow check */ if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t)) @@ -417,7 +416,7 @@ xfs_compat_attrmulti_by_handle( if (IS_ERR(dentry)) return PTR_ERR(dentry); - error = E2BIG; + error = -E2BIG; size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t); if (!size || size > 16 * PAGE_SIZE) goto out_dput; @@ -428,7 +427,7 @@ xfs_compat_attrmulti_by_handle( goto out_dput; } - error = ENOMEM; + error = -ENOMEM; attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); if (!attr_name) goto out_kfree_ops; @@ -439,7 +438,7 @@ xfs_compat_attrmulti_by_handle( compat_ptr(ops[i].am_attrname), MAXNAMELEN); if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) - error = ERANGE; + error = -ERANGE; if (ops[i].am_error < 0) break; @@ -470,19 +469,19 @@ xfs_compat_attrmulti_by_handle( mnt_drop_write_file(parfilp); break; default: - ops[i].am_error = EINVAL; + ops[i].am_error = -EINVAL; } } if (copy_to_user(compat_ptr(am_hreq.ops), ops, size)) - error = XFS_ERROR(EFAULT); + error = -EFAULT; kfree(attr_name); out_kfree_ops: kfree(ops); out_dput: dput(dentry); - return -error; + return error; } STATIC int @@ -496,26 +495,26 @@ xfs_compat_fssetdm_by_handle( struct dentry *dentry; if (!capable(CAP_MKNOD)) - return -XFS_ERROR(EPERM); + return -EPERM; if (copy_from_user(&dmhreq, arg, sizeof(compat_xfs_fsop_setdm_handlereq_t))) - return -XFS_ERROR(EFAULT); + return -EFAULT; dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq); if (IS_ERR(dentry)) return PTR_ERR(dentry); if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { - error = -XFS_ERROR(EPERM); + error = -EPERM; goto out; } if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) { - error = -XFS_ERROR(EFAULT); + error = -EFAULT; goto out; } - error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, + error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, fsd.fsd_dmstate); out: @@ -537,7 +536,7 @@ xfs_file_compat_ioctl( int error; if (filp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; + ioflags |= XFS_IO_INVIS; trace_xfs_file_compat_ioctl(ip); @@ -588,7 +587,7 @@ xfs_file_compat_ioctl( struct xfs_flock64 bf; if (xfs_compat_flock64_copyin(&bf, arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; cmd = _NATIVE_IOC(cmd, struct xfs_flock64); return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); } @@ -598,25 +597,25 @@ xfs_file_compat_ioctl( struct xfs_growfs_data in; if (xfs_compat_growfs_data_copyin(&in, arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_growfs_data(mp, &in); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_FSGROWFSRT_32: { struct xfs_growfs_rt in; if (xfs_compat_growfs_rt_copyin(&in, arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_growfs_rt(mp, &in); mnt_drop_write_file(filp); - return -error; + return error; } #endif /* long changes size, but xfs only copiese out 32 bits */ @@ -633,13 +632,13 @@ xfs_file_compat_ioctl( if (copy_from_user(&sxp, sxu, offsetof(struct xfs_swapext, sx_stat)) || xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) - return -XFS_ERROR(EFAULT); + return -EFAULT; error = mnt_want_write_file(filp); if (error) return error; error = xfs_ioc_swapext(&sxp); mnt_drop_write_file(filp); - return -error; + return error; } case XFS_IOC_FSBULKSTAT_32: case XFS_IOC_FSBULKSTAT_SINGLE_32: @@ -651,7 +650,7 @@ xfs_file_compat_ioctl( struct xfs_fsop_handlereq hreq; if (xfs_compat_handlereq_copyin(&hreq, arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); return xfs_find_handle(cmd, &hreq); } @@ -659,14 +658,14 @@ xfs_file_compat_ioctl( struct xfs_fsop_handlereq hreq; if (xfs_compat_handlereq_copyin(&hreq, arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; return xfs_open_by_handle(filp, &hreq); } case XFS_IOC_READLINK_BY_HANDLE_32: { struct xfs_fsop_handlereq hreq; if (xfs_compat_handlereq_copyin(&hreq, arg)) - return -XFS_ERROR(EFAULT); + return -EFAULT; return xfs_readlink_by_handle(filp, &hreq); } case XFS_IOC_ATTRLIST_BY_HANDLE_32: @@ -676,6 +675,6 @@ xfs_file_compat_ioctl( case XFS_IOC_FSSETDM_BY_HANDLE_32: return xfs_compat_fssetdm_by_handle(filp, arg); default: - return -XFS_ERROR(ENOIOCTLCMD); + return -ENOIOCTLCMD; } } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 6d3ec2b6ee2..e9c47b6f5e5 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -110,7 +110,7 @@ xfs_alert_fsblock_zero( (unsigned long long)imap->br_startoff, (unsigned long long)imap->br_blockcount, imap->br_state); - return EFSCORRUPTED; + return -EFSCORRUPTED; } int @@ -138,7 +138,7 @@ xfs_iomap_write_direct( error = xfs_qm_dqattach(ip, 0); if (error) - return XFS_ERROR(error); + return error; rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); @@ -148,7 +148,7 @@ xfs_iomap_write_direct( if ((offset + count) > XFS_ISIZE(ip)) { error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); if (error) - return XFS_ERROR(error); + return error; } else { if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) last_fsb = MIN(last_fsb, (xfs_fileoff_t) @@ -188,7 +188,7 @@ xfs_iomap_write_direct( */ if (error) { xfs_trans_cancel(tp, 0); - return XFS_ERROR(error); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -225,7 +225,7 @@ xfs_iomap_write_direct( * Copy any maps to caller's array and return any error. */ if (nimaps == 0) { - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; goto out_unlock; } @@ -397,7 +397,8 @@ xfs_quota_calc_throttle( struct xfs_inode *ip, int type, xfs_fsblock_t *qblocks, - int *qshift) + int *qshift, + int64_t *qfreesp) { int64_t freesp; int shift = 0; @@ -406,6 +407,7 @@ xfs_quota_calc_throttle( /* over hi wmark, squash the prealloc completely */ if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { *qblocks = 0; + *qfreesp = 0; return; } @@ -418,6 +420,9 @@ xfs_quota_calc_throttle( shift += 2; } + if (freesp < *qfreesp) + *qfreesp = freesp; + /* only overwrite the throttle values if we are more aggressive */ if ((freesp >> shift) < (*qblocks >> *qshift)) { *qblocks = freesp; @@ -476,15 +481,18 @@ xfs_iomap_prealloc_size( } /* - * Check each quota to cap the prealloc size and provide a shift - * value to throttle with. + * Check each quota to cap the prealloc size, provide a shift value to + * throttle with and adjust amount of available space. */ if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks)) - xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift); + xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift, + &freesp); if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks)) - xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift); + xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift, + &freesp); if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks)) - xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift); + xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift, + &freesp); /* * The final prealloc size is set to the minimum of free space available @@ -552,7 +560,7 @@ xfs_iomap_write_delay( */ error = xfs_qm_dqattach_locked(ip, 0); if (error) - return XFS_ERROR(error); + return error; extsz = xfs_get_extsz_hint(ip); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -596,11 +604,11 @@ retry: imap, &nimaps, XFS_BMAPI_ENTIRE); switch (error) { case 0: - case ENOSPC: - case EDQUOT: + case -ENOSPC: + case -EDQUOT: break; default: - return XFS_ERROR(error); + return error; } /* @@ -614,7 +622,7 @@ retry: error = 0; goto retry; } - return XFS_ERROR(error ? error : ENOSPC); + return error ? error : -ENOSPC; } if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) @@ -663,7 +671,7 @@ xfs_iomap_write_allocate( */ error = xfs_qm_dqattach(ip, 0); if (error) - return XFS_ERROR(error); + return error; offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = imap->br_blockcount; @@ -690,7 +698,7 @@ xfs_iomap_write_allocate( nres, 0); if (error) { xfs_trans_cancel(tp, 0); - return XFS_ERROR(error); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); @@ -739,7 +747,7 @@ xfs_iomap_write_allocate( if ((map_start_fsb + count_fsb) > last_block) { count_fsb = last_block - map_start_fsb; if (count_fsb == 0) { - error = EAGAIN; + error = -EAGAIN; goto trans_cancel; } } @@ -793,7 +801,7 @@ trans_cancel: xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); error0: xfs_iunlock(ip, XFS_ILOCK_EXCL); - return XFS_ERROR(error); + return error; } int @@ -853,7 +861,7 @@ xfs_iomap_write_unwritten( resblks, 0); if (error) { xfs_trans_cancel(tp, 0); - return XFS_ERROR(error); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -892,7 +900,7 @@ xfs_iomap_write_unwritten( error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) - return XFS_ERROR(error); + return error; if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) return xfs_alert_fsblock_zero(ip, &imap); @@ -915,5 +923,5 @@ error_on_bmapi_transaction: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); xfs_iunlock(ip, XFS_ILOCK_EXCL); - return XFS_ERROR(error); + return error; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 205613a0606..72129493e9d 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -72,7 +72,7 @@ xfs_initxattrs( int error = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - error = -xfs_attr_set(ip, xattr->name, xattr->value, + error = xfs_attr_set(ip, xattr->name, xattr->value, xattr->value_len, ATTR_SECURE); if (error < 0) break; @@ -93,7 +93,7 @@ xfs_init_security( struct inode *dir, const struct qstr *qstr) { - return -security_inode_init_security(inode, dir, qstr, + return security_inode_init_security(inode, dir, qstr, &xfs_initxattrs, NULL); } @@ -173,12 +173,12 @@ xfs_generic_create( #ifdef CONFIG_XFS_POSIX_ACL if (default_acl) { - error = -xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) goto out_cleanup_inode; } if (acl) { - error = -xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); if (error) goto out_cleanup_inode; } @@ -194,7 +194,7 @@ xfs_generic_create( posix_acl_release(default_acl); if (acl) posix_acl_release(acl); - return -error; + return error; out_cleanup_inode: if (!tmpfile) @@ -248,8 +248,8 @@ xfs_vn_lookup( xfs_dentry_to_name(&name, dentry, 0); error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); if (unlikely(error)) { - if (unlikely(error != ENOENT)) - return ERR_PTR(-error); + if (unlikely(error != -ENOENT)) + return ERR_PTR(error); d_add(dentry, NULL); return NULL; } @@ -275,8 +275,8 @@ xfs_vn_ci_lookup( xfs_dentry_to_name(&xname, dentry, 0); error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); if (unlikely(error)) { - if (unlikely(error != ENOENT)) - return ERR_PTR(-error); + if (unlikely(error != -ENOENT)) + return ERR_PTR(error); /* * call d_add(dentry, NULL) here when d_drop_negative_children * is called in xfs_vn_mknod (ie. allow negative dentries @@ -311,7 +311,7 @@ xfs_vn_link( error = xfs_link(XFS_I(dir), XFS_I(inode), &name); if (unlikely(error)) - return -error; + return error; ihold(inode); d_instantiate(dentry, inode); @@ -328,7 +328,7 @@ xfs_vn_unlink( xfs_dentry_to_name(&name, dentry, 0); - error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode)); + error = xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode)); if (error) return error; @@ -375,7 +375,7 @@ xfs_vn_symlink( xfs_cleanup_inode(dir, inode, dentry); iput(inode); out: - return -error; + return error; } STATIC int @@ -392,8 +392,8 @@ xfs_vn_rename( xfs_dentry_to_name(&oname, odentry, 0); xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode); - return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), - XFS_I(ndir), &nname, new_inode ? + return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), + XFS_I(ndir), &nname, new_inode ? XFS_I(new_inode) : NULL); } @@ -414,7 +414,7 @@ xfs_vn_follow_link( if (!link) goto out_err; - error = -xfs_readlink(XFS_I(dentry->d_inode), link); + error = xfs_readlink(XFS_I(dentry->d_inode), link); if (unlikely(error)) goto out_kfree; @@ -441,7 +441,7 @@ xfs_vn_getattr( trace_xfs_getattr(ip); if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; stat->size = XFS_ISIZE(ip); stat->dev = inode->i_sb->s_dev; @@ -546,14 +546,14 @@ xfs_setattr_nonsize( /* If acls are being inherited, we already have this checked */ if (!(flags & XFS_ATTR_NOACL)) { if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); + return -EROFS; if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; - error = -inode_change_ok(inode, iattr); + error = inode_change_ok(inode, iattr); if (error) - return XFS_ERROR(error); + return error; } ASSERT((mask & ATTR_SIZE) == 0); @@ -703,7 +703,7 @@ xfs_setattr_nonsize( xfs_qm_dqrele(gdqp); if (error) - return XFS_ERROR(error); + return error; /* * XXX(hch): Updating the ACL entries is not atomic vs the i_mode @@ -713,9 +713,9 @@ xfs_setattr_nonsize( * Posix ACL code seems to care about this issue either. */ if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { - error = -posix_acl_chmod(inode, inode->i_mode); + error = posix_acl_chmod(inode, inode->i_mode); if (error) - return XFS_ERROR(error); + return error; } return 0; @@ -748,14 +748,14 @@ xfs_setattr_size( trace_xfs_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); + return -EROFS; if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; - error = -inode_change_ok(inode, iattr); + error = inode_change_ok(inode, iattr); if (error) - return XFS_ERROR(error); + return error; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(S_ISREG(ip->i_d.di_mode)); @@ -818,7 +818,7 @@ xfs_setattr_size( * care about here. */ if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { - error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ip->i_d.di_size, newsize); if (error) return error; @@ -844,7 +844,7 @@ xfs_setattr_size( * much we can do about this, except to hope that the caller sees ENOMEM * and retries the truncate operation. */ - error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); + error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); if (error) return error; truncate_setsize(inode, newsize); @@ -950,7 +950,7 @@ xfs_vn_setattr( error = xfs_setattr_nonsize(ip, iattr, 0); } - return -error; + return error; } STATIC int @@ -970,7 +970,7 @@ xfs_vn_update_time( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); if (error) { xfs_trans_cancel(tp, 0); - return -error; + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -991,7 +991,7 @@ xfs_vn_update_time( } xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); - return -xfs_trans_commit(tp, 0); + return xfs_trans_commit(tp, 0); } #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) @@ -1036,7 +1036,7 @@ xfs_fiemap_format( *full = 1; /* user array now full */ } - return -error; + return error; } STATIC int @@ -1055,12 +1055,12 @@ xfs_vn_fiemap( return error; /* Set up bmap header for xfs internal routine */ - bm.bmv_offset = BTOBB(start); + bm.bmv_offset = BTOBBT(start); /* Special case for whole file */ if (length == FIEMAP_MAX_OFFSET) bm.bmv_length = -1LL; else - bm.bmv_length = BTOBB(length); + bm.bmv_length = BTOBB(start + length) - bm.bmv_offset; /* We add one because in getbmap world count includes the header */ bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM : @@ -1075,7 +1075,7 @@ xfs_vn_fiemap( error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo); if (error) - return -error; + return error; return 0; } diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index cb64f222d60..f71be9c6801 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -67,19 +67,17 @@ xfs_bulkstat_one_int( *stat = BULKSTAT_RV_NOTHING; if (!buffer || xfs_internal_inum(mp, ino)) - return XFS_ERROR(EINVAL); + return -EINVAL; buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); if (!buf) - return XFS_ERROR(ENOMEM); + return -ENOMEM; error = xfs_iget(mp, NULL, ino, (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), XFS_ILOCK_SHARED, &ip); - if (error) { - *stat = BULKSTAT_RV_NOTHING; + if (error) goto out_free; - } ASSERT(ip != NULL); ASSERT(ip->i_imap.im_blkno != 0); @@ -136,7 +134,6 @@ xfs_bulkstat_one_int( IRELE(ip); error = formatter(buffer, ubsize, ubused, buf); - if (!error) *stat = BULKSTAT_RV_DIDONE; @@ -154,9 +151,9 @@ xfs_bulkstat_one_fmt( const xfs_bstat_t *buffer) { if (ubsize < sizeof(*buffer)) - return XFS_ERROR(ENOMEM); + return -ENOMEM; if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) - return XFS_ERROR(EFAULT); + return -EFAULT; if (ubused) *ubused = sizeof(*buffer); return 0; @@ -175,9 +172,170 @@ xfs_bulkstat_one( xfs_bulkstat_one_fmt, ubused, stat); } +/* + * Loop over all clusters in a chunk for a given incore inode allocation btree + * record. Do a readahead if there are any allocated inodes in that cluster. + */ +STATIC void +xfs_bulkstat_ichunk_ra( + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct xfs_inobt_rec_incore *irec) +{ + xfs_agblock_t agbno; + struct blk_plug plug; + int blks_per_cluster; + int inodes_per_cluster; + int i; /* inode chunk index */ + + agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino); + blks_per_cluster = xfs_icluster_size_fsb(mp); + inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; + + blk_start_plug(&plug); + for (i = 0; i < XFS_INODES_PER_CHUNK; + i += inodes_per_cluster, agbno += blks_per_cluster) { + if (xfs_inobt_maskn(i, inodes_per_cluster) & ~irec->ir_free) { + xfs_btree_reada_bufs(mp, agno, agbno, blks_per_cluster, + &xfs_inode_buf_ops); + } + } + blk_finish_plug(&plug); +} + +/* + * Lookup the inode chunk that the given inode lives in and then get the record + * if we found the chunk. If the inode was not the last in the chunk and there + * are some left allocated, update the data for the pointed-to record as well as + * return the count of grabbed inodes. + */ +STATIC int +xfs_bulkstat_grab_ichunk( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t agino, /* starting inode of chunk */ + int *icount,/* return # of inodes grabbed */ + struct xfs_inobt_rec_incore *irec) /* btree record */ +{ + int idx; /* index into inode chunk */ + int stat; + int error = 0; + + /* Lookup the inode chunk that this inode lives in */ + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat); + if (error) + return error; + if (!stat) { + *icount = 0; + return error; + } + + /* Get the record, should always work */ + error = xfs_inobt_get_rec(cur, irec, &stat); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(stat == 1); + + /* Check if the record contains the inode in request */ + if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) + return -EINVAL; + + idx = agino - irec->ir_startino + 1; + if (idx < XFS_INODES_PER_CHUNK && + (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) { + int i; + + /* We got a right chunk with some left inodes allocated at it. + * Grab the chunk record. Mark all the uninteresting inodes + * free -- because they're before our start point. + */ + for (i = 0; i < idx; i++) { + if (XFS_INOBT_MASK(i) & ~irec->ir_free) + irec->ir_freecount++; + } + + irec->ir_free |= xfs_inobt_maskn(0, idx); + *icount = XFS_INODES_PER_CHUNK - irec->ir_freecount; + } + + return 0; +} + #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) /* + * Process inodes in chunk with a pointer to a formatter function + * that will iget the inode and fill in the appropriate structure. + */ +int +xfs_bulkstat_ag_ichunk( + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct xfs_inobt_rec_incore *irbp, + bulkstat_one_pf formatter, + size_t statstruct_size, + struct xfs_bulkstat_agichunk *acp) +{ + xfs_ino_t lastino = acp->ac_lastino; + char __user **ubufp = acp->ac_ubuffer; + int ubleft = acp->ac_ubleft; + int ubelem = acp->ac_ubelem; + int chunkidx, clustidx; + int error = 0; + xfs_agino_t agino; + + for (agino = irbp->ir_startino, chunkidx = clustidx = 0; + XFS_BULKSTAT_UBLEFT(ubleft) && + irbp->ir_freecount < XFS_INODES_PER_CHUNK; + chunkidx++, clustidx++, agino++) { + int fmterror; /* bulkstat formatter result */ + int ubused; + xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino); + + ASSERT(chunkidx < XFS_INODES_PER_CHUNK); + + /* Skip if this inode is free */ + if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { + lastino = ino; + continue; + } + + /* + * Count used inodes as free so we can tell when the + * chunk is used up. + */ + irbp->ir_freecount++; + + /* Get the inode and fill in a single buffer */ + ubused = statstruct_size; + error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror); + if (fmterror == BULKSTAT_RV_NOTHING) { + if (error && error != -ENOENT && error != -EINVAL) { + ubleft = 0; + break; + } + lastino = ino; + continue; + } + if (fmterror == BULKSTAT_RV_GIVEUP) { + ubleft = 0; + ASSERT(error); + break; + } + if (*ubufp) + *ubufp += ubused; + ubleft -= ubused; + ubelem++; + lastino = ino; + } + + acp->ac_lastino = lastino; + acp->ac_ubleft = ubleft; + acp->ac_ubelem = ubelem; + + return error; +} + +/* * Return stat information in bulk (by-inode) for the filesystem. */ int /* error status */ @@ -190,13 +348,10 @@ xfs_bulkstat( char __user *ubuffer, /* buffer with inode stats */ int *done) /* 1 if there are more stats to get */ { - xfs_agblock_t agbno=0;/* allocation group block number */ xfs_buf_t *agbp; /* agi header buffer */ xfs_agi_t *agi; /* agi header data */ xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ - int chunkidx; /* current index into inode chunk */ - int clustidx; /* current index into inode cluster */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ int end_of_ag; /* set if we've seen the ag end */ int error; /* error code */ @@ -209,8 +364,6 @@ xfs_bulkstat( xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ xfs_ino_t lastino; /* last inode number returned */ - int blks_per_cluster; /* # of blocks per cluster */ - int inodes_per_cluster;/* # of inodes per cluster */ int nirbuf; /* size of irbuf */ int rval; /* return value error code */ int tmp; /* result value from btree calls */ @@ -218,7 +371,6 @@ xfs_bulkstat( int ubleft; /* bytes left in user's buffer */ char __user *ubufp; /* pointer into user's buffer */ int ubelem; /* spaces used in user's buffer */ - int ubused; /* bytes used by formatter */ /* * Get the last inode value, see if there's nothing to do. @@ -233,20 +385,16 @@ xfs_bulkstat( *ubcountp = 0; return 0; } - if (!ubcountp || *ubcountp <= 0) { - return EINVAL; - } + ubcount = *ubcountp; /* statstruct's */ ubleft = ubcount * statstruct_size; /* bytes */ *ubcountp = ubelem = 0; *done = 0; fmterror = 0; ubufp = ubuffer; - blks_per_cluster = xfs_icluster_size_fsb(mp); - inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); if (!irbuf) - return ENOMEM; + return -ENOMEM; nirbuf = irbsize / sizeof(*irbuf); @@ -258,14 +406,8 @@ xfs_bulkstat( while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { cond_resched(); error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - if (error) { - /* - * Skip this allocation group and go to the next one. - */ - agno++; - agino = 0; - continue; - } + if (error) + break; agi = XFS_BUF_TO_AGI(agbp); /* * Allocate and initialize a btree cursor for ialloc btree. @@ -275,96 +417,39 @@ xfs_bulkstat( irbp = irbuf; irbufend = irbuf + nirbuf; end_of_ag = 0; - /* - * If we're returning in the middle of an allocation group, - * we need to get the remainder of the chunk we're in. - */ + icount = 0; if (agino > 0) { - xfs_inobt_rec_incore_t r; - /* - * Lookup the inode chunk that this inode lives in. + * In the middle of an allocation group, we need to get + * the remainder of the chunk we're in. */ - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, - &tmp); - if (!error && /* no I/O error */ - tmp && /* lookup succeeded */ - /* got the record, should always work */ - !(error = xfs_inobt_get_rec(cur, &r, &i)) && - i == 1 && - /* this is the right chunk */ - agino < r.ir_startino + XFS_INODES_PER_CHUNK && - /* lastino was not last in chunk */ - (chunkidx = agino - r.ir_startino + 1) < - XFS_INODES_PER_CHUNK && - /* there are some left allocated */ - xfs_inobt_maskn(chunkidx, - XFS_INODES_PER_CHUNK - chunkidx) & - ~r.ir_free) { - /* - * Grab the chunk record. Mark all the - * uninteresting inodes (because they're - * before our start point) free. - */ - for (i = 0; i < chunkidx; i++) { - if (XFS_INOBT_MASK(i) & ~r.ir_free) - r.ir_freecount++; - } - r.ir_free |= xfs_inobt_maskn(0, chunkidx); + struct xfs_inobt_rec_incore r; + + error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); + if (error) + break; + if (icount) { irbp->ir_startino = r.ir_startino; irbp->ir_freecount = r.ir_freecount; irbp->ir_free = r.ir_free; irbp++; agino = r.ir_startino + XFS_INODES_PER_CHUNK; - icount = XFS_INODES_PER_CHUNK - r.ir_freecount; - } else { - /* - * If any of those tests failed, bump the - * inode number (just in case). - */ - agino++; - icount = 0; } - /* - * In any case, increment to the next record. - */ - if (!error) - error = xfs_btree_increment(cur, 0, &tmp); + /* Increment to the next record */ + error = xfs_btree_increment(cur, 0, &tmp); } else { - /* - * Start of ag. Lookup the first inode chunk. - */ + /* Start of ag. Lookup the first inode chunk */ error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp); - icount = 0; } + if (error) + break; + /* * Loop through inode btree records in this ag, * until we run out of inodes or space in the buffer. */ while (irbp < irbufend && icount < ubcount) { - xfs_inobt_rec_incore_t r; - - /* - * Loop as long as we're unable to read the - * inode btree. - */ - while (error) { - agino += XFS_INODES_PER_CHUNK; - if (XFS_AGINO_TO_AGBNO(mp, agino) >= - be32_to_cpu(agi->agi_length)) - break; - error = xfs_inobt_lookup(cur, agino, - XFS_LOOKUP_GE, &tmp); - cond_resched(); - } - /* - * If ran off the end of the ag either with an error, - * or the normal way, set end and stop collecting. - */ - if (error) { - end_of_ag = 1; - break; - } + struct xfs_inobt_rec_incore r; error = xfs_inobt_get_rec(cur, &r, &i); if (error || i == 0) { @@ -377,25 +462,7 @@ xfs_bulkstat( * Also start read-ahead now for this chunk. */ if (r.ir_freecount < XFS_INODES_PER_CHUNK) { - struct blk_plug plug; - /* - * Loop over all clusters in the next chunk. - * Do a readahead if there are any allocated - * inodes in that cluster. - */ - blk_start_plug(&plug); - agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); - for (chunkidx = 0; - chunkidx < XFS_INODES_PER_CHUNK; - chunkidx += inodes_per_cluster, - agbno += blks_per_cluster) { - if (xfs_inobt_maskn(chunkidx, - inodes_per_cluster) & ~r.ir_free) - xfs_btree_reada_bufs(mp, agno, - agbno, blks_per_cluster, - &xfs_inode_buf_ops); - } - blk_finish_plug(&plug); + xfs_bulkstat_ichunk_ra(mp, agno, &r); irbp->ir_startino = r.ir_startino; irbp->ir_freecount = r.ir_freecount; irbp->ir_free = r.ir_free; @@ -422,57 +489,20 @@ xfs_bulkstat( irbufend = irbp; for (irbp = irbuf; irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) { - /* - * Now process this chunk of inodes. - */ - for (agino = irbp->ir_startino, chunkidx = clustidx = 0; - XFS_BULKSTAT_UBLEFT(ubleft) && - irbp->ir_freecount < XFS_INODES_PER_CHUNK; - chunkidx++, clustidx++, agino++) { - ASSERT(chunkidx < XFS_INODES_PER_CHUNK); - - ino = XFS_AGINO_TO_INO(mp, agno, agino); - /* - * Skip if this inode is free. - */ - if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { - lastino = ino; - continue; - } - /* - * Count used inodes as free so we can tell - * when the chunk is used up. - */ - irbp->ir_freecount++; - - /* - * Get the inode and fill in a single buffer. - */ - ubused = statstruct_size; - error = formatter(mp, ino, ubufp, ubleft, - &ubused, &fmterror); - if (fmterror == BULKSTAT_RV_NOTHING) { - if (error && error != ENOENT && - error != EINVAL) { - ubleft = 0; - rval = error; - break; - } - lastino = ino; - continue; - } - if (fmterror == BULKSTAT_RV_GIVEUP) { - ubleft = 0; - ASSERT(error); - rval = error; - break; - } - if (ubufp) - ubufp += ubused; - ubleft -= ubused; - ubelem++; - lastino = ino; - } + struct xfs_bulkstat_agichunk ac; + + ac.ac_lastino = lastino; + ac.ac_ubuffer = &ubuffer; + ac.ac_ubleft = ubleft; + ac.ac_ubelem = ubelem; + error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, + formatter, statstruct_size, &ac); + if (error) + rval = error; + + lastino = ac.ac_lastino; + ubleft = ac.ac_ubleft; + ubelem = ac.ac_ubelem; cond_resched(); } @@ -512,58 +542,10 @@ xfs_bulkstat( return rval; } -/* - * Return stat information in bulk (by-inode) for the filesystem. - * Special case for non-sequential one inode bulkstat. - */ -int /* error status */ -xfs_bulkstat_single( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t *lastinop, /* inode to return */ - char __user *buffer, /* buffer with inode stats */ - int *done) /* 1 if there are more stats to get */ -{ - int count; /* count value for bulkstat call */ - int error; /* return value */ - xfs_ino_t ino; /* filesystem inode number */ - int res; /* result from bs1 */ - - /* - * note that requesting valid inode numbers which are not allocated - * to inodes will most likely cause xfs_imap_to_bp to generate warning - * messages about bad magic numbers. This is ok. The fact that - * the inode isn't actually an inode is handled by the - * error check below. Done this way to make the usual case faster - * at the expense of the error case. - */ - - ino = *lastinop; - error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), - NULL, &res); - if (error) { - /* - * Special case way failed, do it the "long" way - * to see if that works. - */ - (*lastinop)--; - count = 1; - if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, - sizeof(xfs_bstat_t), buffer, done)) - return error; - if (count == 0 || (xfs_ino_t)*lastinop != ino) - return error == EFSCORRUPTED ? - XFS_ERROR(EINVAL) : error; - else - return 0; - } - *done = 0; - return 0; -} - int xfs_inumbers_fmt( void __user *ubuffer, /* buffer to write to */ - const xfs_inogrp_t *buffer, /* buffer to read from */ + const struct xfs_inogrp *buffer, /* buffer to read from */ long count, /* # of elements to read */ long *written) /* # of bytes written */ { @@ -578,127 +560,104 @@ xfs_inumbers_fmt( */ int /* error status */ xfs_inumbers( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t *lastino, /* last inode returned */ - int *count, /* size of buffer/count returned */ - void __user *ubuffer,/* buffer with inode descriptions */ - inumbers_fmt_pf formatter) + struct xfs_mount *mp,/* mount point for filesystem */ + xfs_ino_t *lastino,/* last inode returned */ + int *count,/* size of buffer/count returned */ + void __user *ubuffer,/* buffer with inode descriptions */ + inumbers_fmt_pf formatter) { - xfs_buf_t *agbp; - xfs_agino_t agino; - xfs_agnumber_t agno; - int bcount; - xfs_inogrp_t *buffer; - int bufidx; - xfs_btree_cur_t *cur; - int error; - xfs_inobt_rec_incore_t r; - int i; - xfs_ino_t ino; - int left; - int tmp; - - ino = (xfs_ino_t)*lastino; - agno = XFS_INO_TO_AGNO(mp, ino); - agino = XFS_INO_TO_AGINO(mp, ino); - left = *count; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, *lastino); + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, *lastino); + struct xfs_btree_cur *cur = NULL; + struct xfs_buf *agbp = NULL; + struct xfs_inogrp *buffer; + int bcount; + int left = *count; + int bufidx = 0; + int error = 0; + *count = 0; + if (agno >= mp->m_sb.sb_agcount || + *lastino != XFS_AGINO_TO_INO(mp, agno, agino)) + return error; + bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer))); buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP); - error = bufidx = 0; - cur = NULL; - agbp = NULL; - while (left > 0 && agno < mp->m_sb.sb_agcount) { - if (agbp == NULL) { + do { + struct xfs_inobt_rec_incore r; + int stat; + + if (!agbp) { error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - if (error) { - /* - * If we can't read the AGI of this ag, - * then just skip to the next one. - */ - ASSERT(cur == NULL); - agbp = NULL; - agno++; - agino = 0; - continue; - } + if (error) + break; + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, - &tmp); - if (error) { - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - cur = NULL; - xfs_buf_relse(agbp); - agbp = NULL; - /* - * Move up the last inode in the current - * chunk. The lookup_ge will always get - * us the first inode in the next chunk. - */ - agino += XFS_INODES_PER_CHUNK - 1; - continue; - } - } - error = xfs_inobt_get_rec(cur, &r, &i); - if (error || i == 0) { - xfs_buf_relse(agbp); - agbp = NULL; - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - cur = NULL; - agno++; - agino = 0; - continue; + &stat); + if (error) + break; + if (!stat) + goto next_ag; } + + error = xfs_inobt_get_rec(cur, &r, &stat); + if (error) + break; + if (!stat) + goto next_ag; + agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, r.ir_startino); buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - r.ir_freecount; buffer[bufidx].xi_allocmask = ~r.ir_free; - bufidx++; - left--; - if (bufidx == bcount) { - long written; - if (formatter(ubuffer, buffer, bufidx, &written)) { - error = XFS_ERROR(EFAULT); + if (++bufidx == bcount) { + long written; + + error = formatter(ubuffer, buffer, bufidx, &written); + if (error) break; - } ubuffer += written; *count += bufidx; bufidx = 0; } - if (left) { - error = xfs_btree_increment(cur, 0, &tmp); - if (error) { - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - cur = NULL; - xfs_buf_relse(agbp); - agbp = NULL; - /* - * The agino value has already been bumped. - * Just try to skip up to it. - */ - agino += XFS_INODES_PER_CHUNK; - continue; - } - } - } + if (!--left) + break; + + error = xfs_btree_increment(cur, 0, &stat); + if (error) + break; + if (stat) + continue; + +next_ag: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + cur = NULL; + xfs_buf_relse(agbp); + agbp = NULL; + agino = 0; + } while (++agno < mp->m_sb.sb_agcount); + if (!error) { if (bufidx) { - long written; - if (formatter(ubuffer, buffer, bufidx, &written)) - error = XFS_ERROR(EFAULT); - else + long written; + + error = formatter(ubuffer, buffer, bufidx, &written); + if (!error) *count += bufidx; } *lastino = XFS_AGINO_TO_INO(mp, agno, agino); } + kmem_free(buffer); if (cur) xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR)); if (agbp) xfs_buf_relse(agbp); + return error; } diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 97295d91d17..aaed08022eb 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -30,6 +30,22 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, int *ubused, int *stat); +struct xfs_bulkstat_agichunk { + xfs_ino_t ac_lastino; /* last inode returned */ + char __user **ac_ubuffer;/* pointer into user's buffer */ + int ac_ubleft; /* bytes left in user's buffer */ + int ac_ubelem; /* spaces used in user's buffer */ +}; + +int +xfs_bulkstat_ag_ichunk( + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct xfs_inobt_rec_incore *irbp, + bulkstat_one_pf formatter, + size_t statstruct_size, + struct xfs_bulkstat_agichunk *acp); + /* * Values for stat return value. */ @@ -50,13 +66,6 @@ xfs_bulkstat( char __user *ubuffer,/* buffer with inode stats */ int *done); /* 1 if there are more stats to get */ -int -xfs_bulkstat_single( - xfs_mount_t *mp, - xfs_ino_t *lastinop, - char __user *buffer, - int *done); - typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ void __user *ubuffer, /* buffer to write to */ int ubsize, /* remaining user buffer sz */ diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 825249d2dfc..d10dc8f397c 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -21,18 +21,6 @@ #include <linux/types.h> /* - * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. - * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set. - */ -#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64) -# define XFS_BIG_BLKNOS 1 -# define XFS_BIG_INUMS 1 -#else -# define XFS_BIG_BLKNOS 0 -# define XFS_BIG_INUMS 0 -#endif - -/* * Kernel specific type declarations for XFS */ typedef signed char __int8_t; @@ -113,7 +101,7 @@ typedef __uint64_t __psunsigned_t; #include <asm/byteorder.h> #include <asm/unaligned.h> -#include "xfs_vnode.h" +#include "xfs_fs.h" #include "xfs_stats.h" #include "xfs_sysctl.h" #include "xfs_iops.h" @@ -191,6 +179,17 @@ typedef __uint64_t __psunsigned_t; #define MAX(a,b) (max(a,b)) #define howmany(x, y) (((x)+((y)-1))/(y)) +/* + * XFS wrapper structure for sysfs support. It depends on external data + * structures and is embedded in various internal data structures to implement + * the XFS sysfs object heirarchy. Define it here for broad access throughout + * the codebase. + */ +struct xfs_kobj { + struct kobject kobject; + struct completion complete; +}; + /* Kernel uid/gid conversion. These are used to convert to/from the on disk * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally. * The conversion here is type only, the value will remain the same since we @@ -331,7 +330,7 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) { x += y - 1; do_div(x, y); - return(x * y); + return x * y; } static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 292308dede6..ca4fd5bd852 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -34,6 +34,7 @@ #include "xfs_trace.h" #include "xfs_fsops.h" #include "xfs_cksum.h" +#include "xfs_sysfs.h" kmem_zone_t *xfs_log_ticket_zone; @@ -283,7 +284,7 @@ xlog_grant_head_wait( return 0; shutdown: list_del_init(&tic->t_queue); - return XFS_ERROR(EIO); + return -EIO; } /* @@ -377,7 +378,7 @@ xfs_log_regrant( int error = 0; if (XLOG_FORCED_SHUTDOWN(log)) - return XFS_ERROR(EIO); + return -EIO; XFS_STATS_INC(xs_try_logspace); @@ -446,7 +447,7 @@ xfs_log_reserve( ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); if (XLOG_FORCED_SHUTDOWN(log)) - return XFS_ERROR(EIO); + return -EIO; XFS_STATS_INC(xs_try_logspace); @@ -454,7 +455,7 @@ xfs_log_reserve( tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, KM_SLEEP | KM_MAYFAIL); if (!tic) - return XFS_ERROR(ENOMEM); + return -ENOMEM; tic->t_trans_type = t_type; *ticp = tic; @@ -590,7 +591,7 @@ xfs_log_release_iclog( { if (xlog_state_release_iclog(mp->m_log, iclog)) { xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); - return EIO; + return -EIO; } return 0; @@ -628,7 +629,7 @@ xfs_log_mount( mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); if (IS_ERR(mp->m_log)) { - error = -PTR_ERR(mp->m_log); + error = PTR_ERR(mp->m_log); goto out; } @@ -652,18 +653,18 @@ xfs_log_mount( xfs_warn(mp, "Log size %d blocks too small, minimum size is %d blocks", mp->m_sb.sb_logblocks, min_logfsbs); - error = EINVAL; + error = -EINVAL; } else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) { xfs_warn(mp, "Log size %d blocks too large, maximum size is %lld blocks", mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS); - error = EINVAL; + error = -EINVAL; } else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) { xfs_warn(mp, "log size %lld bytes too large, maximum size is %lld bytes", XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks), XFS_MAX_LOG_BYTES); - error = EINVAL; + error = -EINVAL; } if (error) { if (xfs_sb_version_hascrc(&mp->m_sb)) { @@ -707,6 +708,11 @@ xfs_log_mount( } } + error = xfs_sysfs_init(&mp->m_log->l_kobj, &xfs_log_ktype, &mp->m_kobj, + "log"); + if (error) + goto out_destroy_ail; + /* Normal transactions can now occur */ mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; @@ -947,6 +953,9 @@ xfs_log_unmount( xfs_log_quiesce(mp); xfs_trans_ail_destroy(mp); + + xfs_sysfs_del(&mp->m_log->l_kobj); + xlog_dealloc_log(mp->m_log); } @@ -1313,7 +1322,7 @@ xlog_alloc_log( xlog_in_core_t *iclog, *prev_iclog=NULL; xfs_buf_t *bp; int i; - int error = ENOMEM; + int error = -ENOMEM; uint log2_size = 0; log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL); @@ -1340,7 +1349,7 @@ xlog_alloc_log( xlog_grant_head_init(&log->l_reserve_head); xlog_grant_head_init(&log->l_write_head); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; if (xfs_sb_version_hassector(&mp->m_sb)) { log2_size = mp->m_sb.sb_logsectlog; if (log2_size < BBSHIFT) { @@ -1369,8 +1378,14 @@ xlog_alloc_log( xlog_get_iclog_buffer_size(mp, log); - error = ENOMEM; - bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0); + /* + * Use a NULL block for the extra log buffer used during splits so that + * it will trigger errors if we ever try to do IO on it without first + * having set it up properly. + */ + error = -ENOMEM; + bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL, + BTOBB(log->l_iclog_size), 0); if (!bp) goto out_free_log; @@ -1463,7 +1478,7 @@ out_free_iclog: out_free_log: kmem_free(log); out: - return ERR_PTR(-error); + return ERR_PTR(error); } /* xlog_alloc_log */ @@ -1661,7 +1676,7 @@ xlog_bdstrat( xfs_buf_lock(bp); if (iclog->ic_state & XLOG_STATE_IOERROR) { - xfs_buf_ioerror(bp, EIO); + xfs_buf_ioerror(bp, -EIO); xfs_buf_stale(bp); xfs_buf_ioend(bp, 0); /* @@ -2360,7 +2375,7 @@ xlog_write( ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags); if (!ophdr) - return XFS_ERROR(EIO); + return -EIO; xlog_write_adv_cnt(&ptr, &len, &log_offset, sizeof(struct xlog_op_header)); @@ -2859,7 +2874,7 @@ restart: spin_lock(&log->l_icloglock); if (XLOG_FORCED_SHUTDOWN(log)) { spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); + return -EIO; } iclog = log->l_iclog; @@ -3047,7 +3062,7 @@ xlog_state_release_iclog( int sync = 0; /* do we sync? */ if (iclog->ic_state & XLOG_STATE_IOERROR) - return XFS_ERROR(EIO); + return -EIO; ASSERT(atomic_read(&iclog->ic_refcnt) > 0); if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) @@ -3055,7 +3070,7 @@ xlog_state_release_iclog( if (iclog->ic_state & XLOG_STATE_IOERROR) { spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); + return -EIO; } ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_WANT_SYNC); @@ -3172,7 +3187,7 @@ _xfs_log_force( iclog = log->l_iclog; if (iclog->ic_state & XLOG_STATE_IOERROR) { spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); + return -EIO; } /* If the head iclog is not active nor dirty, we just attach @@ -3210,7 +3225,7 @@ _xfs_log_force( spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) - return XFS_ERROR(EIO); + return -EIO; if (log_flushed) *log_flushed = 1; @@ -3246,7 +3261,7 @@ maybe_sleep: */ if (iclog->ic_state & XLOG_STATE_IOERROR) { spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); + return -EIO; } XFS_STATS_INC(xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); @@ -3256,7 +3271,7 @@ maybe_sleep: * and the memory read should be atomic. */ if (iclog->ic_state & XLOG_STATE_IOERROR) - return XFS_ERROR(EIO); + return -EIO; if (log_flushed) *log_flushed = 1; } else { @@ -3324,7 +3339,7 @@ try_again: iclog = log->l_iclog; if (iclog->ic_state & XLOG_STATE_IOERROR) { spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); + return -EIO; } do { @@ -3375,7 +3390,7 @@ try_again: xlog_state_switch_iclogs(log, iclog, 0); spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) - return XFS_ERROR(EIO); + return -EIO; if (log_flushed) *log_flushed = 1; spin_lock(&log->l_icloglock); @@ -3390,7 +3405,7 @@ try_again: */ if (iclog->ic_state & XLOG_STATE_IOERROR) { spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); + return -EIO; } XFS_STATS_INC(xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); @@ -3400,7 +3415,7 @@ try_again: * and the memory read should be atomic. */ if (iclog->ic_state & XLOG_STATE_IOERROR) - return XFS_ERROR(EIO); + return -EIO; if (log_flushed) *log_flushed = 1; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index b3425b34e3d..f6b79e5325d 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -78,8 +78,6 @@ xlog_cil_init_post_recovery( { log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); log->l_cilp->xc_ctx->sequence = 1; - log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle, - log->l_curr_block); } /* @@ -634,7 +632,7 @@ out_abort_free_ticket: xfs_log_ticket_put(tic); out_abort: xlog_cil_committed(ctx, XFS_LI_ABORTED); - return XFS_ERROR(EIO); + return -EIO; } static void @@ -928,12 +926,12 @@ xlog_cil_init( cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); if (!cil) - return ENOMEM; + return -ENOMEM; ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL); if (!ctx) { kmem_free(cil); - return ENOMEM; + return -ENOMEM; } INIT_WORK(&cil->xc_push_work, xlog_cil_push_work); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 9bc403a9e54..db7cbdeb2b4 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -405,6 +405,8 @@ struct xlog { struct xlog_grant_head l_reserve_head; struct xlog_grant_head l_write_head; + struct xfs_kobj l_kobj; + /* The following field are used for debugging; need to hold icloglock */ #ifdef DEBUG char *l_iclog_bak[XLOG_MAX_ICLOGS]; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 981af0f6504..1fd5787add9 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -179,7 +179,7 @@ xlog_bread_noalign( xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); - return EFSCORRUPTED; + return -EFSCORRUPTED; } blk_no = round_down(blk_no, log->l_sectBBsize); @@ -194,7 +194,7 @@ xlog_bread_noalign( bp->b_error = 0; if (XFS_FORCED_SHUTDOWN(log->l_mp)) - return XFS_ERROR(EIO); + return -EIO; xfs_buf_iorequest(bp); error = xfs_buf_iowait(bp); @@ -268,7 +268,7 @@ xlog_bwrite( xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); - return EFSCORRUPTED; + return -EFSCORRUPTED; } blk_no = round_down(blk_no, log->l_sectBBsize); @@ -330,14 +330,14 @@ xlog_header_check_recover( xlog_header_check_dump(mp, head); XFS_ERROR_REPORT("xlog_header_check_recover(1)", XFS_ERRLEVEL_HIGH, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { xfs_warn(mp, "dirty log entry has mismatched uuid - can't recover"); xlog_header_check_dump(mp, head); XFS_ERROR_REPORT("xlog_header_check_recover(2)", XFS_ERRLEVEL_HIGH, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; } @@ -364,7 +364,7 @@ xlog_header_check_mount( xlog_header_check_dump(mp, head); XFS_ERROR_REPORT("xlog_header_check_mount", XFS_ERRLEVEL_HIGH, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; } @@ -462,7 +462,7 @@ xlog_find_verify_cycle( while (!(bp = xlog_get_bp(log, bufblks))) { bufblks >>= 1; if (bufblks < log->l_sectBBsize) - return ENOMEM; + return -ENOMEM; } for (i = start_blk; i < start_blk + nbblks; i += bufblks) { @@ -524,7 +524,7 @@ xlog_find_verify_log_record( if (!(bp = xlog_get_bp(log, num_blks))) { if (!(bp = xlog_get_bp(log, 1))) - return ENOMEM; + return -ENOMEM; smallmem = 1; } else { error = xlog_bread(log, start_blk, num_blks, bp, &offset); @@ -539,7 +539,7 @@ xlog_find_verify_log_record( xfs_warn(log->l_mp, "Log inconsistent (didn't find previous header)"); ASSERT(0); - error = XFS_ERROR(EIO); + error = -EIO; goto out; } @@ -564,7 +564,7 @@ xlog_find_verify_log_record( * will be called again for the end of the physical log. */ if (i == -1) { - error = -1; + error = 1; goto out; } @@ -628,7 +628,12 @@ xlog_find_head( int error, log_bbnum = log->l_logBBsize; /* Is the end of the log device zeroed? */ - if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { + error = xlog_find_zeroed(log, &first_blk); + if (error < 0) { + xfs_warn(log->l_mp, "empty log check failed"); + return error; + } + if (error == 1) { *return_head_blk = first_blk; /* Is the whole lot zeroed? */ @@ -641,15 +646,12 @@ xlog_find_head( } return 0; - } else if (error) { - xfs_warn(log->l_mp, "empty log check failed"); - return error; } first_blk = 0; /* get cycle # of 1st block */ bp = xlog_get_bp(log, 1); if (!bp) - return ENOMEM; + return -ENOMEM; error = xlog_bread(log, 0, 1, bp, &offset); if (error) @@ -818,29 +820,29 @@ validate_head: start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ /* start ptr at last block ptr before head_blk */ - if ((error = xlog_find_verify_log_record(log, start_blk, - &head_blk, 0)) == -1) { - error = XFS_ERROR(EIO); - goto bp_err; - } else if (error) + error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); + if (error == 1) + error = -EIO; + if (error) goto bp_err; } else { start_blk = 0; ASSERT(head_blk <= INT_MAX); - if ((error = xlog_find_verify_log_record(log, start_blk, - &head_blk, 0)) == -1) { + error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); + if (error < 0) + goto bp_err; + if (error == 1) { /* We hit the beginning of the log during our search */ start_blk = log_bbnum - (num_scan_bblks - head_blk); new_blk = log_bbnum; ASSERT(start_blk <= INT_MAX && (xfs_daddr_t) log_bbnum-start_blk >= 0); ASSERT(head_blk <= INT_MAX); - if ((error = xlog_find_verify_log_record(log, - start_blk, &new_blk, - (int)head_blk)) == -1) { - error = XFS_ERROR(EIO); - goto bp_err; - } else if (error) + error = xlog_find_verify_log_record(log, start_blk, + &new_blk, (int)head_blk); + if (error == 1) + error = -EIO; + if (error) goto bp_err; if (new_blk != log_bbnum) head_blk = new_blk; @@ -911,7 +913,7 @@ xlog_find_tail( bp = xlog_get_bp(log, 1); if (!bp) - return ENOMEM; + return -ENOMEM; if (*head_blk == 0) { /* special case */ error = xlog_bread(log, 0, 1, bp, &offset); if (error) @@ -961,7 +963,7 @@ xlog_find_tail( xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); xlog_put_bp(bp); ASSERT(0); - return XFS_ERROR(EIO); + return -EIO; } /* find blk_no of tail of log */ @@ -1092,8 +1094,8 @@ done: * * Return: * 0 => the log is completely written to - * -1 => use *blk_no as the first block of the log - * >0 => error has occurred + * 1 => use *blk_no as the first block of the log + * <0 => error has occurred */ STATIC int xlog_find_zeroed( @@ -1112,7 +1114,7 @@ xlog_find_zeroed( /* check totally zeroed log */ bp = xlog_get_bp(log, 1); if (!bp) - return ENOMEM; + return -ENOMEM; error = xlog_bread(log, 0, 1, bp, &offset); if (error) goto bp_err; @@ -1121,7 +1123,7 @@ xlog_find_zeroed( if (first_cycle == 0) { /* completely zeroed log */ *blk_no = 0; xlog_put_bp(bp); - return -1; + return 1; } /* check partially zeroed log */ @@ -1141,7 +1143,7 @@ xlog_find_zeroed( */ xfs_warn(log->l_mp, "Log inconsistent or not a log (last==0, first!=1)"); - error = XFS_ERROR(EINVAL); + error = -EINVAL; goto bp_err; } @@ -1179,19 +1181,18 @@ xlog_find_zeroed( * Potentially backup over partial log record write. We don't need * to search the end of the log because we know it is zero. */ - if ((error = xlog_find_verify_log_record(log, start_blk, - &last_blk, 0)) == -1) { - error = XFS_ERROR(EIO); - goto bp_err; - } else if (error) - goto bp_err; + error = xlog_find_verify_log_record(log, start_blk, &last_blk, 0); + if (error == 1) + error = -EIO; + if (error) + goto bp_err; *blk_no = last_blk; bp_err: xlog_put_bp(bp); if (error) return error; - return -1; + return 1; } /* @@ -1251,7 +1252,7 @@ xlog_write_log_records( while (!(bp = xlog_get_bp(log, bufblks))) { bufblks >>= 1; if (bufblks < sectbb) - return ENOMEM; + return -ENOMEM; } /* We may need to do a read at the start to fill in part of @@ -1354,7 +1355,7 @@ xlog_clear_stale_blocks( if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", XFS_ERRLEVEL_LOW, log->l_mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } tail_distance = tail_block + (log->l_logBBsize - head_block); } else { @@ -1366,7 +1367,7 @@ xlog_clear_stale_blocks( if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", XFS_ERRLEVEL_LOW, log->l_mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } tail_distance = tail_block - head_block; } @@ -1551,7 +1552,7 @@ xlog_recover_add_to_trans( xfs_warn(log->l_mp, "%s: bad header magic number", __func__); ASSERT(0); - return XFS_ERROR(EIO); + return -EIO; } if (len == sizeof(xfs_trans_header_t)) xlog_recover_add_item(&trans->r_itemq); @@ -1581,7 +1582,7 @@ xlog_recover_add_to_trans( in_f->ilf_size); ASSERT(0); kmem_free(ptr); - return XFS_ERROR(EIO); + return -EIO; } item->ri_total = in_f->ilf_size; @@ -1702,7 +1703,7 @@ xlog_recover_reorder_trans( */ if (!list_empty(&sort_list)) list_splice_init(&sort_list, &trans->r_itemq); - error = XFS_ERROR(EIO); + error = -EIO; goto out; } } @@ -1943,7 +1944,7 @@ xlog_recover_do_inode_buffer( item, bp); XFS_ERROR_REPORT("xlog_recover_do_inode_buf", XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, @@ -2125,6 +2126,17 @@ xlog_recover_validate_buf_type( __uint16_t magic16; __uint16_t magicda; + /* + * We can only do post recovery validation on items on CRC enabled + * fielsystems as we need to know when the buffer was written to be able + * to determine if we should have replayed the item. If we replay old + * metadata over a newer buffer, then it will enter a temporarily + * inconsistent state resulting in verification failures. Hence for now + * just avoid the verification stage for non-crc filesystems + */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + magic32 = be32_to_cpu(*(__be32 *)bp->b_addr); magic16 = be16_to_cpu(*(__be16*)bp->b_addr); magicda = be16_to_cpu(info->magic); @@ -2162,8 +2174,6 @@ xlog_recover_validate_buf_type( bp->b_ops = &xfs_agf_buf_ops; break; case XFS_BLFT_AGFL_BUF: - if (!xfs_sb_version_hascrc(&mp->m_sb)) - break; if (magic32 != XFS_AGFL_MAGIC) { xfs_warn(mp, "Bad AGFL block magic!"); ASSERT(0); @@ -2196,10 +2206,6 @@ xlog_recover_validate_buf_type( #endif break; case XFS_BLFT_DINO_BUF: - /* - * we get here with inode allocation buffers, not buffers that - * track unlinked list changes. - */ if (magic16 != XFS_DINODE_MAGIC) { xfs_warn(mp, "Bad INODE block magic!"); ASSERT(0); @@ -2279,8 +2285,6 @@ xlog_recover_validate_buf_type( bp->b_ops = &xfs_attr3_leaf_buf_ops; break; case XFS_BLFT_ATTR_RMT_BUF: - if (!xfs_sb_version_hascrc(&mp->m_sb)) - break; if (magic32 != XFS_ATTR3_RMT_MAGIC) { xfs_warn(mp, "Bad attr remote magic!"); ASSERT(0); @@ -2387,16 +2391,7 @@ xlog_recover_do_reg_buffer( /* Shouldn't be any more regions */ ASSERT(i == item->ri_total); - /* - * We can only do post recovery validation on items on CRC enabled - * fielsystems as we need to know when the buffer was written to be able - * to determine if we should have replayed the item. If we replay old - * metadata over a newer buffer, then it will enter a temporarily - * inconsistent state resulting in verification failures. Hence for now - * just avoid the verification stage for non-crc filesystems - */ - if (xfs_sb_version_hascrc(&mp->m_sb)) - xlog_recover_validate_buf_type(mp, bp, buf_f); + xlog_recover_validate_buf_type(mp, bp, buf_f); } /* @@ -2404,8 +2399,11 @@ xlog_recover_do_reg_buffer( * Simple algorithm: if we have found a QUOTAOFF log item of the same type * (ie. USR or GRP), then just toss this buffer away; don't recover it. * Else, treat it as a regular buffer and do recovery. + * + * Return false if the buffer was tossed and true if we recovered the buffer to + * indicate to the caller if the buffer needs writing. */ -STATIC void +STATIC bool xlog_recover_do_dquot_buffer( struct xfs_mount *mp, struct xlog *log, @@ -2420,9 +2418,8 @@ xlog_recover_do_dquot_buffer( /* * Filesystems are required to send in quota flags at mount time. */ - if (mp->m_qflags == 0) { - return; - } + if (!mp->m_qflags) + return false; type = 0; if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) @@ -2435,9 +2432,10 @@ xlog_recover_do_dquot_buffer( * This type of quotas was turned off, so ignore this buffer */ if (log->l_quotaoffs_flag & type) - return; + return false; xlog_recover_do_reg_buffer(mp, item, bp, buf_f); + return true; } /* @@ -2496,7 +2494,7 @@ xlog_recover_buffer_pass2( bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, buf_flags, NULL); if (!bp) - return XFS_ERROR(ENOMEM); + return -ENOMEM; error = bp->b_error; if (error) { xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)"); @@ -2504,23 +2502,44 @@ xlog_recover_buffer_pass2( } /* - * recover the buffer only if we get an LSN from it and it's less than + * Recover the buffer only if we get an LSN from it and it's less than * the lsn of the transaction we are replaying. + * + * Note that we have to be extremely careful of readahead here. + * Readahead does not attach verfiers to the buffers so if we don't + * actually do any replay after readahead because of the LSN we found + * in the buffer if more recent than that current transaction then we + * need to attach the verifier directly. Failure to do so can lead to + * future recovery actions (e.g. EFI and unlinked list recovery) can + * operate on the buffers and they won't get the verifier attached. This + * can lead to blocks on disk having the correct content but a stale + * CRC. + * + * It is safe to assume these clean buffers are currently up to date. + * If the buffer is dirtied by a later transaction being replayed, then + * the verifier will be reset to match whatever recover turns that + * buffer into. */ lsn = xlog_recover_get_buf_lsn(mp, bp); - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { + xlog_recover_validate_buf_type(mp, bp, buf_f); goto out_release; + } if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); + if (error) + goto out_release; } else if (buf_f->blf_flags & (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { - xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); + bool dirty; + + dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); + if (!dirty) + goto out_release; } else { xlog_recover_do_reg_buffer(mp, item, bp, buf_f); } - if (error) - goto out_release; /* * Perform delayed write on the buffer. Asynchronous writes will be @@ -2598,7 +2617,7 @@ xfs_recover_inode_owner_change( ip = xfs_inode_alloc(mp, in_f->ilf_ino); if (!ip) - return ENOMEM; + return -ENOMEM; /* instantiate the inode */ xfs_dinode_from_disk(&ip->i_d, dip); @@ -2676,7 +2695,7 @@ xlog_recover_inode_pass2( bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, &xfs_inode_buf_ops); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error; } error = bp->b_error; @@ -2697,7 +2716,7 @@ xlog_recover_inode_pass2( __func__, dip, bp, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", XFS_ERRLEVEL_LOW, mp); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } dicp = item->ri_buf[1].i_addr; @@ -2707,7 +2726,7 @@ xlog_recover_inode_pass2( __func__, item, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", XFS_ERRLEVEL_LOW, mp); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } @@ -2764,7 +2783,7 @@ xlog_recover_inode_pass2( "%s: Bad regular inode log record, rec ptr 0x%p, " "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", __func__, item, dip, bp, in_f->ilf_ino); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } } else if (unlikely(S_ISDIR(dicp->di_mode))) { @@ -2777,7 +2796,7 @@ xlog_recover_inode_pass2( "%s: Bad dir inode log record, rec ptr 0x%p, " "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", __func__, item, dip, bp, in_f->ilf_ino); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } } @@ -2790,7 +2809,7 @@ xlog_recover_inode_pass2( __func__, item, dip, bp, in_f->ilf_ino, dicp->di_nextents + dicp->di_anextents, dicp->di_nblocks); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { @@ -2800,7 +2819,7 @@ xlog_recover_inode_pass2( "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } isize = xfs_icdinode_size(dicp->di_version); @@ -2810,7 +2829,7 @@ xlog_recover_inode_pass2( xfs_alert(mp, "%s: Bad inode log record length %d, rec ptr 0x%p", __func__, item->ri_buf[1].i_len, item); - error = EFSCORRUPTED; + error = -EFSCORRUPTED; goto out_release; } @@ -2898,7 +2917,7 @@ xlog_recover_inode_pass2( default: xfs_warn(log->l_mp, "%s: Invalid flag", __func__); ASSERT(0); - error = EIO; + error = -EIO; goto out_release; } } @@ -2919,7 +2938,7 @@ out_release: error: if (need_free) kmem_free(in_f); - return XFS_ERROR(error); + return error; } /* @@ -2946,7 +2965,7 @@ xlog_recover_quotaoff_pass1( if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) log->l_quotaoffs_flag |= XFS_DQ_GROUP; - return (0); + return 0; } /* @@ -2971,17 +2990,17 @@ xlog_recover_dquot_pass2( * Filesystems are required to send in quota flags at mount time. */ if (mp->m_qflags == 0) - return (0); + return 0; recddq = item->ri_buf[1].i_addr; if (recddq == NULL) { xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); - return XFS_ERROR(EIO); + return -EIO; } if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { xfs_alert(log->l_mp, "dquot too small (%d) in %s.", item->ri_buf[1].i_len, __func__); - return XFS_ERROR(EIO); + return -EIO; } /* @@ -2990,7 +3009,7 @@ xlog_recover_dquot_pass2( type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); ASSERT(type); if (log->l_quotaoffs_flag & type) - return (0); + return 0; /* * At this point we know that quota was _not_ turned off. @@ -3007,12 +3026,19 @@ xlog_recover_dquot_pass2( error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, "xlog_recover_dquot_pass2 (log copy)"); if (error) - return XFS_ERROR(EIO); + return -EIO; ASSERT(dq_f->qlf_len == 1); + /* + * At this point we are assuming that the dquots have been allocated + * and hence the buffer has valid dquots stamped in it. It should, + * therefore, pass verifier validation. If the dquot is bad, then the + * we'll return an error here, so we don't need to specifically check + * the dquot in the buffer after the verifier has run. + */ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp, - NULL); + &xfs_dquot_buf_ops); if (error) return error; @@ -3020,18 +3046,6 @@ xlog_recover_dquot_pass2( ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); /* - * At least the magic num portion should be on disk because this - * was among a chunk of dquots created earlier, and we did some - * minimal initialization then. - */ - error = xfs_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, - "xlog_recover_dquot_pass2"); - if (error) { - xfs_buf_relse(bp); - return XFS_ERROR(EIO); - } - - /* * If the dquot has an LSN in it, recover the dquot only if it's less * than the lsn of the transaction we are replaying. */ @@ -3178,38 +3192,38 @@ xlog_recover_do_icreate_pass2( icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; if (icl->icl_type != XFS_LI_ICREATE) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); - return EINVAL; + return -EINVAL; } if (icl->icl_size != 1) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); - return EINVAL; + return -EINVAL; } agno = be32_to_cpu(icl->icl_ag); if (agno >= mp->m_sb.sb_agcount) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); - return EINVAL; + return -EINVAL; } agbno = be32_to_cpu(icl->icl_agbno); if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); - return EINVAL; + return -EINVAL; } isize = be32_to_cpu(icl->icl_isize); if (isize != mp->m_sb.sb_inodesize) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); - return EINVAL; + return -EINVAL; } count = be32_to_cpu(icl->icl_count); if (!count) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); - return EINVAL; + return -EINVAL; } length = be32_to_cpu(icl->icl_length); if (!length || length >= mp->m_sb.sb_agblocks) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); - return EINVAL; + return -EINVAL; } /* existing allocation is fixed value */ @@ -3218,7 +3232,7 @@ xlog_recover_do_icreate_pass2( if (count != mp->m_ialloc_inos || length != mp->m_ialloc_blks) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); - return EINVAL; + return -EINVAL; } /* @@ -3389,7 +3403,7 @@ xlog_recover_commit_pass1( xfs_warn(log->l_mp, "%s: invalid item type (%d)", __func__, ITEM_TYPE(item)); ASSERT(0); - return XFS_ERROR(EIO); + return -EIO; } } @@ -3425,7 +3439,7 @@ xlog_recover_commit_pass2( xfs_warn(log->l_mp, "%s: invalid item type (%d)", __func__, ITEM_TYPE(item)); ASSERT(0); - return XFS_ERROR(EIO); + return -EIO; } } @@ -3560,7 +3574,7 @@ xlog_recover_process_data( /* check the log format matches our own - else we can't recover */ if (xlog_header_check_recover(log->l_mp, rhead)) - return (XFS_ERROR(EIO)); + return -EIO; while ((dp < lp) && num_logops) { ASSERT(dp + sizeof(xlog_op_header_t) <= lp); @@ -3571,7 +3585,7 @@ xlog_recover_process_data( xfs_warn(log->l_mp, "%s: bad clientid 0x%x", __func__, ohead->oh_clientid); ASSERT(0); - return (XFS_ERROR(EIO)); + return -EIO; } tid = be32_to_cpu(ohead->oh_tid); hash = XLOG_RHASH(tid); @@ -3585,7 +3599,7 @@ xlog_recover_process_data( xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, be32_to_cpu(ohead->oh_len)); WARN_ON(1); - return (XFS_ERROR(EIO)); + return -EIO; } flags = ohead->oh_flags & ~XLOG_END_TRANS; if (flags & XLOG_WAS_CONT_TRANS) @@ -3607,7 +3621,7 @@ xlog_recover_process_data( xfs_warn(log->l_mp, "%s: bad transaction", __func__); ASSERT(0); - error = XFS_ERROR(EIO); + error = -EIO; break; case 0: case XLOG_CONTINUE_TRANS: @@ -3618,7 +3632,7 @@ xlog_recover_process_data( xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags); ASSERT(0); - error = XFS_ERROR(EIO); + error = -EIO; break; } if (error) { @@ -3669,7 +3683,7 @@ xlog_recover_process_efi( */ set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); xfs_efi_release(efip, efip->efi_format.efi_nextents); - return XFS_ERROR(EIO); + return -EIO; } } @@ -3969,7 +3983,7 @@ xlog_unpack_data_crc( * CRC protection by punting an error back up the stack. */ if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) - return EFSCORRUPTED; + return -EFSCORRUPTED; } return 0; @@ -4018,14 +4032,14 @@ xlog_valid_rec_header( if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) { XFS_ERROR_REPORT("xlog_valid_rec_header(1)", XFS_ERRLEVEL_LOW, log->l_mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (unlikely( (!rhead->h_version || (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", __func__, be32_to_cpu(rhead->h_version)); - return XFS_ERROR(EIO); + return -EIO; } /* LR body must have data or it wouldn't have been written */ @@ -4033,12 +4047,12 @@ xlog_valid_rec_header( if (unlikely( hlen <= 0 || hlen > INT_MAX )) { XFS_ERROR_REPORT("xlog_valid_rec_header(2)", XFS_ERRLEVEL_LOW, log->l_mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { XFS_ERROR_REPORT("xlog_valid_rec_header(3)", XFS_ERRLEVEL_LOW, log->l_mp); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } return 0; } @@ -4081,7 +4095,7 @@ xlog_do_recovery_pass( */ hbp = xlog_get_bp(log, 1); if (!hbp) - return ENOMEM; + return -ENOMEM; error = xlog_bread(log, tail_blk, 1, hbp, &offset); if (error) @@ -4110,11 +4124,11 @@ xlog_do_recovery_pass( } if (!hbp) - return ENOMEM; + return -ENOMEM; dbp = xlog_get_bp(log, BTOBB(h_size)); if (!dbp) { xlog_put_bp(hbp); - return ENOMEM; + return -ENOMEM; } memset(rhash, 0, sizeof(rhash)); @@ -4388,7 +4402,7 @@ xlog_do_recover( * If IO errors happened during recovery, bail out. */ if (XFS_FORCED_SHUTDOWN(log->l_mp)) { - return (EIO); + return -EIO; } /* @@ -4415,7 +4429,7 @@ xlog_do_recover( if (XFS_FORCED_SHUTDOWN(log->l_mp)) { xfs_buf_relse(bp); - return XFS_ERROR(EIO); + return -EIO; } xfs_buf_iorequest(bp); @@ -4492,7 +4506,7 @@ xlog_recover( "Please recover the log on a kernel that supports the unknown features.", (log->l_mp->m_sb.sb_features_log_incompat & XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)); - return EINVAL; + return -EINVAL; } xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 3507cd0ec40..fbf0384a466 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -42,6 +42,7 @@ #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_dinode.h" +#include "xfs_sysfs.h" #ifdef HAVE_PERCPU_SB @@ -60,6 +61,8 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex); static int xfs_uuid_table_size; static uuid_t *xfs_uuid_table; +extern struct kset *xfs_kset; + /* * See if the UUID is unique among mounted XFS filesystems. * Mount fails if UUID is nil or a FS with the same UUID is already mounted. @@ -76,7 +79,7 @@ xfs_uuid_mount( if (uuid_is_nil(uuid)) { xfs_warn(mp, "Filesystem has nil UUID - can't mount"); - return XFS_ERROR(EINVAL); + return -EINVAL; } mutex_lock(&xfs_uuid_table_mutex); @@ -104,7 +107,7 @@ xfs_uuid_mount( out_duplicate: mutex_unlock(&xfs_uuid_table_mutex); xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid); - return XFS_ERROR(EINVAL); + return -EINVAL; } STATIC void @@ -173,13 +176,9 @@ xfs_sb_validate_fsb_count( ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); ASSERT(sbp->sb_blocklog >= BBSHIFT); -#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ + /* Limited by ULONG_MAX of page cache index */ if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) - return EFBIG; -#else /* Limited by UINT_MAX of sectors */ - if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX) - return EFBIG; -#endif + return -EFBIG; return 0; } @@ -250,9 +249,9 @@ xfs_initialize_perag( mp->m_flags &= ~XFS_MOUNT_32BITINODES; if (mp->m_flags & XFS_MOUNT_32BITINODES) - index = xfs_set_inode32(mp); + index = xfs_set_inode32(mp, agcount); else - index = xfs_set_inode64(mp); + index = xfs_set_inode64(mp, agcount); if (maxagi) *maxagi = index; @@ -308,15 +307,15 @@ reread: if (!bp) { if (loud) xfs_warn(mp, "SB buffer read failed"); - return EIO; + return -EIO; } if (bp->b_error) { error = bp->b_error; if (loud) xfs_warn(mp, "SB validate failed with error %d.", error); /* bad CRC means corrupted metadata */ - if (error == EFSBADCRC) - error = EFSCORRUPTED; + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; goto release_buf; } @@ -324,7 +323,6 @@ reread: * Initialize the mount structure from the superblock. */ xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); - xfs_sb_quota_from_disk(sbp); /* * If we haven't validated the superblock, do so now before we try @@ -333,7 +331,7 @@ reread: if (sbp->sb_magicnum != XFS_SB_MAGIC) { if (loud) xfs_warn(mp, "Invalid superblock magic number"); - error = EINVAL; + error = -EINVAL; goto release_buf; } @@ -344,7 +342,7 @@ reread: if (loud) xfs_warn(mp, "device supports %u byte sectors (not %u)", sector_size, sbp->sb_sectsize); - error = ENOSYS; + error = -ENOSYS; goto release_buf; } @@ -392,7 +390,7 @@ xfs_update_alignment(xfs_mount_t *mp) xfs_warn(mp, "alignment check failed: sunit/swidth vs. blocksize(%d)", sbp->sb_blocksize); - return XFS_ERROR(EINVAL); + return -EINVAL; } else { /* * Convert the stripe unit and width to FSBs. @@ -402,14 +400,14 @@ xfs_update_alignment(xfs_mount_t *mp) xfs_warn(mp, "alignment check failed: sunit/swidth vs. agsize(%d)", sbp->sb_agblocks); - return XFS_ERROR(EINVAL); + return -EINVAL; } else if (mp->m_dalign) { mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); } else { xfs_warn(mp, "alignment check failed: sunit(%d) less than bsize(%d)", mp->m_dalign, sbp->sb_blocksize); - return XFS_ERROR(EINVAL); + return -EINVAL; } } @@ -429,7 +427,7 @@ xfs_update_alignment(xfs_mount_t *mp) } else { xfs_warn(mp, "cannot change alignment: superblock does not support data alignment"); - return XFS_ERROR(EINVAL); + return -EINVAL; } } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && xfs_sb_version_hasdalign(&mp->m_sb)) { @@ -556,14 +554,14 @@ xfs_check_sizes(xfs_mount_t *mp) d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { xfs_warn(mp, "filesystem size mismatch detected"); - return XFS_ERROR(EFBIG); + return -EFBIG; } bp = xfs_buf_read_uncached(mp->m_ddev_targp, d - XFS_FSS_TO_BB(mp, 1), XFS_FSS_TO_BB(mp, 1), 0, NULL); if (!bp) { xfs_warn(mp, "last sector read failed"); - return EIO; + return -EIO; } xfs_buf_relse(bp); @@ -571,14 +569,14 @@ xfs_check_sizes(xfs_mount_t *mp) d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { xfs_warn(mp, "log size mismatch detected"); - return XFS_ERROR(EFBIG); + return -EFBIG; } bp = xfs_buf_read_uncached(mp->m_logdev_targp, d - XFS_FSB_TO_BB(mp, 1), XFS_FSB_TO_BB(mp, 1), 0, NULL); if (!bp) { xfs_warn(mp, "log device read failed"); - return EIO; + return -EIO; } xfs_buf_relse(bp); } @@ -731,10 +729,15 @@ xfs_mountfs( xfs_set_maxicount(mp); - error = xfs_uuid_mount(mp); + mp->m_kobj.kobject.kset = xfs_kset; + error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); if (error) goto out; + error = xfs_uuid_mount(mp); + if (error) + goto out_remove_sysfs; + /* * Set the minimum read and write sizes */ @@ -816,7 +819,7 @@ xfs_mountfs( if (!sbp->sb_logblocks) { xfs_warn(mp, "no log defined"); XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto out_free_perag; } @@ -855,7 +858,7 @@ xfs_mountfs( !mp->m_sb.sb_inprogress) { error = xfs_initialize_perag_data(mp, sbp->sb_agcount); if (error) - goto out_fail_wait; + goto out_log_dealloc; } /* @@ -876,7 +879,7 @@ xfs_mountfs( xfs_iunlock(rip, XFS_ILOCK_EXCL); XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, mp); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto out_rele_rip; } mp->m_rootip = rip; /* save it */ @@ -927,7 +930,7 @@ xfs_mountfs( xfs_notice(mp, "resetting quota flags"); error = xfs_mount_reset_sbqflags(mp); if (error) - return error; + goto out_rtunmount; } } @@ -989,6 +992,8 @@ xfs_mountfs( xfs_da_unmount(mp); out_remove_uuid: xfs_uuid_unmount(mp); + out_remove_sysfs: + xfs_sysfs_del(&mp->m_kobj); out: return error; } @@ -1071,6 +1076,8 @@ xfs_unmountfs( xfs_errortag_clearall(mp, 0); #endif xfs_free_perag(mp); + + xfs_sysfs_del(&mp->m_kobj); } int @@ -1152,7 +1159,7 @@ xfs_mod_incore_sb_unlocked( lcounter += delta; if (lcounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_icount = lcounter; return 0; @@ -1161,7 +1168,7 @@ xfs_mod_incore_sb_unlocked( lcounter += delta; if (lcounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_ifree = lcounter; return 0; @@ -1191,7 +1198,7 @@ xfs_mod_incore_sb_unlocked( * blocks if were allowed to. */ if (!rsvd) - return XFS_ERROR(ENOSPC); + return -ENOSPC; lcounter = (long long)mp->m_resblks_avail + delta; if (lcounter >= 0) { @@ -1202,7 +1209,7 @@ xfs_mod_incore_sb_unlocked( "Filesystem \"%s\": reserve blocks depleted! " "Consider increasing reserve pool size.", mp->m_fsname); - return XFS_ERROR(ENOSPC); + return -ENOSPC; } mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); @@ -1211,7 +1218,7 @@ xfs_mod_incore_sb_unlocked( lcounter = (long long)mp->m_sb.sb_frextents; lcounter += delta; if (lcounter < 0) { - return XFS_ERROR(ENOSPC); + return -ENOSPC; } mp->m_sb.sb_frextents = lcounter; return 0; @@ -1220,7 +1227,7 @@ xfs_mod_incore_sb_unlocked( lcounter += delta; if (lcounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_dblocks = lcounter; return 0; @@ -1229,7 +1236,7 @@ xfs_mod_incore_sb_unlocked( scounter += delta; if (scounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_agcount = scounter; return 0; @@ -1238,7 +1245,7 @@ xfs_mod_incore_sb_unlocked( scounter += delta; if (scounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_imax_pct = scounter; return 0; @@ -1247,7 +1254,7 @@ xfs_mod_incore_sb_unlocked( scounter += delta; if (scounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_rextsize = scounter; return 0; @@ -1256,7 +1263,7 @@ xfs_mod_incore_sb_unlocked( scounter += delta; if (scounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_rbmblocks = scounter; return 0; @@ -1265,7 +1272,7 @@ xfs_mod_incore_sb_unlocked( lcounter += delta; if (lcounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_rblocks = lcounter; return 0; @@ -1274,7 +1281,7 @@ xfs_mod_incore_sb_unlocked( lcounter += delta; if (lcounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_rextents = lcounter; return 0; @@ -1283,13 +1290,13 @@ xfs_mod_incore_sb_unlocked( scounter += delta; if (scounter < 0) { ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_sb.sb_rextslog = scounter; return 0; default: ASSERT(0); - return XFS_ERROR(EINVAL); + return -EINVAL; } } @@ -1452,7 +1459,7 @@ xfs_dev_is_read_only( (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { xfs_notice(mp, "%s required on read-only device.", message); xfs_notice(mp, "write access unavailable, cannot proceed."); - return EROFS; + return -EROFS; } return 0; } @@ -1995,7 +2002,7 @@ slow_path: * (e.g. lots of space just got freed). After that * we are done. */ - if (ret != ENOSPC) + if (ret != -ENOSPC) xfs_icsb_balance_counter(mp, field, 0); xfs_icsb_unlock(mp); return ret; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 7295a0b7c34..b0447c86e7e 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -166,6 +166,7 @@ typedef struct xfs_mount { on the next remount,rw */ int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ + struct xfs_kobj m_kobj; struct workqueue_struct *m_data_workqueue; struct workqueue_struct *m_unwritten_workqueue; diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index f99b4933dc2..1eb6f3df698 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -337,20 +337,20 @@ xfs_mru_cache_create( *mrup = NULL; if (!mrup || !grp_count || !lifetime_ms || !free_func) - return EINVAL; + return -EINVAL; if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count)) - return EINVAL; + return -EINVAL; if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP))) - return ENOMEM; + return -ENOMEM; /* An extra list is needed to avoid reaping up to a grp_time early. */ mru->grp_count = grp_count + 1; mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); if (!mru->lists) { - err = ENOMEM; + err = -ENOMEM; goto exit; } @@ -434,16 +434,16 @@ xfs_mru_cache_insert( ASSERT(mru && mru->lists); if (!mru || !mru->lists) - return EINVAL; + return -EINVAL; if (radix_tree_preload(GFP_KERNEL)) - return ENOMEM; + return -ENOMEM; INIT_LIST_HEAD(&elem->list_node); elem->key = key; spin_lock(&mru->lock); - error = -radix_tree_insert(&mru->store, key, elem); + error = radix_tree_insert(&mru->store, key, elem); radix_tree_preload_end(); if (!error) _xfs_mru_cache_list_insert(mru, elem); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 6d26759c779..10232102b4a 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -98,18 +98,18 @@ restart: next_index = be32_to_cpu(dqp->q_core.d_id) + 1; error = execute(batch[i], data); - if (error == EAGAIN) { + if (error == -EAGAIN) { skipped++; continue; } - if (error && last_error != EFSCORRUPTED) + if (error && last_error != -EFSCORRUPTED) last_error = error; } mutex_unlock(&qi->qi_tree_lock); /* bail out if the filesystem is corrupted. */ - if (last_error == EFSCORRUPTED) { + if (last_error == -EFSCORRUPTED) { skipped = 0; break; } @@ -138,7 +138,7 @@ xfs_qm_dqpurge( xfs_dqlock(dqp); if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { xfs_dqunlock(dqp); - return EAGAIN; + return -EAGAIN; } dqp->dq_flags |= XFS_DQ_FREEING; @@ -221,100 +221,6 @@ xfs_qm_unmount( } } - -/* - * This is called from xfs_mountfs to start quotas and initialize all - * necessary data structures like quotainfo. This is also responsible for - * running a quotacheck as necessary. We are guaranteed that the superblock - * is consistently read in at this point. - * - * If we fail here, the mount will continue with quota turned off. We don't - * need to inidicate success or failure at all. - */ -void -xfs_qm_mount_quotas( - xfs_mount_t *mp) -{ - int error = 0; - uint sbf; - - /* - * If quotas on realtime volumes is not supported, we disable - * quotas immediately. - */ - if (mp->m_sb.sb_rextents) { - xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); - mp->m_qflags = 0; - goto write_changes; - } - - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - /* - * Allocate the quotainfo structure inside the mount struct, and - * create quotainode(s), and change/rev superblock if necessary. - */ - error = xfs_qm_init_quotainfo(mp); - if (error) { - /* - * We must turn off quotas. - */ - ASSERT(mp->m_quotainfo == NULL); - mp->m_qflags = 0; - goto write_changes; - } - /* - * If any of the quotas are not consistent, do a quotacheck. - */ - if (XFS_QM_NEED_QUOTACHECK(mp)) { - error = xfs_qm_quotacheck(mp); - if (error) { - /* Quotacheck failed and disabled quotas. */ - return; - } - } - /* - * If one type of quotas is off, then it will lose its - * quotachecked status, since we won't be doing accounting for - * that type anymore. - */ - if (!XFS_IS_UQUOTA_ON(mp)) - mp->m_qflags &= ~XFS_UQUOTA_CHKD; - if (!XFS_IS_GQUOTA_ON(mp)) - mp->m_qflags &= ~XFS_GQUOTA_CHKD; - if (!XFS_IS_PQUOTA_ON(mp)) - mp->m_qflags &= ~XFS_PQUOTA_CHKD; - - write_changes: - /* - * We actually don't have to acquire the m_sb_lock at all. - * This can only be called from mount, and that's single threaded. XXX - */ - spin_lock(&mp->m_sb_lock); - sbf = mp->m_sb.sb_qflags; - mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; - spin_unlock(&mp->m_sb_lock); - - if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { - if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { - /* - * We could only have been turning quotas off. - * We aren't in very good shape actually because - * the incore structures are convinced that quotas are - * off, but the on disk superblock doesn't know that ! - */ - ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); - xfs_alert(mp, "%s: Superblock update failed!", - __func__); - } - } - - if (error) { - xfs_warn(mp, "Failed to initialize disk quotas."); - return; - } -} - /* * Called from the vfsops layer. */ @@ -671,7 +577,7 @@ xfs_qm_init_quotainfo( qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); - error = -list_lru_init(&qinf->qi_lru); + error = list_lru_init(&qinf->qi_lru); if (error) goto out_free_qinf; @@ -995,7 +901,7 @@ xfs_qm_dqiter_bufs( * will leave a trace in the log indicating corruption has * been detected. */ - if (error == EFSCORRUPTED) { + if (error == -EFSCORRUPTED) { error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, bno), mp->m_quotainfo->qi_dqchunklen, 0, &bp, @@ -1005,6 +911,12 @@ xfs_qm_dqiter_bufs( if (error) break; + /* + * A corrupt buffer might not have a verifier attached, so + * make sure we have the correct one attached before writeback + * occurs. + */ + bp->b_ops = &xfs_dquot_buf_ops; xfs_qm_reset_dqcounts(mp, bp, firstid, type); xfs_buf_delwri_queue(bp, buffer_list); xfs_buf_relse(bp); @@ -1090,7 +1002,7 @@ xfs_qm_dqiterate( xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, rablkno), mp->m_quotainfo->qi_dqchunklen, - NULL); + &xfs_dquot_buf_ops); rablkno++; } } @@ -1138,8 +1050,8 @@ xfs_qm_quotacheck_dqadjust( /* * Shouldn't be able to turn off quotas here. */ - ASSERT(error != ESRCH); - ASSERT(error != ENOENT); + ASSERT(error != -ESRCH); + ASSERT(error != -ENOENT); return error; } @@ -1226,7 +1138,7 @@ xfs_qm_dqusage_adjust( */ if (xfs_is_quota_inode(&mp->m_sb, ino)) { *res = BULKSTAT_RV_NOTHING; - return XFS_ERROR(EINVAL); + return -EINVAL; } /* @@ -1330,7 +1242,7 @@ out_unlock: * Walk thru all the filesystem inodes and construct a consistent view * of the disk quota world. If the quotacheck fails, disable quotas. */ -int +STATIC int xfs_qm_quotacheck( xfs_mount_t *mp) { @@ -1463,7 +1375,100 @@ xfs_qm_quotacheck( } } else xfs_notice(mp, "Quotacheck: Done."); - return (error); + return error; +} + +/* + * This is called from xfs_mountfs to start quotas and initialize all + * necessary data structures like quotainfo. This is also responsible for + * running a quotacheck as necessary. We are guaranteed that the superblock + * is consistently read in at this point. + * + * If we fail here, the mount will continue with quota turned off. We don't + * need to inidicate success or failure at all. + */ +void +xfs_qm_mount_quotas( + struct xfs_mount *mp) +{ + int error = 0; + uint sbf; + + /* + * If quotas on realtime volumes is not supported, we disable + * quotas immediately. + */ + if (mp->m_sb.sb_rextents) { + xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); + mp->m_qflags = 0; + goto write_changes; + } + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * Allocate the quotainfo structure inside the mount struct, and + * create quotainode(s), and change/rev superblock if necessary. + */ + error = xfs_qm_init_quotainfo(mp); + if (error) { + /* + * We must turn off quotas. + */ + ASSERT(mp->m_quotainfo == NULL); + mp->m_qflags = 0; + goto write_changes; + } + /* + * If any of the quotas are not consistent, do a quotacheck. + */ + if (XFS_QM_NEED_QUOTACHECK(mp)) { + error = xfs_qm_quotacheck(mp); + if (error) { + /* Quotacheck failed and disabled quotas. */ + return; + } + } + /* + * If one type of quotas is off, then it will lose its + * quotachecked status, since we won't be doing accounting for + * that type anymore. + */ + if (!XFS_IS_UQUOTA_ON(mp)) + mp->m_qflags &= ~XFS_UQUOTA_CHKD; + if (!XFS_IS_GQUOTA_ON(mp)) + mp->m_qflags &= ~XFS_GQUOTA_CHKD; + if (!XFS_IS_PQUOTA_ON(mp)) + mp->m_qflags &= ~XFS_PQUOTA_CHKD; + + write_changes: + /* + * We actually don't have to acquire the m_sb_lock at all. + * This can only be called from mount, and that's single threaded. XXX + */ + spin_lock(&mp->m_sb_lock); + sbf = mp->m_sb.sb_qflags; + mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; + spin_unlock(&mp->m_sb_lock); + + if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { + if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { + /* + * We could only have been turning quotas off. + * We aren't in very good shape actually because + * the incore structures are convinced that quotas are + * off, but the on disk superblock doesn't know that ! + */ + ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); + xfs_alert(mp, "%s: Superblock update failed!", + __func__); + } + } + + if (error) { + xfs_warn(mp, "Failed to initialize disk quotas."); + return; + } } /* @@ -1493,7 +1498,7 @@ xfs_qm_init_quotainos( error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &uip); if (error) - return XFS_ERROR(error); + return error; } if (XFS_IS_GQUOTA_ON(mp) && mp->m_sb.sb_gquotino != NULLFSINO) { @@ -1563,7 +1568,7 @@ error_rele: IRELE(gip); if (pip) IRELE(pip); - return XFS_ERROR(error); + return error; } STATIC void @@ -1679,7 +1684,7 @@ xfs_qm_vop_dqalloc( XFS_QMOPT_DOWARN, &uq); if (error) { - ASSERT(error != ENOENT); + ASSERT(error != -ENOENT); return error; } /* @@ -1706,7 +1711,7 @@ xfs_qm_vop_dqalloc( XFS_QMOPT_DOWARN, &gq); if (error) { - ASSERT(error != ENOENT); + ASSERT(error != -ENOENT); goto error_rele; } xfs_dqunlock(gq); @@ -1726,7 +1731,7 @@ xfs_qm_vop_dqalloc( XFS_QMOPT_DOWARN, &pq); if (error) { - ASSERT(error != ENOENT); + ASSERT(error != -ENOENT); goto error_rele; } xfs_dqunlock(pq); @@ -1895,7 +1900,7 @@ xfs_qm_vop_chown_reserve( -((xfs_qcnt_t)delblks), 0, blkflags); } - return (0); + return 0; } int diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 797fd463627..3a07a937e23 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -157,7 +157,6 @@ struct xfs_dquot_acct { #define XFS_QM_RTBWARNLIMIT 5 extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); -extern int xfs_qm_quotacheck(struct xfs_mount *); extern int xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t); /* dquot stuff */ diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index e9be63abd8d..2c61e61b020 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -117,7 +117,7 @@ xfs_qm_newmount( (uquotaondisk ? " usrquota" : ""), (gquotaondisk ? " grpquota" : ""), (pquotaondisk ? " prjquota" : "")); - return XFS_ERROR(EPERM); + return -EPERM; } if (XFS_IS_QUOTA_ON(mp) || quotaondisk) { diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index bbc813caba4..80f2d77d929 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -64,10 +64,10 @@ xfs_qm_scall_quotaoff( /* * No file system can have quotas enabled on disk but not in core. * Note that quota utilities (like quotaoff) _expect_ - * errno == EEXIST here. + * errno == -EEXIST here. */ if ((mp->m_qflags & flags) == 0) - return XFS_ERROR(EEXIST); + return -EEXIST; error = 0; flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); @@ -94,7 +94,7 @@ xfs_qm_scall_quotaoff( /* XXX what to do if error ? Revert back to old vals incore ? */ error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); - return (error); + return error; } dqtype = 0; @@ -198,7 +198,7 @@ xfs_qm_scall_quotaoff( if (mp->m_qflags == 0) { mutex_unlock(&q->qi_quotaofflock); xfs_qm_destroy_quotainfo(mp); - return (0); + return 0; } /* @@ -278,13 +278,13 @@ xfs_qm_scall_trunc_qfiles( xfs_mount_t *mp, uint flags) { - int error = EINVAL; + int error = -EINVAL; if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0 || (flags & ~XFS_DQ_ALLTYPES)) { xfs_debug(mp, "%s: flags=%x m_qflags=%x", __func__, flags, mp->m_qflags); - return XFS_ERROR(EINVAL); + return -EINVAL; } if (flags & XFS_DQ_USER) { @@ -328,7 +328,7 @@ xfs_qm_scall_quotaon( if (flags == 0) { xfs_debug(mp, "%s: zero flags, m_qflags=%x", __func__, mp->m_qflags); - return XFS_ERROR(EINVAL); + return -EINVAL; } /* No fs can turn on quotas with a delayed effect */ @@ -351,13 +351,13 @@ xfs_qm_scall_quotaon( xfs_debug(mp, "%s: Can't enforce without acct, flags=%x sbflags=%x", __func__, flags, mp->m_sb.sb_qflags); - return XFS_ERROR(EINVAL); + return -EINVAL; } /* * If everything's up to-date incore, then don't waste time. */ if ((mp->m_qflags & flags) == flags) - return XFS_ERROR(EEXIST); + return -EEXIST; /* * Change sb_qflags on disk but not incore mp->qflags @@ -372,11 +372,11 @@ xfs_qm_scall_quotaon( * There's nothing to change if it's the same. */ if ((qf & flags) == flags && sbflags == 0) - return XFS_ERROR(EEXIST); + return -EEXIST; sbflags |= XFS_SB_QFLAGS; if ((error = xfs_qm_write_sb_changes(mp, sbflags))) - return (error); + return error; /* * If we aren't trying to switch on quota enforcement, we are done. */ @@ -387,10 +387,10 @@ xfs_qm_scall_quotaon( ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != (mp->m_qflags & XFS_GQUOTA_ACCT)) || (flags & XFS_ALL_QUOTA_ENFD) == 0) - return (0); + return 0; if (! XFS_IS_QUOTA_RUNNING(mp)) - return XFS_ERROR(ESRCH); + return -ESRCH; /* * Switch on quota enforcement in core. @@ -399,7 +399,7 @@ xfs_qm_scall_quotaon( mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); mutex_unlock(&mp->m_quotainfo->qi_quotaofflock); - return (0); + return 0; } @@ -426,7 +426,7 @@ xfs_qm_scall_getqstat( if (!xfs_sb_version_hasquota(&mp->m_sb)) { out->qs_uquota.qfs_ino = NULLFSINO; out->qs_gquota.qfs_ino = NULLFSINO; - return (0); + return 0; } out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & @@ -514,7 +514,7 @@ xfs_qm_scall_getqstatv( out->qs_uquota.qfs_ino = NULLFSINO; out->qs_gquota.qfs_ino = NULLFSINO; out->qs_pquota.qfs_ino = NULLFSINO; - return (0); + return 0; } out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & @@ -595,7 +595,7 @@ xfs_qm_scall_setqlim( xfs_qcnt_t hard, soft; if (newlim->d_fieldmask & ~XFS_DQ_MASK) - return EINVAL; + return -EINVAL; if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) return 0; @@ -615,7 +615,7 @@ xfs_qm_scall_setqlim( */ error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp); if (error) { - ASSERT(error != ENOENT); + ASSERT(error != -ENOENT); goto out_unlock; } xfs_dqunlock(dqp); @@ -758,7 +758,7 @@ xfs_qm_log_quotaoff_end( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0); if (error) { xfs_trans_cancel(tp, 0); - return (error); + return error; } qoffi = xfs_trans_get_qoff_item(tp, startqoff, @@ -772,7 +772,7 @@ xfs_qm_log_quotaoff_end( */ xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); - return (error); + return error; } @@ -822,7 +822,7 @@ error0: spin_unlock(&mp->m_sb_lock); } *qoffstartp = qoffi; - return (error); + return error; } @@ -850,7 +850,7 @@ xfs_qm_scall_getquota( * our utility programs are concerned. */ if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { - error = XFS_ERROR(ENOENT); + error = -ENOENT; goto out_put; } @@ -953,7 +953,7 @@ xfs_qm_export_flags( uflags |= FS_QUOTA_GDQ_ENFD; if (flags & XFS_PQUOTA_ENFD) uflags |= FS_QUOTA_PDQ_ENFD; - return (uflags); + return uflags; } diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 2ad1b9822e9..b238027df98 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -51,7 +51,7 @@ xfs_fs_get_xstate( if (!XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; - return -xfs_qm_scall_getqstat(mp, fqs); + return xfs_qm_scall_getqstat(mp, fqs); } STATIC int @@ -63,7 +63,7 @@ xfs_fs_get_xstatev( if (!XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; - return -xfs_qm_scall_getqstatv(mp, fqs); + return xfs_qm_scall_getqstatv(mp, fqs); } STATIC int @@ -95,11 +95,11 @@ xfs_fs_set_xstate( switch (op) { case Q_XQUOTAON: - return -xfs_qm_scall_quotaon(mp, flags); + return xfs_qm_scall_quotaon(mp, flags); case Q_XQUOTAOFF: if (!XFS_IS_QUOTA_ON(mp)) return -EINVAL; - return -xfs_qm_scall_quotaoff(mp, flags); + return xfs_qm_scall_quotaoff(mp, flags); } return -EINVAL; @@ -112,7 +112,7 @@ xfs_fs_rm_xquota( { struct xfs_mount *mp = XFS_M(sb); unsigned int flags = 0; - + if (sb->s_flags & MS_RDONLY) return -EROFS; @@ -123,11 +123,11 @@ xfs_fs_rm_xquota( flags |= XFS_DQ_USER; if (uflags & FS_GROUP_QUOTA) flags |= XFS_DQ_GROUP; - if (uflags & FS_USER_QUOTA) + if (uflags & FS_PROJ_QUOTA) flags |= XFS_DQ_PROJ; - return -xfs_qm_scall_trunc_qfiles(mp, flags); -} + return xfs_qm_scall_trunc_qfiles(mp, flags); +} STATIC int xfs_fs_get_dqblk( @@ -142,7 +142,7 @@ xfs_fs_get_dqblk( if (!XFS_IS_QUOTA_ON(mp)) return -ESRCH; - return -xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid), + return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid), xfs_quota_type(qid.type), fdq); } @@ -161,7 +161,7 @@ xfs_fs_set_dqblk( if (!XFS_IS_QUOTA_ON(mp)) return -ESRCH; - return -xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid), + return xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid), xfs_quota_type(qid.type), fdq); } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index ec5ca65c621..909e143b87a 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -863,7 +863,7 @@ xfs_growfs_rt_alloc( XFS_BMAPI_METADATA, &firstblock, resblks, &map, &nmap, &flist); if (!error && nmap < 1) - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; if (error) goto error_cancel; /* @@ -903,7 +903,7 @@ xfs_growfs_rt_alloc( bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); if (bp == NULL) { - error = XFS_ERROR(EIO); + error = -EIO; error_cancel: xfs_trans_cancel(tp, cancelflags); goto error; @@ -944,9 +944,9 @@ xfs_growfs_rt( xfs_buf_t *bp; /* temporary buffer */ int error; /* error return value */ xfs_mount_t *nmp; /* new (fake) mount structure */ - xfs_drfsbno_t nrblocks; /* new number of realtime blocks */ + xfs_rfsblock_t nrblocks; /* new number of realtime blocks */ xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ - xfs_drtbno_t nrextents; /* new number of realtime extents */ + xfs_rtblock_t nrextents; /* new number of realtime extents */ uint8_t nrextslog; /* new log2 of sb_rextents */ xfs_extlen_t nrsumblocks; /* new number of summary blocks */ uint nrsumlevels; /* new rt summary levels */ @@ -962,11 +962,11 @@ xfs_growfs_rt( * Initial error checking. */ if (!capable(CAP_SYS_ADMIN)) - return XFS_ERROR(EPERM); + return -EPERM; if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL || (nrblocks = in->newblocks) <= sbp->sb_rblocks || (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) - return XFS_ERROR(EINVAL); + return -EINVAL; if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks))) return error; /* @@ -976,7 +976,7 @@ xfs_growfs_rt( XFS_FSB_TO_BB(mp, nrblocks - 1), XFS_FSB_TO_BB(mp, 1), 0, NULL); if (!bp) - return EIO; + return -EIO; if (bp->b_error) { error = bp->b_error; xfs_buf_relse(bp); @@ -1001,7 +1001,7 @@ xfs_growfs_rt( * since we'll log basically the whole summary file at once. */ if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1)) - return XFS_ERROR(EINVAL); + return -EINVAL; /* * Get the old block counts for bitmap and summary inodes. * These can't change since other growfs callers are locked out. @@ -1208,7 +1208,7 @@ xfs_rtallocate_extent( len, &sumbp, &sb, prod, &r); break; default: - error = EIO; + error = -EIO; ASSERT(0); } if (error) @@ -1247,7 +1247,7 @@ xfs_rtmount_init( if (mp->m_rtdev_targp == NULL) { xfs_warn(mp, "Filesystem has a realtime volume, use rtdev=device option"); - return XFS_ERROR(ENODEV); + return -ENODEV; } mp->m_rsumlevels = sbp->sb_rextslog + 1; mp->m_rsumsize = @@ -1263,7 +1263,7 @@ xfs_rtmount_init( xfs_warn(mp, "realtime mount -- %llu != %llu", (unsigned long long) XFS_BB_TO_FSB(mp, d), (unsigned long long) mp->m_sb.sb_rblocks); - return XFS_ERROR(EFBIG); + return -EFBIG; } bp = xfs_buf_read_uncached(mp->m_rtdev_targp, d - XFS_FSB_TO_BB(mp, 1), @@ -1272,7 +1272,7 @@ xfs_rtmount_init( xfs_warn(mp, "realtime device size check failed"); if (bp) xfs_buf_relse(bp); - return EIO; + return -EIO; } xfs_buf_relse(bp); return 0; diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 752b63d1030..c642795324a 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -132,7 +132,7 @@ xfs_rtmount_init( return 0; xfs_warn(mp, "Not built with CONFIG_XFS_RT"); - return ENOSYS; + return -ENOSYS; } # define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) # define xfs_rtunmount_inodes(m) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 8f0333b3f7a..b194652033c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -61,6 +61,7 @@ static const struct super_operations xfs_super_operations; static kmem_zone_t *xfs_ioend_zone; mempool_t *xfs_ioend_pool; +struct kset *xfs_kset; #define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ #define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ @@ -185,7 +186,7 @@ xfs_parseargs( */ mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); if (!mp->m_fsname) - return ENOMEM; + return -ENOMEM; mp->m_fsname_len = strlen(mp->m_fsname) + 1; /* @@ -204,9 +205,6 @@ xfs_parseargs( */ mp->m_flags |= XFS_MOUNT_BARRIER; mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; -#if !XFS_BIG_INUMS - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; -#endif /* * These can be overridden by the mount option parsing. @@ -227,57 +225,57 @@ xfs_parseargs( if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } if (kstrtoint(value, 10, &mp->m_logbufs)) - return EINVAL; + return -EINVAL; } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } if (suffix_kstrtoint(value, 10, &mp->m_logbsize)) - return EINVAL; + return -EINVAL; } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); if (!mp->m_logname) - return ENOMEM; + return -ENOMEM; } else if (!strcmp(this_char, MNTOPT_MTPT)) { xfs_warn(mp, "%s option not allowed on this system", this_char); - return EINVAL; + return -EINVAL; } else if (!strcmp(this_char, MNTOPT_RTDEV)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); if (!mp->m_rtname) - return ENOMEM; + return -ENOMEM; } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } if (kstrtoint(value, 10, &iosize)) - return EINVAL; + return -EINVAL; iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } if (suffix_kstrtoint(value, 10, &iosize)) - return EINVAL; + return -EINVAL; iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_GRPID) || !strcmp(this_char, MNTOPT_BSDGROUPS)) { @@ -297,27 +295,22 @@ xfs_parseargs( if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } if (kstrtoint(value, 10, &dsunit)) - return EINVAL; + return -EINVAL; } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { if (!value || !*value) { xfs_warn(mp, "%s option requires an argument", this_char); - return EINVAL; + return -EINVAL; } if (kstrtoint(value, 10, &dswidth)) - return EINVAL; + return -EINVAL; } else if (!strcmp(this_char, MNTOPT_32BITINODE)) { mp->m_flags |= XFS_MOUNT_SMALL_INUMS; } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; -#if !XFS_BIG_INUMS - xfs_warn(mp, "%s option not allowed on this system", - this_char); - return EINVAL; -#endif } else if (!strcmp(this_char, MNTOPT_NOUUID)) { mp->m_flags |= XFS_MOUNT_NOUUID; } else if (!strcmp(this_char, MNTOPT_BARRIER)) { @@ -390,7 +383,7 @@ xfs_parseargs( "irixsgid is now a sysctl(2) variable, option is deprecated."); } else { xfs_warn(mp, "unknown mount option [%s].", this_char); - return EINVAL; + return -EINVAL; } } @@ -400,32 +393,32 @@ xfs_parseargs( if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && !(mp->m_flags & XFS_MOUNT_RDONLY)) { xfs_warn(mp, "no-recovery mounts must be read-only."); - return EINVAL; + return -EINVAL; } if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { xfs_warn(mp, "sunit and swidth options incompatible with the noalign option"); - return EINVAL; + return -EINVAL; } #ifndef CONFIG_XFS_QUOTA if (XFS_IS_QUOTA_RUNNING(mp)) { xfs_warn(mp, "quota support not available in this kernel."); - return EINVAL; + return -EINVAL; } #endif if ((dsunit && !dswidth) || (!dsunit && dswidth)) { xfs_warn(mp, "sunit and swidth must be specified together"); - return EINVAL; + return -EINVAL; } if (dsunit && (dswidth % dsunit != 0)) { xfs_warn(mp, "stripe width (%d) must be a multiple of the stripe unit (%d)", dswidth, dsunit); - return EINVAL; + return -EINVAL; } done: @@ -446,7 +439,7 @@ done: mp->m_logbufs > XLOG_MAX_ICLOGS)) { xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); - return XFS_ERROR(EINVAL); + return -EINVAL; } if (mp->m_logbsize != -1 && mp->m_logbsize != 0 && @@ -456,7 +449,7 @@ done: xfs_warn(mp, "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", mp->m_logbsize); - return XFS_ERROR(EINVAL); + return -EINVAL; } if (iosizelog) { @@ -465,7 +458,7 @@ done: xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", iosizelog, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); - return XFS_ERROR(EINVAL); + return -EINVAL; } mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; @@ -597,15 +590,20 @@ xfs_max_file_offset( return (((__uint64_t)pagefactor) << bitshift) - 1; } +/* + * xfs_set_inode32() and xfs_set_inode64() are passed an agcount + * because in the growfs case, mp->m_sb.sb_agcount is not updated + * yet to the potentially higher ag count. + */ xfs_agnumber_t -xfs_set_inode32(struct xfs_mount *mp) +xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount) { xfs_agnumber_t index = 0; xfs_agnumber_t maxagi = 0; xfs_sb_t *sbp = &mp->m_sb; xfs_agnumber_t max_metadata; - xfs_agino_t agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0); - xfs_ino_t ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino); + xfs_agino_t agino; + xfs_ino_t ino; xfs_perag_t *pag; /* Calculate how much should be reserved for inodes to meet @@ -620,10 +618,12 @@ xfs_set_inode32(struct xfs_mount *mp) do_div(icount, sbp->sb_agblocks); max_metadata = icount; } else { - max_metadata = sbp->sb_agcount; + max_metadata = agcount; } - for (index = 0; index < sbp->sb_agcount; index++) { + agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); + + for (index = 0; index < agcount; index++) { ino = XFS_AGINO_TO_INO(mp, index, agino); if (ino > XFS_MAXINUMBER_32) { @@ -648,11 +648,11 @@ xfs_set_inode32(struct xfs_mount *mp) } xfs_agnumber_t -xfs_set_inode64(struct xfs_mount *mp) +xfs_set_inode64(struct xfs_mount *mp, xfs_agnumber_t agcount) { xfs_agnumber_t index = 0; - for (index = 0; index < mp->m_sb.sb_agcount; index++) { + for (index = 0; index < agcount; index++) { struct xfs_perag *pag; pag = xfs_perag_get(mp, index); @@ -686,7 +686,7 @@ xfs_blkdev_get( xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); } - return -error; + return error; } STATIC void @@ -756,7 +756,7 @@ xfs_open_devices( if (rtdev == ddev || rtdev == logdev) { xfs_warn(mp, "Cannot mount filesystem with identical rtdev and ddev/logdev."); - error = EINVAL; + error = -EINVAL; goto out_close_rtdev; } } @@ -764,7 +764,7 @@ xfs_open_devices( /* * Setup xfs_mount buffer target pointers */ - error = ENOMEM; + error = -ENOMEM; mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev); if (!mp->m_ddev_targp) goto out_close_rtdev; @@ -1188,6 +1188,7 @@ xfs_fs_remount( char *options) { struct xfs_mount *mp = XFS_M(sb); + xfs_sb_t *sbp = &mp->m_sb; substring_t args[MAX_OPT_ARGS]; char *p; int error; @@ -1208,10 +1209,10 @@ xfs_fs_remount( mp->m_flags &= ~XFS_MOUNT_BARRIER; break; case Opt_inode64: - mp->m_maxagi = xfs_set_inode64(mp); + mp->m_maxagi = xfs_set_inode64(mp, sbp->sb_agcount); break; case Opt_inode32: - mp->m_maxagi = xfs_set_inode32(mp); + mp->m_maxagi = xfs_set_inode32(mp, sbp->sb_agcount); break; default: /* @@ -1295,7 +1296,7 @@ xfs_fs_freeze( xfs_save_resvblks(mp); xfs_quiesce_attr(mp); - return -xfs_fs_log_dummy(mp); + return xfs_fs_log_dummy(mp); } STATIC int @@ -1314,7 +1315,7 @@ xfs_fs_show_options( struct seq_file *m, struct dentry *root) { - return -xfs_showargs(XFS_M(root->d_sb), m); + return xfs_showargs(XFS_M(root->d_sb), m); } /* @@ -1336,14 +1337,14 @@ xfs_finish_flags( mp->m_logbsize < mp->m_sb.sb_logsunit) { xfs_warn(mp, "logbuf size must be greater than or equal to log stripe size"); - return XFS_ERROR(EINVAL); + return -EINVAL; } } else { /* Fail a mount if the logbuf is larger than 32K */ if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { xfs_warn(mp, "logbuf size for version 1 logs must be 16K or 32K"); - return XFS_ERROR(EINVAL); + return -EINVAL; } } @@ -1355,7 +1356,7 @@ xfs_finish_flags( xfs_warn(mp, "Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.", MNTOPT_NOATTR2, MNTOPT_ATTR2); - return XFS_ERROR(EINVAL); + return -EINVAL; } /* @@ -1372,7 +1373,7 @@ xfs_finish_flags( if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { xfs_warn(mp, "cannot mount a read-only filesystem as read-write"); - return XFS_ERROR(EROFS); + return -EROFS; } if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && @@ -1380,7 +1381,7 @@ xfs_finish_flags( !xfs_sb_version_has_pquotino(&mp->m_sb)) { xfs_warn(mp, "Super block does not support project and group quota together"); - return XFS_ERROR(EINVAL); + return -EINVAL; } return 0; @@ -1394,7 +1395,7 @@ xfs_fs_fill_super( { struct inode *root; struct xfs_mount *mp = NULL; - int flags = 0, error = ENOMEM; + int flags = 0, error = -ENOMEM; mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); if (!mp) @@ -1428,11 +1429,11 @@ xfs_fs_fill_super( if (error) goto out_free_fsname; - error = -xfs_init_mount_workqueues(mp); + error = xfs_init_mount_workqueues(mp); if (error) goto out_close_devices; - error = -xfs_icsb_init_counters(mp); + error = xfs_icsb_init_counters(mp); if (error) goto out_destroy_workqueues; @@ -1474,12 +1475,12 @@ xfs_fs_fill_super( root = igrab(VFS_I(mp->m_rootip)); if (!root) { - error = ENOENT; + error = -ENOENT; goto out_unmount; } sb->s_root = d_make_root(root); if (!sb->s_root) { - error = ENOMEM; + error = -ENOMEM; goto out_unmount; } @@ -1499,7 +1500,7 @@ out_destroy_workqueues: xfs_free_fsname(mp); kfree(mp); out: - return -error; + return error; out_unmount: xfs_filestream_unmount(mp); @@ -1761,9 +1762,15 @@ init_xfs_fs(void) if (error) goto out_cleanup_procfs; + xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); + if (!xfs_kset) { + error = -ENOMEM; + goto out_sysctl_unregister;; + } + error = xfs_qm_init(); if (error) - goto out_sysctl_unregister; + goto out_kset_unregister; error = register_filesystem(&xfs_fs_type); if (error) @@ -1772,6 +1779,8 @@ init_xfs_fs(void) out_qm_exit: xfs_qm_exit(); + out_kset_unregister: + kset_unregister(xfs_kset); out_sysctl_unregister: xfs_sysctl_unregister(); out_cleanup_procfs: @@ -1793,6 +1802,7 @@ exit_xfs_fs(void) { xfs_qm_exit(); unregister_filesystem(&xfs_fs_type); + kset_unregister(xfs_kset); xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_buf_terminate(); diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index bbe3d15a790..2b830c2f322 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -44,16 +44,6 @@ extern void xfs_qm_exit(void); # define XFS_REALTIME_STRING #endif -#if XFS_BIG_BLKNOS -# if XFS_BIG_INUMS -# define XFS_BIGFS_STRING "large block/inode numbers, " -# else -# define XFS_BIGFS_STRING "large block numbers, " -# endif -#else -# define XFS_BIGFS_STRING -#endif - #ifdef DEBUG # define XFS_DBG_STRING "debug" #else @@ -64,7 +54,6 @@ extern void xfs_qm_exit(void); #define XFS_BUILD_OPTIONS XFS_ACL_STRING \ XFS_SECURITY_STRING \ XFS_REALTIME_STRING \ - XFS_BIGFS_STRING \ XFS_DBG_STRING /* DBG must be last */ struct xfs_inode; @@ -76,8 +65,8 @@ extern __uint64_t xfs_max_file_offset(unsigned int); extern void xfs_flush_inodes(struct xfs_mount *mp); extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); -extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *); -extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *); +extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *, xfs_agnumber_t agcount); +extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *, xfs_agnumber_t agcount); extern const struct export_operations xfs_export_operations; extern const struct xattr_handler *xfs_xattr_handlers[]; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index d69363c833e..6a944a2cd36 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -76,15 +76,15 @@ xfs_readlink_bmap( bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0, &xfs_symlink_buf_ops); if (!bp) - return XFS_ERROR(ENOMEM); + return -ENOMEM; error = bp->b_error; if (error) { xfs_buf_ioerror_alert(bp, __func__); xfs_buf_relse(bp); /* bad CRC means corrupted metadata */ - if (error == EFSBADCRC) - error = EFSCORRUPTED; + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; goto out; } byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); @@ -95,7 +95,7 @@ xfs_readlink_bmap( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!xfs_symlink_hdr_ok(ip->i_ino, offset, byte_cnt, bp)) { - error = EFSCORRUPTED; + error = -EFSCORRUPTED; xfs_alert(mp, "symlink header does not match required off/len/owner (0x%x/Ox%x,0x%llx)", offset, byte_cnt, ip->i_ino); @@ -135,7 +135,7 @@ xfs_readlink( trace_xfs_readlink(ip); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; xfs_ilock(ip, XFS_ILOCK_SHARED); @@ -148,7 +148,7 @@ xfs_readlink( __func__, (unsigned long long) ip->i_ino, (long long) pathlen); ASSERT(0); - error = XFS_ERROR(EFSCORRUPTED); + error = -EFSCORRUPTED; goto out; } @@ -203,14 +203,14 @@ xfs_symlink( trace_xfs_symlink(dp, link_name); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; /* * Check component lengths of the target path name. */ pathlen = strlen(target_path); if (pathlen >= MAXPATHLEN) /* total string too long */ - return XFS_ERROR(ENAMETOOLONG); + return -ENAMETOOLONG; udqp = gdqp = NULL; prid = xfs_get_initial_prid(dp); @@ -238,7 +238,7 @@ xfs_symlink( fs_blocks = xfs_symlink_blocks(mp, pathlen); resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, resblks, 0); - if (error == ENOSPC && fs_blocks == 0) { + if (error == -ENOSPC && fs_blocks == 0) { resblks = 0; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0); } @@ -254,7 +254,7 @@ xfs_symlink( * Check whether the directory allows new symlinks or not. */ if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { - error = XFS_ERROR(EPERM); + error = -EPERM; goto error_return; } @@ -284,7 +284,7 @@ xfs_symlink( error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, prid, resblks > 0, &ip, NULL); if (error) { - if (error == ENOSPC) + if (error == -ENOSPC) goto error_return; goto error1; } @@ -348,7 +348,7 @@ xfs_symlink( bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error2; } bp->b_ops = &xfs_symlink_buf_ops; @@ -489,7 +489,7 @@ xfs_inactive_symlink_rmt( XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); if (!bp) { - error = ENOMEM; + error = -ENOMEM; goto error_bmap_cancel; } xfs_trans_binval(tp, bp); @@ -562,7 +562,7 @@ xfs_inactive_symlink( trace_xfs_inactive_symlink(ip); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -580,7 +580,7 @@ xfs_inactive_symlink( __func__, (unsigned long long)ip->i_ino, pathlen); xfs_iunlock(ip, XFS_ILOCK_EXCL); ASSERT(0); - return XFS_ERROR(EFSCORRUPTED); + return -EFSCORRUPTED; } if (ip->i_df.if_flags & XFS_IFINLINE) { diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c new file mode 100644 index 00000000000..9835139ce1e --- /dev/null +++ b/fs/xfs/xfs_sysfs.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2014 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "xfs.h" +#include "xfs_sysfs.h" +#include "xfs_log_format.h" +#include "xfs_log.h" +#include "xfs_log_priv.h" + +struct xfs_sysfs_attr { + struct attribute attr; + ssize_t (*show)(char *buf, void *data); + ssize_t (*store)(const char *buf, size_t count, void *data); +}; + +static inline struct xfs_sysfs_attr * +to_attr(struct attribute *attr) +{ + return container_of(attr, struct xfs_sysfs_attr, attr); +} + +#define XFS_SYSFS_ATTR_RW(name) \ + static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name) +#define XFS_SYSFS_ATTR_RO(name) \ + static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name) + +#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr + +/* + * xfs_mount kobject. This currently has no attributes and thus no need for show + * and store helpers. The mp kobject serves as the per-mount parent object that + * is identified by the fsname under sysfs. + */ + +struct kobj_type xfs_mp_ktype = { + .release = xfs_sysfs_release, +}; + +/* xlog */ + +STATIC ssize_t +log_head_lsn_show( + char *buf, + void *data) +{ + struct xlog *log = data; + int cycle; + int block; + + spin_lock(&log->l_icloglock); + cycle = log->l_curr_cycle; + block = log->l_curr_block; + spin_unlock(&log->l_icloglock); + + return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); +} +XFS_SYSFS_ATTR_RO(log_head_lsn); + +STATIC ssize_t +log_tail_lsn_show( + char *buf, + void *data) +{ + struct xlog *log = data; + int cycle; + int block; + + xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block); + return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); +} +XFS_SYSFS_ATTR_RO(log_tail_lsn); + +STATIC ssize_t +reserve_grant_head_show( + char *buf, + void *data) +{ + struct xlog *log = data; + int cycle; + int bytes; + + xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes); + return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); +} +XFS_SYSFS_ATTR_RO(reserve_grant_head); + +STATIC ssize_t +write_grant_head_show( + char *buf, + void *data) +{ + struct xlog *log = data; + int cycle; + int bytes; + + xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes); + return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); +} +XFS_SYSFS_ATTR_RO(write_grant_head); + +static struct attribute *xfs_log_attrs[] = { + ATTR_LIST(log_head_lsn), + ATTR_LIST(log_tail_lsn), + ATTR_LIST(reserve_grant_head), + ATTR_LIST(write_grant_head), + NULL, +}; + +static inline struct xlog * +to_xlog(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + return container_of(kobj, struct xlog, l_kobj); +} + +STATIC ssize_t +xfs_log_show( + struct kobject *kobject, + struct attribute *attr, + char *buf) +{ + struct xlog *log = to_xlog(kobject); + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->show ? xfs_attr->show(buf, log) : 0; +} + +STATIC ssize_t +xfs_log_store( + struct kobject *kobject, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xlog *log = to_xlog(kobject); + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0; +} + +static struct sysfs_ops xfs_log_ops = { + .show = xfs_log_show, + .store = xfs_log_store, +}; + +struct kobj_type xfs_log_ktype = { + .release = xfs_sysfs_release, + .sysfs_ops = &xfs_log_ops, + .default_attrs = xfs_log_attrs, +}; diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h new file mode 100644 index 00000000000..54a2091183c --- /dev/null +++ b/fs/xfs/xfs_sysfs.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2014 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __XFS_SYSFS_H__ +#define __XFS_SYSFS_H__ + +extern struct kobj_type xfs_mp_ktype; /* xfs_mount */ +extern struct kobj_type xfs_log_ktype; /* xlog */ + +static inline struct xfs_kobj * +to_kobj(struct kobject *kobject) +{ + return container_of(kobject, struct xfs_kobj, kobject); +} + +static inline void +xfs_sysfs_release(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + complete(&kobj->complete); +} + +static inline int +xfs_sysfs_init( + struct xfs_kobj *kobj, + struct kobj_type *ktype, + struct xfs_kobj *parent_kobj, + const char *name) +{ + init_completion(&kobj->complete); + return kobject_init_and_add(&kobj->kobject, ktype, + &parent_kobj->kobject, "%s", name); +} + +static inline void +xfs_sysfs_del( + struct xfs_kobj *kobj) +{ + kobject_del(&kobj->kobject); + kobject_put(&kobj->kobject); + wait_for_completion(&kobj->complete); +} + +#endif /* __XFS_SYSFS_H__ */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index d03932564cc..30e8e341095 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -190,7 +190,7 @@ xfs_trans_reserve( -((int64_t)blocks), rsvd); if (error != 0) { current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); - return (XFS_ERROR(ENOSPC)); + return -ENOSPC; } tp->t_blk_res += blocks; } @@ -241,7 +241,7 @@ xfs_trans_reserve( error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS, -((int64_t)rtextents), rsvd); if (error) { - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; goto undo_log; } tp->t_rtx_res += rtextents; @@ -874,7 +874,7 @@ xfs_trans_commit( goto out_unreserve; if (XFS_FORCED_SHUTDOWN(mp)) { - error = XFS_ERROR(EIO); + error = -EIO; goto out_unreserve; } @@ -917,7 +917,7 @@ out_unreserve: if (tp->t_ticket) { commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags); if (commit_lsn == -1 && !error) - error = XFS_ERROR(EIO); + error = -EIO; } current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0); @@ -1024,7 +1024,7 @@ xfs_trans_roll( */ error = xfs_trans_commit(trans, 0); if (error) - return (error); + return error; trans = *tpp; diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index cb0f3a84cc6..859482f53b5 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -762,7 +762,7 @@ xfs_trans_ail_init( ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); if (!ailp) - return ENOMEM; + return -ENOMEM; ailp->xa_mount = mp; INIT_LIST_HEAD(&ailp->xa_ail); @@ -781,7 +781,7 @@ xfs_trans_ail_init( out_free_ailp: kmem_free(ailp); - return ENOMEM; + return -ENOMEM; } void diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index b8eef0549f3..96c898e7ac9 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -166,7 +166,7 @@ xfs_trans_get_buf_map( ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; trace_xfs_trans_get_buf_recur(bip); - return (bp); + return bp; } bp = xfs_buf_get_map(target, map, nmaps, flags); @@ -178,7 +178,7 @@ xfs_trans_get_buf_map( _xfs_trans_bjoin(tp, bp, 1); trace_xfs_trans_get_buf(bp->b_fspriv); - return (bp); + return bp; } /* @@ -201,9 +201,8 @@ xfs_trans_getsb(xfs_trans_t *tp, * Default to just trying to lock the superblock buffer * if tp is NULL. */ - if (tp == NULL) { - return (xfs_getsb(mp, flags)); - } + if (tp == NULL) + return xfs_getsb(mp, flags); /* * If the superblock buffer already has this transaction @@ -218,7 +217,7 @@ xfs_trans_getsb(xfs_trans_t *tp, ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; trace_xfs_trans_getsb_recur(bip); - return (bp); + return bp; } bp = xfs_getsb(mp, flags); @@ -227,7 +226,7 @@ xfs_trans_getsb(xfs_trans_t *tp, _xfs_trans_bjoin(tp, bp, 1); trace_xfs_trans_getsb(bp->b_fspriv); - return (bp); + return bp; } #ifdef DEBUG @@ -267,7 +266,7 @@ xfs_trans_read_buf_map( bp = xfs_buf_read_map(target, map, nmaps, flags, ops); if (!bp) return (flags & XBF_TRYLOCK) ? - EAGAIN : XFS_ERROR(ENOMEM); + -EAGAIN : -ENOMEM; if (bp->b_error) { error = bp->b_error; @@ -277,8 +276,8 @@ xfs_trans_read_buf_map( xfs_buf_relse(bp); /* bad CRC means corrupted metadata */ - if (error == EFSBADCRC) - error = EFSCORRUPTED; + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; return error; } #ifdef DEBUG @@ -287,7 +286,7 @@ xfs_trans_read_buf_map( if (((xfs_req_num++) % xfs_error_mod) == 0) { xfs_buf_relse(bp); xfs_debug(mp, "Returning error!"); - return XFS_ERROR(EIO); + return -EIO; } } } @@ -343,8 +342,8 @@ xfs_trans_read_buf_map( xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); /* bad CRC means corrupted metadata */ - if (error == EFSBADCRC) - error = EFSCORRUPTED; + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; return error; } } @@ -355,7 +354,7 @@ xfs_trans_read_buf_map( if (XFS_FORCED_SHUTDOWN(mp)) { trace_xfs_trans_read_buf_shut(bp, _RET_IP_); *bpp = NULL; - return XFS_ERROR(EIO); + return -EIO; } @@ -372,7 +371,7 @@ xfs_trans_read_buf_map( if (bp == NULL) { *bpp = NULL; return (flags & XBF_TRYLOCK) ? - 0 : XFS_ERROR(ENOMEM); + 0 : -ENOMEM; } if (bp->b_error) { error = bp->b_error; @@ -384,8 +383,8 @@ xfs_trans_read_buf_map( xfs_buf_relse(bp); /* bad CRC means corrupted metadata */ - if (error == EFSBADCRC) - error = EFSCORRUPTED; + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; return error; } #ifdef DEBUG @@ -396,7 +395,7 @@ xfs_trans_read_buf_map( SHUTDOWN_META_IO_ERROR); xfs_buf_relse(bp); xfs_debug(mp, "Returning trans error!"); - return XFS_ERROR(EIO); + return -EIO; } } } @@ -414,7 +413,7 @@ shutdown_abort: trace_xfs_trans_read_buf_shut(bp, _RET_IP_); xfs_buf_relse(bp); *bpp = NULL; - return XFS_ERROR(EIO); + return -EIO; } /* diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 41172861e85..846e061c2e9 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -722,8 +722,8 @@ xfs_trans_dqresv( error_return: xfs_dqunlock(dqp); if (flags & XFS_QMOPT_ENOSPC) - return ENOSPC; - return EDQUOT; + return -ENOSPC; + return -EDQUOT; } diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 65c6e6650b1..b79dc66b2ec 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -38,43 +38,18 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */ typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ -/* - * These types are 64 bits on disk but are either 32 or 64 bits in memory. - * Disk based types: - */ -typedef __uint64_t xfs_dfsbno_t; /* blockno in filesystem (agno|agbno) */ -typedef __uint64_t xfs_drfsbno_t; /* blockno in filesystem (raw) */ -typedef __uint64_t xfs_drtbno_t; /* extent (block) in realtime area */ -typedef __uint64_t xfs_dfiloff_t; /* block number in a file */ -typedef __uint64_t xfs_dfilblks_t; /* number of blocks in a file */ - -/* - * Memory based types are conditional. - */ -#if XFS_BIG_BLKNOS typedef __uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */ typedef __uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */ typedef __uint64_t xfs_rtblock_t; /* extent (block) in realtime area */ -typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ -#else -typedef __uint32_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */ -typedef __uint32_t xfs_rfsblock_t; /* blockno in filesystem (raw) */ -typedef __uint32_t xfs_rtblock_t; /* extent (block) in realtime area */ -typedef __int32_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ -#endif typedef __uint64_t xfs_fileoff_t; /* block number in a file */ -typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ +typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ +typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ /* * Null values for the types. */ -#define NULLDFSBNO ((xfs_dfsbno_t)-1) -#define NULLDRFSBNO ((xfs_drfsbno_t)-1) -#define NULLDRTBNO ((xfs_drtbno_t)-1) -#define NULLDFILOFF ((xfs_dfiloff_t)-1) - #define NULLFSBLOCK ((xfs_fsblock_t)-1) #define NULLRFSBLOCK ((xfs_rfsblock_t)-1) #define NULLRTBLOCK ((xfs_rtblock_t)-1) diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h deleted file mode 100644 index e8a77383c0d..00000000000 --- a/fs/xfs/xfs_vnode.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_VNODE_H__ -#define __XFS_VNODE_H__ - -#include "xfs_fs.h" - -struct file; -struct xfs_inode; -struct attrlist_cursor_kern; - -/* - * Flags for read/write calls - same values as IRIX - */ -#define IO_ISDIRECT 0x00004 /* bypass page cache */ -#define IO_INVIS 0x00020 /* don't update inode timestamps */ - -#define XFS_IO_FLAGS \ - { IO_ISDIRECT, "DIRECT" }, \ - { IO_INVIS, "INVIS"} - -/* - * Some useful predicates. - */ -#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) -#define VN_CACHED(vp) (vp->i_mapping->nrpages) -#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \ - PAGECACHE_TAG_DIRTY) - - -#endif /* __XFS_VNODE_H__ */ diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 78ed92a46fd..93455b99804 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -49,7 +49,7 @@ xfs_xattr_get(struct dentry *dentry, const char *name, value = NULL; } - error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags); + error = xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags); if (error) return error; return asize; @@ -71,8 +71,8 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, xflags |= ATTR_REPLACE; if (!value) - return -xfs_attr_remove(ip, (unsigned char *)name, xflags); - return -xfs_attr_set(ip, (unsigned char *)name, + return xfs_attr_remove(ip, (unsigned char *)name, xflags); + return xfs_attr_set(ip, (unsigned char *)name, (void *)value, size, xflags); } |