diff options
Diffstat (limited to 'fs/xfs/xfs_iops.c')
-rw-r--r-- | fs/xfs/xfs_iops.c | 93 |
1 files changed, 76 insertions, 17 deletions
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 9ddfb8190ca..205613a0606 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -39,6 +39,7 @@ #include "xfs_da_btree.h" #include "xfs_dir2_priv.h" #include "xfs_dinode.h" +#include "xfs_trans_space.h" #include <linux/capability.h> #include <linux/xattr.h> @@ -48,6 +49,18 @@ #include <linux/fiemap.h> #include <linux/slab.h> +/* + * Directories have different lock order w.r.t. mmap_sem compared to regular + * files. This is due to readdir potentially triggering page faults on a user + * buffer inside filldir(), and this happens with the ilock on the directory + * held. For regular files, the lock order is the other way around - the + * mmap_sem is taken during the page fault, and then we lock the ilock to do + * block mapping. Hence we need a different class for the directory ilock so + * that lockdep can tell them apart. + */ +static struct lock_class_key xfs_nondir_ilock_class; +static struct lock_class_key xfs_dir_ilock_class; + static int xfs_initxattrs( struct inode *inode, @@ -59,8 +72,8 @@ xfs_initxattrs( int error = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - error = xfs_attr_set(ip, xattr->name, xattr->value, - xattr->value_len, ATTR_SECURE); + error = -xfs_attr_set(ip, xattr->name, xattr->value, + xattr->value_len, ATTR_SECURE); if (error < 0) break; } @@ -80,8 +93,8 @@ xfs_init_security( struct inode *dir, const struct qstr *qstr) { - return security_inode_init_security(inode, dir, qstr, - &xfs_initxattrs, NULL); + return -security_inode_init_security(inode, dir, qstr, + &xfs_initxattrs, NULL); } static void @@ -111,15 +124,15 @@ xfs_cleanup_inode( xfs_dentry_to_name(&teardown, dentry, 0); xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); - iput(inode); } STATIC int -xfs_vn_mknod( +xfs_generic_create( struct inode *dir, struct dentry *dentry, umode_t mode, - dev_t rdev) + dev_t rdev, + bool tmpfile) /* unnamed file */ { struct inode *inode; struct xfs_inode *ip = NULL; @@ -143,8 +156,12 @@ xfs_vn_mknod( if (error) return error; - xfs_dentry_to_name(&name, dentry, mode); - error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); + if (!tmpfile) { + xfs_dentry_to_name(&name, dentry, mode); + error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); + } else { + error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip); + } if (unlikely(error)) goto out_free_acl; @@ -156,18 +173,22 @@ xfs_vn_mknod( #ifdef CONFIG_XFS_POSIX_ACL if (default_acl) { - error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + error = -xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) goto out_cleanup_inode; } if (acl) { - error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = -xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); if (error) goto out_cleanup_inode; } #endif - d_instantiate(dentry, inode); + if (tmpfile) + d_tmpfile(dentry, inode); + else + d_instantiate(dentry, inode); + out_free_acl: if (default_acl) posix_acl_release(default_acl); @@ -176,11 +197,23 @@ xfs_vn_mknod( return -error; out_cleanup_inode: - xfs_cleanup_inode(dir, inode, dentry); + if (!tmpfile) + xfs_cleanup_inode(dir, inode, dentry); + iput(inode); goto out_free_acl; } STATIC int +xfs_vn_mknod( + struct inode *dir, + struct dentry *dentry, + umode_t mode, + dev_t rdev) +{ + return xfs_generic_create(dir, dentry, mode, rdev, false); +} + +STATIC int xfs_vn_create( struct inode *dir, struct dentry *dentry, @@ -340,6 +373,7 @@ xfs_vn_symlink( out_cleanup_inode: xfs_cleanup_inode(dir, inode, dentry); + iput(inode); out: return -error; } @@ -795,22 +829,34 @@ xfs_setattr_size( */ inode_dio_wait(inode); + /* + * Do all the page cache truncate work outside the transaction context + * as the "lock" order is page lock->log space reservation. i.e. + * locking pages inside the transaction can ABBA deadlock with + * writeback. We have to do the VFS inode size update before we truncate + * the pagecache, however, to avoid racing with page faults beyond the + * new EOF they are not serialised against truncate operations except by + * page locks and size updates. + * + * Hence we are in a situation where a truncate can fail with ENOMEM + * from xfs_trans_reserve(), but having already truncated the in-memory + * version of the file (i.e. made user visible changes). There's not + * much we can do about this, except to hope that the caller sees ENOMEM + * and retries the truncate operation. + */ error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); if (error) return error; + truncate_setsize(inode, newsize); tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) goto out_trans_cancel; - truncate_setsize(inode, newsize); - commit_flags = XFS_TRANS_RELEASE_LOG_RES; lock_flags |= XFS_ILOCK_EXCL; - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); /* @@ -1034,6 +1080,15 @@ xfs_vn_fiemap( return 0; } +STATIC int +xfs_vn_tmpfile( + struct inode *dir, + struct dentry *dentry, + umode_t mode) +{ + return xfs_generic_create(dir, dentry, mode, 0, true); +} + static const struct inode_operations xfs_inode_operations = { .get_acl = xfs_get_acl, .set_acl = xfs_set_acl, @@ -1072,6 +1127,7 @@ static const struct inode_operations xfs_dir_inode_operations = { .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, .update_time = xfs_vn_update_time, + .tmpfile = xfs_vn_tmpfile, }; static const struct inode_operations xfs_dir_ci_inode_operations = { @@ -1099,6 +1155,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, .update_time = xfs_vn_update_time, + .tmpfile = xfs_vn_tmpfile, }; static const struct inode_operations xfs_symlink_inode_operations = { @@ -1191,6 +1248,7 @@ xfs_setup_inode( xfs_diflags_to_iflags(inode, ip); ip->d_ops = ip->i_mount->m_nondir_inode_ops; + lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class); switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_op = &xfs_inode_operations; @@ -1198,6 +1256,7 @@ xfs_setup_inode( inode->i_mapping->a_ops = &xfs_address_space_operations; break; case S_IFDIR: + lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) inode->i_op = &xfs_dir_ci_inode_operations; else |