summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_iops.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_iops.c')
-rw-r--r--fs/xfs/xfs_iops.c93
1 files changed, 76 insertions, 17 deletions
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 9ddfb8190ca..205613a0606 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -39,6 +39,7 @@
#include "xfs_da_btree.h"
#include "xfs_dir2_priv.h"
#include "xfs_dinode.h"
+#include "xfs_trans_space.h"
#include <linux/capability.h>
#include <linux/xattr.h>
@@ -48,6 +49,18 @@
#include <linux/fiemap.h>
#include <linux/slab.h>
+/*
+ * Directories have different lock order w.r.t. mmap_sem compared to regular
+ * files. This is due to readdir potentially triggering page faults on a user
+ * buffer inside filldir(), and this happens with the ilock on the directory
+ * held. For regular files, the lock order is the other way around - the
+ * mmap_sem is taken during the page fault, and then we lock the ilock to do
+ * block mapping. Hence we need a different class for the directory ilock so
+ * that lockdep can tell them apart.
+ */
+static struct lock_class_key xfs_nondir_ilock_class;
+static struct lock_class_key xfs_dir_ilock_class;
+
static int
xfs_initxattrs(
struct inode *inode,
@@ -59,8 +72,8 @@ xfs_initxattrs(
int error = 0;
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
- error = xfs_attr_set(ip, xattr->name, xattr->value,
- xattr->value_len, ATTR_SECURE);
+ error = -xfs_attr_set(ip, xattr->name, xattr->value,
+ xattr->value_len, ATTR_SECURE);
if (error < 0)
break;
}
@@ -80,8 +93,8 @@ xfs_init_security(
struct inode *dir,
const struct qstr *qstr)
{
- return security_inode_init_security(inode, dir, qstr,
- &xfs_initxattrs, NULL);
+ return -security_inode_init_security(inode, dir, qstr,
+ &xfs_initxattrs, NULL);
}
static void
@@ -111,15 +124,15 @@ xfs_cleanup_inode(
xfs_dentry_to_name(&teardown, dentry, 0);
xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
- iput(inode);
}
STATIC int
-xfs_vn_mknod(
+xfs_generic_create(
struct inode *dir,
struct dentry *dentry,
umode_t mode,
- dev_t rdev)
+ dev_t rdev,
+ bool tmpfile) /* unnamed file */
{
struct inode *inode;
struct xfs_inode *ip = NULL;
@@ -143,8 +156,12 @@ xfs_vn_mknod(
if (error)
return error;
- xfs_dentry_to_name(&name, dentry, mode);
- error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+ if (!tmpfile) {
+ xfs_dentry_to_name(&name, dentry, mode);
+ error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+ } else {
+ error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
+ }
if (unlikely(error))
goto out_free_acl;
@@ -156,18 +173,22 @@ xfs_vn_mknod(
#ifdef CONFIG_XFS_POSIX_ACL
if (default_acl) {
- error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+ error = -xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
if (error)
goto out_cleanup_inode;
}
if (acl) {
- error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
+ error = -xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
if (error)
goto out_cleanup_inode;
}
#endif
- d_instantiate(dentry, inode);
+ if (tmpfile)
+ d_tmpfile(dentry, inode);
+ else
+ d_instantiate(dentry, inode);
+
out_free_acl:
if (default_acl)
posix_acl_release(default_acl);
@@ -176,11 +197,23 @@ xfs_vn_mknod(
return -error;
out_cleanup_inode:
- xfs_cleanup_inode(dir, inode, dentry);
+ if (!tmpfile)
+ xfs_cleanup_inode(dir, inode, dentry);
+ iput(inode);
goto out_free_acl;
}
STATIC int
+xfs_vn_mknod(
+ struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode,
+ dev_t rdev)
+{
+ return xfs_generic_create(dir, dentry, mode, rdev, false);
+}
+
+STATIC int
xfs_vn_create(
struct inode *dir,
struct dentry *dentry,
@@ -340,6 +373,7 @@ xfs_vn_symlink(
out_cleanup_inode:
xfs_cleanup_inode(dir, inode, dentry);
+ iput(inode);
out:
return -error;
}
@@ -795,22 +829,34 @@ xfs_setattr_size(
*/
inode_dio_wait(inode);
+ /*
+ * Do all the page cache truncate work outside the transaction context
+ * as the "lock" order is page lock->log space reservation. i.e.
+ * locking pages inside the transaction can ABBA deadlock with
+ * writeback. We have to do the VFS inode size update before we truncate
+ * the pagecache, however, to avoid racing with page faults beyond the
+ * new EOF they are not serialised against truncate operations except by
+ * page locks and size updates.
+ *
+ * Hence we are in a situation where a truncate can fail with ENOMEM
+ * from xfs_trans_reserve(), but having already truncated the in-memory
+ * version of the file (i.e. made user visible changes). There's not
+ * much we can do about this, except to hope that the caller sees ENOMEM
+ * and retries the truncate operation.
+ */
error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
if (error)
return error;
+ truncate_setsize(inode, newsize);
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error)
goto out_trans_cancel;
- truncate_setsize(inode, newsize);
-
commit_flags = XFS_TRANS_RELEASE_LOG_RES;
lock_flags |= XFS_ILOCK_EXCL;
-
xfs_ilock(ip, XFS_ILOCK_EXCL);
-
xfs_trans_ijoin(tp, ip, 0);
/*
@@ -1034,6 +1080,15 @@ xfs_vn_fiemap(
return 0;
}
+STATIC int
+xfs_vn_tmpfile(
+ struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode)
+{
+ return xfs_generic_create(dir, dentry, mode, 0, true);
+}
+
static const struct inode_operations xfs_inode_operations = {
.get_acl = xfs_get_acl,
.set_acl = xfs_set_acl,
@@ -1072,6 +1127,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
.update_time = xfs_vn_update_time,
+ .tmpfile = xfs_vn_tmpfile,
};
static const struct inode_operations xfs_dir_ci_inode_operations = {
@@ -1099,6 +1155,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
.update_time = xfs_vn_update_time,
+ .tmpfile = xfs_vn_tmpfile,
};
static const struct inode_operations xfs_symlink_inode_operations = {
@@ -1191,6 +1248,7 @@ xfs_setup_inode(
xfs_diflags_to_iflags(inode, ip);
ip->d_ops = ip->i_mount->m_nondir_inode_ops;
+ lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_op = &xfs_inode_operations;
@@ -1198,6 +1256,7 @@ xfs_setup_inode(
inode->i_mapping->a_ops = &xfs_address_space_operations;
break;
case S_IFDIR:
+ lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
inode->i_op = &xfs_dir_ci_inode_operations;
else