summaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c211
1 files changed, 166 insertions, 45 deletions
diff --git a/fs/namei.c b/fs/namei.c
index b7fad009bbf..5008f01787f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -32,6 +32,7 @@
#include <linux/fcntl.h>
#include <linux/device_cgroup.h>
#include <linux/fs_struct.h>
+#include <linux/posix_acl.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -136,7 +137,7 @@ static int do_getname(const char __user *filename, char *page)
return retval;
}
-static char *getname_flags(const char __user * filename, int flags)
+static char *getname_flags(const char __user *filename, int flags, int *empty)
{
char *tmp, *result;
@@ -147,6 +148,8 @@ static char *getname_flags(const char __user * filename, int flags)
result = tmp;
if (retval < 0) {
+ if (retval == -ENOENT && empty)
+ *empty = 1;
if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
__putname(tmp);
result = ERR_PTR(retval);
@@ -159,7 +162,7 @@ static char *getname_flags(const char __user * filename, int flags)
char *getname(const char __user * filename)
{
- return getname_flags(filename, 0);
+ return getname_flags(filename, 0, 0);
}
#ifdef CONFIG_AUDITSYSCALL
@@ -173,24 +176,66 @@ void putname(const char *name)
EXPORT_SYMBOL(putname);
#endif
+static int check_acl(struct inode *inode, int mask)
+{
+#ifdef CONFIG_FS_POSIX_ACL
+ struct posix_acl *acl;
+
+ if (mask & MAY_NOT_BLOCK) {
+ acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS);
+ if (!acl)
+ return -EAGAIN;
+ /* no ->get_acl() calls in RCU mode... */
+ if (acl == ACL_NOT_CACHED)
+ return -ECHILD;
+ return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK);
+ }
+
+ acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
+
+ /*
+ * A filesystem can force a ACL callback by just never filling the
+ * ACL cache. But normally you'd fill the cache either at inode
+ * instantiation time, or on the first ->get_acl call.
+ *
+ * If the filesystem doesn't have a get_acl() function at all, we'll
+ * just create the negative cache entry.
+ */
+ if (acl == ACL_NOT_CACHED) {
+ if (inode->i_op->get_acl) {
+ acl = inode->i_op->get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ } else {
+ set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
+ return -EAGAIN;
+ }
+ }
+
+ if (acl) {
+ int error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ return error;
+ }
+#endif
+
+ return -EAGAIN;
+}
+
/*
- * This does basic POSIX ACL permission checking
+ * This does the basic permission checking
*/
static int acl_permission_check(struct inode *inode, int mask)
{
- int (*check_acl)(struct inode *inode, int mask);
unsigned int mode = inode->i_mode;
- mask &= MAY_READ | MAY_WRITE | MAY_EXEC | MAY_NOT_BLOCK;
-
if (current_user_ns() != inode_userns(inode))
goto other_perms;
- if (current_fsuid() == inode->i_uid)
+ if (likely(current_fsuid() == inode->i_uid))
mode >>= 6;
else {
- check_acl = inode->i_op->check_acl;
- if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
+ if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
int error = check_acl(inode, mask);
if (error != -EAGAIN)
return error;
@@ -212,7 +257,7 @@ other_perms:
/**
* generic_permission - check for access rights on a Posix-like filesystem
* @inode: inode to check access rights for
- * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...)
*
* Used to check for read/write/execute permissions on a file.
* We use "fsuid" for this, letting us set arbitrary permissions
@@ -228,7 +273,7 @@ int generic_permission(struct inode *inode, int mask)
int ret;
/*
- * Do the basic POSIX ACL permission checks.
+ * Do the basic permission checks.
*/
ret = acl_permission_check(inode, mask);
if (ret != -EACCES)
@@ -263,21 +308,43 @@ int generic_permission(struct inode *inode, int mask)
return -EACCES;
}
+/*
+ * We _really_ want to just do "generic_permission()" without
+ * even looking at the inode->i_op values. So we keep a cache
+ * flag in inode->i_opflags, that says "this has not special
+ * permission function, use the fast case".
+ */
+static inline int do_inode_permission(struct inode *inode, int mask)
+{
+ if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
+ if (likely(inode->i_op->permission))
+ return inode->i_op->permission(inode, mask);
+
+ /* This gets set once for the inode lifetime */
+ spin_lock(&inode->i_lock);
+ inode->i_opflags |= IOP_FASTPERM;
+ spin_unlock(&inode->i_lock);
+ }
+ return generic_permission(inode, mask);
+}
+
/**
* inode_permission - check for access rights to a given inode
* @inode: inode to check permission on
- * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...)
*
* Used to check for read/write/execute permissions on an inode.
* We use "fsuid" for this, letting us set arbitrary permissions
* for filesystem access without changing the "normal" uids which
* are used for other things.
+ *
+ * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
*/
int inode_permission(struct inode *inode, int mask)
{
int retval;
- if (mask & MAY_WRITE) {
+ if (unlikely(mask & MAY_WRITE)) {
umode_t mode = inode->i_mode;
/*
@@ -294,11 +361,7 @@ int inode_permission(struct inode *inode, int mask)
return -EACCES;
}
- if (inode->i_op->permission)
- retval = inode->i_op->permission(inode, mask);
- else
- retval = generic_permission(inode, mask);
-
+ retval = do_inode_permission(inode, mask);
if (retval)
return retval;
@@ -660,23 +723,20 @@ static int follow_automount(struct path *path, unsigned flags,
if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
return -EREMOTE;
- /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT
- * and this is the terminal part of the path.
- */
- if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT))
- return -EISDIR; /* we actually want to stop here */
-
- /* We want to mount if someone is trying to open/create a file of any
- * type under the mountpoint, wants to traverse through the mountpoint
- * or wants to open the mounted directory.
+ /* We don't want to mount if someone's just doing a stat -
+ * unless they're stat'ing a directory and appended a '/' to
+ * the name.
*
- * We don't want to mount if someone's just doing a stat and they've
- * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and
- * appended a '/' to the name.
+ * We do, however, want to mount if someone wants to open or
+ * create a file of any type under the mountpoint, wants to
+ * traverse through the mountpoint or wants to open the
+ * mounted directory. Also, autofs may mark negative dentries
+ * as being automount points. These will need the attentions
+ * of the daemon to instantiate them before they can be used.
*/
- if (!(flags & LOOKUP_FOLLOW) &&
- !(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
- LOOKUP_OPEN | LOOKUP_CREATE)))
+ if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
+ LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
+ path->dentry->d_inode)
return -EISDIR;
current->total_link_count++;
@@ -792,7 +852,7 @@ static int follow_managed(struct path *path, unsigned flags)
mntput(path->mnt);
if (ret == -EISDIR)
ret = 0;
- return ret;
+ return ret < 0 ? ret : need_mntput;
}
int follow_down_one(struct path *path)
@@ -840,6 +900,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
break;
path->mnt = mounted;
path->dentry = mounted->mnt_root;
+ nd->flags |= LOOKUP_JUMPED;
nd->seq = read_seqcount_begin(&path->dentry->d_seq);
/*
* Update the inode too. We don't need to re-check the
@@ -1153,6 +1214,8 @@ retry:
path_put_conditional(path, nd);
return err;
}
+ if (err)
+ nd->flags |= LOOKUP_JUMPED;
*inode = path->dentry->d_inode;
return 0;
}
@@ -1194,6 +1257,26 @@ static void terminate_walk(struct nameidata *nd)
}
}
+/*
+ * Do we need to follow links? We _really_ want to be able
+ * to do this check without having to look at inode->i_op,
+ * so we keep a cache of "no, this doesn't need follow_link"
+ * for the common case.
+ */
+static inline int should_follow_link(struct inode *inode, int follow)
+{
+ if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
+ if (likely(inode->i_op->follow_link))
+ return follow;
+
+ /* This gets set once for the inode lifetime */
+ spin_lock(&inode->i_lock);
+ inode->i_opflags |= IOP_NOFOLLOW;
+ spin_unlock(&inode->i_lock);
+ }
+ return 0;
+}
+
static inline int walk_component(struct nameidata *nd, struct path *path,
struct qstr *name, int type, int follow)
{
@@ -1216,7 +1299,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
terminate_walk(nd);
return -ENOENT;
}
- if (unlikely(inode->i_op->follow_link) && follow) {
+ if (should_follow_link(inode, follow)) {
if (nd->flags & LOOKUP_RCU) {
if (unlikely(unlazy_walk(nd, path->dentry))) {
terminate_walk(nd);
@@ -1269,6 +1352,26 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
}
/*
+ * We really don't want to look at inode->i_op->lookup
+ * when we don't have to. So we keep a cache bit in
+ * the inode ->i_opflags field that says "yes, we can
+ * do lookup on this inode".
+ */
+static inline int can_lookup(struct inode *inode)
+{
+ if (likely(inode->i_opflags & IOP_LOOKUP))
+ return 1;
+ if (likely(!inode->i_op->lookup))
+ return 0;
+
+ /* We do this once for the lifetime of the inode */
+ spin_lock(&inode->i_lock);
+ inode->i_opflags |= IOP_LOOKUP;
+ spin_unlock(&inode->i_lock);
+ return 1;
+}
+
+/*
* Name resolution.
* This is the basic name resolution function, turning a pathname into
* the final dentry. We expect 'base' to be positive and a directory.
@@ -1347,10 +1450,10 @@ static int link_path_walk(const char *name, struct nameidata *nd)
if (err)
return err;
}
+ if (can_lookup(nd->inode))
+ continue;
err = -ENOTDIR;
- if (!nd->inode->i_op->lookup)
- break;
- continue;
+ break;
/* here ends the main loop */
last_component:
@@ -1700,11 +1803,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
return __lookup_hash(&this, base, NULL);
}
-int user_path_at(int dfd, const char __user *name, unsigned flags,
- struct path *path)
+int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
+ struct path *path, int *empty)
{
struct nameidata nd;
- char *tmp = getname_flags(name, flags);
+ char *tmp = getname_flags(name, flags, empty);
int err = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
@@ -1718,6 +1821,12 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
return err;
}
+int user_path_at(int dfd, const char __user *name, unsigned flags,
+ struct path *path)
+{
+ return user_path_at_empty(dfd, name, flags, path, 0);
+}
+
static int user_path_parent(int dfd, const char __user *path,
struct nameidata *nd, char **name)
{
@@ -1937,10 +2046,7 @@ static int may_open(struct path *path, int acc_mode, int flag)
if (flag & O_NOATIME && !inode_owner_or_capable(inode))
return -EPERM;
- /*
- * Ensure there are no outstanding leases on the file.
- */
- return break_lease(inode, flag);
+ return 0;
}
static int handle_truncate(struct file *filp)
@@ -2043,6 +2149,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
}
/* create side of things */
+ /*
+ * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED has been
+ * cleared when we got to the last component we are about to look up
+ */
error = complete_walk(nd);
if (error)
return ERR_PTR(error);
@@ -2111,6 +2221,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (error < 0)
goto exit_dput;
+ if (error)
+ nd->flags |= LOOKUP_JUMPED;
+
error = -ENOENT;
if (!path->dentry->d_inode)
goto exit_dput;
@@ -2120,6 +2233,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
path_to_nameidata(path, nd);
nd->inode = path->dentry->d_inode;
+ /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
+ error = complete_walk(nd);
+ if (error)
+ goto exit;
error = -EISDIR;
if (S_ISDIR(nd->inode->i_mode))
goto exit;
@@ -2512,6 +2629,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
if (!dir->i_op->rmdir)
return -EPERM;
+ dget(dentry);
mutex_lock(&dentry->d_inode->i_mutex);
error = -EBUSY;
@@ -2532,6 +2650,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
out:
mutex_unlock(&dentry->d_inode->i_mutex);
+ dput(dentry);
if (!error)
d_delete(dentry);
return error;
@@ -2921,6 +3040,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
if (error)
return error;
+ dget(new_dentry);
if (target)
mutex_lock(&target->i_mutex);
@@ -2941,6 +3061,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
out:
if (target)
mutex_unlock(&target->i_mutex);
+ dput(new_dentry);
if (!error)
if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
d_move(old_dentry,new_dentry);