diff options
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 211 |
1 files changed, 166 insertions, 45 deletions
diff --git a/fs/namei.c b/fs/namei.c index b7fad009bbf..5008f01787f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -32,6 +32,7 @@ #include <linux/fcntl.h> #include <linux/device_cgroup.h> #include <linux/fs_struct.h> +#include <linux/posix_acl.h> #include <asm/uaccess.h> #include "internal.h" @@ -136,7 +137,7 @@ static int do_getname(const char __user *filename, char *page) return retval; } -static char *getname_flags(const char __user * filename, int flags) +static char *getname_flags(const char __user *filename, int flags, int *empty) { char *tmp, *result; @@ -147,6 +148,8 @@ static char *getname_flags(const char __user * filename, int flags) result = tmp; if (retval < 0) { + if (retval == -ENOENT && empty) + *empty = 1; if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) { __putname(tmp); result = ERR_PTR(retval); @@ -159,7 +162,7 @@ static char *getname_flags(const char __user * filename, int flags) char *getname(const char __user * filename) { - return getname_flags(filename, 0); + return getname_flags(filename, 0, 0); } #ifdef CONFIG_AUDITSYSCALL @@ -173,24 +176,66 @@ void putname(const char *name) EXPORT_SYMBOL(putname); #endif +static int check_acl(struct inode *inode, int mask) +{ +#ifdef CONFIG_FS_POSIX_ACL + struct posix_acl *acl; + + if (mask & MAY_NOT_BLOCK) { + acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS); + if (!acl) + return -EAGAIN; + /* no ->get_acl() calls in RCU mode... */ + if (acl == ACL_NOT_CACHED) + return -ECHILD; + return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK); + } + + acl = get_cached_acl(inode, ACL_TYPE_ACCESS); + + /* + * A filesystem can force a ACL callback by just never filling the + * ACL cache. But normally you'd fill the cache either at inode + * instantiation time, or on the first ->get_acl call. + * + * If the filesystem doesn't have a get_acl() function at all, we'll + * just create the negative cache entry. + */ + if (acl == ACL_NOT_CACHED) { + if (inode->i_op->get_acl) { + acl = inode->i_op->get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } else { + set_cached_acl(inode, ACL_TYPE_ACCESS, NULL); + return -EAGAIN; + } + } + + if (acl) { + int error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return error; + } +#endif + + return -EAGAIN; +} + /* - * This does basic POSIX ACL permission checking + * This does the basic permission checking */ static int acl_permission_check(struct inode *inode, int mask) { - int (*check_acl)(struct inode *inode, int mask); unsigned int mode = inode->i_mode; - mask &= MAY_READ | MAY_WRITE | MAY_EXEC | MAY_NOT_BLOCK; - if (current_user_ns() != inode_userns(inode)) goto other_perms; - if (current_fsuid() == inode->i_uid) + if (likely(current_fsuid() == inode->i_uid)) mode >>= 6; else { - check_acl = inode->i_op->check_acl; - if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { + if (IS_POSIXACL(inode) && (mode & S_IRWXG)) { int error = check_acl(inode, mask); if (error != -EAGAIN) return error; @@ -212,7 +257,7 @@ other_perms: /** * generic_permission - check for access rights on a Posix-like filesystem * @inode: inode to check access rights for - * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) + * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...) * * Used to check for read/write/execute permissions on a file. * We use "fsuid" for this, letting us set arbitrary permissions @@ -228,7 +273,7 @@ int generic_permission(struct inode *inode, int mask) int ret; /* - * Do the basic POSIX ACL permission checks. + * Do the basic permission checks. */ ret = acl_permission_check(inode, mask); if (ret != -EACCES) @@ -263,21 +308,43 @@ int generic_permission(struct inode *inode, int mask) return -EACCES; } +/* + * We _really_ want to just do "generic_permission()" without + * even looking at the inode->i_op values. So we keep a cache + * flag in inode->i_opflags, that says "this has not special + * permission function, use the fast case". + */ +static inline int do_inode_permission(struct inode *inode, int mask) +{ + if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { + if (likely(inode->i_op->permission)) + return inode->i_op->permission(inode, mask); + + /* This gets set once for the inode lifetime */ + spin_lock(&inode->i_lock); + inode->i_opflags |= IOP_FASTPERM; + spin_unlock(&inode->i_lock); + } + return generic_permission(inode, mask); +} + /** * inode_permission - check for access rights to a given inode * @inode: inode to check permission on - * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) + * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...) * * Used to check for read/write/execute permissions on an inode. * We use "fsuid" for this, letting us set arbitrary permissions * for filesystem access without changing the "normal" uids which * are used for other things. + * + * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. */ int inode_permission(struct inode *inode, int mask) { int retval; - if (mask & MAY_WRITE) { + if (unlikely(mask & MAY_WRITE)) { umode_t mode = inode->i_mode; /* @@ -294,11 +361,7 @@ int inode_permission(struct inode *inode, int mask) return -EACCES; } - if (inode->i_op->permission) - retval = inode->i_op->permission(inode, mask); - else - retval = generic_permission(inode, mask); - + retval = do_inode_permission(inode, mask); if (retval) return retval; @@ -660,23 +723,20 @@ static int follow_automount(struct path *path, unsigned flags, if (!path->dentry->d_op || !path->dentry->d_op->d_automount) return -EREMOTE; - /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT - * and this is the terminal part of the path. - */ - if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT)) - return -EISDIR; /* we actually want to stop here */ - - /* We want to mount if someone is trying to open/create a file of any - * type under the mountpoint, wants to traverse through the mountpoint - * or wants to open the mounted directory. + /* We don't want to mount if someone's just doing a stat - + * unless they're stat'ing a directory and appended a '/' to + * the name. * - * We don't want to mount if someone's just doing a stat and they've - * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and - * appended a '/' to the name. + * We do, however, want to mount if someone wants to open or + * create a file of any type under the mountpoint, wants to + * traverse through the mountpoint or wants to open the + * mounted directory. Also, autofs may mark negative dentries + * as being automount points. These will need the attentions + * of the daemon to instantiate them before they can be used. */ - if (!(flags & LOOKUP_FOLLOW) && - !(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | - LOOKUP_OPEN | LOOKUP_CREATE))) + if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | + LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && + path->dentry->d_inode) return -EISDIR; current->total_link_count++; @@ -792,7 +852,7 @@ static int follow_managed(struct path *path, unsigned flags) mntput(path->mnt); if (ret == -EISDIR) ret = 0; - return ret; + return ret < 0 ? ret : need_mntput; } int follow_down_one(struct path *path) @@ -840,6 +900,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, break; path->mnt = mounted; path->dentry = mounted->mnt_root; + nd->flags |= LOOKUP_JUMPED; nd->seq = read_seqcount_begin(&path->dentry->d_seq); /* * Update the inode too. We don't need to re-check the @@ -1153,6 +1214,8 @@ retry: path_put_conditional(path, nd); return err; } + if (err) + nd->flags |= LOOKUP_JUMPED; *inode = path->dentry->d_inode; return 0; } @@ -1194,6 +1257,26 @@ static void terminate_walk(struct nameidata *nd) } } +/* + * Do we need to follow links? We _really_ want to be able + * to do this check without having to look at inode->i_op, + * so we keep a cache of "no, this doesn't need follow_link" + * for the common case. + */ +static inline int should_follow_link(struct inode *inode, int follow) +{ + if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { + if (likely(inode->i_op->follow_link)) + return follow; + + /* This gets set once for the inode lifetime */ + spin_lock(&inode->i_lock); + inode->i_opflags |= IOP_NOFOLLOW; + spin_unlock(&inode->i_lock); + } + return 0; +} + static inline int walk_component(struct nameidata *nd, struct path *path, struct qstr *name, int type, int follow) { @@ -1216,7 +1299,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path, terminate_walk(nd); return -ENOENT; } - if (unlikely(inode->i_op->follow_link) && follow) { + if (should_follow_link(inode, follow)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { terminate_walk(nd); @@ -1269,6 +1352,26 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) } /* + * We really don't want to look at inode->i_op->lookup + * when we don't have to. So we keep a cache bit in + * the inode ->i_opflags field that says "yes, we can + * do lookup on this inode". + */ +static inline int can_lookup(struct inode *inode) +{ + if (likely(inode->i_opflags & IOP_LOOKUP)) + return 1; + if (likely(!inode->i_op->lookup)) + return 0; + + /* We do this once for the lifetime of the inode */ + spin_lock(&inode->i_lock); + inode->i_opflags |= IOP_LOOKUP; + spin_unlock(&inode->i_lock); + return 1; +} + +/* * Name resolution. * This is the basic name resolution function, turning a pathname into * the final dentry. We expect 'base' to be positive and a directory. @@ -1347,10 +1450,10 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (err) return err; } + if (can_lookup(nd->inode)) + continue; err = -ENOTDIR; - if (!nd->inode->i_op->lookup) - break; - continue; + break; /* here ends the main loop */ last_component: @@ -1700,11 +1803,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) return __lookup_hash(&this, base, NULL); } -int user_path_at(int dfd, const char __user *name, unsigned flags, - struct path *path) +int user_path_at_empty(int dfd, const char __user *name, unsigned flags, + struct path *path, int *empty) { struct nameidata nd; - char *tmp = getname_flags(name, flags); + char *tmp = getname_flags(name, flags, empty); int err = PTR_ERR(tmp); if (!IS_ERR(tmp)) { @@ -1718,6 +1821,12 @@ int user_path_at(int dfd, const char __user *name, unsigned flags, return err; } +int user_path_at(int dfd, const char __user *name, unsigned flags, + struct path *path) +{ + return user_path_at_empty(dfd, name, flags, path, 0); +} + static int user_path_parent(int dfd, const char __user *path, struct nameidata *nd, char **name) { @@ -1937,10 +2046,7 @@ static int may_open(struct path *path, int acc_mode, int flag) if (flag & O_NOATIME && !inode_owner_or_capable(inode)) return -EPERM; - /* - * Ensure there are no outstanding leases on the file. - */ - return break_lease(inode, flag); + return 0; } static int handle_truncate(struct file *filp) @@ -2043,6 +2149,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path, } /* create side of things */ + /* + * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED has been + * cleared when we got to the last component we are about to look up + */ error = complete_walk(nd); if (error) return ERR_PTR(error); @@ -2111,6 +2221,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (error < 0) goto exit_dput; + if (error) + nd->flags |= LOOKUP_JUMPED; + error = -ENOENT; if (!path->dentry->d_inode) goto exit_dput; @@ -2120,6 +2233,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path, path_to_nameidata(path, nd); nd->inode = path->dentry->d_inode; + /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ + error = complete_walk(nd); + if (error) + goto exit; error = -EISDIR; if (S_ISDIR(nd->inode->i_mode)) goto exit; @@ -2512,6 +2629,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) if (!dir->i_op->rmdir) return -EPERM; + dget(dentry); mutex_lock(&dentry->d_inode->i_mutex); error = -EBUSY; @@ -2532,6 +2650,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) out: mutex_unlock(&dentry->d_inode->i_mutex); + dput(dentry); if (!error) d_delete(dentry); return error; @@ -2921,6 +3040,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, if (error) return error; + dget(new_dentry); if (target) mutex_lock(&target->i_mutex); @@ -2941,6 +3061,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, out: if (target) mutex_unlock(&target->i_mutex); + dput(new_dentry); if (!error) if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) d_move(old_dentry,new_dentry); |