diff options
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 379 |
1 files changed, 280 insertions, 99 deletions
diff --git a/fs/namei.c b/fs/namei.c index 8df7a78ace5..8f7b41a1488 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -368,18 +368,6 @@ void path_get(struct path *path) EXPORT_SYMBOL(path_get); /** - * path_get_long - get a long reference to a path - * @path: path to get the reference to - * - * Given a path increment the reference count to the dentry and the vfsmount. - */ -void path_get_long(struct path *path) -{ - mntget_long(path->mnt); - dget(path->dentry); -} - -/** * path_put - put a reference to a path * @path: path to put the reference to * @@ -393,18 +381,6 @@ void path_put(struct path *path) EXPORT_SYMBOL(path_put); /** - * path_put_long - put a long reference to a path - * @path: path to put the reference to - * - * Given a path decrement the reference count to the dentry and the vfsmount. - */ -void path_put_long(struct path *path) -{ - dput(path->dentry); - mntput_long(path->mnt); -} - -/** * nameidata_drop_rcu - drop this nameidata out of rcu-walk * @nd: nameidata pathwalk data to drop * Returns: 0 on success, -ECHILD on failure @@ -800,12 +776,8 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p) touch_atime(link->mnt, dentry); nd_set_link(nd, NULL); - if (link->mnt != nd->path.mnt) { - path_to_nameidata(link, nd); - nd->inode = nd->path.dentry->d_inode; - dget(dentry); - } - mntget(link->mnt); + if (link->mnt == nd->path.mnt) + mntget(link->mnt); nd->last_type = LAST_BIND; *p = dentry->d_inode->i_op->follow_link(dentry, nd); @@ -896,54 +868,169 @@ int follow_up(struct path *path) } /* - * serialization is taken care of in namespace.c + * Perform an automount + * - return -EISDIR to tell follow_managed() to stop and return the path we + * were called with. */ -static void __follow_mount_rcu(struct nameidata *nd, struct path *path, - struct inode **inode) +static int follow_automount(struct path *path, unsigned flags, + bool *need_mntput) { - while (d_mountpoint(path->dentry)) { - struct vfsmount *mounted; - mounted = __lookup_mnt(path->mnt, path->dentry, 1); - if (!mounted) - return; - path->mnt = mounted; - path->dentry = mounted->mnt_root; - nd->seq = read_seqcount_begin(&path->dentry->d_seq); - *inode = path->dentry->d_inode; + struct vfsmount *mnt; + int err; + + if (!path->dentry->d_op || !path->dentry->d_op->d_automount) + return -EREMOTE; + + /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT + * and this is the terminal part of the path. + */ + if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE)) + return -EISDIR; /* we actually want to stop here */ + + /* We want to mount if someone is trying to open/create a file of any + * type under the mountpoint, wants to traverse through the mountpoint + * or wants to open the mounted directory. + * + * We don't want to mount if someone's just doing a stat and they've + * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and + * appended a '/' to the name. + */ + if (!(flags & LOOKUP_FOLLOW) && + !(flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY | + LOOKUP_OPEN | LOOKUP_CREATE))) + return -EISDIR; + + current->total_link_count++; + if (current->total_link_count >= 40) + return -ELOOP; + + mnt = path->dentry->d_op->d_automount(path); + if (IS_ERR(mnt)) { + /* + * The filesystem is allowed to return -EISDIR here to indicate + * it doesn't want to automount. For instance, autofs would do + * this so that its userspace daemon can mount on this dentry. + * + * However, we can only permit this if it's a terminal point in + * the path being looked up; if it wasn't then the remainder of + * the path is inaccessible and we should say so. + */ + if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_CONTINUE)) + return -EREMOTE; + return PTR_ERR(mnt); } -} -static int __follow_mount(struct path *path) -{ - int res = 0; - while (d_mountpoint(path->dentry)) { - struct vfsmount *mounted = lookup_mnt(path); - if (!mounted) - break; + if (!mnt) /* mount collision */ + return 0; + + /* The new mount record should have at least 2 refs to prevent it being + * expired before we get a chance to add it + */ + BUG_ON(mnt_get_count(mnt) < 2); + + if (mnt->mnt_sb == path->mnt->mnt_sb && + mnt->mnt_root == path->dentry) { + mnt_clear_expiry(mnt); + mntput(mnt); + mntput(mnt); + return -ELOOP; + } + + /* We need to add the mountpoint to the parent. The filesystem may + * have placed it on an expiry list, and so we need to make sure it + * won't be expired under us if do_add_mount() fails (do_add_mount() + * will eat a reference unconditionally). + */ + mntget(mnt); + err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE); + switch (err) { + case -EBUSY: + /* Someone else made a mount here whilst we were busy */ + err = 0; + default: + mnt_clear_expiry(mnt); + mntput(mnt); + mntput(mnt); + return err; + case 0: + mntput(mnt); dput(path->dentry); - if (res) + if (*need_mntput) mntput(path->mnt); - path->mnt = mounted; - path->dentry = dget(mounted->mnt_root); - res = 1; + path->mnt = mnt; + path->dentry = dget(mnt->mnt_root); + *need_mntput = true; + return 0; } - return res; } -static void follow_mount(struct path *path) +/* + * Handle a dentry that is managed in some way. + * - Flagged for transit management (autofs) + * - Flagged as mountpoint + * - Flagged as automount point + * + * This may only be called in refwalk mode. + * + * Serialization is taken care of in namespace.c + */ +static int follow_managed(struct path *path, unsigned flags) { - while (d_mountpoint(path->dentry)) { - struct vfsmount *mounted = lookup_mnt(path); - if (!mounted) - break; - dput(path->dentry); - mntput(path->mnt); - path->mnt = mounted; - path->dentry = dget(mounted->mnt_root); + unsigned managed; + bool need_mntput = false; + int ret; + + /* Given that we're not holding a lock here, we retain the value in a + * local variable for each dentry as we look at it so that we don't see + * the components of that value change under us */ + while (managed = ACCESS_ONCE(path->dentry->d_flags), + managed &= DCACHE_MANAGED_DENTRY, + unlikely(managed != 0)) { + /* Allow the filesystem to manage the transit without i_mutex + * being held. */ + if (managed & DCACHE_MANAGE_TRANSIT) { + BUG_ON(!path->dentry->d_op); + BUG_ON(!path->dentry->d_op->d_manage); + ret = path->dentry->d_op->d_manage(path->dentry, + false, false); + if (ret < 0) + return ret == -EISDIR ? 0 : ret; + } + + /* Transit to a mounted filesystem. */ + if (managed & DCACHE_MOUNTED) { + struct vfsmount *mounted = lookup_mnt(path); + if (mounted) { + dput(path->dentry); + if (need_mntput) + mntput(path->mnt); + path->mnt = mounted; + path->dentry = dget(mounted->mnt_root); + need_mntput = true; + continue; + } + + /* Something is mounted on this dentry in another + * namespace and/or whatever was mounted there in this + * namespace got unmounted before we managed to get the + * vfsmount_lock */ + } + + /* Handle an automount point */ + if (managed & DCACHE_NEED_AUTOMOUNT) { + ret = follow_automount(path, flags, &need_mntput); + if (ret < 0) + return ret == -EISDIR ? 0 : ret; + continue; + } + + /* We didn't change the current path point */ + break; } + return 0; } -int follow_down(struct path *path) +int follow_down_one(struct path *path) { struct vfsmount *mounted; @@ -958,13 +1045,41 @@ int follow_down(struct path *path) return 0; } +/* + * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we + * meet a managed dentry and we're not walking to "..". True is returned to + * continue, false to abort. + */ +static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, + struct inode **inode, bool reverse_transit) +{ + while (d_mountpoint(path->dentry)) { + struct vfsmount *mounted; + if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && + !reverse_transit && + path->dentry->d_op->d_manage(path->dentry, false, true) < 0) + return false; + mounted = __lookup_mnt(path->mnt, path->dentry, 1); + if (!mounted) + break; + path->mnt = mounted; + path->dentry = mounted->mnt_root; + nd->seq = read_seqcount_begin(&path->dentry->d_seq); + *inode = path->dentry->d_inode; + } + + if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) + return reverse_transit; + return true; +} + static int follow_dotdot_rcu(struct nameidata *nd) { struct inode *inode = nd->inode; set_root_rcu(nd); - while(1) { + while (1) { if (nd->path.dentry == nd->root.dentry && nd->path.mnt == nd->root.mnt) { break; @@ -987,12 +1102,80 @@ static int follow_dotdot_rcu(struct nameidata *nd) nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); inode = nd->path.dentry->d_inode; } - __follow_mount_rcu(nd, &nd->path, &inode); + __follow_mount_rcu(nd, &nd->path, &inode, true); nd->inode = inode; return 0; } +/* + * Follow down to the covering mount currently visible to userspace. At each + * point, the filesystem owning that dentry may be queried as to whether the + * caller is permitted to proceed or not. + * + * Care must be taken as namespace_sem may be held (indicated by mounting_here + * being true). + */ +int follow_down(struct path *path, bool mounting_here) +{ + unsigned managed; + int ret; + + while (managed = ACCESS_ONCE(path->dentry->d_flags), + unlikely(managed & DCACHE_MANAGED_DENTRY)) { + /* Allow the filesystem to manage the transit without i_mutex + * being held. + * + * We indicate to the filesystem if someone is trying to mount + * something here. This gives autofs the chance to deny anyone + * other than its daemon the right to mount on its + * superstructure. + * + * The filesystem may sleep at this point. + */ + if (managed & DCACHE_MANAGE_TRANSIT) { + BUG_ON(!path->dentry->d_op); + BUG_ON(!path->dentry->d_op->d_manage); + ret = path->dentry->d_op->d_manage( + path->dentry, mounting_here, false); + if (ret < 0) + return ret == -EISDIR ? 0 : ret; + } + + /* Transit to a mounted filesystem. */ + if (managed & DCACHE_MOUNTED) { + struct vfsmount *mounted = lookup_mnt(path); + if (!mounted) + break; + dput(path->dentry); + mntput(path->mnt); + path->mnt = mounted; + path->dentry = dget(mounted->mnt_root); + continue; + } + + /* Don't handle automount points here */ + break; + } + return 0; +} + +/* + * Skip to top of mountpoint pile in refwalk mode for follow_dotdot() + */ +static void follow_mount(struct path *path) +{ + while (d_mountpoint(path->dentry)) { + struct vfsmount *mounted = lookup_mnt(path); + if (!mounted) + break; + dput(path->dentry); + mntput(path->mnt); + path->mnt = mounted; + path->dentry = dget(mounted->mnt_root); + } +} + static void follow_dotdot(struct nameidata *nd) { set_root(nd); @@ -1057,12 +1240,14 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, struct vfsmount *mnt = nd->path.mnt; struct dentry *dentry, *parent = nd->path.dentry; struct inode *dir; + int err; + /* * See if the low-level filesystem might want * to use its own hash.. */ if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { - int err = parent->d_op->d_hash(parent, nd->inode, name); + err = parent->d_op->d_hash(parent, nd->inode, name); if (err < 0) return err; } @@ -1089,22 +1274,28 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, nd->seq = seq; if (dentry->d_flags & DCACHE_OP_REVALIDATE) goto need_revalidate; +done2: path->mnt = mnt; path->dentry = dentry; - __follow_mount_rcu(nd, path, inode); - } else { - dentry = __d_lookup(parent, name); - if (!dentry) - goto need_lookup; + if (likely(__follow_mount_rcu(nd, path, inode, false))) + return 0; + if (nameidata_drop_rcu(nd)) + return -ECHILD; + /* fallthru */ + } + dentry = __d_lookup(parent, name); + if (!dentry) + goto need_lookup; found: - if (dentry->d_flags & DCACHE_OP_REVALIDATE) - goto need_revalidate; + if (dentry->d_flags & DCACHE_OP_REVALIDATE) + goto need_revalidate; done: - path->mnt = mnt; - path->dentry = dentry; - __follow_mount(path); - *inode = path->dentry->d_inode; - } + path->mnt = mnt; + path->dentry = dentry; + err = follow_managed(path, nd->flags); + if (unlikely(err < 0)) + return err; + *inode = path->dentry->d_inode; return 0; need_lookup: @@ -1143,6 +1334,8 @@ need_revalidate: goto need_lookup; if (IS_ERR(dentry)) goto fail; + if (nd->flags & LOOKUP_RCU) + goto done2; goto done; fail: @@ -1150,17 +1343,6 @@ fail: } /* - * This is a temporary kludge to deal with "automount" symlinks; proper - * solution is to trigger them on follow_mount(), so that do_lookup() - * would DTRT. To be killed before 2.6.34-final. - */ -static inline int follow_on_final(struct inode *inode, unsigned lookup_flags) -{ - return inode && unlikely(inode->i_op->follow_link) && - ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode)); -} - -/* * Name resolution. * This is the basic name resolution function, turning a pathname into * the final dentry. We expect 'base' to be positive and a directory. @@ -1298,7 +1480,8 @@ last_component: err = do_lookup(nd, &this, &next, &inode); if (err) break; - if (follow_on_final(inode, lookup_flags)) { + if (inode && unlikely(inode->i_op->follow_link) && + (lookup_flags & LOOKUP_FOLLOW)) { if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) return -ECHILD; BUG_ON(inode != next.dentry->d_inode); @@ -2200,11 +2383,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (open_flag & O_EXCL) goto exit_dput; - if (__follow_mount(path)) { - error = -ELOOP; - if (open_flag & O_NOFOLLOW) - goto exit_dput; - } + error = follow_managed(path, nd->flags); + if (error < 0) + goto exit_dput; error = -ENOENT; if (!path->dentry->d_inode) @@ -2353,8 +2534,7 @@ reval: struct inode *linki = link.dentry->d_inode; void *cookie; error = -ELOOP; - /* S_ISDIR part is a temporary automount kludge */ - if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(linki->i_mode)) + if (!(nd.flags & LOOKUP_FOLLOW)) goto exit_dput; if (count++ == 32) goto exit_dput; @@ -3413,6 +3593,7 @@ const struct inode_operations page_symlink_inode_operations = { }; EXPORT_SYMBOL(user_path_at); +EXPORT_SYMBOL(follow_down_one); EXPORT_SYMBOL(follow_down); EXPORT_SYMBOL(follow_up); EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ |