diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 15:44:47 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 15:44:47 -0800 |
commit | 6a2b60b17b3e48a418695a94bd2420f6ab32e519 (patch) | |
tree | 54b7792fa68b8890f710fa6398b6ba8626a039a8 /fs/proc/namespaces.c | |
parent | 9228ff90387e276ad67b10c0eb525c9d6a57d5e9 (diff) | |
parent | 98f842e675f96ffac96e6c50315790912b2812be (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull user namespace changes from Eric Biederman:
"While small this set of changes is very significant with respect to
containers in general and user namespaces in particular. The user
space interface is now complete.
This set of changes adds support for unprivileged users to create user
namespaces and as a user namespace root to create other namespaces.
The tyranny of supporting suid root preventing unprivileged users from
using cool new kernel features is broken.
This set of changes completes the work on setns, adding support for
the pid, user, mount namespaces.
This set of changes includes a bunch of basic pid namespace
cleanups/simplifications. Of particular significance is the rework of
the pid namespace cleanup so it no longer requires sending out
tendrils into all kinds of unexpected cleanup paths for operation. At
least one case of broken error handling is fixed by this cleanup.
The files under /proc/<pid>/ns/ have been converted from regular files
to magic symlinks which prevents incorrect caching by the VFS,
ensuring the files always refer to the namespace the process is
currently using and ensuring that the ptrace_mayaccess permission
checks are always applied.
The files under /proc/<pid>/ns/ have been given stable inode numbers
so it is now possible to see if different processes share the same
namespaces.
Through the David Miller's net tree are changes to relax many of the
permission checks in the networking stack to allowing the user
namespace root to usefully use the networking stack. Similar changes
for the mount namespace and the pid namespace are coming through my
tree.
Two small changes to add user namespace support were commited here adn
in David Miller's -net tree so that I could complete the work on the
/proc/<pid>/ns/ files in this tree.
Work remains to make it safe to build user namespaces and 9p, afs,
ceph, cifs, coda, gfs2, ncpfs, nfs, nfsd, ocfs2, and xfs so the
Kconfig guard remains in place preventing that user namespaces from
being built when any of those filesystems are enabled.
Future design work remains to allow root users outside of the initial
user namespace to mount more than just /proc and /sys."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (38 commits)
proc: Usable inode numbers for the namespace file descriptors.
proc: Fix the namespace inode permission checks.
proc: Generalize proc inode allocation
userns: Allow unprivilged mounts of proc and sysfs
userns: For /proc/self/{uid,gid}_map derive the lower userns from the struct file
procfs: Print task uids and gids in the userns that opened the proc file
userns: Implement unshare of the user namespace
userns: Implent proc namespace operations
userns: Kill task_user_ns
userns: Make create_new_namespaces take a user_ns parameter
userns: Allow unprivileged use of setns.
userns: Allow unprivileged users to create new namespaces
userns: Allow setting a userns mapping to your current uid.
userns: Allow chown and setgid preservation
userns: Allow unprivileged users to create user namespaces.
userns: Ignore suid and sgid on binaries if the uid or gid can not be mapped
userns: fix return value on mntns_install() failure
vfs: Allow unprivileged manipulation of the mount namespace.
vfs: Only support slave subtrees across different user namespaces
vfs: Add a user namespace reference from struct mnt_namespace
...
Diffstat (limited to 'fs/proc/namespaces.c')
-rw-r--r-- | fs/proc/namespaces.c | 185 |
1 files changed, 164 insertions, 21 deletions
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index b178ed733c3..b7a47196c8c 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -11,6 +11,7 @@ #include <net/net_namespace.h> #include <linux/ipc_namespace.h> #include <linux/pid_namespace.h> +#include <linux/user_namespace.h> #include "internal.h" @@ -24,12 +25,168 @@ static const struct proc_ns_operations *ns_entries[] = { #ifdef CONFIG_IPC_NS &ipcns_operations, #endif +#ifdef CONFIG_PID_NS + &pidns_operations, +#endif +#ifdef CONFIG_USER_NS + &userns_operations, +#endif + &mntns_operations, }; static const struct file_operations ns_file_operations = { .llseek = no_llseek, }; +static const struct inode_operations ns_inode_operations = { + .setattr = proc_setattr, +}; + +static int ns_delete_dentry(const struct dentry *dentry) +{ + /* Don't cache namespace inodes when not in use */ + return 1; +} + +static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) +{ + struct inode *inode = dentry->d_inode; + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; + + return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", + ns_ops->name, inode->i_ino); +} + +const struct dentry_operations ns_dentry_operations = +{ + .d_delete = ns_delete_dentry, + .d_dname = ns_dname, +}; + +static struct dentry *proc_ns_get_dentry(struct super_block *sb, + struct task_struct *task, const struct proc_ns_operations *ns_ops) +{ + struct dentry *dentry, *result; + struct inode *inode; + struct proc_inode *ei; + struct qstr qname = { .name = "", }; + void *ns; + + ns = ns_ops->get(task); + if (!ns) + return ERR_PTR(-ENOENT); + + dentry = d_alloc_pseudo(sb, &qname); + if (!dentry) { + ns_ops->put(ns); + return ERR_PTR(-ENOMEM); + } + + inode = iget_locked(sb, ns_ops->inum(ns)); + if (!inode) { + dput(dentry); + ns_ops->put(ns); + return ERR_PTR(-ENOMEM); + } + + ei = PROC_I(inode); + if (inode->i_state & I_NEW) { + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_op = &ns_inode_operations; + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_fop = &ns_file_operations; + ei->ns_ops = ns_ops; + ei->ns = ns; + unlock_new_inode(inode); + } else { + ns_ops->put(ns); + } + + d_set_d_op(dentry, &ns_dentry_operations); + result = d_instantiate_unique(dentry, inode); + if (result) { + dput(dentry); + dentry = result; + } + + return dentry; +} + +static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = dentry->d_inode; + struct super_block *sb = inode->i_sb; + struct proc_inode *ei = PROC_I(inode); + struct task_struct *task; + struct dentry *ns_dentry; + void *error = ERR_PTR(-EACCES); + + task = get_proc_task(inode); + if (!task) + goto out; + + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out_put_task; + + ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); + if (IS_ERR(ns_dentry)) { + error = ERR_CAST(ns_dentry); + goto out_put_task; + } + + dput(nd->path.dentry); + nd->path.dentry = ns_dentry; + error = NULL; + +out_put_task: + put_task_struct(task); +out: + return error; +} + +static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) +{ + struct inode *inode = dentry->d_inode; + struct proc_inode *ei = PROC_I(inode); + const struct proc_ns_operations *ns_ops = ei->ns_ops; + struct task_struct *task; + void *ns; + char name[50]; + int len = -EACCES; + + task = get_proc_task(inode); + if (!task) + goto out; + + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out_put_task; + + len = -ENOENT; + ns = ns_ops->get(task); + if (!ns) + goto out_put_task; + + snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); + len = strlen(name); + + if (len > buflen) + len = buflen; + if (copy_to_user(buffer, name, len)) + len = -EFAULT; + + ns_ops->put(ns); +out_put_task: + put_task_struct(task); +out: + return len; +} + +static const struct inode_operations proc_ns_link_inode_operations = { + .readlink = proc_ns_readlink, + .follow_link = proc_ns_follow_link, + .setattr = proc_setattr, +}; + static struct dentry *proc_ns_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { @@ -37,21 +194,15 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-ENOENT); - void *ns; inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) goto out; - ns = ns_ops->get(task); - if (!ns) - goto out_iput; - ei = PROC_I(inode); - inode->i_mode = S_IFREG|S_IRUSR; - inode->i_fop = &ns_file_operations; - ei->ns_ops = ns_ops; - ei->ns = ns; + inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_op = &proc_ns_link_inode_operations; + ei->ns_ops = ns_ops; d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); @@ -60,9 +211,6 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, error = NULL; out: return error; -out_iput: - iput(inode); - goto out; } static int proc_ns_fill_cache(struct file *filp, void *dirent, @@ -89,10 +237,6 @@ static int proc_ns_dir_readdir(struct file *filp, void *dirent, if (!task) goto out_no_task; - ret = -EPERM; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out; - ret = 0; i = filp->f_pos; switch (i) { @@ -152,10 +296,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, if (!task) goto out_no_task; - error = ERR_PTR(-EPERM); - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out; - last = &ns_entries[ARRAY_SIZE(ns_entries)]; for (entry = ns_entries; entry < last; entry++) { if (strlen((*entry)->name) != len) @@ -163,7 +303,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, if (!memcmp(dentry->d_name.name, (*entry)->name, len)) break; } - error = ERR_PTR(-ENOENT); if (entry == last) goto out; @@ -198,3 +337,7 @@ out_invalid: return ERR_PTR(-EINVAL); } +bool proc_ns_inode(struct inode *inode) +{ + return inode->i_fop == &ns_file_operations; +} |