diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-03-28 13:43:46 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-03-28 13:43:46 -0700 |
commit | 2c3de1c2d7d68c6ba4c1ecd82c68285f34d9609e (patch) | |
tree | 6a09ce761173a966718f9009514dcc90bd9947b7 | |
parent | 9064171268d838b8f283fe111ef086b9479d059a (diff) | |
parent | 87a8ebd637dafc255070f503909a053cf0d98d3f (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull userns fixes from Eric W Biederman:
"The bulk of the changes are fixing the worst consequences of the user
namespace design oversight in not considering what happens when one
namespace starts off as a clone of another namespace, as happens with
the mount namespace.
The rest of the changes are just plain bug fixes.
Many thanks to Andy Lutomirski for pointing out many of these issues."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
userns: Restrict when proc and sysfs can be mounted
ipc: Restrict mounting the mqueue filesystem
vfs: Carefully propogate mounts across user namespaces
vfs: Add a mount flag to lock read only bind mounts
userns: Don't allow creation if the user is chrooted
yama: Better permission check for ptraceme
pid: Handle the exit of a multi-threaded init.
scm: Require CAP_SYS_ADMIN over the current pidns to spoof pids.
-rw-r--r-- | fs/namespace.c | 54 | ||||
-rw-r--r-- | fs/pnode.c | 6 | ||||
-rw-r--r-- | fs/pnode.h | 1 | ||||
-rw-r--r-- | fs/proc/root.c | 4 | ||||
-rw-r--r-- | fs/sysfs/mount.c | 4 | ||||
-rw-r--r-- | include/linux/fs_struct.h | 2 | ||||
-rw-r--r-- | include/linux/mount.h | 2 | ||||
-rw-r--r-- | include/linux/user_namespace.h | 4 | ||||
-rw-r--r-- | ipc/mqueue.c | 12 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 3 | ||||
-rw-r--r-- | kernel/user.c | 2 | ||||
-rw-r--r-- | kernel/user_namespace.c | 11 | ||||
-rw-r--r-- | net/core/scm.c | 4 | ||||
-rw-r--r-- | security/yama/yama_lsm.c | 4 |
14 files changed, 105 insertions, 8 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 50ca17d3cb4..d581e45c0a9 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -798,6 +798,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, } mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; + /* Don't allow unprivileged users to change mount flags */ + if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) + mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; + atomic_inc(&sb->s_active); mnt->mnt.mnt_sb = sb; mnt->mnt.mnt_root = dget(root); @@ -1713,6 +1717,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) if (readonly_request == __mnt_is_readonly(mnt)) return 0; + if (mnt->mnt_flags & MNT_LOCK_READONLY) + return -EPERM; + if (readonly_request) error = mnt_make_readonly(real_mount(mnt)); else @@ -2339,7 +2346,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, /* First pass: copy the tree topology */ copy_flags = CL_COPY_ALL | CL_EXPIRE; if (user_ns != mnt_ns->user_ns) - copy_flags |= CL_SHARED_TO_SLAVE; + copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; new = copy_tree(old, old->mnt.mnt_root, copy_flags); if (IS_ERR(new)) { up_write(&namespace_sem); @@ -2732,6 +2739,51 @@ bool our_mnt(struct vfsmount *mnt) return check_mnt(real_mount(mnt)); } +bool current_chrooted(void) +{ + /* Does the current process have a non-standard root */ + struct path ns_root; + struct path fs_root; + bool chrooted; + + /* Find the namespace root */ + ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt; + ns_root.dentry = ns_root.mnt->mnt_root; + path_get(&ns_root); + while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root)) + ; + + get_fs_root(current->fs, &fs_root); + + chrooted = !path_equal(&fs_root, &ns_root); + + path_put(&fs_root); + path_put(&ns_root); + + return chrooted; +} + +void update_mnt_policy(struct user_namespace *userns) +{ + struct mnt_namespace *ns = current->nsproxy->mnt_ns; + struct mount *mnt; + + down_read(&namespace_sem); + list_for_each_entry(mnt, &ns->list, mnt_list) { + switch (mnt->mnt.mnt_sb->s_magic) { + case SYSFS_MAGIC: + userns->may_mount_sysfs = true; + break; + case PROC_SUPER_MAGIC: + userns->may_mount_proc = true; + break; + } + if (userns->may_mount_sysfs && userns->may_mount_proc) + break; + } + up_read(&namespace_sem); +} + static void *mntns_get(struct task_struct *task) { struct mnt_namespace *ns = NULL; diff --git a/fs/pnode.c b/fs/pnode.c index 3e000a51ac0..8b29d2164da 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -9,6 +9,7 @@ #include <linux/mnt_namespace.h> #include <linux/mount.h> #include <linux/fs.h> +#include <linux/nsproxy.h> #include "internal.h" #include "pnode.h" @@ -220,6 +221,7 @@ static struct mount *get_source(struct mount *dest, int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, struct mount *source_mnt, struct list_head *tree_list) { + struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; struct mount *m, *child; int ret = 0; struct mount *prev_dest_mnt = dest_mnt; @@ -237,6 +239,10 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); + /* Notice when we are propagating across user namespaces */ + if (m->mnt_ns->user_ns != user_ns) + type |= CL_UNPRIVILEGED; + child = copy_tree(source, source->mnt.mnt_root, type); if (IS_ERR(child)) { ret = PTR_ERR(child); diff --git a/fs/pnode.h b/fs/pnode.h index 19b853a3445..a0493d5ebfb 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -23,6 +23,7 @@ #define CL_MAKE_SHARED 0x08 #define CL_PRIVATE 0x10 #define CL_SHARED_TO_SLAVE 0x20 +#define CL_UNPRIVILEGED 0x40 static inline void set_mnt_shared(struct mount *mnt) { diff --git a/fs/proc/root.c b/fs/proc/root.c index c6e9fac26ba..9c7fab1d23f 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -16,6 +16,7 @@ #include <linux/sched.h> #include <linux/module.h> #include <linux/bitops.h> +#include <linux/user_namespace.h> #include <linux/mount.h> #include <linux/pid_namespace.h> #include <linux/parser.h> @@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, } else { ns = task_active_pid_ns(current); options = data; + + if (!current_user_ns()->may_mount_proc) + return ERR_PTR(-EPERM); } sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 8d924b5ec73..afd83273e6c 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/magic.h> #include <linux/slab.h> +#include <linux/user_namespace.h> #include "sysfs.h" @@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, struct super_block *sb; int error; + if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs) + return ERR_PTR(-EPERM); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return ERR_PTR(-ENOMEM); diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 729eded4b24..2b93a9a5a1e 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -50,4 +50,6 @@ static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root, spin_unlock(&fs->lock); } +extern bool current_chrooted(void); + #endif /* _LINUX_FS_STRUCT_H */ diff --git a/include/linux/mount.h b/include/linux/mount.h index d7029f4a191..73005f9957e 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -47,6 +47,8 @@ struct mnt_namespace; #define MNT_INTERNAL 0x4000 +#define MNT_LOCK_READONLY 0x400000 + struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 4ce00932493..b6b215f13b4 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -26,6 +26,8 @@ struct user_namespace { kuid_t owner; kgid_t group; unsigned int proc_inum; + bool may_mount_sysfs; + bool may_mount_proc; }; extern struct user_namespace init_user_ns; @@ -82,4 +84,6 @@ static inline void put_user_ns(struct user_namespace *ns) #endif +void update_mnt_policy(struct user_namespace *userns); + #endif /* _LINUX_USER_H */ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 3953fda2e8b..e4e47f64744 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -330,8 +330,16 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - if (!(flags & MS_KERNMOUNT)) - data = current->nsproxy->ipc_ns; + if (!(flags & MS_KERNMOUNT)) { + struct ipc_namespace *ns = current->nsproxy->ipc_ns; + /* Don't allow mounting unless the caller has CAP_SYS_ADMIN + * over the ipc namespace. + */ + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + data = ns; + } return mount_ns(fs_type, flags, data, mqueue_fill_super); } diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index c1c3dc1c602..bea15bdf82b 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -181,6 +181,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) int nr; int rc; struct task_struct *task, *me = current; + int init_pids = thread_group_leader(me) ? 1 : 2; /* Don't allow any more processes into the pid namespace */ disable_pid_allocation(pid_ns); @@ -230,7 +231,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) */ for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); - if (pid_ns->nr_hashed == 1) + if (pid_ns->nr_hashed == init_pids) break; schedule(); } diff --git a/kernel/user.c b/kernel/user.c index e81978e8c03..8e635a18ab5 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -51,6 +51,8 @@ struct user_namespace init_user_ns = { .owner = GLOBAL_ROOT_UID, .group = GLOBAL_ROOT_GID, .proc_inum = PROC_USER_INIT_INO, + .may_mount_sysfs = true, + .may_mount_proc = true, }; EXPORT_SYMBOL_GPL(init_user_ns); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index b14f4d34204..a54f26f82eb 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -61,6 +61,15 @@ int create_user_ns(struct cred *new) kgid_t group = new->egid; int ret; + /* + * Verify that we can not violate the policy of which files + * may be accessed that is specified by the root directory, + * by verifing that the root directory is at the root of the + * mount namespace which allows all files to be accessed. + */ + if (current_chrooted()) + return -EPERM; + /* The creator needs a mapping in the parent user namespace * or else we won't be able to reasonably tell userspace who * created a user_namespace. @@ -87,6 +96,8 @@ int create_user_ns(struct cred *new) set_cred_user_ns(new, ns); + update_mnt_policy(ns); + return 0; } diff --git a/net/core/scm.c b/net/core/scm.c index 905dcc6ad1e..2dc6cdaaae8 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -24,6 +24,7 @@ #include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/security.h> +#include <linux/pid_namespace.h> #include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/slab.h> @@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds) if (!uid_valid(uid) || !gid_valid(gid)) return -EINVAL; - if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) && + if ((creds->pid == task_tgid_vnr(current) || + ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index 23414b93771..13c88fbcf03 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -347,10 +347,8 @@ int yama_ptrace_traceme(struct task_struct *parent) /* Only disallow PTRACE_TRACEME on more aggressive settings. */ switch (ptrace_scope) { case YAMA_SCOPE_CAPABILITY: - rcu_read_lock(); - if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE)) + if (!has_ns_capability(parent, current_user_ns(), CAP_SYS_PTRACE)) rc = -EPERM; - rcu_read_unlock(); break; case YAMA_SCOPE_NO_ATTACH: rc = -EPERM; |