From 59607db367c57f515183cb203642291bb14d9c40 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 23 Mar 2011 16:43:16 -0700 Subject: userns: add a user_namespace as creator/owner of uts_namespace The expected course of development for user namespaces targeted capabilities is laid out at https://wiki.ubuntu.com/UserNamespace. Goals: - Make it safe for an unprivileged user to unshare namespaces. They will be privileged with respect to the new namespace, but this should only include resources which the unprivileged user already owns. - Provide separate limits and accounting for userids in different namespaces. Status: Currently (as of 2.6.38) you can clone with the CLONE_NEWUSER flag to get a new user namespace if you have the CAP_SYS_ADMIN, CAP_SETUID, and CAP_SETGID capabilities. What this gets you is a whole new set of userids, meaning that user 500 will have a different 'struct user' in your namespace than in other namespaces. So any accounting information stored in struct user will be unique to your namespace. However, throughout the kernel there are checks which - simply check for a capability. Since root in a child namespace has all capabilities, this means that a child namespace is not constrained. - simply compare uid1 == uid2. Since these are the integer uids, uid 500 in namespace 1 will be said to be equal to uid 500 in namespace 2. As a result, the lxc implementation at lxc.sf.net does not use user namespaces. This is actually helpful because it leaves us free to develop user namespaces in such a way that, for some time, user namespaces may be unuseful. Bugs aside, this patchset is supposed to not at all affect systems which are not actively using user namespaces, and only restrict what tasks in child user namespace can do. They begin to limit privilege to a user namespace, so that root in a container cannot kill or ptrace tasks in the parent user namespace, and can only get world access rights to files. Since all files currently belong to the initila user namespace, that means that child user namespaces can only get world access rights to *all* files. While this temporarily makes user namespaces bad for system containers, it starts to get useful for some sandboxing. I've run the 'runltplite.sh' with and without this patchset and found no difference. This patch: copy_process() handles CLONE_NEWUSER before the rest of the namespaces. So in the case of clone(CLONE_NEWUSER|CLONE_NEWUTS) the new uts namespace will have the new user namespace as its owner. That is what we want, since we want root in that new userns to be able to have privilege over it. Changelog: Feb 15: don't set uts_ns->user_ns if we didn't create a new uts_ns. Feb 23: Move extern init_user_ns declaration from init/version.c to utsname.h. Signed-off-by: Serge E. Hallyn Acked-by: "Eric W. Biederman" Acked-by: Daniel Lezcano Acked-by: David Howells Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/nsproxy.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/nsproxy.c') diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index f74e6c00e26..034dc2ed13a 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -74,6 +74,11 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, err = PTR_ERR(new_nsp->uts_ns); goto out_uts; } + if (new_nsp->uts_ns != tsk->nsproxy->uts_ns) { + put_user_ns(new_nsp->uts_ns->user_ns); + new_nsp->uts_ns->user_ns = task_cred_xxx(tsk, user)->user_ns; + get_user_ns(new_nsp->uts_ns->user_ns); + } new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); if (IS_ERR(new_nsp->ipc_ns)) { -- cgit v1.2.3-70-g09d2 From bb96a6f50be27390dc959ff67d9ea0ea0cfbe177 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 23 Mar 2011 16:43:18 -0700 Subject: userns: allow sethostname in a container Changelog: Feb 23: let clone_uts_ns() handle setting uts->user_ns To do so we need to pass in the task_struct who'll get the utsname, so we can get its user_ns. Feb 23: As per Oleg's coment, just pass in tsk, instead of two of its members. Signed-off-by: Serge E. Hallyn Acked-by: "Eric W. Biederman" Acked-by: Daniel Lezcano Acked-by: David Howells Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/utsname.h | 6 +++--- kernel/nsproxy.c | 7 +------ kernel/sys.c | 2 +- kernel/utsname.c | 12 +++++++----- 4 files changed, 12 insertions(+), 15 deletions(-) (limited to 'kernel/nsproxy.c') diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 2c3c0f54370..4e5b0213fdc 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -54,7 +54,7 @@ static inline void get_uts_ns(struct uts_namespace *ns) } extern struct uts_namespace *copy_utsname(unsigned long flags, - struct uts_namespace *ns); + struct task_struct *tsk); extern void free_uts_ns(struct kref *kref); static inline void put_uts_ns(struct uts_namespace *ns) @@ -71,12 +71,12 @@ static inline void put_uts_ns(struct uts_namespace *ns) } static inline struct uts_namespace *copy_utsname(unsigned long flags, - struct uts_namespace *ns) + struct task_struct *tsk) { if (flags & CLONE_NEWUTS) return ERR_PTR(-EINVAL); - return ns; + return tsk->nsproxy->uts_ns; } #endif diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 034dc2ed13a..b97fc9d04dd 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -69,16 +69,11 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_ns; } - new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); + new_nsp->uts_ns = copy_utsname(flags, tsk); if (IS_ERR(new_nsp->uts_ns)) { err = PTR_ERR(new_nsp->uts_ns); goto out_uts; } - if (new_nsp->uts_ns != tsk->nsproxy->uts_ns) { - put_user_ns(new_nsp->uts_ns->user_ns); - new_nsp->uts_ns->user_ns = task_cred_xxx(tsk, user)->user_ns; - get_user_ns(new_nsp->uts_ns->user_ns); - } new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); if (IS_ERR(new_nsp->ipc_ns)) { diff --git a/kernel/sys.c b/kernel/sys.c index 1ad48b3b906..5761c53e19e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1181,7 +1181,7 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) int errno; char tmp[__NEW_UTS_LEN]; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; diff --git a/kernel/utsname.c b/kernel/utsname.c index a7b3a8d1ad2..44646179eab 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -31,7 +31,8 @@ static struct uts_namespace *create_uts_ns(void) * @old_ns: namespace to clone * Return NULL on error (failure to kmalloc), new ns otherwise */ -static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) +static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, + struct uts_namespace *old_ns) { struct uts_namespace *ns; @@ -41,8 +42,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) down_read(&uts_sem); memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); - ns->user_ns = old_ns->user_ns; - get_user_ns(ns->user_ns); + ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns); up_read(&uts_sem); return ns; } @@ -53,8 +53,10 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) * utsname of this process won't be seen by parent, and vice * versa. */ -struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns) +struct uts_namespace *copy_utsname(unsigned long flags, + struct task_struct *tsk) { + struct uts_namespace *old_ns = tsk->nsproxy->uts_ns; struct uts_namespace *new_ns; BUG_ON(!old_ns); @@ -63,7 +65,7 @@ struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *ol if (!(flags & CLONE_NEWUTS)) return old_ns; - new_ns = clone_uts_ns(old_ns); + new_ns = clone_uts_ns(tsk, old_ns); put_uts_ns(old_ns); return new_ns; -- cgit v1.2.3-70-g09d2 From b515498f5bb5f38fc0e390b4ff7d00b6077de127 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 23 Mar 2011 16:43:23 -0700 Subject: userns: add a user namespace owner of ipc ns Changelog: Feb 15: Don't set new ipc->user_ns if we didn't create a new ipc_ns. Feb 23: Move extern declaration to ipc_namespace.h, and group fwd declarations at top. Signed-off-by: Serge E. Hallyn Acked-by: "Eric W. Biederman" Acked-by: Daniel Lezcano Acked-by: David Howells Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 3 +++ ipc/msgutil.c | 1 + ipc/namespace.c | 9 +++++++-- kernel/nsproxy.c | 5 +++++ 4 files changed, 16 insertions(+), 2 deletions(-) (limited to 'kernel/nsproxy.c') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 51952989ad4..d3c32dcec62 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -15,6 +15,7 @@ #define IPCNS_CALLBACK_PRI 0 +struct user_namespace; struct ipc_ids { int in_use; @@ -56,6 +57,8 @@ struct ipc_namespace { unsigned int mq_msg_max; /* initialized to DFLT_MSGMAX */ unsigned int mq_msgsize_max; /* initialized to DFLT_MSGSIZEMAX */ + /* user_ns which owns the ipc ns */ + struct user_namespace *user_ns; }; extern struct ipc_namespace init_ipc_ns; diff --git a/ipc/msgutil.c b/ipc/msgutil.c index f095ee26883..8b5ce5d3f3e 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -32,6 +32,7 @@ struct ipc_namespace init_ipc_ns = { .mq_msg_max = DFLT_MSGMAX, .mq_msgsize_max = DFLT_MSGSIZEMAX, #endif + .user_ns = &init_user_ns, }; atomic_t nr_ipc_ns = ATOMIC_INIT(1); diff --git a/ipc/namespace.c b/ipc/namespace.c index a1094ff0bef..aa188996269 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -11,10 +11,11 @@ #include #include #include +#include #include "util.h" -static struct ipc_namespace *create_ipc_ns(void) +static struct ipc_namespace *create_ipc_ns(struct ipc_namespace *old_ns) { struct ipc_namespace *ns; int err; @@ -43,6 +44,9 @@ static struct ipc_namespace *create_ipc_ns(void) ipcns_notify(IPCNS_CREATED); register_ipcns_notifier(ns); + ns->user_ns = old_ns->user_ns; + get_user_ns(ns->user_ns); + return ns; } @@ -50,7 +54,7 @@ struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns) { if (!(flags & CLONE_NEWIPC)) return get_ipc_ns(ns); - return create_ipc_ns(); + return create_ipc_ns(ns); } /* @@ -105,6 +109,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) * order to have a correct value when recomputing msgmni. */ ipcns_notify(IPCNS_REMOVED); + put_user_ns(ns->user_ns); } /* diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index b97fc9d04dd..ac8a56e90bf 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -80,6 +80,11 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, err = PTR_ERR(new_nsp->ipc_ns); goto out_ipc; } + if (new_nsp->ipc_ns != tsk->nsproxy->ipc_ns) { + put_user_ns(new_nsp->ipc_ns->user_ns); + new_nsp->ipc_ns->user_ns = task_cred_xxx(tsk, user)->user_ns; + get_user_ns(new_nsp->ipc_ns->user_ns); + } new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk)); if (IS_ERR(new_nsp->pid_ns)) { -- cgit v1.2.3-70-g09d2 From b0e77598f87107001a00b8a4ece9c95e4254ccc4 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 23 Mar 2011 16:43:24 -0700 Subject: userns: user namespaces: convert several capable() calls CAP_IPC_OWNER and CAP_IPC_LOCK can be checked against current_user_ns(), because the resource comes from current's own ipc namespace. setuid/setgid are to uids in own namespace, so again checks can be against current_user_ns(). Changelog: Jan 11: Use task_ns_capable() in place of sched_capable(). Jan 11: Use nsown_capable() as suggested by Bastian Blank. Jan 11: Clarify (hopefully) some logic in futex and sched.c Feb 15: use ns_capable for ipc, not nsown_capable Feb 23: let copy_ipcs handle setting ipc_ns->user_ns Feb 23: pass ns down rather than taking it from current [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Serge E. Hallyn Acked-by: "Eric W. Biederman" Acked-by: Daniel Lezcano Acked-by: David Howells Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 7 ++++--- ipc/msg.c | 8 ++++---- ipc/namespace.c | 13 ++++++++----- ipc/sem.c | 10 ++++++---- ipc/shm.c | 9 +++++---- ipc/util.c | 26 ++++++++++++++++---------- ipc/util.h | 5 +++-- kernel/futex.c | 11 ++++++++++- kernel/futex_compat.c | 11 ++++++++++- kernel/groups.c | 2 +- kernel/nsproxy.c | 7 +------ kernel/sched.c | 9 ++++++--- kernel/uid16.c | 2 +- 13 files changed, 75 insertions(+), 45 deletions(-) (limited to 'kernel/nsproxy.c') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index d3c32dcec62..a6d1655f960 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -5,6 +5,7 @@ #include #include #include +#include /* * ipc namespace events @@ -93,7 +94,7 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } #if defined(CONFIG_IPC_NS) extern struct ipc_namespace *copy_ipcs(unsigned long flags, - struct ipc_namespace *ns); + struct task_struct *tsk); static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { if (ns) @@ -104,12 +105,12 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) extern void put_ipc_ns(struct ipc_namespace *ns); #else static inline struct ipc_namespace *copy_ipcs(unsigned long flags, - struct ipc_namespace *ns) + struct task_struct *tsk) { if (flags & CLONE_NEWIPC) return ERR_PTR(-EINVAL); - return ns; + return tsk->nsproxy->ipc_ns; } static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) diff --git a/ipc/msg.c b/ipc/msg.c index 747b65507a9..0e732e92e22 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -421,7 +421,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, return -EFAULT; } - ipcp = ipcctl_pre_down(&msg_ids(ns), msqid, cmd, + ipcp = ipcctl_pre_down(ns, &msg_ids(ns), msqid, cmd, &msqid64.msg_perm, msqid64.msg_qbytes); if (IS_ERR(ipcp)) return PTR_ERR(ipcp); @@ -539,7 +539,7 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) success_return = 0; } err = -EACCES; - if (ipcperms(&msq->q_perm, S_IRUGO)) + if (ipcperms(ns, &msq->q_perm, S_IRUGO)) goto out_unlock; err = security_msg_queue_msgctl(msq, cmd); @@ -664,7 +664,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, struct msg_sender s; err = -EACCES; - if (ipcperms(&msq->q_perm, S_IWUGO)) + if (ipcperms(ns, &msq->q_perm, S_IWUGO)) goto out_unlock_free; err = security_msg_queue_msgsnd(msq, msg, msgflg); @@ -774,7 +774,7 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext, struct list_head *tmp; msg = ERR_PTR(-EACCES); - if (ipcperms(&msq->q_perm, S_IRUGO)) + if (ipcperms(ns, &msq->q_perm, S_IRUGO)) goto out_unlock; msg = ERR_PTR(-EAGAIN); diff --git a/ipc/namespace.c b/ipc/namespace.c index aa188996269..3c3e5223e7e 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -15,7 +15,8 @@ #include "util.h" -static struct ipc_namespace *create_ipc_ns(struct ipc_namespace *old_ns) +static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk, + struct ipc_namespace *old_ns) { struct ipc_namespace *ns; int err; @@ -44,17 +45,19 @@ static struct ipc_namespace *create_ipc_ns(struct ipc_namespace *old_ns) ipcns_notify(IPCNS_CREATED); register_ipcns_notifier(ns); - ns->user_ns = old_ns->user_ns; - get_user_ns(ns->user_ns); + ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns); return ns; } -struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns) +struct ipc_namespace *copy_ipcs(unsigned long flags, + struct task_struct *tsk) { + struct ipc_namespace *ns = tsk->nsproxy->ipc_ns; + if (!(flags & CLONE_NEWIPC)) return get_ipc_ns(ns); - return create_ipc_ns(ns); + return create_ipc_ns(tsk, ns); } /* diff --git a/ipc/sem.c b/ipc/sem.c index 0e0d49bbb86..ae040a0727c 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -817,7 +817,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, } err = -EACCES; - if (ipcperms (&sma->sem_perm, S_IRUGO)) + if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) goto out_unlock; err = security_sem_semctl(sma, cmd); @@ -862,7 +862,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, nsems = sma->sem_nsems; err = -EACCES; - if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO)) + if (ipcperms(ns, &sma->sem_perm, + (cmd == SETVAL || cmd == SETALL) ? S_IWUGO : S_IRUGO)) goto out_unlock; err = security_sem_semctl(sma, cmd); @@ -1047,7 +1048,8 @@ static int semctl_down(struct ipc_namespace *ns, int semid, return -EFAULT; } - ipcp = ipcctl_pre_down(&sem_ids(ns), semid, cmd, &semid64.sem_perm, 0); + ipcp = ipcctl_pre_down(ns, &sem_ids(ns), semid, cmd, + &semid64.sem_perm, 0); if (IS_ERR(ipcp)) return PTR_ERR(ipcp); @@ -1386,7 +1388,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, goto out_unlock_free; error = -EACCES; - if (ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) + if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) goto out_unlock_free; error = security_sem_semop(sma, sops, nsops, alter); diff --git a/ipc/shm.c b/ipc/shm.c index 7d3bb22a930..8644452f5c4 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -623,7 +623,8 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, return -EFAULT; } - ipcp = ipcctl_pre_down(&shm_ids(ns), shmid, cmd, &shmid64.shm_perm, 0); + ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd, + &shmid64.shm_perm, 0); if (IS_ERR(ipcp)) return PTR_ERR(ipcp); @@ -737,7 +738,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) result = 0; } err = -EACCES; - if (ipcperms (&shp->shm_perm, S_IRUGO)) + if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) goto out_unlock; err = security_shm_shmctl(shp, cmd); if (err) @@ -773,7 +774,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) audit_ipc_obj(&(shp->shm_perm)); - if (!capable(CAP_IPC_LOCK)) { + if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { uid_t euid = current_euid(); err = -EPERM; if (euid != shp->shm_perm.uid && @@ -888,7 +889,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) } err = -EACCES; - if (ipcperms(&shp->shm_perm, acc_mode)) + if (ipcperms(ns, &shp->shm_perm, acc_mode)) goto out_unlock; err = security_shm_shmat(shp, shmaddr, shmflg); diff --git a/ipc/util.c b/ipc/util.c index 69a0cc13d96..8fd1b891ec0 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -329,12 +329,14 @@ retry: * * It is called with ipc_ids.rw_mutex and ipcp->lock held. */ -static int ipc_check_perms(struct kern_ipc_perm *ipcp, struct ipc_ops *ops, - struct ipc_params *params) +static int ipc_check_perms(struct ipc_namespace *ns, + struct kern_ipc_perm *ipcp, + struct ipc_ops *ops, + struct ipc_params *params) { int err; - if (ipcperms(ipcp, params->flg)) + if (ipcperms(ns, ipcp, params->flg)) err = -EACCES; else { err = ops->associate(ipcp, params->flg); @@ -396,7 +398,7 @@ retry: * ipc_check_perms returns the IPC id on * success */ - err = ipc_check_perms(ipcp, ops, params); + err = ipc_check_perms(ns, ipcp, ops, params); } ipc_unlock(ipcp); } @@ -610,10 +612,12 @@ void ipc_rcu_putref(void *ptr) * * Check user, group, other permissions for access * to ipc resources. return 0 if allowed + * + * @flag will most probably be 0 or S_...UGO from */ -int ipcperms (struct kern_ipc_perm *ipcp, short flag) -{ /* flag will most probably be 0 or S_...UGO from */ +int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flag) +{ uid_t euid = current_euid(); int requested_mode, granted_mode; @@ -627,7 +631,7 @@ int ipcperms (struct kern_ipc_perm *ipcp, short flag) granted_mode >>= 3; /* is there some bit set in requested_mode but not in granted_mode? */ if ((requested_mode & ~granted_mode & 0007) && - !capable(CAP_IPC_OWNER)) + !ns_capable(ns->user_ns, CAP_IPC_OWNER)) return -1; return security_ipc_permission(ipcp, flag); @@ -765,6 +769,7 @@ void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) /** * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd + * @ids: the ipc namespace * @ids: the table of ids where to look for the ipc * @id: the id of the ipc to retrieve * @cmd: the cmd to check @@ -779,7 +784,8 @@ void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) * - returns the ipc with both ipc and rw_mutex locks held in case of success * or an err-code without any lock held otherwise. */ -struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd, +struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, + struct ipc_ids *ids, int id, int cmd, struct ipc64_perm *perm, int extra_perm) { struct kern_ipc_perm *ipcp; @@ -799,8 +805,8 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd, perm->gid, perm->mode); euid = current_euid(); - if (euid == ipcp->cuid || - euid == ipcp->uid || capable(CAP_SYS_ADMIN)) + if (euid == ipcp->cuid || euid == ipcp->uid || + ns_capable(ns->user_ns, CAP_SYS_ADMIN)) return ipcp; err = -EPERM; diff --git a/ipc/util.h b/ipc/util.h index 764b51a37a6..6f5c20bedaa 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -103,7 +103,7 @@ int ipc_get_maxid(struct ipc_ids *); void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *); /* must be called with ipcp locked */ -int ipcperms(struct kern_ipc_perm *ipcp, short flg); +int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg); /* for rare, potentially huge allocations. * both function can sleep @@ -126,7 +126,8 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out); -struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd, +struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, + struct ipc_ids *ids, int id, int cmd, struct ipc64_perm *perm, int extra_perm); #ifndef __ARCH_WANT_IPC_PARSE_VERSION diff --git a/kernel/futex.c b/kernel/futex.c index bda41571538..6570c459f31 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2418,10 +2418,19 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, goto err_unlock; ret = -EPERM; pcred = __task_cred(p); + /* If victim is in different user_ns, then uids are not + comparable, so we must have CAP_SYS_PTRACE */ + if (cred->user->user_ns != pcred->user->user_ns) { + if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) + goto err_unlock; + goto ok; + } + /* If victim is in same user_ns, then uids are comparable */ if (cred->euid != pcred->euid && cred->euid != pcred->uid && - !capable(CAP_SYS_PTRACE)) + !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) goto err_unlock; +ok: head = p->robust_list; rcu_read_unlock(); } diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index a7934ac75e5..5f9e689dc8f 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -153,10 +153,19 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, goto err_unlock; ret = -EPERM; pcred = __task_cred(p); + /* If victim is in different user_ns, then uids are not + comparable, so we must have CAP_SYS_PTRACE */ + if (cred->user->user_ns != pcred->user->user_ns) { + if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) + goto err_unlock; + goto ok; + } + /* If victim is in same user_ns, then uids are comparable */ if (cred->euid != pcred->euid && cred->euid != pcred->uid && - !capable(CAP_SYS_PTRACE)) + !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) goto err_unlock; +ok: head = p->compat_robust_list; rcu_read_unlock(); } diff --git a/kernel/groups.c b/kernel/groups.c index 253dc0f35cf..1cc476d52dd 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -233,7 +233,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) struct group_info *group_info; int retval; - if (!capable(CAP_SETGID)) + if (!nsown_capable(CAP_SETGID)) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index ac8a56e90bf..a05d191ffdd 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -75,16 +75,11 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_uts; } - new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); + new_nsp->ipc_ns = copy_ipcs(flags, tsk); if (IS_ERR(new_nsp->ipc_ns)) { err = PTR_ERR(new_nsp->ipc_ns); goto out_ipc; } - if (new_nsp->ipc_ns != tsk->nsproxy->ipc_ns) { - put_user_ns(new_nsp->ipc_ns->user_ns); - new_nsp->ipc_ns->user_ns = task_cred_xxx(tsk, user)->user_ns; - get_user_ns(new_nsp->ipc_ns->user_ns); - } new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk)); if (IS_ERR(new_nsp->pid_ns)) { diff --git a/kernel/sched.c b/kernel/sched.c index a172494a9a6..480adeb63f8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4892,8 +4892,11 @@ static bool check_same_owner(struct task_struct *p) rcu_read_lock(); pcred = __task_cred(p); - match = (cred->euid == pcred->euid || - cred->euid == pcred->uid); + if (cred->user->user_ns == pcred->user->user_ns) + match = (cred->euid == pcred->euid || + cred->euid == pcred->uid); + else + match = false; rcu_read_unlock(); return match; } @@ -5221,7 +5224,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) goto out_free_cpus_allowed; } retval = -EPERM; - if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) + if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE)) goto out_unlock; retval = security_task_setscheduler(p); diff --git a/kernel/uid16.c b/kernel/uid16.c index 419209893d8..51c6e89e861 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -189,7 +189,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) struct group_info *group_info; int retval; - if (!capable(CAP_SETGID)) + if (!nsown_capable(CAP_SETGID)) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; -- cgit v1.2.3-70-g09d2