From b6dff3ec5e116e3af6f537d4caedcad6b9e5082a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:16 +1100 Subject: CRED: Separate task security context from task_struct Separate the task security context from task_struct. At this point, the security data is temporarily embedded in the task_struct with two pointers pointing to it. Note that the Alpha arch is altered as it refers to (E)UID and (E)GID in entry.S via asm-offsets. With comment fixes Signed-off-by: Marc Dionne Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- kernel/fork.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index f6083561dfe..81fdc773390 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -147,8 +147,8 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(tsk == current); security_task_free(tsk); - free_uid(tsk->user); - put_group_info(tsk->group_info); + free_uid(tsk->__temp_cred.user); + put_group_info(tsk->__temp_cred.group_info); delayacct_tsk_free(tsk); if (!profile_handoff_task(tsk)) @@ -969,17 +969,18 @@ static struct task_struct *copy_process(unsigned long clone_flags, DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif + p->cred = &p->__temp_cred; retval = -EAGAIN; - if (atomic_read(&p->user->processes) >= + if (atomic_read(&p->cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->user != current->nsproxy->user_ns->root_user) + p->cred->user != current->nsproxy->user_ns->root_user) goto bad_fork_free; } - atomic_inc(&p->user->__count); - atomic_inc(&p->user->processes); - get_group_info(p->group_info); + atomic_inc(&p->cred->user->__count); + atomic_inc(&p->cred->user->processes); + get_group_info(p->cred->group_info); /* * If multiple threads are within copy_process(), then this check @@ -1035,9 +1036,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); #ifdef CONFIG_SECURITY - p->security = NULL; + p->cred->security = NULL; #endif - p->cap_bset = current->cap_bset; p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); @@ -1298,9 +1298,9 @@ bad_fork_cleanup_cgroup: bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: - put_group_info(p->group_info); - atomic_dec(&p->user->processes); - free_uid(p->user); + put_group_info(p->cred->group_info); + atomic_dec(&p->cred->user->processes); + free_uid(p->cred->user); bad_fork_free: free_task(p); fork_out: -- cgit v1.2.3-70-g09d2 From f1752eec6145c97163dbce62d17cf5d928e28a27 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:17 +1100 Subject: CRED: Detach the credentials from task_struct Detach the credentials from task_struct, duplicating them in copy_process() and releasing them in __put_task_struct(). Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/cred.h | 29 ++++++++++++++ include/linux/init_task.h | 16 +------- include/linux/sched.h | 1 - include/linux/security.h | 26 ++++++------- kernel/Makefile | 2 +- kernel/cred.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/fork.c | 24 +++--------- security/capability.c | 8 ++-- security/security.c | 8 ++-- security/selinux/hooks.c | 32 ++++++++-------- security/smack/smack_lsm.c | 20 +++++----- 11 files changed, 179 insertions(+), 83 deletions(-) create mode 100644 kernel/cred.c (limited to 'kernel/fork.c') diff --git a/include/linux/cred.h b/include/linux/cred.h index 3e65587a72e..a7a686074cb 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -158,4 +158,33 @@ do { \ *(_gid) = current->cred->fsgid; \ } while(0) +extern void __put_cred(struct cred *); +extern int copy_creds(struct task_struct *, unsigned long); + +/** + * get_cred - Get a reference on a set of credentials + * @cred: The credentials to reference + * + * Get a reference on the specified set of credentials. The caller must + * release the reference. + */ +static inline struct cred *get_cred(struct cred *cred) +{ + atomic_inc(&cred->usage); + return cred; +} + +/** + * put_cred - Release a reference to a set of credentials + * @cred: The credentials to release + * + * Release a reference to a set of credentials, deleting them when the last ref + * is released. + */ +static inline void put_cred(struct cred *cred) +{ + if (atomic_dec_and_test(&(cred)->usage)) + __put_cred(cred); +} + #endif /* _LINUX_CRED_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9de41ccd67b..5e24c54b6df 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -115,19 +115,6 @@ extern struct group_info init_groups; extern struct cred init_cred; -#define INIT_CRED(p) \ -{ \ - .usage = ATOMIC_INIT(3), \ - .securebits = SECUREBITS_DEFAULT, \ - .cap_inheritable = CAP_INIT_INH_SET, \ - .cap_permitted = CAP_FULL_SET, \ - .cap_effective = CAP_INIT_EFF_SET, \ - .cap_bset = CAP_INIT_BSET, \ - .user = INIT_USER, \ - .group_info = &init_groups, \ - .lock = __SPIN_LOCK_UNLOCKED(p.lock), \ -} - /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -162,8 +149,7 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .__temp_cred = INIT_CRED(tsk.__temp_cred), \ - .cred = &tsk.__temp_cred, \ + .cred = &init_cred, \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index c8b92502354..740cf946c8c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1151,7 +1151,6 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - struct cred __temp_cred __deprecated; /* temporary credentials to be removed */ struct cred *cred; /* actual/objective task credentials */ char comm[TASK_COMM_LEN]; /* executable name excluding path diff --git a/include/linux/security.h b/include/linux/security.h index 9f305d4a31a..9239cc11eb9 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -593,15 +593,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * manual page for definitions of the @clone_flags. * @clone_flags contains the flags indicating what should be shared. * Return 0 if permission is granted. - * @task_alloc_security: - * @p contains the task_struct for child process. - * Allocate and attach a security structure to the p->security field. The - * security field is initialized to NULL when the task structure is + * @cred_alloc_security: + * @cred contains the cred struct for child process. + * Allocate and attach a security structure to the cred->security field. + * The security field is initialized to NULL when the task structure is * allocated. * Return 0 if operation was successful. - * @task_free_security: - * @p contains the task_struct for process. - * Deallocate and clear the p->security field. + * @cred_free: + * @cred points to the credentials. + * Deallocate and clear the cred->security field in a set of credentials. * @task_setuid: * Check permission before setting one or more of the user identity * attributes of the current process. The @flags parameter indicates @@ -1405,8 +1405,8 @@ struct security_operations { int (*dentry_open) (struct file *file); int (*task_create) (unsigned long clone_flags); - int (*task_alloc_security) (struct task_struct *p); - void (*task_free_security) (struct task_struct *p); + int (*cred_alloc_security) (struct cred *cred); + void (*cred_free) (struct cred *cred); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); int (*task_post_setuid) (uid_t old_ruid /* or fsuid */ , uid_t old_euid, uid_t old_suid, int flags); @@ -1660,8 +1660,8 @@ int security_file_send_sigiotask(struct task_struct *tsk, int security_file_receive(struct file *file); int security_dentry_open(struct file *file); int security_task_create(unsigned long clone_flags); -int security_task_alloc(struct task_struct *p); -void security_task_free(struct task_struct *p); +int security_cred_alloc(struct cred *cred); +void security_cred_free(struct cred *cred); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); @@ -2181,12 +2181,12 @@ static inline int security_task_create(unsigned long clone_flags) return 0; } -static inline int security_task_alloc(struct task_struct *p) +static inline int security_cred_alloc(struct cred *cred) { return 0; } -static inline void security_task_free(struct task_struct *p) +static inline void security_cred_free(struct cred *cred) { } static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, diff --git a/kernel/Makefile b/kernel/Makefile index 9a3ec66a9d8..5a6a612c302 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ - notifier.o ksysfs.o pm_qos_params.o sched_clock.o + notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o CFLAGS_REMOVE_sched.o = -mno-spe diff --git a/kernel/cred.c b/kernel/cred.c new file mode 100644 index 00000000000..833244a7cb0 --- /dev/null +++ b/kernel/cred.c @@ -0,0 +1,96 @@ +/* Task credentials management + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include + +/* + * The initial credentials for the initial task + */ +struct cred init_cred = { + .usage = ATOMIC_INIT(3), + .securebits = SECUREBITS_DEFAULT, + .cap_inheritable = CAP_INIT_INH_SET, + .cap_permitted = CAP_FULL_SET, + .cap_effective = CAP_INIT_EFF_SET, + .cap_bset = CAP_INIT_BSET, + .user = INIT_USER, + .group_info = &init_groups, +}; + +/* + * The RCU callback to actually dispose of a set of credentials + */ +static void put_cred_rcu(struct rcu_head *rcu) +{ + struct cred *cred = container_of(rcu, struct cred, rcu); + + BUG_ON(atomic_read(&cred->usage) != 0); + + key_put(cred->thread_keyring); + key_put(cred->request_key_auth); + put_group_info(cred->group_info); + free_uid(cred->user); + security_cred_free(cred); + kfree(cred); +} + +/** + * __put_cred - Destroy a set of credentials + * @sec: The record to release + * + * Destroy a set of credentials on which no references remain. + */ +void __put_cred(struct cred *cred) +{ + call_rcu(&cred->rcu, put_cred_rcu); +} +EXPORT_SYMBOL(__put_cred); + +/* + * Copy credentials for the new process created by fork() + */ +int copy_creds(struct task_struct *p, unsigned long clone_flags) +{ + struct cred *pcred; + int ret; + + pcred = kmemdup(p->cred, sizeof(*p->cred), GFP_KERNEL); + if (!pcred) + return -ENOMEM; + +#ifdef CONFIG_SECURITY + pcred->security = NULL; +#endif + + ret = security_cred_alloc(pcred); + if (ret < 0) { + kfree(pcred); + return ret; + } + + atomic_set(&pcred->usage, 1); + get_group_info(pcred->group_info); + get_uid(pcred->user); + key_get(pcred->thread_keyring); + key_get(pcred->request_key_auth); + + atomic_inc(&pcred->user->processes); + + /* RCU assignment is unneeded here as no-one can have accessed this + * pointer yet, barring us */ + p->cred = pcred; + return 0; +} diff --git a/kernel/fork.c b/kernel/fork.c index 81fdc773390..c932e283ddf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -146,9 +146,7 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); - security_task_free(tsk); - free_uid(tsk->__temp_cred.user); - put_group_info(tsk->__temp_cred.group_info); + put_cred(tsk->cred); delayacct_tsk_free(tsk); if (!profile_handoff_task(tsk)) @@ -969,7 +967,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif - p->cred = &p->__temp_cred; retval = -EAGAIN; if (atomic_read(&p->cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { @@ -978,9 +975,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_free; } - atomic_inc(&p->cred->user->__count); - atomic_inc(&p->cred->user->processes); - get_group_info(p->cred->group_info); + retval = copy_creds(p, clone_flags); + if (retval < 0) + goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check @@ -1035,9 +1032,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); -#ifdef CONFIG_SECURITY - p->cred->security = NULL; -#endif p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); @@ -1082,10 +1076,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); - if ((retval = security_task_alloc(p))) - goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) - goto bad_fork_cleanup_security; + goto bad_fork_cleanup_policy; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; @@ -1284,8 +1276,6 @@ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); -bad_fork_cleanup_security: - security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_put(p->mempolicy); @@ -1298,9 +1288,7 @@ bad_fork_cleanup_cgroup: bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: - put_group_info(p->cred->group_info); - atomic_dec(&p->cred->user->processes); - free_uid(p->cred->user); + put_cred(p->cred); bad_fork_free: free_task(p); fork_out: diff --git a/security/capability.c b/security/capability.c index 24587481903..6c4b5137ca7 100644 --- a/security/capability.c +++ b/security/capability.c @@ -340,12 +340,12 @@ static int cap_task_create(unsigned long clone_flags) return 0; } -static int cap_task_alloc_security(struct task_struct *p) +static int cap_cred_alloc_security(struct cred *cred) { return 0; } -static void cap_task_free_security(struct task_struct *p) +static void cap_cred_free(struct cred *cred) { } @@ -890,8 +890,8 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, file_receive); set_to_cap_if_null(ops, dentry_open); set_to_cap_if_null(ops, task_create); - set_to_cap_if_null(ops, task_alloc_security); - set_to_cap_if_null(ops, task_free_security); + set_to_cap_if_null(ops, cred_alloc_security); + set_to_cap_if_null(ops, cred_free); set_to_cap_if_null(ops, task_setuid); set_to_cap_if_null(ops, task_post_setuid); set_to_cap_if_null(ops, task_setgid); diff --git a/security/security.c b/security/security.c index 81c956a1230..d058f7d5b10 100644 --- a/security/security.c +++ b/security/security.c @@ -616,14 +616,14 @@ int security_task_create(unsigned long clone_flags) return security_ops->task_create(clone_flags); } -int security_task_alloc(struct task_struct *p) +int security_cred_alloc(struct cred *cred) { - return security_ops->task_alloc_security(p); + return security_ops->cred_alloc_security(cred); } -void security_task_free(struct task_struct *p) +void security_cred_free(struct cred *cred) { - security_ops->task_free_security(p); + security_ops->cred_free(cred); } int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 328308f2882..658435dce37 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -158,7 +158,7 @@ static int selinux_secmark_enabled(void) /* Allocate and free functions for each kind of security blob. */ -static int task_alloc_security(struct task_struct *task) +static int cred_alloc_security(struct cred *cred) { struct task_security_struct *tsec; @@ -167,18 +167,11 @@ static int task_alloc_security(struct task_struct *task) return -ENOMEM; tsec->osid = tsec->sid = SECINITSID_UNLABELED; - task->cred->security = tsec; + cred->security = tsec; return 0; } -static void task_free_security(struct task_struct *task) -{ - struct task_security_struct *tsec = task->cred->security; - task->cred->security = NULL; - kfree(tsec); -} - static int inode_alloc_security(struct inode *inode) { struct task_security_struct *tsec = current->cred->security; @@ -3184,17 +3177,17 @@ static int selinux_task_create(unsigned long clone_flags) return task_has_perm(current, current, PROCESS__FORK); } -static int selinux_task_alloc_security(struct task_struct *tsk) +static int selinux_cred_alloc_security(struct cred *cred) { struct task_security_struct *tsec1, *tsec2; int rc; tsec1 = current->cred->security; - rc = task_alloc_security(tsk); + rc = cred_alloc_security(cred); if (rc) return rc; - tsec2 = tsk->cred->security; + tsec2 = cred->security; tsec2->osid = tsec1->osid; tsec2->sid = tsec1->sid; @@ -3208,9 +3201,14 @@ static int selinux_task_alloc_security(struct task_struct *tsk) return 0; } -static void selinux_task_free_security(struct task_struct *tsk) +/* + * detach and free the LSM part of a set of credentials + */ +static void selinux_cred_free(struct cred *cred) { - task_free_security(tsk); + struct task_security_struct *tsec = cred->security; + cred->security = NULL; + kfree(tsec); } static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) @@ -5552,8 +5550,8 @@ static struct security_operations selinux_ops = { .dentry_open = selinux_dentry_open, .task_create = selinux_task_create, - .task_alloc_security = selinux_task_alloc_security, - .task_free_security = selinux_task_free_security, + .cred_alloc_security = selinux_cred_alloc_security, + .cred_free = selinux_cred_free, .task_setuid = selinux_task_setuid, .task_post_setuid = selinux_task_post_setuid, .task_setgid = selinux_task_setgid, @@ -5683,7 +5681,7 @@ static __init int selinux_init(void) printk(KERN_INFO "SELinux: Initializing.\n"); /* Set the security state for the initial task. */ - if (task_alloc_security(current)) + if (cred_alloc_security(current->cred)) panic("SELinux: Failed to initialize initial task.\n"); tsec = current->cred->security; tsec->osid = tsec->sid = SECINITSID_KERNEL; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 791da238d04..cc837314fb0 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -975,8 +975,8 @@ static int smack_file_receive(struct file *file) */ /** - * smack_task_alloc_security - "allocate" a task blob - * @tsk: the task in need of a blob + * smack_cred_alloc_security - "allocate" a task cred blob + * @cred: the task creds in need of a blob * * Smack isn't using copies of blobs. Everyone * points to an immutable list. No alloc required. @@ -984,24 +984,24 @@ static int smack_file_receive(struct file *file) * * Always returns 0 */ -static int smack_task_alloc_security(struct task_struct *tsk) +static int smack_cred_alloc_security(struct cred *cred) { - tsk->cred->security = current->cred->security; + cred->security = current->cred->security; return 0; } /** - * smack_task_free_security - "free" a task blob - * @task: the task with the blob + * smack_cred_free - "free" task-level security credentials + * @cred: the credentials in question * * Smack isn't using copies of blobs. Everyone * points to an immutable list. The blobs never go away. * There is no leak here. */ -static void smack_task_free_security(struct task_struct *task) +static void smack_cred_free(struct cred *cred) { - task->cred->security = NULL; + cred->security = NULL; } /** @@ -2630,8 +2630,8 @@ struct security_operations smack_ops = { .file_send_sigiotask = smack_file_send_sigiotask, .file_receive = smack_file_receive, - .task_alloc_security = smack_task_alloc_security, - .task_free_security = smack_task_free_security, + .cred_alloc_security = smack_cred_alloc_security, + .cred_free = smack_cred_free, .task_post_setuid = cap_task_post_setuid, .task_setpgid = smack_task_setpgid, .task_getpgid = smack_task_getpgid, -- cgit v1.2.3-70-g09d2 From bb952bb98a7e479262c7eb25d5592545a3af147d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:20 +1100 Subject: CRED: Separate per-task-group keyrings from signal_struct Separate per-task-group keyrings from signal_struct and dangle their anchor from the cred struct rather than the signal_struct. Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: James Morris --- include/linux/cred.h | 16 +++++++ include/linux/key.h | 8 +--- include/linux/sched.h | 6 --- kernel/cred.c | 63 +++++++++++++++++++++++++++ kernel/fork.c | 7 --- security/keys/process_keys.c | 100 +++++++++++++++++-------------------------- security/keys/request_key.c | 34 ++++++--------- 7 files changed, 135 insertions(+), 99 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/cred.h b/include/linux/cred.h index 166ce4ddba6..62b9e532422 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -71,6 +71,21 @@ extern int groups_search(const struct group_info *, gid_t); extern int in_group_p(gid_t); extern int in_egroup_p(gid_t); +/* + * The common credentials for a thread group + * - shared by CLONE_THREAD + */ +#ifdef CONFIG_KEYS +struct thread_group_cred { + atomic_t usage; + pid_t tgid; /* thread group process ID */ + spinlock_t lock; + struct key *session_keyring; /* keyring inherited over fork */ + struct key *process_keyring; /* keyring private to this process */ + struct rcu_head rcu; /* RCU deletion hook */ +}; +#endif + /* * The security context of a task * @@ -114,6 +129,7 @@ struct cred { * keys to */ struct key *thread_keyring; /* keyring private to this thread */ struct key *request_key_auth; /* assumed request_key authority */ + struct thread_group_cred *tgcred; /* thread-group shared credentials */ #endif #ifdef CONFIG_SECURITY void *security; /* subjective LSM security */ diff --git a/include/linux/key.h b/include/linux/key.h index df709e1af3c..0836cc838b0 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -278,9 +278,7 @@ extern ctl_table key_sysctls[]; */ extern void switch_uid_keyring(struct user_struct *new_user); extern int copy_keys(unsigned long clone_flags, struct task_struct *tsk); -extern int copy_thread_group_keys(struct task_struct *tsk); extern void exit_keys(struct task_struct *tsk); -extern void exit_thread_group_keys(struct signal_struct *tg); extern int suid_keys(struct task_struct *tsk); extern int exec_keys(struct task_struct *tsk); extern void key_fsuid_changed(struct task_struct *tsk); @@ -289,8 +287,8 @@ extern void key_init(void); #define __install_session_keyring(keyring) \ ({ \ - struct key *old_session = current->signal->session_keyring; \ - current->signal->session_keyring = keyring; \ + struct key *old_session = current->cred->tgcred->session_keyring; \ + current->cred->tgcred->session_keyring = keyring; \ old_session; \ }) @@ -308,9 +306,7 @@ extern void key_init(void); #define switch_uid_keyring(u) do { } while(0) #define __install_session_keyring(k) ({ NULL; }) #define copy_keys(f,t) 0 -#define copy_thread_group_keys(t) 0 #define exit_keys(t) do { } while(0) -#define exit_thread_group_keys(tg) do { } while(0) #define suid_keys(t) do { } while(0) #define exec_keys(t) do { } while(0) #define key_fsuid_changed(t) do { } while(0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 740cf946c8c..2913252989b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -571,12 +571,6 @@ struct signal_struct { */ struct rlimit rlim[RLIM_NLIMITS]; - /* keep the process-shared keyrings here so that they do the right - * thing in threads created with CLONE_THREAD */ -#ifdef CONFIG_KEYS - struct key *session_keyring; /* keyring inherited over fork */ - struct key *process_keyring; /* keyring private to this process */ -#endif #ifdef CONFIG_BSD_PROCESS_ACCT struct pacct_struct pacct; /* per-process accounting information */ #endif diff --git a/kernel/cred.c b/kernel/cred.c index 833244a7cb0..ac73e361768 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -16,6 +16,17 @@ #include #include +/* + * The common credentials for the initial task's thread group + */ +#ifdef CONFIG_KEYS +static struct thread_group_cred init_tgcred = { + .usage = ATOMIC_INIT(2), + .tgid = 0, + .lock = SPIN_LOCK_UNLOCKED, +}; +#endif + /* * The initial credentials for the initial task */ @@ -28,8 +39,41 @@ struct cred init_cred = { .cap_bset = CAP_INIT_BSET, .user = INIT_USER, .group_info = &init_groups, +#ifdef CONFIG_KEYS + .tgcred = &init_tgcred, +#endif }; +/* + * Dispose of the shared task group credentials + */ +#ifdef CONFIG_KEYS +static void release_tgcred_rcu(struct rcu_head *rcu) +{ + struct thread_group_cred *tgcred = + container_of(rcu, struct thread_group_cred, rcu); + + BUG_ON(atomic_read(&tgcred->usage) != 0); + + key_put(tgcred->session_keyring); + key_put(tgcred->process_keyring); + kfree(tgcred); +} +#endif + +/* + * Release a set of thread group credentials. + */ +static void release_tgcred(struct cred *cred) +{ +#ifdef CONFIG_KEYS + struct thread_group_cred *tgcred = cred->tgcred; + + if (atomic_dec_and_test(&tgcred->usage)) + call_rcu(&tgcred->rcu, release_tgcred_rcu); +#endif +} + /* * The RCU callback to actually dispose of a set of credentials */ @@ -41,6 +85,7 @@ static void put_cred_rcu(struct rcu_head *rcu) key_put(cred->thread_keyring); key_put(cred->request_key_auth); + release_tgcred(cred); put_group_info(cred->group_info); free_uid(cred->user); security_cred_free(cred); @@ -71,12 +116,30 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) if (!pcred) return -ENOMEM; +#ifdef CONFIG_KEYS + if (clone_flags & CLONE_THREAD) { + atomic_inc(&pcred->tgcred->usage); + } else { + pcred->tgcred = kmalloc(sizeof(struct cred), GFP_KERNEL); + if (!pcred->tgcred) { + kfree(pcred); + return -ENOMEM; + } + atomic_set(&pcred->tgcred->usage, 1); + spin_lock_init(&pcred->tgcred->lock); + pcred->tgcred->process_keyring = NULL; + pcred->tgcred->session_keyring = + key_get(p->cred->tgcred->session_keyring); + } +#endif + #ifdef CONFIG_SECURITY pcred->security = NULL; #endif ret = security_cred_alloc(pcred); if (ret < 0) { + release_tgcred(pcred); kfree(pcred); return ret; } diff --git a/kernel/fork.c b/kernel/fork.c index c932e283ddf..ded1972672a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -802,12 +802,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) if (!sig) return -ENOMEM; - ret = copy_thread_group_keys(tsk); - if (ret < 0) { - kmem_cache_free(signal_cachep, sig); - return ret; - } - atomic_set(&sig->count, 1); atomic_set(&sig->live, 1); init_waitqueue_head(&sig->wait_chldexit); @@ -852,7 +846,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) void __cleanup_signal(struct signal_struct *sig) { thread_group_cputime_free(sig); - exit_thread_group_keys(sig); tty_kref_put(sig->tty); kmem_cache_free(signal_cachep, sig); } diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 212601ebaa4..70ee93406f3 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -189,7 +189,7 @@ int install_process_keyring(void) might_sleep(); - if (!tsk->signal->process_keyring) { + if (!tsk->cred->tgcred->process_keyring) { sprintf(buf, "_pid.%u", tsk->tgid); keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, @@ -200,12 +200,12 @@ int install_process_keyring(void) } /* attach keyring */ - spin_lock_irq(&tsk->sighand->siglock); - if (!tsk->signal->process_keyring) { - tsk->signal->process_keyring = keyring; + spin_lock_irq(&tsk->cred->tgcred->lock); + if (!tsk->cred->tgcred->process_keyring) { + tsk->cred->tgcred->process_keyring = keyring; keyring = NULL; } - spin_unlock_irq(&tsk->sighand->siglock); + spin_unlock_irq(&tsk->cred->tgcred->lock); key_put(keyring); } @@ -235,11 +235,11 @@ static int install_session_keyring(struct key *keyring) sprintf(buf, "_ses.%u", tsk->tgid); flags = KEY_ALLOC_QUOTA_OVERRUN; - if (tsk->signal->session_keyring) + if (tsk->cred->tgcred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, - flags, NULL); + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, + tsk, flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); } @@ -248,10 +248,10 @@ static int install_session_keyring(struct key *keyring) } /* install the keyring */ - spin_lock_irq(&tsk->sighand->siglock); - old = tsk->signal->session_keyring; - rcu_assign_pointer(tsk->signal->session_keyring, keyring); - spin_unlock_irq(&tsk->sighand->siglock); + spin_lock_irq(&tsk->cred->tgcred->lock); + old = tsk->cred->tgcred->session_keyring; + rcu_assign_pointer(tsk->cred->tgcred->session_keyring, keyring); + spin_unlock_irq(&tsk->cred->tgcred->lock); /* we're using RCU on the pointer, but there's no point synchronising * on it if it didn't previously point to anything */ @@ -264,28 +264,6 @@ static int install_session_keyring(struct key *keyring) } /* end install_session_keyring() */ -/*****************************************************************************/ -/* - * copy the keys in a thread group for fork without CLONE_THREAD - */ -int copy_thread_group_keys(struct task_struct *tsk) -{ - key_check(current->thread_group->session_keyring); - key_check(current->thread_group->process_keyring); - - /* no process keyring yet */ - tsk->signal->process_keyring = NULL; - - /* same session keyring */ - rcu_read_lock(); - tsk->signal->session_keyring = - key_get(rcu_dereference(current->signal->session_keyring)); - rcu_read_unlock(); - - return 0; - -} /* end copy_thread_group_keys() */ - /*****************************************************************************/ /* * copy the keys for fork @@ -305,17 +283,6 @@ int copy_keys(unsigned long clone_flags, struct task_struct *tsk) } /* end copy_keys() */ -/*****************************************************************************/ -/* - * dispose of thread group keys upon thread group destruction - */ -void exit_thread_group_keys(struct signal_struct *tg) -{ - key_put(tg->session_keyring); - key_put(tg->process_keyring); - -} /* end exit_thread_group_keys() */ - /*****************************************************************************/ /* * dispose of per-thread keys upon thread exit @@ -344,10 +311,10 @@ int exec_keys(struct task_struct *tsk) key_put(old); /* discard the process keyring from a newly exec'd task */ - spin_lock_irq(&tsk->sighand->siglock); - old = tsk->signal->process_keyring; - tsk->signal->process_keyring = NULL; - spin_unlock_irq(&tsk->sighand->siglock); + spin_lock_irq(&tsk->cred->tgcred->lock); + old = tsk->cred->tgcred->process_keyring; + tsk->cred->tgcred->process_keyring = NULL; + spin_unlock_irq(&tsk->cred->tgcred->lock); key_put(old); @@ -452,9 +419,9 @@ key_ref_t search_process_keyrings(struct key_type *type, } /* search the process keyring second */ - if (context->signal->process_keyring) { + if (cred->tgcred->process_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->signal->process_keyring, 1), + make_key_ref(cred->tgcred->process_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -473,11 +440,11 @@ key_ref_t search_process_keyrings(struct key_type *type, } /* search the session keyring */ - if (context->signal->session_keyring) { + if (cred->tgcred->session_keyring) { rcu_read_lock(); key_ref = keyring_search_aux( make_key_ref(rcu_dereference( - context->signal->session_keyring), + cred->tgcred->session_keyring), 1), context, type, description, match); rcu_read_unlock(); @@ -586,11 +553,13 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, { struct request_key_auth *rka; struct task_struct *t = current; - struct cred *cred = current_cred(); + struct cred *cred; struct key *key; key_ref_t key_ref, skey_ref; int ret; +try_again: + cred = get_current_cred(); key_ref = ERR_PTR(-ENOKEY); switch (id) { @@ -604,6 +573,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, key = ERR_PTR(ret); goto error; } + goto reget_creds; } key = cred->thread_keyring; @@ -612,7 +582,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, break; case KEY_SPEC_PROCESS_KEYRING: - if (!t->signal->process_keyring) { + if (!cred->tgcred->process_keyring) { if (!create) goto error; @@ -621,15 +591,16 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, key = ERR_PTR(ret); goto error; } + goto reget_creds; } - key = t->signal->process_keyring; + key = cred->tgcred->process_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_SESSION_KEYRING: - if (!t->signal->session_keyring) { + if (!cred->tgcred->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ ret = install_user_keyrings(); @@ -639,10 +610,11 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, cred->user->session_keyring); if (ret < 0) goto error; + goto reget_creds; } rcu_read_lock(); - key = rcu_dereference(t->signal->session_keyring); + key = rcu_dereference(cred->tgcred->session_keyring); atomic_inc(&key->usage); rcu_read_unlock(); key_ref = make_key_ref(key, 1); @@ -758,6 +730,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, goto invalid_key; error: + put_cred(cred); return key_ref; invalid_key: @@ -765,6 +738,12 @@ invalid_key: key_ref = ERR_PTR(ret); goto error; + /* if we attempted to install a keyring, then it may have caused new + * creds to be installed */ +reget_creds: + put_cred(cred); + goto try_again; + } /* end lookup_user_key() */ /*****************************************************************************/ @@ -777,6 +756,7 @@ invalid_key: long join_session_keyring(const char *name) { struct task_struct *tsk = current; + struct cred *cred = current->cred; struct key *keyring; long ret; @@ -787,7 +767,7 @@ long join_session_keyring(const char *name) goto error; rcu_read_lock(); - ret = rcu_dereference(tsk->signal->session_keyring)->serial; + ret = rcu_dereference(cred->tgcred->session_keyring)->serial; rcu_read_unlock(); goto error; } @@ -799,7 +779,7 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, tsk->cred->uid, tsk->cred->gid, tsk, + keyring = keyring_alloc(name, cred->uid, cred->gid, tsk, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 0488b0af5bd..3d12558362d 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -66,7 +66,6 @@ static int call_sbin_request_key(struct key_construction *cons, const char *op, void *aux) { - struct task_struct *tsk = current; const struct cred *cred = current_cred(); key_serial_t prkey, sskey; struct key *key = cons->key, *authkey = cons->authkey, *keyring; @@ -109,18 +108,13 @@ static int call_sbin_request_key(struct key_construction *cons, cred->thread_keyring->serial : 0); prkey = 0; - if (tsk->signal->process_keyring) - prkey = tsk->signal->process_keyring->serial; + if (cred->tgcred->process_keyring) + prkey = cred->tgcred->process_keyring->serial; - sprintf(keyring_str[1], "%d", prkey); - - if (tsk->signal->session_keyring) { - rcu_read_lock(); - sskey = rcu_dereference(tsk->signal->session_keyring)->serial; - rcu_read_unlock(); - } else { + if (cred->tgcred->session_keyring) + sskey = rcu_dereference(cred->tgcred->session_keyring)->serial; + else sskey = cred->user->session_keyring->serial; - } sprintf(keyring_str[2], "%d", sskey); @@ -222,7 +216,7 @@ static int construct_key(struct key *key, const void *callout_info, static void construct_get_dest_keyring(struct key **_dest_keyring) { struct request_key_auth *rka; - struct task_struct *tsk = current; + const struct cred *cred = current_cred(); struct key *dest_keyring = *_dest_keyring, *authkey; kenter("%p", dest_keyring); @@ -234,11 +228,11 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } else { /* use a default keyring; falling through the cases until we * find one that we actually have */ - switch (tsk->cred->jit_keyring) { + switch (cred->jit_keyring) { case KEY_REQKEY_DEFL_DEFAULT: case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: - if (tsk->cred->request_key_auth) { - authkey = tsk->cred->request_key_auth; + if (cred->request_key_auth) { + authkey = cred->request_key_auth; down_read(&authkey->sem); rka = authkey->payload.data; if (!test_bit(KEY_FLAG_REVOKED, @@ -251,19 +245,19 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } case KEY_REQKEY_DEFL_THREAD_KEYRING: - dest_keyring = key_get(tsk->cred->thread_keyring); + dest_keyring = key_get(cred->thread_keyring); if (dest_keyring) break; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - dest_keyring = key_get(tsk->signal->process_keyring); + dest_keyring = key_get(cred->tgcred->process_keyring); if (dest_keyring) break; case KEY_REQKEY_DEFL_SESSION_KEYRING: rcu_read_lock(); dest_keyring = key_get( - rcu_dereference(tsk->signal->session_keyring)); + rcu_dereference(cred->tgcred->session_keyring)); rcu_read_unlock(); if (dest_keyring) @@ -271,11 +265,11 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: dest_keyring = - key_get(tsk->cred->user->session_keyring); + key_get(cred->user->session_keyring); break; case KEY_REQKEY_DEFL_USER_KEYRING: - dest_keyring = key_get(tsk->cred->user->uid_keyring); + dest_keyring = key_get(cred->user->uid_keyring); break; case KEY_REQKEY_DEFL_GROUP_KEYRING: -- cgit v1.2.3-70-g09d2 From d84f4f992cbd76e8f39c488cf0c5d123843923b1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:23 +1100 Subject: CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells Acked-by: James Morris Signed-off-by: James Morris --- fs/exec.c | 31 ++- fs/nfsd/auth.c | 92 ++++---- fs/nfsd/nfs4recover.c | 68 +++--- fs/nfsd/nfsfh.c | 11 +- fs/open.c | 31 ++- include/linux/audit.h | 22 +- include/linux/capability.h | 2 - include/linux/cred.h | 44 +++- include/linux/init_task.h | 2 + include/linux/key.h | 22 +- include/linux/sched.h | 6 +- include/linux/security.h | 178 +++++++--------- init/main.c | 1 + kernel/auditsc.c | 42 ++-- kernel/capability.c | 78 +++---- kernel/cred-internals.h | 21 ++ kernel/cred.c | 321 ++++++++++++++++++++++++---- kernel/exit.c | 9 +- kernel/fork.c | 7 +- kernel/kmod.c | 30 ++- kernel/ptrace.c | 9 + kernel/signal.c | 10 +- kernel/sys.c | 450 +++++++++++++++++++++------------------ kernel/user.c | 37 +--- kernel/user_namespace.c | 12 +- lib/Makefile | 2 +- net/rxrpc/ar-key.c | 6 +- security/capability.c | 21 +- security/commoncap.c | 265 +++++++++++------------ security/keys/internal.h | 17 +- security/keys/key.c | 25 +-- security/keys/keyctl.c | 95 ++++++--- security/keys/keyring.c | 14 +- security/keys/permission.c | 24 ++- security/keys/proc.c | 8 +- security/keys/process_keys.c | 333 ++++++++++++++--------------- security/keys/request_key.c | 29 ++- security/keys/request_key_auth.c | 41 ++-- security/security.c | 58 +++-- security/selinux/hooks.c | 286 ++++++++++++------------- security/smack/smack_lsm.c | 82 ++++--- 41 files changed, 1603 insertions(+), 1239 deletions(-) create mode 100644 kernel/cred-internals.h (limited to 'kernel/fork.c') diff --git a/fs/exec.c b/fs/exec.c index a5330e1a221..9bd3559ddec 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1007,13 +1007,12 @@ int flush_old_exec(struct linux_binprm * bprm) */ current->mm->task_size = TASK_SIZE; - if (bprm->e_uid != current_euid() || bprm->e_gid != current_egid()) { - suid_keys(current); + if (bprm->e_uid != current_euid() || + bprm->e_gid != current_egid()) { set_dumpable(current->mm, suid_dumpable); current->pdeath_signal = 0; } else if (file_permission(bprm->file, MAY_READ) || (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { - suid_keys(current); set_dumpable(current->mm, suid_dumpable); } @@ -1096,10 +1095,8 @@ void compute_creds(struct linux_binprm *bprm) { int unsafe; - if (bprm->e_uid != current_uid()) { - suid_keys(current); + if (bprm->e_uid != current_uid()) current->pdeath_signal = 0; - } exec_keys(current); task_lock(current); @@ -1709,8 +1706,9 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) struct linux_binfmt * binfmt; struct inode * inode; struct file * file; + const struct cred *old_cred; + struct cred *cred; int retval = 0; - int fsuid = current_fsuid(); int flag = 0; int ispipe = 0; unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; @@ -1723,12 +1721,20 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) binfmt = current->binfmt; if (!binfmt || !binfmt->core_dump) goto fail; + + cred = prepare_creds(); + if (!cred) { + retval = -ENOMEM; + goto fail; + } + down_write(&mm->mmap_sem); /* * If another thread got here first, or we are not dumpable, bail out. */ if (mm->core_state || !get_dumpable(mm)) { up_write(&mm->mmap_sem); + put_cred(cred); goto fail; } @@ -1739,12 +1745,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) */ if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ - current->cred->fsuid = 0; /* Dump root private */ + cred->fsuid = 0; /* Dump root private */ } retval = coredump_wait(exit_code, &core_state); - if (retval < 0) + if (retval < 0) { + put_cred(cred); goto fail; + } + + old_cred = override_creds(cred); /* * Clear any false indication of pending signals that might @@ -1835,7 +1845,8 @@ fail_unlock: if (helper_argv) argv_free(helper_argv); - current->cred->fsuid = fsuid; + revert_creds(old_cred); + put_cred(cred); coredump_finish(mm); fail: return retval; diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 808fc03a6fb..836ffa1047d 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -27,55 +27,67 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) { - struct cred *act_as = current->cred ; - struct svc_cred cred = rqstp->rq_cred; + struct group_info *rqgi; + struct group_info *gi; + struct cred *new; int i; int flags = nfsexp_flags(rqstp, exp); int ret; + new = prepare_creds(); + if (!new) + return -ENOMEM; + + new->fsuid = rqstp->rq_cred.cr_uid; + new->fsgid = rqstp->rq_cred.cr_gid; + + rqgi = rqstp->rq_cred.cr_group_info; + if (flags & NFSEXP_ALLSQUASH) { - cred.cr_uid = exp->ex_anon_uid; - cred.cr_gid = exp->ex_anon_gid; - cred.cr_group_info = groups_alloc(0); + new->fsuid = exp->ex_anon_uid; + new->fsgid = exp->ex_anon_gid; + gi = groups_alloc(0); } else if (flags & NFSEXP_ROOTSQUASH) { - struct group_info *gi; - if (!cred.cr_uid) - cred.cr_uid = exp->ex_anon_uid; - if (!cred.cr_gid) - cred.cr_gid = exp->ex_anon_gid; - gi = groups_alloc(cred.cr_group_info->ngroups); - if (gi) - for (i = 0; i < cred.cr_group_info->ngroups; i++) { - if (!GROUP_AT(cred.cr_group_info, i)) - GROUP_AT(gi, i) = exp->ex_anon_gid; - else - GROUP_AT(gi, i) = GROUP_AT(cred.cr_group_info, i); - } - cred.cr_group_info = gi; - } else - get_group_info(cred.cr_group_info); - - if (cred.cr_uid != (uid_t) -1) - act_as->fsuid = cred.cr_uid; - else - act_as->fsuid = exp->ex_anon_uid; - if (cred.cr_gid != (gid_t) -1) - act_as->fsgid = cred.cr_gid; - else - act_as->fsgid = exp->ex_anon_gid; + if (!new->fsuid) + new->fsuid = exp->ex_anon_uid; + if (!new->fsgid) + new->fsgid = exp->ex_anon_gid; - if (!cred.cr_group_info) - return -ENOMEM; - ret = set_groups(act_as, cred.cr_group_info); - put_group_info(cred.cr_group_info); - if ((cred.cr_uid)) { - act_as->cap_effective = - cap_drop_nfsd_set(act_as->cap_effective); + gi = groups_alloc(rqgi->ngroups); + if (!gi) + goto oom; + + for (i = 0; i < rqgi->ngroups; i++) { + if (!GROUP_AT(rqgi, i)) + GROUP_AT(gi, i) = exp->ex_anon_gid; + else + GROUP_AT(gi, i) = GROUP_AT(rqgi, i); + } } else { - act_as->cap_effective = - cap_raise_nfsd_set(act_as->cap_effective, - act_as->cap_permitted); + gi = get_group_info(rqgi); } + + if (new->fsuid == (uid_t) -1) + new->fsuid = exp->ex_anon_uid; + if (new->fsgid == (gid_t) -1) + new->fsgid = exp->ex_anon_gid; + + ret = set_groups(new, gi); + put_group_info(gi); + if (!ret) + goto error; + + if (new->uid) + new->cap_effective = cap_drop_nfsd_set(new->cap_effective); + else + new->cap_effective = cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted); + return commit_creds(new); + +oom: + ret = -ENOMEM; +error: + abort_creds(new); return ret; } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 632a50b4b37..9371ea12d7f 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -54,20 +54,26 @@ static struct path rec_dir; static int rec_dir_init = 0; -static void -nfs4_save_user(uid_t *saveuid, gid_t *savegid) +static int +nfs4_save_creds(const struct cred **original_creds) { - *saveuid = current->cred->fsuid; - *savegid = current->cred->fsgid; - current->cred->fsuid = 0; - current->cred->fsgid = 0; + struct cred *new; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + new->fsuid = 0; + new->fsgid = 0; + *original_creds = override_creds(new); + put_cred(new); + return 0; } static void -nfs4_reset_user(uid_t saveuid, gid_t savegid) +nfs4_reset_creds(const struct cred *original) { - current->cred->fsuid = saveuid; - current->cred->fsgid = savegid; + revert_creds(original); } static void @@ -129,10 +135,9 @@ nfsd4_sync_rec_dir(void) int nfsd4_create_clid_dir(struct nfs4_client *clp) { + const struct cred *original_cred; char *dname = clp->cl_recdir; struct dentry *dentry; - uid_t uid; - gid_t gid; int status; dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); @@ -140,7 +145,9 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) if (!rec_dir_init || clp->cl_firststate) return 0; - nfs4_save_user(&uid, &gid); + status = nfs4_save_creds(&original_cred); + if (status < 0) + return status; /* lock the parent */ mutex_lock(&rec_dir.dentry->d_inode->i_mutex); @@ -168,7 +175,7 @@ out_unlock: clp->cl_firststate = 1; nfsd4_sync_rec_dir(); } - nfs4_reset_user(uid, gid); + nfs4_reset_creds(original_cred); dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); return status; } @@ -211,20 +218,21 @@ nfsd4_build_dentrylist(void *arg, const char *name, int namlen, static int nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) { + const struct cred *original_cred; struct file *filp; struct dentry_list_arg dla = { .parent = dir, }; struct list_head *dentries = &dla.dentries; struct dentry_list *child; - uid_t uid; - gid_t gid; int status; if (!rec_dir_init) return 0; - nfs4_save_user(&uid, &gid); + status = nfs4_save_creds(&original_cred); + if (status < 0) + return status; filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY, current_cred()); @@ -250,7 +258,7 @@ out: dput(child->dentry); kfree(child); } - nfs4_reset_user(uid, gid); + nfs4_reset_creds(original_cred); return status; } @@ -312,8 +320,7 @@ out: void nfsd4_remove_clid_dir(struct nfs4_client *clp) { - uid_t uid; - gid_t gid; + const struct cred *original_cred; int status; if (!rec_dir_init || !clp->cl_firststate) @@ -323,9 +330,13 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) if (status) goto out; clp->cl_firststate = 0; - nfs4_save_user(&uid, &gid); + + status = nfs4_save_creds(&original_cred); + if (status < 0) + goto out; + status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); - nfs4_reset_user(uid, gid); + nfs4_reset_creds(original_cred); if (status == 0) nfsd4_sync_rec_dir(); mnt_drop_write(rec_dir.mnt); @@ -402,16 +413,21 @@ nfsd4_recdir_load(void) { void nfsd4_init_recdir(char *rec_dirname) { - uid_t uid = 0; - gid_t gid = 0; - int status; + const struct cred *original_cred; + int status; printk("NFSD: Using %s as the NFSv4 state recovery directory\n", rec_dirname); BUG_ON(rec_dir_init); - nfs4_save_user(&uid, &gid); + status = nfs4_save_creds(&original_cred); + if (status < 0) { + printk("NFSD: Unable to change credentials to find recovery" + " directory: error %d\n", + status); + return; + } status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &rec_dir); @@ -421,7 +437,7 @@ nfsd4_init_recdir(char *rec_dirname) if (!status) rec_dir_init = 1; - nfs4_reset_user(uid, gid); + nfs4_reset_creds(original_cred); } void diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index e67cfaea086..f0da7d9c3a9 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -186,9 +186,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) * access control settings being in effect, we cannot * fix that case easily. */ - current->cred->cap_effective = - cap_raise_nfsd_set(current->cred->cap_effective, - current->cred->cap_permitted); + struct cred *new = prepare_creds(); + if (!new) + return nfserrno(-ENOMEM); + new->cap_effective = + cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted); + put_cred(override_creds(new)); + put_cred(new); } else { error = nfsd_setuser_and_check_port(rqstp, exp); if (error) diff --git a/fs/open.c b/fs/open.c index f96eaab280a..c0a426d5766 100644 --- a/fs/open.c +++ b/fs/open.c @@ -425,30 +425,33 @@ out: */ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) { - struct cred *cred = current->cred; + const struct cred *old_cred; + struct cred *override_cred; struct path path; struct inode *inode; - int old_fsuid, old_fsgid; - kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */ int res; if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; - old_fsuid = cred->fsuid; - old_fsgid = cred->fsgid; + override_cred = prepare_creds(); + if (!override_cred) + return -ENOMEM; - cred->fsuid = cred->uid; - cred->fsgid = cred->gid; + override_cred->fsuid = override_cred->uid; + override_cred->fsgid = override_cred->gid; if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */ - if (current->cred->uid) - old_cap = cap_set_effective(__cap_empty_set); + if (override_cred->uid) + cap_clear(override_cred->cap_effective); else - old_cap = cap_set_effective(cred->cap_permitted); + override_cred->cap_effective = + override_cred->cap_permitted; } + old_cred = override_creds(override_cred); + res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); if (res) goto out; @@ -485,12 +488,8 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) out_path_release: path_put(&path); out: - cred->fsuid = old_fsuid; - cred->fsgid = old_fsgid; - - if (!issecure(SECURE_NO_SETUID_FIXUP)) - cap_set_effective(old_cap); - + revert_creds(old_cred); + put_cred(override_cred); return res; } diff --git a/include/linux/audit.h b/include/linux/audit.h index 6fbebac7b1b..0b2fcb698a6 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -454,8 +454,10 @@ extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_pr extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout); extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification); extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); -extern void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE); -extern int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm); +extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, + const struct cred *new, + const struct cred *old); +extern int __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old); static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -522,16 +524,20 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) * * -Eric */ -static inline void audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE) +static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm, + const struct cred *new, + const struct cred *old) { if (unlikely(!audit_dummy_context())) - __audit_log_bprm_fcaps(bprm, pP, pE); + return __audit_log_bprm_fcaps(bprm, new, old); + return 0; } -static inline int audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm) +static inline int audit_log_capset(pid_t pid, const struct cred *new, + const struct cred *old) { if (unlikely(!audit_dummy_context())) - return __audit_log_capset(pid, eff, inh, perm); + return __audit_log_capset(pid, new, old); return 0; } @@ -566,8 +572,8 @@ extern int audit_signals; #define audit_mq_timedreceive(d,l,p,t) ({ 0; }) #define audit_mq_notify(d,n) ({ 0; }) #define audit_mq_getsetattr(d,s) ({ 0; }) -#define audit_log_bprm_fcaps(b, p, e) do { ; } while (0) -#define audit_log_capset(pid, e, i, p) ({ 0; }) +#define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; }) +#define audit_log_capset(pid, ncr, ocr) ({ 0; }) #define audit_ptrace(t) ((void)0) #define audit_n_rules 0 #define audit_signals 0 diff --git a/include/linux/capability.h b/include/linux/capability.h index 7f26580a5a4..e22f48c2a46 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -519,8 +519,6 @@ extern const kernel_cap_t __cap_empty_set; extern const kernel_cap_t __cap_full_set; extern const kernel_cap_t __cap_init_eff_set; -kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); - /** * has_capability - Determine if a task has a superior capability available * @t: The task in question diff --git a/include/linux/cred.h b/include/linux/cred.h index 62b9e532422..eaf6fa695a0 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -84,6 +84,8 @@ struct thread_group_cred { struct key *process_keyring; /* keyring private to this process */ struct rcu_head rcu; /* RCU deletion hook */ }; + +extern void release_tgcred(struct cred *cred); #endif /* @@ -137,11 +139,30 @@ struct cred { struct user_struct *user; /* real user ID subscription */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ struct rcu_head rcu; /* RCU deletion hook */ - spinlock_t lock; /* lock for pointer changes */ }; extern void __put_cred(struct cred *); extern int copy_creds(struct task_struct *, unsigned long); +extern struct cred *prepare_creds(void); +extern struct cred *prepare_usermodehelper_creds(void); +extern int commit_creds(struct cred *); +extern void abort_creds(struct cred *); +extern const struct cred *override_creds(const struct cred *) __deprecated; +extern void revert_creds(const struct cred *) __deprecated; +extern void __init cred_init(void); + +/** + * get_new_cred - Get a reference on a new set of credentials + * @cred: The new credentials to reference + * + * Get a reference on the specified set of new credentials. The caller must + * release the reference. + */ +static inline struct cred *get_new_cred(struct cred *cred) +{ + atomic_inc(&cred->usage); + return cred; +} /** * get_cred - Get a reference on a set of credentials @@ -150,10 +171,9 @@ extern int copy_creds(struct task_struct *, unsigned long); * Get a reference on the specified set of credentials. The caller must * release the reference. */ -static inline struct cred *get_cred(struct cred *cred) +static inline const struct cred *get_cred(const struct cred *cred) { - atomic_inc(&cred->usage); - return cred; + return get_new_cred((struct cred *) cred); } /** @@ -166,6 +186,8 @@ static inline struct cred *get_cred(struct cred *cred) static inline void put_cred(const struct cred *_cred) { struct cred *cred = (struct cred *) _cred; + + BUG_ON(atomic_read(&(cred)->usage) <= 0); if (atomic_dec_and_test(&(cred)->usage)) __put_cred(cred); } @@ -250,13 +272,13 @@ static inline void put_cred(const struct cred *_cred) __groups; \ }) -#define task_cred_xxx(task, xxx) \ -({ \ - __typeof__(task->cred->xxx) ___val; \ - rcu_read_lock(); \ - ___val = __task_cred((task))->xxx; \ - rcu_read_unlock(); \ - ___val; \ +#define task_cred_xxx(task, xxx) \ +({ \ + __typeof__(((struct cred *)NULL)->xxx) ___val; \ + rcu_read_lock(); \ + ___val = __task_cred((task))->xxx; \ + rcu_read_unlock(); \ + ___val; \ }) #define task_uid(task) (task_cred_xxx((task), uid)) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5e24c54b6df..08c3b24ad9a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -150,6 +150,8 @@ extern struct cred init_cred; .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ .cred = &init_cred, \ + .cred_exec_mutex = \ + __MUTEX_INITIALIZER(tsk.cred_exec_mutex), \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/key.h b/include/linux/key.h index 0836cc838b0..69ecf0934b0 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -73,6 +73,7 @@ struct key; struct seq_file; struct user_struct; struct signal_struct; +struct cred; struct key_type; struct key_owner; @@ -181,7 +182,7 @@ struct key { extern struct key *key_alloc(struct key_type *type, const char *desc, uid_t uid, gid_t gid, - struct task_struct *ctx, + const struct cred *cred, key_perm_t perm, unsigned long flags); @@ -249,7 +250,7 @@ extern int key_unlink(struct key *keyring, struct key *key); extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, - struct task_struct *ctx, + const struct cred *cred, unsigned long flags, struct key *dest); @@ -276,22 +277,12 @@ extern ctl_table key_sysctls[]; /* * the userspace interface */ -extern void switch_uid_keyring(struct user_struct *new_user); -extern int copy_keys(unsigned long clone_flags, struct task_struct *tsk); -extern void exit_keys(struct task_struct *tsk); -extern int suid_keys(struct task_struct *tsk); +extern int install_thread_keyring_to_cred(struct cred *cred); extern int exec_keys(struct task_struct *tsk); extern void key_fsuid_changed(struct task_struct *tsk); extern void key_fsgid_changed(struct task_struct *tsk); extern void key_init(void); -#define __install_session_keyring(keyring) \ -({ \ - struct key *old_session = current->cred->tgcred->session_keyring; \ - current->cred->tgcred->session_keyring = keyring; \ - old_session; \ -}) - #else /* CONFIG_KEYS */ #define key_validate(k) 0 @@ -303,11 +294,6 @@ extern void key_init(void); #define make_key_ref(k, p) NULL #define key_ref_to_ptr(k) NULL #define is_key_possessed(k) 0 -#define switch_uid_keyring(u) do { } while(0) -#define __install_session_keyring(k) ({ NULL; }) -#define copy_keys(f,t) 0 -#define exit_keys(t) do { } while(0) -#define suid_keys(t) do { } while(0) #define exec_keys(t) do { } while(0) #define key_fsuid_changed(t) do { } while(0) #define key_fsgid_changed(t) do { } while(0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 2913252989b..121d655e460 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1145,7 +1145,8 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - struct cred *cred; /* actual/objective task credentials */ + const struct cred *cred; /* actual/objective task credentials (COW) */ + struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock @@ -1720,7 +1721,6 @@ static inline struct user_struct *get_uid(struct user_struct *u) return u; } extern void free_uid(struct user_struct *); -extern void switch_uid(struct user_struct *); extern void release_uids(struct user_namespace *ns); #include @@ -1870,6 +1870,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) +extern bool is_single_threaded(struct task_struct *); + /* * Careful: do_each_thread/while_each_thread is a double loop so * 'break' will not work as expected - use goto instead. diff --git a/include/linux/security.h b/include/linux/security.h index 7e9fe046a0d..68be1125144 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -53,24 +53,21 @@ extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern int cap_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); -extern void cap_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); +extern int cap_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); extern int cap_bprm_set_security(struct linux_binprm *bprm); -extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); +extern int cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); extern int cap_bprm_secureexec(struct linux_binprm *bprm); extern int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); -extern int cap_task_post_setuid(uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); -extern void cap_task_reparent_to_init(struct task_struct *p); +extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p); + unsigned long arg4, unsigned long arg5); extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp); extern int cap_task_setioprio(struct task_struct *p, int ioprio); extern int cap_task_setnice(struct task_struct *p, int nice); @@ -170,8 +167,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Compute and set the security attributes of a process being transformed * by an execve operation based on the old attributes (current->security) * and the information saved in @bprm->security by the set_security hook. - * Since this hook function (and its caller) are void, this hook can not - * return an error. However, it can leave the security attributes of the + * Since this function may return an error, in which case the process will + * be killed. However, it can leave the security attributes of the * process unchanged if an access failure occurs at this point. * bprm_apply_creds is called under task_lock. @unsafe indicates various * reasons why it may be unsafe to change security state. @@ -593,15 +590,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * manual page for definitions of the @clone_flags. * @clone_flags contains the flags indicating what should be shared. * Return 0 if permission is granted. - * @cred_alloc_security: - * @cred contains the cred struct for child process. - * Allocate and attach a security structure to the cred->security field. - * The security field is initialized to NULL when the task structure is - * allocated. - * Return 0 if operation was successful. * @cred_free: * @cred points to the credentials. * Deallocate and clear the cred->security field in a set of credentials. + * @cred_prepare: + * @new points to the new credentials. + * @old points to the original credentials. + * @gfp indicates the atomicity of any memory allocations. + * Prepare a new set of credentials by copying the data from the old set. + * @cred_commit: + * @new points to the new credentials. + * @old points to the original credentials. + * Install a new set of credentials. * @task_setuid: * Check permission before setting one or more of the user identity * attributes of the current process. The @flags parameter indicates @@ -614,15 +614,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @id2 contains a uid. * @flags contains one of the LSM_SETID_* values. * Return 0 if permission is granted. - * @task_post_setuid: + * @task_fix_setuid: * Update the module's state after setting one or more of the user * identity attributes of the current process. The @flags parameter * indicates which of the set*uid system calls invoked this hook. If - * @flags is LSM_SETID_FS, then @old_ruid is the old fs uid and the other - * parameters are not used. - * @old_ruid contains the old real uid (or fs uid if LSM_SETID_FS). - * @old_euid contains the old effective uid (or -1 if LSM_SETID_FS). - * @old_suid contains the old saved uid (or -1 if LSM_SETID_FS). + * @new is the set of credentials that will be installed. Modifications + * should be made to this rather than to @current->cred. + * @old is the set of credentials that are being replaces * @flags contains one of the LSM_SETID_* values. * Return 0 on success. * @task_setgid: @@ -725,13 +723,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @arg3 contains a argument. * @arg4 contains a argument. * @arg5 contains a argument. - * @rc_p contains a pointer to communicate back the forced return code - * Return 0 if permission is granted, and non-zero if the security module - * has taken responsibility (setting *rc_p) for the prctl call. - * @task_reparent_to_init: - * Set the security attributes in @p->security for a kernel thread that - * is being reparented to the init task. - * @p contains the task_struct for the kernel thread. + * Return -ENOSYS if no-one wanted to handle this op, any other value to + * cause prctl() to return immediately with that value. * @task_to_inode: * Set the security attributes for an inode based on an associated task's * security attributes, e.g. for /proc/pid inodes. @@ -1008,7 +1001,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * See whether a specific operational right is granted to a process on a * key. * @key_ref refers to the key (key pointer + possession attribute bit). - * @context points to the process to provide the context against which to + * @cred points to the credentials to provide the context against which to * evaluate the security data on the key. * @perm describes the combination of permissions required of this key. * Return 1 if permission granted, 0 if permission denied and -ve it the @@ -1170,6 +1163,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @child process. * Security modules may also want to perform a process tracing check * during an execve in the set_security or apply_creds hooks of + * tracing check during an execve in the bprm_set_creds hook of * binprm_security_ops if the process is being traced and its security * attributes would be changed by the execve. * @child contains the task_struct structure for the target process. @@ -1193,19 +1187,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. * Return 0 if the capability sets were successfully obtained. - * @capset_check: - * Check permission before setting the @effective, @inheritable, and - * @permitted capability sets for the current process. - * @effective contains the effective capability set. - * @inheritable contains the inheritable capability set. - * @permitted contains the permitted capability set. - * Return 0 if permission is granted. - * @capset_set: + * @capset: * Set the @effective, @inheritable, and @permitted capability sets for * the current process. + * @new contains the new credentials structure for target process. + * @old contains the current credentials structure for target process. * @effective contains the effective capability set. * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. + * Return 0 and update @new if permission is granted. * @capable: * Check whether the @tsk process has the @cap capability. * @tsk contains the task_struct for the process. @@ -1297,12 +1287,11 @@ struct security_operations { int (*capget) (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset_check) (const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); - void (*capset_set) (const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); + int (*capset) (struct cred *new, + const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); int (*capable) (struct task_struct *tsk, int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); @@ -1314,7 +1303,7 @@ struct security_operations { int (*bprm_alloc_security) (struct linux_binprm *bprm); void (*bprm_free_security) (struct linux_binprm *bprm); - void (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe); + int (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe); void (*bprm_post_apply_creds) (struct linux_binprm *bprm); int (*bprm_set_security) (struct linux_binprm *bprm); int (*bprm_check_security) (struct linux_binprm *bprm); @@ -1405,11 +1394,13 @@ struct security_operations { int (*dentry_open) (struct file *file, const struct cred *cred); int (*task_create) (unsigned long clone_flags); - int (*cred_alloc_security) (struct cred *cred); void (*cred_free) (struct cred *cred); + int (*cred_prepare)(struct cred *new, const struct cred *old, + gfp_t gfp); + void (*cred_commit)(struct cred *new, const struct cred *old); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); - int (*task_post_setuid) (uid_t old_ruid /* or fsuid */ , - uid_t old_euid, uid_t old_suid, int flags); + int (*task_fix_setuid) (struct cred *new, const struct cred *old, + int flags); int (*task_setgid) (gid_t id0, gid_t id1, gid_t id2, int flags); int (*task_setpgid) (struct task_struct *p, pid_t pgid); int (*task_getpgid) (struct task_struct *p); @@ -1429,8 +1420,7 @@ struct security_operations { int (*task_wait) (struct task_struct *p); int (*task_prctl) (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, - unsigned long arg5, long *rc_p); - void (*task_reparent_to_init) (struct task_struct *p); + unsigned long arg5); void (*task_to_inode) (struct task_struct *p, struct inode *inode); int (*ipc_permission) (struct kern_ipc_perm *ipcp, short flag); @@ -1535,10 +1525,10 @@ struct security_operations { /* key management security hooks */ #ifdef CONFIG_KEYS - int (*key_alloc) (struct key *key, struct task_struct *tsk, unsigned long flags); + int (*key_alloc) (struct key *key, const struct cred *cred, unsigned long flags); void (*key_free) (struct key *key); int (*key_permission) (key_ref_t key_ref, - struct task_struct *context, + const struct cred *cred, key_perm_t perm); int (*key_getsecurity)(struct key *key, char **_buffer); #endif /* CONFIG_KEYS */ @@ -1564,12 +1554,10 @@ int security_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -int security_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); -void security_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); +int security_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); int security_capable(struct task_struct *tsk, int cap); int security_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); @@ -1583,7 +1571,7 @@ int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_vm_enough_memory_kern(long pages); int security_bprm_alloc(struct linux_binprm *bprm); void security_bprm_free(struct linux_binprm *bprm); -void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); +int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); void security_bprm_post_apply_creds(struct linux_binprm *bprm); int security_bprm_set(struct linux_binprm *bprm); int security_bprm_check(struct linux_binprm *bprm); @@ -1660,11 +1648,12 @@ int security_file_send_sigiotask(struct task_struct *tsk, int security_file_receive(struct file *file); int security_dentry_open(struct file *file, const struct cred *cred); int security_task_create(unsigned long clone_flags); -int security_cred_alloc(struct cred *cred); void security_cred_free(struct cred *cred); +int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp); +void security_commit_creds(struct cred *new, const struct cred *old); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); -int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, - uid_t old_suid, int flags); +int security_task_fix_setuid(struct cred *new, const struct cred *old, + int flags); int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags); int security_task_setpgid(struct task_struct *p, pid_t pgid); int security_task_getpgid(struct task_struct *p); @@ -1683,8 +1672,7 @@ int security_task_kill(struct task_struct *p, struct siginfo *info, int sig, u32 secid); int security_task_wait(struct task_struct *p); int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p); -void security_task_reparent_to_init(struct task_struct *p); + unsigned long arg4, unsigned long arg5); void security_task_to_inode(struct task_struct *p, struct inode *inode); int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid); @@ -1759,18 +1747,13 @@ static inline int security_capget(struct task_struct *target, return cap_capget(target, effective, inheritable, permitted); } -static inline int security_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) +static inline int security_capset(struct cred *new, + const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { - return cap_capset_check(effective, inheritable, permitted); -} - -static inline void security_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) -{ - cap_capset_set(effective, inheritable, permitted); + return cap_capset(new, old, effective, inheritable, permitted); } static inline int security_capable(struct task_struct *tsk, int cap) @@ -1837,9 +1820,9 @@ static inline int security_bprm_alloc(struct linux_binprm *bprm) static inline void security_bprm_free(struct linux_binprm *bprm) { } -static inline void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +static inline int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) { - cap_bprm_apply_creds(bprm, unsafe); + return cap_bprm_apply_creds(bprm, unsafe); } static inline void security_bprm_post_apply_creds(struct linux_binprm *bprm) @@ -2182,13 +2165,20 @@ static inline int security_task_create(unsigned long clone_flags) return 0; } -static inline int security_cred_alloc(struct cred *cred) +static inline void security_cred_free(struct cred *cred) +{ } + +static inline int security_prepare_creds(struct cred *new, + const struct cred *old, + gfp_t gfp) { return 0; } -static inline void security_cred_free(struct cred *cred) -{ } +static inline void security_commit_creds(struct cred *new, + const struct cred *old) +{ +} static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) @@ -2196,10 +2186,11 @@ static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, return 0; } -static inline int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, - uid_t old_suid, int flags) +static inline int security_task_fix_setuid(struct cred *new, + const struct cred *old, + int flags) { - return cap_task_post_setuid(old_ruid, old_euid, old_suid, flags); + return cap_task_fix_setuid(new, old, flags); } static inline int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, @@ -2286,14 +2277,9 @@ static inline int security_task_wait(struct task_struct *p) static inline int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, - unsigned long arg5, long *rc_p) -{ - return cap_task_prctl(option, arg2, arg3, arg3, arg5, rc_p); -} - -static inline void security_task_reparent_to_init(struct task_struct *p) + unsigned long arg5) { - cap_task_reparent_to_init(p); + return cap_task_prctl(option, arg2, arg3, arg3, arg5); } static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) @@ -2719,16 +2705,16 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi #ifdef CONFIG_KEYS #ifdef CONFIG_SECURITY -int security_key_alloc(struct key *key, struct task_struct *tsk, unsigned long flags); +int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags); void security_key_free(struct key *key); int security_key_permission(key_ref_t key_ref, - struct task_struct *context, key_perm_t perm); + const struct cred *cred, key_perm_t perm); int security_key_getsecurity(struct key *key, char **_buffer); #else static inline int security_key_alloc(struct key *key, - struct task_struct *tsk, + const struct cred *cred, unsigned long flags) { return 0; @@ -2739,7 +2725,7 @@ static inline void security_key_free(struct key *key) } static inline int security_key_permission(key_ref_t key_ref, - struct task_struct *context, + const struct cred *cred, key_perm_t perm) { return 0; diff --git a/init/main.c b/init/main.c index 7e117a231af..db843bff573 100644 --- a/init/main.c +++ b/init/main.c @@ -669,6 +669,7 @@ asmlinkage void __init start_kernel(void) efi_enter_virtual_mode(); #endif thread_info_cache_init(); + cred_init(); fork_init(num_physpages); proc_caches_init(); buffer_init(); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ae8ef88ade3..bc1e2d854bf 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2546,18 +2546,17 @@ int __audit_signal_info(int sig, struct task_struct *t) /** * __audit_log_bprm_fcaps - store information about a loading bprm and relevant fcaps - * @bprm pointer to the bprm being processed - * @caps the caps read from the disk + * @bprm: pointer to the bprm being processed + * @new: the proposed new credentials + * @old: the old credentials * * Simply check if the proc already has the caps given by the file and if not * store the priv escalation info for later auditing at the end of the syscall * - * this can fail and we don't care. See the note in audit.h for - * audit_log_bprm_fcaps() for my explaination.... - * * -Eric */ -void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE) +int __audit_log_bprm_fcaps(struct linux_binprm *bprm, + const struct cred *new, const struct cred *old) { struct audit_aux_data_bprm_fcaps *ax; struct audit_context *context = current->audit_context; @@ -2566,7 +2565,7 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_ ax = kmalloc(sizeof(*ax), GFP_KERNEL); if (!ax) - return; + return -ENOMEM; ax->d.type = AUDIT_BPRM_FCAPS; ax->d.next = context->aux; @@ -2581,26 +2580,27 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_ ax->fcap.fE = !!(vcaps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; - ax->old_pcap.permitted = *pP; - ax->old_pcap.inheritable = current->cred->cap_inheritable; - ax->old_pcap.effective = *pE; + ax->old_pcap.permitted = old->cap_permitted; + ax->old_pcap.inheritable = old->cap_inheritable; + ax->old_pcap.effective = old->cap_effective; - ax->new_pcap.permitted = current->cred->cap_permitted; - ax->new_pcap.inheritable = current->cred->cap_inheritable; - ax->new_pcap.effective = current->cred->cap_effective; + ax->new_pcap.permitted = new->cap_permitted; + ax->new_pcap.inheritable = new->cap_inheritable; + ax->new_pcap.effective = new->cap_effective; + return 0; } /** * __audit_log_capset - store information about the arguments to the capset syscall - * @pid target pid of the capset call - * @eff effective cap set - * @inh inheritible cap set - * @perm permited cap set + * @pid: target pid of the capset call + * @new: the new credentials + * @old: the old (current) credentials * * Record the aguments userspace sent to sys_capset for later printing by the * audit system if applicable */ -int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm) +int __audit_log_capset(pid_t pid, + const struct cred *new, const struct cred *old) { struct audit_aux_data_capset *ax; struct audit_context *context = current->audit_context; @@ -2617,9 +2617,9 @@ int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_c context->aux = (void *)ax; ax->pid = pid; - ax->cap.effective = *eff; - ax->cap.inheritable = *eff; - ax->cap.permitted = *perm; + ax->cap.effective = new->cap_effective; + ax->cap.inheritable = new->cap_effective; + ax->cap.permitted = new->cap_permitted; return 0; } diff --git a/kernel/capability.c b/kernel/capability.c index a404b980b1b..36b4b4daebe 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -15,12 +15,7 @@ #include #include #include - -/* - * This lock protects task->cap_* for all tasks including current. - * Locking rule: acquire this prior to tasklist_lock. - */ -static DEFINE_SPINLOCK(task_capability_lock); +#include "cred-internals.h" /* * Leveraged for setting/resetting capabilities @@ -128,12 +123,11 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy) } /* - * If we have configured with filesystem capability support, then the - * only thing that can change the capabilities of the current process - * is the current process. As such, we can't be in this code at the - * same time as we are in the process of setting capabilities in this - * process. The net result is that we can limit our use of locks to - * when we are reading the caps of another process. + * The only thing that can change the capabilities of the current + * process is the current process. As such, we can't be in this code + * at the same time as we are in the process of setting capabilities + * in this process. The net result is that we can limit our use of + * locks to when we are reading the caps of another process. */ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, kernel_cap_t *pIp, kernel_cap_t *pPp) @@ -143,7 +137,6 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, if (pid && (pid != task_pid_vnr(current))) { struct task_struct *target; - spin_lock(&task_capability_lock); read_lock(&tasklist_lock); target = find_task_by_vpid(pid); @@ -153,34 +146,12 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, ret = security_capget(target, pEp, pIp, pPp); read_unlock(&tasklist_lock); - spin_unlock(&task_capability_lock); } else ret = security_capget(current, pEp, pIp, pPp); return ret; } -/* - * Atomically modify the effective capabilities returning the original - * value. No permission check is performed here - it is assumed that the - * caller is permitted to set the desired effective capabilities. - */ -kernel_cap_t cap_set_effective(const kernel_cap_t pE_new) -{ - kernel_cap_t pE_old; - - spin_lock(&task_capability_lock); - - pE_old = current->cred->cap_effective; - current->cred->cap_effective = pE_new; - - spin_unlock(&task_capability_lock); - - return pE_old; -} - -EXPORT_SYMBOL(cap_set_effective); - /** * sys_capget - get the capabilities of a given process. * @header: pointer to struct that contains capability version and @@ -208,7 +179,6 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) return -EINVAL; ret = cap_get_target_pid(pid, &pE, &pI, &pP); - if (!ret) { struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; unsigned i; @@ -270,6 +240,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; unsigned i, tocopy; kernel_cap_t inheritable, permitted, effective; + struct cred *new; int ret; pid_t pid; @@ -284,8 +255,8 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) if (pid != 0 && pid != task_pid_vnr(current)) return -EPERM; - if (copy_from_user(&kdata, data, tocopy - * sizeof(struct __user_cap_data_struct))) + if (copy_from_user(&kdata, data, + tocopy * sizeof(struct __user_cap_data_struct))) return -EFAULT; for (i = 0; i < tocopy; i++) { @@ -300,24 +271,23 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) i++; } - ret = audit_log_capset(pid, &effective, &inheritable, &permitted); - if (ret) + new = prepare_creds(); + if (!new) + return -ENOMEM; + + ret = security_capset(new, current_cred(), + &effective, &inheritable, &permitted); + if (ret < 0) + goto error; + + ret = audit_log_capset(pid, new, current_cred()); + if (ret < 0) return ret; - /* This lock is required even when filesystem capability support is - * configured - it protects the sys_capget() call from returning - * incorrect data in the case that the targeted process is not the - * current one. - */ - spin_lock(&task_capability_lock); - - ret = security_capset_check(&effective, &inheritable, &permitted); - /* Having verified that the proposed changes are legal, we now put them - * into effect. - */ - if (!ret) - security_capset_set(&effective, &inheritable, &permitted); - spin_unlock(&task_capability_lock); + return commit_creds(new); + +error: + abort_creds(new); return ret; } diff --git a/kernel/cred-internals.h b/kernel/cred-internals.h new file mode 100644 index 00000000000..2dc4fc2d0bf --- /dev/null +++ b/kernel/cred-internals.h @@ -0,0 +1,21 @@ +/* Internal credentials stuff + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +/* + * user.c + */ +static inline void sched_switch_user(struct task_struct *p) +{ +#ifdef CONFIG_USER_SCHED + sched_move_task(p); +#endif /* CONFIG_USER_SCHED */ +} + diff --git a/kernel/cred.c b/kernel/cred.c index ac73e361768..cb6b5eda978 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -15,6 +15,10 @@ #include #include #include +#include +#include "cred-internals.h" + +static struct kmem_cache *cred_jar; /* * The common credentials for the initial task's thread group @@ -64,7 +68,7 @@ static void release_tgcred_rcu(struct rcu_head *rcu) /* * Release a set of thread group credentials. */ -static void release_tgcred(struct cred *cred) +void release_tgcred(struct cred *cred) { #ifdef CONFIG_KEYS struct thread_group_cred *tgcred = cred->tgcred; @@ -81,79 +85,322 @@ static void put_cred_rcu(struct rcu_head *rcu) { struct cred *cred = container_of(rcu, struct cred, rcu); - BUG_ON(atomic_read(&cred->usage) != 0); + if (atomic_read(&cred->usage) != 0) + panic("CRED: put_cred_rcu() sees %p with usage %d\n", + cred, atomic_read(&cred->usage)); + security_cred_free(cred); key_put(cred->thread_keyring); key_put(cred->request_key_auth); release_tgcred(cred); put_group_info(cred->group_info); free_uid(cred->user); - security_cred_free(cred); - kfree(cred); + kmem_cache_free(cred_jar, cred); } /** * __put_cred - Destroy a set of credentials - * @sec: The record to release + * @cred: The record to release * * Destroy a set of credentials on which no references remain. */ void __put_cred(struct cred *cred) { + BUG_ON(atomic_read(&cred->usage) != 0); + call_rcu(&cred->rcu, put_cred_rcu); } EXPORT_SYMBOL(__put_cred); +/** + * prepare_creds - Prepare a new set of credentials for modification + * + * Prepare a new set of task credentials for modification. A task's creds + * shouldn't generally be modified directly, therefore this function is used to + * prepare a new copy, which the caller then modifies and then commits by + * calling commit_creds(). + * + * Returns a pointer to the new creds-to-be if successful, NULL otherwise. + * + * Call commit_creds() or abort_creds() to clean up. + */ +struct cred *prepare_creds(void) +{ + struct task_struct *task = current; + const struct cred *old; + struct cred *new; + + BUG_ON(atomic_read(&task->cred->usage) < 1); + + new = kmem_cache_alloc(cred_jar, GFP_KERNEL); + if (!new) + return NULL; + + old = task->cred; + memcpy(new, old, sizeof(struct cred)); + + atomic_set(&new->usage, 1); + get_group_info(new->group_info); + get_uid(new->user); + +#ifdef CONFIG_KEYS + key_get(new->thread_keyring); + key_get(new->request_key_auth); + atomic_inc(&new->tgcred->usage); +#endif + +#ifdef CONFIG_SECURITY + new->security = NULL; +#endif + + if (security_prepare_creds(new, old, GFP_KERNEL) < 0) + goto error; + return new; + +error: + abort_creds(new); + return NULL; +} +EXPORT_SYMBOL(prepare_creds); + +/* + * prepare new credentials for the usermode helper dispatcher + */ +struct cred *prepare_usermodehelper_creds(void) +{ +#ifdef CONFIG_KEYS + struct thread_group_cred *tgcred = NULL; +#endif + struct cred *new; + +#ifdef CONFIG_KEYS + tgcred = kzalloc(sizeof(*new->tgcred), GFP_ATOMIC); + if (!tgcred) + return NULL; +#endif + + new = kmem_cache_alloc(cred_jar, GFP_ATOMIC); + if (!new) + return NULL; + + memcpy(new, &init_cred, sizeof(struct cred)); + + atomic_set(&new->usage, 1); + get_group_info(new->group_info); + get_uid(new->user); + +#ifdef CONFIG_KEYS + new->thread_keyring = NULL; + new->request_key_auth = NULL; + new->jit_keyring = KEY_REQKEY_DEFL_DEFAULT; + + atomic_set(&tgcred->usage, 1); + spin_lock_init(&tgcred->lock); + new->tgcred = tgcred; +#endif + +#ifdef CONFIG_SECURITY + new->security = NULL; +#endif + if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0) + goto error; + + BUG_ON(atomic_read(&new->usage) != 1); + return new; + +error: + put_cred(new); + return NULL; +} + /* * Copy credentials for the new process created by fork() + * + * We share if we can, but under some circumstances we have to generate a new + * set. */ int copy_creds(struct task_struct *p, unsigned long clone_flags) { - struct cred *pcred; - int ret; +#ifdef CONFIG_KEYS + struct thread_group_cred *tgcred; +#endif + struct cred *new; + + mutex_init(&p->cred_exec_mutex); - pcred = kmemdup(p->cred, sizeof(*p->cred), GFP_KERNEL); - if (!pcred) + if ( +#ifdef CONFIG_KEYS + !p->cred->thread_keyring && +#endif + clone_flags & CLONE_THREAD + ) { + get_cred(p->cred); + atomic_inc(&p->cred->user->processes); + return 0; + } + + new = prepare_creds(); + if (!new) return -ENOMEM; #ifdef CONFIG_KEYS - if (clone_flags & CLONE_THREAD) { - atomic_inc(&pcred->tgcred->usage); - } else { - pcred->tgcred = kmalloc(sizeof(struct cred), GFP_KERNEL); - if (!pcred->tgcred) { - kfree(pcred); + /* new threads get their own thread keyrings if their parent already + * had one */ + if (new->thread_keyring) { + key_put(new->thread_keyring); + new->thread_keyring = NULL; + if (clone_flags & CLONE_THREAD) + install_thread_keyring_to_cred(new); + } + + /* we share the process and session keyrings between all the threads in + * a process - this is slightly icky as we violate COW credentials a + * bit */ + if (!(clone_flags & CLONE_THREAD)) { + tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); + if (!tgcred) { + put_cred(new); return -ENOMEM; } - atomic_set(&pcred->tgcred->usage, 1); - spin_lock_init(&pcred->tgcred->lock); - pcred->tgcred->process_keyring = NULL; - pcred->tgcred->session_keyring = - key_get(p->cred->tgcred->session_keyring); + atomic_set(&tgcred->usage, 1); + spin_lock_init(&tgcred->lock); + tgcred->process_keyring = NULL; + tgcred->session_keyring = key_get(new->tgcred->session_keyring); + + release_tgcred(new); + new->tgcred = tgcred; } #endif -#ifdef CONFIG_SECURITY - pcred->security = NULL; -#endif + atomic_inc(&new->user->processes); + p->cred = new; + return 0; +} - ret = security_cred_alloc(pcred); - if (ret < 0) { - release_tgcred(pcred); - kfree(pcred); - return ret; +/** + * commit_creds - Install new credentials upon the current task + * @new: The credentials to be assigned + * + * Install a new set of credentials to the current task, using RCU to replace + * the old set. + * + * This function eats the caller's reference to the new credentials. + * + * Always returns 0 thus allowing this function to be tail-called at the end + * of, say, sys_setgid(). + */ +int commit_creds(struct cred *new) +{ + struct task_struct *task = current; + const struct cred *old; + + BUG_ON(atomic_read(&new->usage) < 1); + BUG_ON(atomic_read(&task->cred->usage) < 1); + + old = task->cred; + security_commit_creds(new, old); + + /* dumpability changes */ + if (old->euid != new->euid || + old->egid != new->egid || + old->fsuid != new->fsuid || + old->fsgid != new->fsgid || + !cap_issubset(new->cap_permitted, old->cap_permitted)) { + set_dumpable(task->mm, suid_dumpable); + task->pdeath_signal = 0; + smp_wmb(); } - atomic_set(&pcred->usage, 1); - get_group_info(pcred->group_info); - get_uid(pcred->user); - key_get(pcred->thread_keyring); - key_get(pcred->request_key_auth); + /* alter the thread keyring */ + if (new->fsuid != old->fsuid) + key_fsuid_changed(task); + if (new->fsgid != old->fsgid) + key_fsgid_changed(task); + + /* do it + * - What if a process setreuid()'s and this brings the + * new uid over his NPROC rlimit? We can check this now + * cheaply with the new uid cache, so if it matters + * we should be checking for it. -DaveM + */ + if (new->user != old->user) + atomic_inc(&new->user->processes); + rcu_assign_pointer(task->cred, new); + if (new->user != old->user) + atomic_dec(&old->user->processes); + + sched_switch_user(task); + + /* send notifications */ + if (new->uid != old->uid || + new->euid != old->euid || + new->suid != old->suid || + new->fsuid != old->fsuid) + proc_id_connector(task, PROC_EVENT_UID); - atomic_inc(&pcred->user->processes); + if (new->gid != old->gid || + new->egid != old->egid || + new->sgid != old->sgid || + new->fsgid != old->fsgid) + proc_id_connector(task, PROC_EVENT_GID); - /* RCU assignment is unneeded here as no-one can have accessed this - * pointer yet, barring us */ - p->cred = pcred; + put_cred(old); return 0; } +EXPORT_SYMBOL(commit_creds); + +/** + * abort_creds - Discard a set of credentials and unlock the current task + * @new: The credentials that were going to be applied + * + * Discard a set of credentials that were under construction and unlock the + * current task. + */ +void abort_creds(struct cred *new) +{ + BUG_ON(atomic_read(&new->usage) < 1); + put_cred(new); +} +EXPORT_SYMBOL(abort_creds); + +/** + * override_creds - Temporarily override the current process's credentials + * @new: The credentials to be assigned + * + * Install a set of temporary override credentials on the current process, + * returning the old set for later reversion. + */ +const struct cred *override_creds(const struct cred *new) +{ + const struct cred *old = current->cred; + + rcu_assign_pointer(current->cred, get_cred(new)); + return old; +} +EXPORT_SYMBOL(override_creds); + +/** + * revert_creds - Revert a temporary credentials override + * @old: The credentials to be restored + * + * Revert a temporary set of override credentials to an old set, discarding the + * override set. + */ +void revert_creds(const struct cred *old) +{ + const struct cred *override = current->cred; + + rcu_assign_pointer(current->cred, old); + put_cred(override); +} +EXPORT_SYMBOL(revert_creds); + +/* + * initialise the credentials stuff + */ +void __init cred_init(void) +{ + /* allocate a slab in which we can store credentials */ + cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); +} diff --git a/kernel/exit.c b/kernel/exit.c index bbc22530f2c..c0711da1548 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -47,12 +47,14 @@ #include #include #include +#include #include #include #include #include #include +#include "cred-internals.h" static void exit_mm(struct task_struct * tsk); @@ -338,12 +340,12 @@ static void reparent_to_kthreadd(void) /* cpus_allowed? */ /* rt_priority? */ /* signals? */ - security_task_reparent_to_init(current); memcpy(current->signal->rlim, init_task.signal->rlim, sizeof(current->signal->rlim)); - atomic_inc(&(INIT_USER->__count)); + + atomic_inc(&init_cred.usage); + commit_creds(&init_cred); write_unlock_irq(&tasklist_lock); - switch_uid(INIT_USER); } void __set_special_pids(struct pid *pid) @@ -1085,7 +1087,6 @@ NORET_TYPE void do_exit(long code) check_stack_usage(); exit_thread(); cgroup_exit(tsk, 1); - exit_keys(tsk); if (group_dead && tsk->signal->leader) disassociate_ctty(1); diff --git a/kernel/fork.c b/kernel/fork.c index ded1972672a..82a7948a664 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1084,10 +1084,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; - if ((retval = copy_keys(clone_flags, p))) - goto bad_fork_cleanup_mm; if ((retval = copy_namespaces(clone_flags, p))) - goto bad_fork_cleanup_keys; + goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); @@ -1252,8 +1250,6 @@ bad_fork_cleanup_io: put_io_context(p->io_context); bad_fork_cleanup_namespaces: exit_task_namespaces(p); -bad_fork_cleanup_keys: - exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); @@ -1281,6 +1277,7 @@ bad_fork_cleanup_cgroup: bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: + atomic_dec(&p->cred->user->processes); put_cred(p->cred); bad_fork_free: free_task(p); diff --git a/kernel/kmod.c b/kernel/kmod.c index f044f8f5770..b46dbb90866 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -118,10 +118,10 @@ EXPORT_SYMBOL(request_module); struct subprocess_info { struct work_struct work; struct completion *complete; + struct cred *cred; char *path; char **argv; char **envp; - struct key *ring; enum umh_wait wait; int retval; struct file *stdin; @@ -134,19 +134,20 @@ struct subprocess_info { static int ____call_usermodehelper(void *data) { struct subprocess_info *sub_info = data; - struct key *new_session, *old_session; int retval; - /* Unblock all signals and set the session keyring. */ - new_session = key_get(sub_info->ring); + BUG_ON(atomic_read(&sub_info->cred->usage) != 1); + + /* Unblock all signals */ spin_lock_irq(¤t->sighand->siglock); - old_session = __install_session_keyring(new_session); flush_signal_handlers(current, 1); sigemptyset(¤t->blocked); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - key_put(old_session); + /* Install the credentials */ + commit_creds(sub_info->cred); + sub_info->cred = NULL; /* Install input pipe when needed */ if (sub_info->stdin) { @@ -185,6 +186,8 @@ void call_usermodehelper_freeinfo(struct subprocess_info *info) { if (info->cleanup) (*info->cleanup)(info->argv, info->envp); + if (info->cred) + put_cred(info->cred); kfree(info); } EXPORT_SYMBOL(call_usermodehelper_freeinfo); @@ -240,6 +243,8 @@ static void __call_usermodehelper(struct work_struct *work) pid_t pid; enum umh_wait wait = sub_info->wait; + BUG_ON(atomic_read(&sub_info->cred->usage) != 1); + /* CLONE_VFORK: wait until the usermode helper has execve'd * successfully We need the data structures to stay around * until that is done. */ @@ -362,6 +367,9 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, sub_info->path = path; sub_info->argv = argv; sub_info->envp = envp; + sub_info->cred = prepare_usermodehelper_creds(); + if (!sub_info->cred) + return NULL; out: return sub_info; @@ -376,7 +384,13 @@ EXPORT_SYMBOL(call_usermodehelper_setup); void call_usermodehelper_setkeys(struct subprocess_info *info, struct key *session_keyring) { - info->ring = session_keyring; +#ifdef CONFIG_KEYS + struct thread_group_cred *tgcred = info->cred->tgcred; + key_put(tgcred->session_keyring); + tgcred->session_keyring = key_get(session_keyring); +#else + BUG(); +#endif } EXPORT_SYMBOL(call_usermodehelper_setkeys); @@ -444,6 +458,8 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, DECLARE_COMPLETION_ONSTACK(done); int retval = 0; + BUG_ON(atomic_read(&sub_info->cred->usage) != 1); + helper_lock(); if (sub_info->path[0] == '\0') goto out; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index b9d5f4e4f6a..f764b880695 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -171,6 +171,14 @@ int ptrace_attach(struct task_struct *task) if (same_thread_group(task, current)) goto out; + /* Protect exec's credential calculations against our interference; + * SUID, SGID and LSM creds get determined differently under ptrace. + */ + retval = mutex_lock_interruptible(¤t->cred_exec_mutex); + if (retval < 0) + goto out; + + retval = -EPERM; repeat: /* * Nasty, nasty. @@ -210,6 +218,7 @@ repeat: bad: write_unlock_irqrestore(&tasklist_lock, flags); task_unlock(task); + mutex_unlock(¤t->cred_exec_mutex); out: return retval; } diff --git a/kernel/signal.c b/kernel/signal.c index 84989124baf..2a64304ed54 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -180,7 +180,7 @@ int next_signal(struct sigpending *pending, sigset_t *mask) /* * allocate a new signal queue record * - this may be called without locks if and only if t == current, otherwise an - * appopriate lock must be held to protect t's user_struct + * appopriate lock must be held to stop the target task from exiting */ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, int override_rlimit) @@ -194,7 +194,7 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, * caller must be holding the RCU readlock (by way of a spinlock) and * we use RCU protection here */ - user = __task_cred(t)->user; + user = get_uid(__task_cred(t)->user); atomic_inc(&user->sigpending); if (override_rlimit || atomic_read(&user->sigpending) <= @@ -202,12 +202,14 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, q = kmem_cache_alloc(sigqueue_cachep, flags); if (unlikely(q == NULL)) { atomic_dec(&user->sigpending); + free_uid(user); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; - q->user = get_uid(user); + q->user = user; } - return(q); + + return q; } static void __sigqueue_free(struct sigqueue *q) diff --git a/kernel/sys.c b/kernel/sys.c index ccc9eb736d3..ab735040468 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -180,7 +180,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = cred->user; + user = (struct user_struct *) cred->user; if (!who) who = cred->uid; else if ((who != cred->uid) && @@ -479,47 +479,48 @@ void ctrl_alt_del(void) */ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) { - struct cred *cred = current->cred; - int old_rgid = cred->gid; - int old_egid = cred->egid; - int new_rgid = old_rgid; - int new_egid = old_egid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE); if (retval) - return retval; + goto error; + retval = -EPERM; if (rgid != (gid_t) -1) { - if ((old_rgid == rgid) || - (cred->egid == rgid) || + if (old->gid == rgid || + old->egid == rgid || capable(CAP_SETGID)) - new_rgid = rgid; + new->gid = rgid; else - return -EPERM; + goto error; } if (egid != (gid_t) -1) { - if ((old_rgid == egid) || - (cred->egid == egid) || - (cred->sgid == egid) || + if (old->gid == egid || + old->egid == egid || + old->sgid == egid || capable(CAP_SETGID)) - new_egid = egid; + new->egid = egid; else - return -EPERM; - } - if (new_egid != old_egid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); + goto error; } + if (rgid != (gid_t) -1 || - (egid != (gid_t) -1 && egid != old_rgid)) - cred->sgid = new_egid; - cred->fsgid = new_egid; - cred->egid = new_egid; - cred->gid = new_rgid; - key_fsgid_changed(current); - proc_id_connector(current, PROC_EVENT_GID); - return 0; + (egid != (gid_t) -1 && egid != old->gid)) + new->sgid = new->egid; + new->fsgid = new->egid; + + return commit_creds(new); + +error: + abort_creds(new); + return retval; } /* @@ -529,40 +530,42 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) */ asmlinkage long sys_setgid(gid_t gid) { - struct cred *cred = current->cred; - int old_egid = cred->egid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID); if (retval) - return retval; + goto error; - if (capable(CAP_SETGID)) { - if (old_egid != gid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->gid = cred->egid = cred->sgid = cred->fsgid = gid; - } else if ((gid == cred->gid) || (gid == cred->sgid)) { - if (old_egid != gid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->egid = cred->fsgid = gid; - } + retval = -EPERM; + if (capable(CAP_SETGID)) + new->gid = new->egid = new->sgid = new->fsgid = gid; + else if (gid == old->gid || gid == old->sgid) + new->egid = new->fsgid = gid; else - return -EPERM; + goto error; - key_fsgid_changed(current); - proc_id_connector(current, PROC_EVENT_GID); - return 0; + return commit_creds(new); + +error: + abort_creds(new); + return retval; } -static int set_user(uid_t new_ruid, int dumpclear) +/* + * change the user struct in a credentials set to match the new UID + */ +static int set_user(struct cred *new) { struct user_struct *new_user; - new_user = alloc_uid(current->nsproxy->user_ns, new_ruid); + new_user = alloc_uid(current->nsproxy->user_ns, new->uid); if (!new_user) return -EAGAIN; @@ -573,13 +576,8 @@ static int set_user(uid_t new_ruid, int dumpclear) return -EAGAIN; } - switch_uid(new_user); - - if (dumpclear) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - current->cred->uid = new_ruid; + free_uid(new->user); + new->user = new_user; return 0; } @@ -600,55 +598,56 @@ static int set_user(uid_t new_ruid, int dumpclear) */ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) { - struct cred *cred = current->cred; - int old_ruid, old_euid, old_suid, new_ruid, new_euid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE); if (retval) - return retval; - - new_ruid = old_ruid = cred->uid; - new_euid = old_euid = cred->euid; - old_suid = cred->suid; + goto error; + retval = -EPERM; if (ruid != (uid_t) -1) { - new_ruid = ruid; - if ((old_ruid != ruid) && - (cred->euid != ruid) && + new->uid = ruid; + if (old->uid != ruid && + old->euid != ruid && !capable(CAP_SETUID)) - return -EPERM; + goto error; } if (euid != (uid_t) -1) { - new_euid = euid; - if ((old_ruid != euid) && - (cred->euid != euid) && - (cred->suid != euid) && + new->euid = euid; + if (old->uid != euid && + old->euid != euid && + old->suid != euid && !capable(CAP_SETUID)) - return -EPERM; + goto error; } - if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0) - return -EAGAIN; + retval = -EAGAIN; + if (new->uid != old->uid && set_user(new) < 0) + goto error; - if (new_euid != old_euid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->fsuid = cred->euid = new_euid; if (ruid != (uid_t) -1 || - (euid != (uid_t) -1 && euid != old_ruid)) - cred->suid = cred->euid; - cred->fsuid = cred->euid; - - key_fsuid_changed(current); - proc_id_connector(current, PROC_EVENT_UID); + (euid != (uid_t) -1 && euid != old->uid)) + new->suid = new->euid; + new->fsuid = new->euid; - return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); -} + retval = security_task_fix_setuid(new, old, LSM_SETID_RE); + if (retval < 0) + goto error; + return commit_creds(new); +error: + abort_creds(new); + return retval; +} /* * setuid() is implemented like SysV with SAVED_IDS @@ -663,37 +662,41 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) */ asmlinkage long sys_setuid(uid_t uid) { - struct cred *cred = current->cred; - int old_euid = cred->euid; - int old_ruid, old_suid, new_suid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID); if (retval) - return retval; + goto error; - old_ruid = cred->uid; - old_suid = cred->suid; - new_suid = old_suid; - + retval = -EPERM; if (capable(CAP_SETUID)) { - if (uid != old_ruid && set_user(uid, old_euid != uid) < 0) - return -EAGAIN; - new_suid = uid; - } else if ((uid != cred->uid) && (uid != new_suid)) - return -EPERM; - - if (old_euid != uid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); + new->suid = new->uid = uid; + if (uid != old->uid && set_user(new) < 0) { + retval = -EAGAIN; + goto error; + } + } else if (uid != old->uid && uid != new->suid) { + goto error; } - cred->fsuid = cred->euid = uid; - cred->suid = new_suid; - key_fsuid_changed(current); - proc_id_connector(current, PROC_EVENT_UID); + new->fsuid = new->euid = uid; + + retval = security_task_fix_setuid(new, old, LSM_SETID_ID); + if (retval < 0) + goto error; + + return commit_creds(new); - return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); +error: + abort_creds(new); + return retval; } @@ -703,47 +706,53 @@ asmlinkage long sys_setuid(uid_t uid) */ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) { - struct cred *cred = current->cred; - int old_ruid = cred->uid; - int old_euid = cred->euid; - int old_suid = cred->suid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES); if (retval) - return retval; + goto error; + old = current_cred(); + retval = -EPERM; if (!capable(CAP_SETUID)) { - if ((ruid != (uid_t) -1) && (ruid != cred->uid) && - (ruid != cred->euid) && (ruid != cred->suid)) - return -EPERM; - if ((euid != (uid_t) -1) && (euid != cred->uid) && - (euid != cred->euid) && (euid != cred->suid)) - return -EPERM; - if ((suid != (uid_t) -1) && (suid != cred->uid) && - (suid != cred->euid) && (suid != cred->suid)) - return -EPERM; + if (ruid != (uid_t) -1 && ruid != old->uid && + ruid != old->euid && ruid != old->suid) + goto error; + if (euid != (uid_t) -1 && euid != old->uid && + euid != old->euid && euid != old->suid) + goto error; + if (suid != (uid_t) -1 && suid != old->uid && + suid != old->euid && suid != old->suid) + goto error; } + + retval = -EAGAIN; if (ruid != (uid_t) -1) { - if (ruid != cred->uid && - set_user(ruid, euid != cred->euid) < 0) - return -EAGAIN; + new->uid = ruid; + if (ruid != old->uid && set_user(new) < 0) + goto error; } - if (euid != (uid_t) -1) { - if (euid != cred->euid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->euid = euid; - } - cred->fsuid = cred->euid; + if (euid != (uid_t) -1) + new->euid = euid; if (suid != (uid_t) -1) - cred->suid = suid; + new->suid = suid; + new->fsuid = new->euid; - key_fsuid_changed(current); - proc_id_connector(current, PROC_EVENT_UID); + retval = security_task_fix_setuid(new, old, LSM_SETID_RES); + if (retval < 0) + goto error; - return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); + return commit_creds(new); + +error: + abort_creds(new); + return retval; } asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) @@ -763,40 +772,45 @@ asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __us */ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) { - struct cred *cred = current->cred; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES); if (retval) - return retval; + goto error; + retval = -EPERM; if (!capable(CAP_SETGID)) { - if ((rgid != (gid_t) -1) && (rgid != cred->gid) && - (rgid != cred->egid) && (rgid != cred->sgid)) - return -EPERM; - if ((egid != (gid_t) -1) && (egid != cred->gid) && - (egid != cred->egid) && (egid != cred->sgid)) - return -EPERM; - if ((sgid != (gid_t) -1) && (sgid != cred->gid) && - (sgid != cred->egid) && (sgid != cred->sgid)) - return -EPERM; + if (rgid != (gid_t) -1 && rgid != old->gid && + rgid != old->egid && rgid != old->sgid) + goto error; + if (egid != (gid_t) -1 && egid != old->gid && + egid != old->egid && egid != old->sgid) + goto error; + if (sgid != (gid_t) -1 && sgid != old->gid && + sgid != old->egid && sgid != old->sgid) + goto error; } - if (egid != (gid_t) -1) { - if (egid != cred->egid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->egid = egid; - } - cred->fsgid = cred->egid; + if (rgid != (gid_t) -1) - cred->gid = rgid; + new->gid = rgid; + if (egid != (gid_t) -1) + new->egid = egid; if (sgid != (gid_t) -1) - cred->sgid = sgid; + new->sgid = sgid; + new->fsgid = new->egid; - key_fsgid_changed(current); - proc_id_connector(current, PROC_EVENT_GID); - return 0; + return commit_creds(new); + +error: + abort_creds(new); + return retval; } asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) @@ -820,28 +834,35 @@ asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __us */ asmlinkage long sys_setfsuid(uid_t uid) { - struct cred *cred = current->cred; - int old_fsuid; + const struct cred *old; + struct cred *new; + uid_t old_fsuid; + + new = prepare_creds(); + if (!new) + return current_fsuid(); + old = current_cred(); + old_fsuid = old->fsuid; - old_fsuid = cred->fsuid; - if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS)) - return old_fsuid; + if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0) + goto error; - if (uid == cred->uid || uid == cred->euid || - uid == cred->suid || uid == cred->fsuid || + if (uid == old->uid || uid == old->euid || + uid == old->suid || uid == old->fsuid || capable(CAP_SETUID)) { if (uid != old_fsuid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); + new->fsuid = uid; + if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) + goto change_okay; } - cred->fsuid = uid; } - key_fsuid_changed(current); - proc_id_connector(current, PROC_EVENT_UID); - - security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); +error: + abort_creds(new); + return old_fsuid; +change_okay: + commit_creds(new); return old_fsuid; } @@ -850,24 +871,34 @@ asmlinkage long sys_setfsuid(uid_t uid) */ asmlinkage long sys_setfsgid(gid_t gid) { - struct cred *cred = current->cred; - int old_fsgid; + const struct cred *old; + struct cred *new; + gid_t old_fsgid; + + new = prepare_creds(); + if (!new) + return current_fsgid(); + old = current_cred(); + old_fsgid = old->fsgid; - old_fsgid = cred->fsgid; if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS)) - return old_fsgid; + goto error; - if (gid == cred->gid || gid == cred->egid || - gid == cred->sgid || gid == cred->fsgid || + if (gid == old->gid || gid == old->egid || + gid == old->sgid || gid == old->fsgid || capable(CAP_SETGID)) { if (gid != old_fsgid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); + new->fsgid = gid; + goto change_okay; } - cred->fsgid = gid; - key_fsgid_changed(current); - proc_id_connector(current, PROC_EVENT_GID); } + +error: + abort_creds(new); + return old_fsgid; + +change_okay: + commit_creds(new); return old_fsgid; } @@ -1136,7 +1167,7 @@ EXPORT_SYMBOL(groups_free); /* export the group_info to a user-space array */ static int groups_to_user(gid_t __user *grouplist, - struct group_info *group_info) + const struct group_info *group_info) { int i; unsigned int count = group_info->ngroups; @@ -1227,31 +1258,25 @@ int groups_search(const struct group_info *group_info, gid_t grp) } /** - * set_groups - Change a group subscription in a security record - * @sec: The security record to alter - * @group_info: The group list to impose + * set_groups - Change a group subscription in a set of credentials + * @new: The newly prepared set of credentials to alter + * @group_info: The group list to install * - * Validate a group subscription and, if valid, impose it upon a task security - * record. + * Validate a group subscription and, if valid, insert it into a set + * of credentials. */ -int set_groups(struct cred *cred, struct group_info *group_info) +int set_groups(struct cred *new, struct group_info *group_info) { int retval; - struct group_info *old_info; retval = security_task_setgroups(group_info); if (retval) return retval; + put_group_info(new->group_info); groups_sort(group_info); get_group_info(group_info); - - spin_lock(&cred->lock); - old_info = cred->group_info; - cred->group_info = group_info; - spin_unlock(&cred->lock); - - put_group_info(old_info); + new->group_info = group_info; return 0; } @@ -1266,7 +1291,20 @@ EXPORT_SYMBOL(set_groups); */ int set_current_groups(struct group_info *group_info) { - return set_groups(current->cred, group_info); + struct cred *new; + int ret; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + ret = set_groups(new, group_info); + if (ret < 0) { + abort_creds(new); + return ret; + } + + return commit_creds(new); } EXPORT_SYMBOL(set_current_groups); @@ -1666,9 +1704,11 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned char comm[sizeof(me->comm)]; long error; - if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) + error = security_task_prctl(option, arg2, arg3, arg4, arg5); + if (error != -ENOSYS) return error; + error = 0; switch (option) { case PR_SET_PDEATHSIG: if (!valid_signal(arg2)) { diff --git a/kernel/user.c b/kernel/user.c index 104d22ac84d..d476307dd4b 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -16,6 +16,7 @@ #include #include #include +#include "cred-internals.h" struct user_namespace init_user_ns = { .kref = { @@ -104,16 +105,10 @@ static int sched_create_user(struct user_struct *up) return rc; } -static void sched_switch_user(struct task_struct *p) -{ - sched_move_task(p); -} - #else /* CONFIG_USER_SCHED */ static void sched_destroy_user(struct user_struct *up) { } static int sched_create_user(struct user_struct *up) { return 0; } -static void sched_switch_user(struct task_struct *p) { } #endif /* CONFIG_USER_SCHED */ @@ -448,36 +443,6 @@ out_unlock: return NULL; } -void switch_uid(struct user_struct *new_user) -{ - struct user_struct *old_user; - - /* What if a process setreuid()'s and this brings the - * new uid over his NPROC rlimit? We can check this now - * cheaply with the new uid cache, so if it matters - * we should be checking for it. -DaveM - */ - old_user = current->cred->user; - atomic_inc(&new_user->processes); - atomic_dec(&old_user->processes); - switch_uid_keyring(new_user); - current->cred->user = new_user; - sched_switch_user(current); - - /* - * We need to synchronize with __sigqueue_alloc() - * doing a get_uid(p->user).. If that saw the old - * user value, we need to wait until it has exited - * its critical region before we can free the old - * structure. - */ - smp_mb(); - spin_unlock_wait(¤t->sighand->siglock); - - free_uid(old_user); - suid_keys(current); -} - #ifdef CONFIG_USER_NS void release_uids(struct user_namespace *ns) { diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index f82730adea0..0d9c51d6733 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -19,6 +19,7 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) { struct user_namespace *ns; struct user_struct *new_user; + struct cred *new; int n; ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); @@ -45,7 +46,16 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) return ERR_PTR(-ENOMEM); } - switch_uid(new_user); + /* Install the new user */ + new = prepare_creds(); + if (!new) { + free_uid(new_user); + free_uid(ns->root_user); + kfree(ns); + } + free_uid(new->user); + new->user = new_user; + commit_creds(new); return ns; } diff --git a/lib/Makefile b/lib/Makefile index 7cb65d85aeb..80fe8a3ec12 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,7 +11,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o prio_heap.o ratelimit.o show_mem.o + proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 9a8ff684da7..ad8c7a782da 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -287,6 +287,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, time_t expiry, u32 kvno) { + const struct cred *cred = current_cred(); struct key *key; int ret; @@ -297,7 +298,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, _enter(""); - key = key_alloc(&key_type_rxrpc, "x", 0, 0, current, 0, + key = key_alloc(&key_type_rxrpc, "x", 0, 0, cred, 0, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) { _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key)); @@ -340,10 +341,11 @@ EXPORT_SYMBOL(rxrpc_get_server_data_key); */ struct key *rxrpc_get_null_key(const char *keyname) { + const struct cred *cred = current_cred(); struct key *key; int ret; - key = key_alloc(&key_type_rxrpc, keyname, 0, 0, current, + key = key_alloc(&key_type_rxrpc, keyname, 0, 0, cred, KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) return key; diff --git a/security/capability.c b/security/capability.c index fac2f61b69a..efeb6d9e0e6 100644 --- a/security/capability.c +++ b/security/capability.c @@ -340,12 +340,16 @@ static int cap_task_create(unsigned long clone_flags) return 0; } -static int cap_cred_alloc_security(struct cred *cred) +static void cap_cred_free(struct cred *cred) +{ +} + +static int cap_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp) { return 0; } -static void cap_cred_free(struct cred *cred) +static void cap_cred_commit(struct cred *new, const struct cred *old) { } @@ -750,7 +754,7 @@ static void cap_release_secctx(char *secdata, u32 seclen) } #ifdef CONFIG_KEYS -static int cap_key_alloc(struct key *key, struct task_struct *ctx, +static int cap_key_alloc(struct key *key, const struct cred *cred, unsigned long flags) { return 0; @@ -760,7 +764,7 @@ static void cap_key_free(struct key *key) { } -static int cap_key_permission(key_ref_t key_ref, struct task_struct *context, +static int cap_key_permission(key_ref_t key_ref, const struct cred *cred, key_perm_t perm) { return 0; @@ -814,8 +818,7 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, ptrace_may_access); set_to_cap_if_null(ops, ptrace_traceme); set_to_cap_if_null(ops, capget); - set_to_cap_if_null(ops, capset_check); - set_to_cap_if_null(ops, capset_set); + set_to_cap_if_null(ops, capset); set_to_cap_if_null(ops, acct); set_to_cap_if_null(ops, capable); set_to_cap_if_null(ops, quotactl); @@ -890,10 +893,11 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, file_receive); set_to_cap_if_null(ops, dentry_open); set_to_cap_if_null(ops, task_create); - set_to_cap_if_null(ops, cred_alloc_security); set_to_cap_if_null(ops, cred_free); + set_to_cap_if_null(ops, cred_prepare); + set_to_cap_if_null(ops, cred_commit); set_to_cap_if_null(ops, task_setuid); - set_to_cap_if_null(ops, task_post_setuid); + set_to_cap_if_null(ops, task_fix_setuid); set_to_cap_if_null(ops, task_setgid); set_to_cap_if_null(ops, task_setpgid); set_to_cap_if_null(ops, task_getpgid); @@ -910,7 +914,6 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, task_wait); set_to_cap_if_null(ops, task_kill); set_to_cap_if_null(ops, task_prctl); - set_to_cap_if_null(ops, task_reparent_to_init); set_to_cap_if_null(ops, task_to_inode); set_to_cap_if_null(ops, ipc_permission); set_to_cap_if_null(ops, ipc_getsecid); diff --git a/security/commoncap.c b/security/commoncap.c index 0384bf95db6..b5419273f92 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -72,8 +72,8 @@ int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) int ret = 0; rcu_read_lock(); - if (!cap_issubset(child->cred->cap_permitted, - current->cred->cap_permitted) && + if (!cap_issubset(__task_cred(child)->cap_permitted, + current_cred()->cap_permitted) && !capable(CAP_SYS_PTRACE)) ret = -EPERM; rcu_read_unlock(); @@ -85,8 +85,8 @@ int cap_ptrace_traceme(struct task_struct *parent) int ret = 0; rcu_read_lock(); - if (!cap_issubset(current->cred->cap_permitted, - parent->cred->cap_permitted) && + if (!cap_issubset(current_cred()->cap_permitted, + __task_cred(parent)->cap_permitted) && !has_capability(parent, CAP_SYS_PTRACE)) ret = -EPERM; rcu_read_unlock(); @@ -117,7 +117,7 @@ static inline int cap_inh_is_capped(void) * to the old permitted set. That is, if the current task * does *not* possess the CAP_SETPCAP capability. */ - return (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0); + return cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0; } static inline int cap_limit_ptraced_target(void) { return 1; } @@ -132,52 +132,39 @@ static inline int cap_limit_ptraced_target(void) #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ -int cap_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) +int cap_capset(struct cred *new, + const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { - const struct cred *cred = current->cred; - - if (cap_inh_is_capped() - && !cap_issubset(*inheritable, - cap_combine(cred->cap_inheritable, - cred->cap_permitted))) { + if (cap_inh_is_capped() && + !cap_issubset(*inheritable, + cap_combine(old->cap_inheritable, + old->cap_permitted))) /* incapable of using this inheritable set */ return -EPERM; - } + if (!cap_issubset(*inheritable, - cap_combine(cred->cap_inheritable, - cred->cap_bset))) { + cap_combine(old->cap_inheritable, + old->cap_bset))) /* no new pI capabilities outside bounding set */ return -EPERM; - } /* verify restrictions on target's new Permitted set */ - if (!cap_issubset (*permitted, - cap_combine (cred->cap_permitted, - cred->cap_permitted))) { + if (!cap_issubset(*permitted, old->cap_permitted)) return -EPERM; - } /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ - if (!cap_issubset (*effective, *permitted)) { + if (!cap_issubset(*effective, *permitted)) return -EPERM; - } + new->cap_effective = *effective; + new->cap_inheritable = *inheritable; + new->cap_permitted = *permitted; return 0; } -void cap_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) -{ - struct cred *cred = current->cred; - - cred->cap_effective = *effective; - cred->cap_inheritable = *inheritable; - cred->cap_permitted = *permitted; -} - static inline void bprm_clear_caps(struct linux_binprm *bprm) { cap_clear(bprm->cap_post_exec_permitted); @@ -382,41 +369,46 @@ int cap_bprm_set_security (struct linux_binprm *bprm) return ret; } -void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) +int cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) { - struct cred *cred = current->cred; + const struct cred *old = current_cred(); + struct cred *new; + + new = prepare_creds(); + if (!new) + return -ENOMEM; - if (bprm->e_uid != cred->uid || bprm->e_gid != cred->gid || + if (bprm->e_uid != old->uid || bprm->e_gid != old->gid || !cap_issubset(bprm->cap_post_exec_permitted, - cred->cap_permitted)) { + old->cap_permitted)) { set_dumpable(current->mm, suid_dumpable); current->pdeath_signal = 0; if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { if (!capable(CAP_SETUID)) { - bprm->e_uid = cred->uid; - bprm->e_gid = cred->gid; + bprm->e_uid = old->uid; + bprm->e_gid = old->gid; } if (cap_limit_ptraced_target()) { bprm->cap_post_exec_permitted = cap_intersect( bprm->cap_post_exec_permitted, - cred->cap_permitted); + new->cap_permitted); } } } - cred->suid = cred->euid = cred->fsuid = bprm->e_uid; - cred->sgid = cred->egid = cred->fsgid = bprm->e_gid; + new->suid = new->euid = new->fsuid = bprm->e_uid; + new->sgid = new->egid = new->fsgid = bprm->e_gid; /* For init, we want to retain the capabilities set * in the init_task struct. Thus we skip the usual * capability rules */ if (!is_global_init(current)) { - cred->cap_permitted = bprm->cap_post_exec_permitted; + new->cap_permitted = bprm->cap_post_exec_permitted; if (bprm->cap_effective) - cred->cap_effective = bprm->cap_post_exec_permitted; + new->cap_effective = bprm->cap_post_exec_permitted; else - cap_clear(cred->cap_effective); + cap_clear(new->cap_effective); } /* @@ -431,15 +423,15 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) * Number 1 above might fail if you don't have a full bset, but I think * that is interesting information to audit. */ - if (!cap_isclear(cred->cap_effective)) { - if (!cap_issubset(CAP_FULL_SET, cred->cap_effective) || - (bprm->e_uid != 0) || (cred->uid != 0) || + if (!cap_isclear(new->cap_effective)) { + if (!cap_issubset(CAP_FULL_SET, new->cap_effective) || + bprm->e_uid != 0 || new->uid != 0 || issecure(SECURE_NOROOT)) - audit_log_bprm_fcaps(bprm, &cred->cap_permitted, - &cred->cap_effective); + audit_log_bprm_fcaps(bprm, new, old); } - cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + return commit_creds(new); } int cap_bprm_secureexec (struct linux_binprm *bprm) @@ -514,65 +506,49 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) * files.. * Thanks to Olaf Kirch and Peter Benie for spotting this. */ -static inline void cap_emulate_setxuid (int old_ruid, int old_euid, - int old_suid) +static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) { - struct cred *cred = current->cred; - - if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && - (cred->uid != 0 && cred->euid != 0 && cred->suid != 0) && + if ((old->uid == 0 || old->euid == 0 || old->suid == 0) && + (new->uid != 0 && new->euid != 0 && new->suid != 0) && !issecure(SECURE_KEEP_CAPS)) { - cap_clear(cred->cap_permitted); - cap_clear(cred->cap_effective); - } - if (old_euid == 0 && cred->euid != 0) { - cap_clear(cred->cap_effective); - } - if (old_euid != 0 && cred->euid == 0) { - cred->cap_effective = cred->cap_permitted; + cap_clear(new->cap_permitted); + cap_clear(new->cap_effective); } + if (old->euid == 0 && new->euid != 0) + cap_clear(new->cap_effective); + if (old->euid != 0 && new->euid == 0) + new->cap_effective = new->cap_permitted; } -int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, - int flags) +int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) { - struct cred *cred = current->cred; - switch (flags) { case LSM_SETID_RE: case LSM_SETID_ID: case LSM_SETID_RES: /* Copied from kernel/sys.c:setreuid/setuid/setresuid. */ - if (!issecure (SECURE_NO_SETUID_FIXUP)) { - cap_emulate_setxuid (old_ruid, old_euid, old_suid); - } + if (!issecure(SECURE_NO_SETUID_FIXUP)) + cap_emulate_setxuid(new, old); break; case LSM_SETID_FS: - { - uid_t old_fsuid = old_ruid; - - /* Copied from kernel/sys.c:setfsuid. */ + /* Copied from kernel/sys.c:setfsuid. */ - /* - * FIXME - is fsuser used for all CAP_FS_MASK capabilities? - * if not, we might be a bit too harsh here. - */ - - if (!issecure (SECURE_NO_SETUID_FIXUP)) { - if (old_fsuid == 0 && cred->fsuid != 0) { - cred->cap_effective = - cap_drop_fs_set( - cred->cap_effective); - } - if (old_fsuid != 0 && cred->fsuid == 0) { - cred->cap_effective = - cap_raise_fs_set( - cred->cap_effective, - cred->cap_permitted); - } + /* + * FIXME - is fsuser used for all CAP_FS_MASK capabilities? + * if not, we might be a bit too harsh here. + */ + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + if (old->fsuid == 0 && new->fsuid != 0) { + new->cap_effective = + cap_drop_fs_set(new->cap_effective); + } + if (old->fsuid != 0 && new->fsuid == 0) { + new->cap_effective = + cap_raise_fs_set(new->cap_effective, + new->cap_permitted); } - break; } + break; default: return -EINVAL; } @@ -628,13 +604,14 @@ int cap_task_setnice (struct task_struct *p, int nice) * this task could get inconsistent info. There can be no * racing writer bc a task can only change its own caps. */ -static long cap_prctl_drop(unsigned long cap) +static long cap_prctl_drop(struct cred *new, unsigned long cap) { if (!capable(CAP_SETPCAP)) return -EPERM; if (!cap_valid(cap)) return -EINVAL; - cap_lower(current->cred->cap_bset, cap); + + cap_lower(new->cap_bset, cap); return 0; } @@ -655,22 +632,29 @@ int cap_task_setnice (struct task_struct *p, int nice) #endif int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p) + unsigned long arg4, unsigned long arg5) { - struct cred *cred = current_cred(); + struct cred *new; long error = 0; + new = prepare_creds(); + if (!new) + return -ENOMEM; + switch (option) { case PR_CAPBSET_READ: + error = -EINVAL; if (!cap_valid(arg2)) - error = -EINVAL; - else - error = !!cap_raised(cred->cap_bset, arg2); - break; + goto error; + error = !!cap_raised(new->cap_bset, arg2); + goto no_change; + #ifdef CONFIG_SECURITY_FILE_CAPABILITIES case PR_CAPBSET_DROP: - error = cap_prctl_drop(arg2); - break; + error = cap_prctl_drop(new, arg2); + if (error < 0) + goto error; + goto changed; /* * The next four prctl's remain to assist with transitioning a @@ -692,12 +676,12 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, * capability-based-privilege environment. */ case PR_SET_SECUREBITS: - if ((((cred->securebits & SECURE_ALL_LOCKS) >> 1) - & (cred->securebits ^ arg2)) /*[1]*/ - || ((cred->securebits & SECURE_ALL_LOCKS - & ~arg2)) /*[2]*/ - || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ - || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0)) { /*[4]*/ + error = -EPERM; + if ((((new->securebits & SECURE_ALL_LOCKS) >> 1) + & (new->securebits ^ arg2)) /*[1]*/ + || ((new->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ + || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ + || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0) /*[4]*/ /* * [1] no changing of bits that are locked * [2] no unlocking of locks @@ -705,50 +689,51 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, * [4] doing anything requires privilege (go read about * the "sendmail capabilities bug") */ - error = -EPERM; /* cannot change a locked bit */ - } else { - cred->securebits = arg2; - } - break; + ) + /* cannot change a locked bit */ + goto error; + new->securebits = arg2; + goto changed; + case PR_GET_SECUREBITS: - error = cred->securebits; - break; + error = new->securebits; + goto no_change; #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ case PR_GET_KEEPCAPS: if (issecure(SECURE_KEEP_CAPS)) error = 1; - break; + goto no_change; + case PR_SET_KEEPCAPS: + error = -EINVAL; if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */ - error = -EINVAL; - else if (issecure(SECURE_KEEP_CAPS_LOCKED)) - error = -EPERM; - else if (arg2) - cred->securebits |= issecure_mask(SECURE_KEEP_CAPS); + goto error; + error = -EPERM; + if (issecure(SECURE_KEEP_CAPS_LOCKED)) + goto error; + if (arg2) + new->securebits |= issecure_mask(SECURE_KEEP_CAPS); else - cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); - break; + new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + goto changed; default: /* No functionality available - continue with default */ - return 0; + error = -ENOSYS; + goto error; } /* Functionality provided */ - *rc_p = error; - return 1; -} - -void cap_task_reparent_to_init (struct task_struct *p) -{ - struct cred *cred = p->cred; - - cap_set_init_eff(cred->cap_effective); - cap_clear(cred->cap_inheritable); - cap_set_full(cred->cap_permitted); - p->cred->securebits = SECUREBITS_DEFAULT; +changed: + return commit_creds(new); + +no_change: + error = 0; +error: + abort_creds(new); + return error; } int cap_syslog (int type) diff --git a/security/keys/internal.h b/security/keys/internal.h index d1586c62978..81932abefe7 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -12,6 +12,7 @@ #ifndef _INTERNAL_H #define _INTERNAL_H +#include #include static inline __attribute__((format(printf, 1, 2))) @@ -25,7 +26,7 @@ void no_printk(const char *fmt, ...) #define kleave(FMT, ...) \ printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__) #define kdebug(FMT, ...) \ - printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__) + printk(KERN_DEBUG " "FMT"\n", ##__VA_ARGS__) #else #define kenter(FMT, ...) \ no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) @@ -97,7 +98,7 @@ extern struct key *keyring_search_instkey(struct key *keyring, typedef int (*key_match_func_t)(const struct key *, const void *); extern key_ref_t keyring_search_aux(key_ref_t keyring_ref, - struct task_struct *tsk, + const struct cred *cred, struct key_type *type, const void *description, key_match_func_t match); @@ -105,13 +106,13 @@ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref, extern key_ref_t search_process_keyrings(struct key_type *type, const void *description, key_match_func_t match, - struct task_struct *tsk); + const struct cred *cred); extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); extern int install_user_keyrings(void); -extern int install_thread_keyring(void); -extern int install_process_keyring(void); +extern int install_thread_keyring_to_cred(struct cred *); +extern int install_process_keyring_to_cred(struct cred *); extern struct key *request_key_and_link(struct key_type *type, const char *description, @@ -130,12 +131,12 @@ extern long join_session_keyring(const char *name); * check to see whether permission is granted to use a key in the desired way */ extern int key_task_permission(const key_ref_t key_ref, - struct task_struct *context, + const struct cred *cred, key_perm_t perm); static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) { - return key_task_permission(key_ref, current, perm); + return key_task_permission(key_ref, current_cred(), perm); } /* required permissions */ @@ -153,7 +154,7 @@ static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) struct request_key_auth { struct key *target_key; struct key *dest_keyring; - struct task_struct *context; + const struct cred *cred; void *callout_info; size_t callout_len; pid_t pid; diff --git a/security/keys/key.c b/security/keys/key.c index a6ca39ed3b0..f76c8a546fd 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -218,7 +218,7 @@ serial_exists: * instantiate the key or discard it before returning */ struct key *key_alloc(struct key_type *type, const char *desc, - uid_t uid, gid_t gid, struct task_struct *ctx, + uid_t uid, gid_t gid, const struct cred *cred, key_perm_t perm, unsigned long flags) { struct key_user *user = NULL; @@ -294,7 +294,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, #endif /* let the security module know about the key */ - ret = security_key_alloc(key, ctx, flags); + ret = security_key_alloc(key, cred, flags); if (ret < 0) goto security_error; @@ -391,7 +391,7 @@ static int __key_instantiate_and_link(struct key *key, const void *data, size_t datalen, struct key *keyring, - struct key *instkey) + struct key *authkey) { int ret, awaken; @@ -421,8 +421,8 @@ static int __key_instantiate_and_link(struct key *key, ret = __key_link(keyring, key); /* disable the authorisation key */ - if (instkey) - key_revoke(instkey); + if (authkey) + key_revoke(authkey); } } @@ -444,14 +444,14 @@ int key_instantiate_and_link(struct key *key, const void *data, size_t datalen, struct key *keyring, - struct key *instkey) + struct key *authkey) { int ret; if (keyring) down_write(&keyring->sem); - ret = __key_instantiate_and_link(key, data, datalen, keyring, instkey); + ret = __key_instantiate_and_link(key, data, datalen, keyring, authkey); if (keyring) up_write(&keyring->sem); @@ -469,7 +469,7 @@ EXPORT_SYMBOL(key_instantiate_and_link); int key_negate_and_link(struct key *key, unsigned timeout, struct key *keyring, - struct key *instkey) + struct key *authkey) { struct timespec now; int ret, awaken; @@ -504,8 +504,8 @@ int key_negate_and_link(struct key *key, ret = __key_link(keyring, key); /* disable the authorisation key */ - if (instkey) - key_revoke(instkey); + if (authkey) + key_revoke(authkey); } mutex_unlock(&key_construction_mutex); @@ -743,6 +743,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, key_perm_t perm, unsigned long flags) { + const struct cred *cred = current_cred(); struct key_type *ktype; struct key *keyring, *key = NULL; key_ref_t key_ref; @@ -802,8 +803,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, } /* allocate a new key */ - key = key_alloc(ktype, description, current_fsuid(), current_fsgid(), - current, perm, flags); + key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred, + perm, flags); if (IS_ERR(key)) { key_ref = ERR_CAST(key); goto error_3; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 8833b447ade..7c72baa02f2 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -866,6 +866,23 @@ static long get_instantiation_keyring(key_serial_t ringid, return -ENOKEY; } +/* + * change the request_key authorisation key on the current process + */ +static int keyctl_change_reqkey_auth(struct key *key) +{ + struct cred *new; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + key_put(new->request_key_auth); + new->request_key_auth = key_get(key); + + return commit_creds(new); +} + /*****************************************************************************/ /* * instantiate the key with the specified payload, and, if one is given, link @@ -876,12 +893,15 @@ long keyctl_instantiate_key(key_serial_t id, size_t plen, key_serial_t ringid) { + const struct cred *cred = current_cred(); struct request_key_auth *rka; struct key *instkey, *dest_keyring; void *payload; long ret; bool vm = false; + kenter("%d,,%zu,%d", id, plen, ringid); + ret = -EINVAL; if (plen > 1024 * 1024 - 1) goto error; @@ -889,7 +909,7 @@ long keyctl_instantiate_key(key_serial_t id, /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->cred->request_key_auth; + instkey = cred->request_key_auth; if (!instkey) goto error; @@ -931,10 +951,8 @@ long keyctl_instantiate_key(key_serial_t id, /* discard the assumed authority if it's just been disabled by * instantiation of the key */ - if (ret == 0) { - key_put(current->cred->request_key_auth); - current->cred->request_key_auth = NULL; - } + if (ret == 0) + keyctl_change_reqkey_auth(NULL); error2: if (!vm) @@ -953,14 +971,17 @@ error: */ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) { + const struct cred *cred = current_cred(); struct request_key_auth *rka; struct key *instkey, *dest_keyring; long ret; + kenter("%d,%u,%d", id, timeout, ringid); + /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->cred->request_key_auth; + instkey = cred->request_key_auth; if (!instkey) goto error; @@ -982,10 +1003,8 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* discard the assumed authority if it's just been disabled by * instantiation of the key */ - if (ret == 0) { - key_put(current->cred->request_key_auth); - current->cred->request_key_auth = NULL; - } + if (ret == 0) + keyctl_change_reqkey_auth(NULL); error: return ret; @@ -999,36 +1018,56 @@ error: */ long keyctl_set_reqkey_keyring(int reqkey_defl) { - struct cred *cred = current->cred; - int ret; + struct cred *new; + int ret, old_setting; + + old_setting = current_cred_xxx(jit_keyring); + + if (reqkey_defl == KEY_REQKEY_DEFL_NO_CHANGE) + return old_setting; + + new = prepare_creds(); + if (!new) + return -ENOMEM; switch (reqkey_defl) { case KEY_REQKEY_DEFL_THREAD_KEYRING: - ret = install_thread_keyring(); + ret = install_thread_keyring_to_cred(new); if (ret < 0) - return ret; + goto error; goto set; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - ret = install_process_keyring(); - if (ret < 0) - return ret; + ret = install_process_keyring_to_cred(new); + if (ret < 0) { + if (ret != -EEXIST) + goto error; + ret = 0; + } + goto set; case KEY_REQKEY_DEFL_DEFAULT: case KEY_REQKEY_DEFL_SESSION_KEYRING: case KEY_REQKEY_DEFL_USER_KEYRING: case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: - set: - cred->jit_keyring = reqkey_defl; + case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: + goto set; case KEY_REQKEY_DEFL_NO_CHANGE: - return cred->jit_keyring; - case KEY_REQKEY_DEFL_GROUP_KEYRING: default: - return -EINVAL; + ret = -EINVAL; + goto error; } +set: + new->jit_keyring = reqkey_defl; + commit_creds(new); + return old_setting; +error: + abort_creds(new); + return -EINVAL; + } /* end keyctl_set_reqkey_keyring() */ /*****************************************************************************/ @@ -1087,9 +1126,7 @@ long keyctl_assume_authority(key_serial_t id) /* we divest ourselves of authority if given an ID of 0 */ if (id == 0) { - key_put(current->cred->request_key_auth); - current->cred->request_key_auth = NULL; - ret = 0; + ret = keyctl_change_reqkey_auth(NULL); goto error; } @@ -1104,10 +1141,12 @@ long keyctl_assume_authority(key_serial_t id) goto error; } - key_put(current->cred->request_key_auth); - current->cred->request_key_auth = authkey; - ret = authkey->serial; + ret = keyctl_change_reqkey_auth(authkey); + if (ret < 0) + goto error; + key_put(authkey); + ret = authkey->serial; error: return ret; diff --git a/security/keys/keyring.c b/security/keys/keyring.c index fdf75f90199..ed851574d07 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -245,14 +245,14 @@ static long keyring_read(const struct key *keyring, * allocate a keyring and link into the destination keyring */ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, - struct task_struct *ctx, unsigned long flags, + const struct cred *cred, unsigned long flags, struct key *dest) { struct key *keyring; int ret; keyring = key_alloc(&key_type_keyring, description, - uid, gid, ctx, + uid, gid, cred, (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL, flags); @@ -281,7 +281,7 @@ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, * - we propagate the possession attribute from the keyring ref to the key ref */ key_ref_t keyring_search_aux(key_ref_t keyring_ref, - struct task_struct *context, + const struct cred *cred, struct key_type *type, const void *description, key_match_func_t match) @@ -304,7 +304,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, key_check(keyring); /* top keyring must have search permission to begin the search */ - err = key_task_permission(keyring_ref, context, KEY_SEARCH); + err = key_task_permission(keyring_ref, cred, KEY_SEARCH); if (err < 0) { key_ref = ERR_PTR(err); goto error; @@ -377,7 +377,7 @@ descend: /* key must have search permissions */ if (key_task_permission(make_key_ref(key, possessed), - context, KEY_SEARCH) < 0) + cred, KEY_SEARCH) < 0) continue; /* we set a different error code if we pass a negative key */ @@ -404,7 +404,7 @@ ascend: continue; if (key_task_permission(make_key_ref(key, possessed), - context, KEY_SEARCH) < 0) + cred, KEY_SEARCH) < 0) continue; /* stack the current position */ @@ -459,7 +459,7 @@ key_ref_t keyring_search(key_ref_t keyring, if (!type->match) return ERR_PTR(-ENOKEY); - return keyring_search_aux(keyring, current, + return keyring_search_aux(keyring, current->cred, type, description, type->match); } /* end keyring_search() */ diff --git a/security/keys/permission.c b/security/keys/permission.c index 13c36164f28..5d9fc7b93f2 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -14,24 +14,27 @@ #include "internal.h" /*****************************************************************************/ -/* - * check to see whether permission is granted to use a key in the desired way, - * but permit the security modules to override +/** + * key_task_permission - Check a key can be used + * @key_ref: The key to check + * @cred: The credentials to use + * @perm: The permissions to check for + * + * Check to see whether permission is granted to use a key in the desired way, + * but permit the security modules to override. + * + * The caller must hold either a ref on cred or must hold the RCU readlock or a + * spinlock. */ -int key_task_permission(const key_ref_t key_ref, - struct task_struct *context, +int key_task_permission(const key_ref_t key_ref, const struct cred *cred, key_perm_t perm) { - const struct cred *cred; struct key *key; key_perm_t kperm; int ret; key = key_ref_to_ptr(key_ref); - rcu_read_lock(); - cred = __task_cred(context); - /* use the second 8-bits of permissions for keys the caller owns */ if (key->uid == cred->fsuid) { kperm = key->perm >> 16; @@ -57,7 +60,6 @@ int key_task_permission(const key_ref_t key_ref, kperm = key->perm; use_these_perms: - rcu_read_lock(); /* use the top 8-bits of permissions for keys the caller possesses * - possessor permissions are additive with other permissions @@ -71,7 +73,7 @@ use_these_perms: return -EACCES; /* let LSM be the final arbiter */ - return security_key_permission(key_ref, context, perm); + return security_key_permission(key_ref, cred, perm); } /* end key_task_permission() */ diff --git a/security/keys/proc.c b/security/keys/proc.c index f619170da76..7f508def50e 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -136,8 +136,12 @@ static int proc_keys_show(struct seq_file *m, void *v) int rc; /* check whether the current task is allowed to view the key (assuming - * non-possession) */ - rc = key_task_permission(make_key_ref(key, 0), current, KEY_VIEW); + * non-possession) + * - the caller holds a spinlock, and thus the RCU read lock, making our + * access to __current_cred() safe + */ + rc = key_task_permission(make_key_ref(key, 0), current_cred(), + KEY_VIEW); if (rc < 0) return 0; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 70ee93406f3..df329f684a6 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -42,11 +42,15 @@ struct key_user root_key_user = { */ int install_user_keyrings(void) { - struct user_struct *user = current->cred->user; + struct user_struct *user; + const struct cred *cred; struct key *uid_keyring, *session_keyring; char buf[20]; int ret; + cred = current_cred(); + user = cred->user; + kenter("%p{%u}", user, user->uid); if (user->uid_keyring) { @@ -67,7 +71,7 @@ int install_user_keyrings(void) uid_keyring = find_keyring_by_name(buf, true); if (IS_ERR(uid_keyring)) { uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - current, KEY_ALLOC_IN_QUOTA, + cred, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(uid_keyring)) { ret = PTR_ERR(uid_keyring); @@ -83,8 +87,7 @@ int install_user_keyrings(void) if (IS_ERR(session_keyring)) { session_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - current, KEY_ALLOC_IN_QUOTA, - NULL); + cred, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(session_keyring)) { ret = PTR_ERR(session_keyring); goto error_release; @@ -116,142 +119,128 @@ error: return ret; } -/*****************************************************************************/ /* - * deal with the UID changing + * install a fresh thread keyring directly to new credentials */ -void switch_uid_keyring(struct user_struct *new_user) +int install_thread_keyring_to_cred(struct cred *new) { -#if 0 /* do nothing for now */ - struct key *old; - - /* switch to the new user's session keyring if we were running under - * root's default session keyring */ - if (new_user->uid != 0 && - current->session_keyring == &root_session_keyring - ) { - atomic_inc(&new_user->session_keyring->usage); - - task_lock(current); - old = current->session_keyring; - current->session_keyring = new_user->session_keyring; - task_unlock(current); + struct key *keyring; - key_put(old); - } -#endif + keyring = keyring_alloc("_tid", new->uid, new->gid, new, + KEY_ALLOC_QUOTA_OVERRUN, NULL); + if (IS_ERR(keyring)) + return PTR_ERR(keyring); -} /* end switch_uid_keyring() */ + new->thread_keyring = keyring; + return 0; +} -/*****************************************************************************/ /* * install a fresh thread keyring, discarding the old one */ -int install_thread_keyring(void) +static int install_thread_keyring(void) { - struct task_struct *tsk = current; - struct key *keyring, *old; - char buf[20]; + struct cred *new; int ret; - sprintf(buf, "_tid.%u", tsk->pid); + new = prepare_creds(); + if (!new) + return -ENOMEM; - keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, - KEY_ALLOC_QUOTA_OVERRUN, NULL); - if (IS_ERR(keyring)) { - ret = PTR_ERR(keyring); - goto error; + BUG_ON(new->thread_keyring); + + ret = install_thread_keyring_to_cred(new); + if (ret < 0) { + abort_creds(new); + return ret; } - task_lock(tsk); - old = tsk->cred->thread_keyring; - tsk->cred->thread_keyring = keyring; - task_unlock(tsk); + return commit_creds(new); +} - ret = 0; +/* + * install a process keyring directly to a credentials struct + * - returns -EEXIST if there was already a process keyring, 0 if one installed, + * and other -ve on any other error + */ +int install_process_keyring_to_cred(struct cred *new) +{ + struct key *keyring; + int ret; - key_put(old); -error: + if (new->tgcred->process_keyring) + return -EEXIST; + + keyring = keyring_alloc("_pid", new->uid, new->gid, + new, KEY_ALLOC_QUOTA_OVERRUN, NULL); + if (IS_ERR(keyring)) + return PTR_ERR(keyring); + + spin_lock_irq(&new->tgcred->lock); + if (!new->tgcred->process_keyring) { + new->tgcred->process_keyring = keyring; + keyring = NULL; + ret = 0; + } else { + ret = -EEXIST; + } + spin_unlock_irq(&new->tgcred->lock); + key_put(keyring); return ret; +} -} /* end install_thread_keyring() */ - -/*****************************************************************************/ /* * make sure a process keyring is installed + * - we */ -int install_process_keyring(void) +static int install_process_keyring(void) { - struct task_struct *tsk = current; - struct key *keyring; - char buf[20]; + struct cred *new; int ret; - might_sleep(); - - if (!tsk->cred->tgcred->process_keyring) { - sprintf(buf, "_pid.%u", tsk->tgid); - - keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, - KEY_ALLOC_QUOTA_OVERRUN, NULL); - if (IS_ERR(keyring)) { - ret = PTR_ERR(keyring); - goto error; - } - - /* attach keyring */ - spin_lock_irq(&tsk->cred->tgcred->lock); - if (!tsk->cred->tgcred->process_keyring) { - tsk->cred->tgcred->process_keyring = keyring; - keyring = NULL; - } - spin_unlock_irq(&tsk->cred->tgcred->lock); + new = prepare_creds(); + if (!new) + return -ENOMEM; - key_put(keyring); + ret = install_process_keyring_to_cred(new); + if (ret < 0) { + abort_creds(new); + return ret != -EEXIST ?: 0; } - ret = 0; -error: - return ret; - -} /* end install_process_keyring() */ + return commit_creds(new); +} -/*****************************************************************************/ /* - * install a session keyring, discarding the old one - * - if a keyring is not supplied, an empty one is invented + * install a session keyring directly to a credentials struct */ -static int install_session_keyring(struct key *keyring) +static int install_session_keyring_to_cred(struct cred *cred, + struct key *keyring) { - struct task_struct *tsk = current; unsigned long flags; struct key *old; - char buf[20]; might_sleep(); /* create an empty session keyring */ if (!keyring) { - sprintf(buf, "_ses.%u", tsk->tgid); - flags = KEY_ALLOC_QUOTA_OVERRUN; - if (tsk->cred->tgcred->session_keyring) + if (cred->tgcred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, - tsk, flags, NULL); + keyring = keyring_alloc("_ses", cred->uid, cred->gid, + cred, flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); - } - else { + } else { atomic_inc(&keyring->usage); } /* install the keyring */ - spin_lock_irq(&tsk->cred->tgcred->lock); - old = tsk->cred->tgcred->session_keyring; - rcu_assign_pointer(tsk->cred->tgcred->session_keyring, keyring); - spin_unlock_irq(&tsk->cred->tgcred->lock); + spin_lock_irq(&cred->tgcred->lock); + old = cred->tgcred->session_keyring; + rcu_assign_pointer(cred->tgcred->session_keyring, keyring); + spin_unlock_irq(&cred->tgcred->lock); /* we're using RCU on the pointer, but there's no point synchronising * on it if it didn't previously point to anything */ @@ -261,38 +250,29 @@ static int install_session_keyring(struct key *keyring) } return 0; +} -} /* end install_session_keyring() */ - -/*****************************************************************************/ /* - * copy the keys for fork + * install a session keyring, discarding the old one + * - if a keyring is not supplied, an empty one is invented */ -int copy_keys(unsigned long clone_flags, struct task_struct *tsk) +static int install_session_keyring(struct key *keyring) { - key_check(tsk->cred->thread_keyring); - key_check(tsk->cred->request_key_auth); - - /* no thread keyring yet */ - tsk->cred->thread_keyring = NULL; - - /* copy the request_key() authorisation for this thread */ - key_get(tsk->cred->request_key_auth); - - return 0; + struct cred *new; + int ret; -} /* end copy_keys() */ + new = prepare_creds(); + if (!new) + return -ENOMEM; -/*****************************************************************************/ -/* - * dispose of per-thread keys upon thread exit - */ -void exit_keys(struct task_struct *tsk) -{ - key_put(tsk->cred->thread_keyring); - key_put(tsk->cred->request_key_auth); + ret = install_session_keyring_to_cred(new, NULL); + if (ret < 0) { + abort_creds(new); + return ret; + } -} /* end exit_keys() */ + return commit_creds(new); +} /*****************************************************************************/ /* @@ -300,38 +280,41 @@ void exit_keys(struct task_struct *tsk) */ int exec_keys(struct task_struct *tsk) { - struct key *old; + struct thread_group_cred *tgcred = NULL; + struct cred *new; - /* newly exec'd tasks don't get a thread keyring */ - task_lock(tsk); - old = tsk->cred->thread_keyring; - tsk->cred->thread_keyring = NULL; - task_unlock(tsk); +#ifdef CONFIG_KEYS + tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); + if (!tgcred) + return -ENOMEM; +#endif - key_put(old); + new = prepare_creds(); + if (new < 0) + return -ENOMEM; - /* discard the process keyring from a newly exec'd task */ - spin_lock_irq(&tsk->cred->tgcred->lock); - old = tsk->cred->tgcred->process_keyring; - tsk->cred->tgcred->process_keyring = NULL; - spin_unlock_irq(&tsk->cred->tgcred->lock); + /* newly exec'd tasks don't get a thread keyring */ + key_put(new->thread_keyring); + new->thread_keyring = NULL; - key_put(old); + /* create a new per-thread-group creds for all this set of threads to + * share */ + memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred)); - return 0; + atomic_set(&tgcred->usage, 1); + spin_lock_init(&tgcred->lock); -} /* end exec_keys() */ + /* inherit the session keyring; new process keyring */ + key_get(tgcred->session_keyring); + tgcred->process_keyring = NULL; -/*****************************************************************************/ -/* - * deal with SUID programs - * - we might want to make this invent a new session keyring - */ -int suid_keys(struct task_struct *tsk) -{ + release_tgcred(new); + new->tgcred = tgcred; + + commit_creds(new); return 0; -} /* end suid_keys() */ +} /* end exec_keys() */ /*****************************************************************************/ /* @@ -376,16 +359,13 @@ void key_fsgid_changed(struct task_struct *tsk) key_ref_t search_process_keyrings(struct key_type *type, const void *description, key_match_func_t match, - struct task_struct *context) + const struct cred *cred) { struct request_key_auth *rka; - struct cred *cred; key_ref_t key_ref, ret, err; might_sleep(); - cred = get_task_cred(context); - /* we want to return -EAGAIN or -ENOKEY if any of the keyrings were * searchable, but we failed to find a key or we found a negative key; * otherwise we want to return a sample error (probably -EACCES) if @@ -401,7 +381,7 @@ key_ref_t search_process_keyrings(struct key_type *type, if (cred->thread_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->thread_keyring, 1), - context, type, description, match); + cred, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -422,7 +402,7 @@ key_ref_t search_process_keyrings(struct key_type *type, if (cred->tgcred->process_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->tgcred->process_keyring, 1), - context, type, description, match); + cred, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -446,7 +426,7 @@ key_ref_t search_process_keyrings(struct key_type *type, make_key_ref(rcu_dereference( cred->tgcred->session_keyring), 1), - context, type, description, match); + cred, type, description, match); rcu_read_unlock(); if (!IS_ERR(key_ref)) @@ -468,7 +448,7 @@ key_ref_t search_process_keyrings(struct key_type *type, else if (cred->user->session_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->user->session_keyring, 1), - context, type, description, match); + cred, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -490,7 +470,7 @@ key_ref_t search_process_keyrings(struct key_type *type, * - we don't permit access to request_key auth keys via this method */ if (cred->request_key_auth && - context == current && + cred == current_cred() && type != &key_type_request_key_auth ) { /* defend against the auth key being revoked */ @@ -500,7 +480,7 @@ key_ref_t search_process_keyrings(struct key_type *type, rka = cred->request_key_auth->payload.data; key_ref = search_process_keyrings(type, description, - match, rka->context); + match, rka->cred); up_read(&cred->request_key_auth->sem); @@ -527,7 +507,6 @@ key_ref_t search_process_keyrings(struct key_type *type, key_ref = ret ? ret : err; found: - put_cred(cred); return key_ref; } /* end search_process_keyrings() */ @@ -552,8 +531,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, key_perm_t perm) { struct request_key_auth *rka; - struct task_struct *t = current; - struct cred *cred; + const struct cred *cred; struct key *key; key_ref_t key_ref, skey_ref; int ret; @@ -608,6 +586,7 @@ try_again: goto error; ret = install_session_keyring( cred->user->session_keyring); + if (ret < 0) goto error; goto reget_creds; @@ -693,7 +672,7 @@ try_again: /* check to see if we possess the key */ skey_ref = search_process_keyrings(key->type, key, lookup_user_key_possessed, - current); + cred); if (!IS_ERR(skey_ref)) { key_put(key); @@ -725,7 +704,7 @@ try_again: goto invalid_key; /* check the permissions */ - ret = key_task_permission(key_ref, t, perm); + ret = key_task_permission(key_ref, cred, perm); if (ret < 0) goto invalid_key; @@ -755,21 +734,33 @@ reget_creds: */ long join_session_keyring(const char *name) { - struct task_struct *tsk = current; - struct cred *cred = current->cred; + const struct cred *old; + struct cred *new; struct key *keyring; - long ret; + long ret, serial; + + /* only permit this if there's a single thread in the thread group - + * this avoids us having to adjust the creds on all threads and risking + * ENOMEM */ + if (!is_single_threaded(current)) + return -EMLINK; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); /* if no name is provided, install an anonymous keyring */ if (!name) { - ret = install_session_keyring(NULL); + ret = install_session_keyring_to_cred(new, NULL); if (ret < 0) goto error; - rcu_read_lock(); - ret = rcu_dereference(cred->tgcred->session_keyring)->serial; - rcu_read_unlock(); - goto error; + serial = new->tgcred->session_keyring->serial; + ret = commit_creds(new); + if (ret == 0) + ret = serial; + goto okay; } /* allow the user to join or create a named keyring */ @@ -779,29 +770,33 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, cred->uid, cred->gid, tsk, + keyring = keyring_alloc(name, old->uid, old->gid, old, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; } - } - else if (IS_ERR(keyring)) { + } else if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; } /* we've got a keyring - now to install it */ - ret = install_session_keyring(keyring); + ret = install_session_keyring_to_cred(new, keyring); if (ret < 0) goto error2; + commit_creds(new); + mutex_unlock(&key_session_mutex); + ret = keyring->serial; key_put(keyring); +okay: + return ret; error2: mutex_unlock(&key_session_mutex); error: + abort_creds(new); return ret; - -} /* end join_session_keyring() */ +} diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 3d12558362d..0e04f72ef2d 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -83,8 +83,10 @@ static int call_sbin_request_key(struct key_construction *cons, /* allocate a new session keyring */ sprintf(desc, "_req.%u", key->serial); - keyring = keyring_alloc(desc, current_fsuid(), current_fsgid(), current, + cred = get_current_cred(); + keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred, KEY_ALLOC_QUOTA_OVERRUN, NULL); + put_cred(cred); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error_alloc; @@ -104,8 +106,7 @@ static int call_sbin_request_key(struct key_construction *cons, /* we specify the process's default keyrings */ sprintf(keyring_str[0], "%d", - cred->thread_keyring ? - cred->thread_keyring->serial : 0); + cred->thread_keyring ? cred->thread_keyring->serial : 0); prkey = 0; if (cred->tgcred->process_keyring) @@ -155,8 +156,8 @@ error_link: key_put(keyring); error_alloc: - kleave(" = %d", ret); complete_request_key(cons, ret); + kleave(" = %d", ret); return ret; } @@ -295,6 +296,7 @@ static int construct_alloc_key(struct key_type *type, struct key_user *user, struct key **_key) { + const struct cred *cred = current_cred(); struct key *key; key_ref_t key_ref; @@ -302,9 +304,8 @@ static int construct_alloc_key(struct key_type *type, mutex_lock(&user->cons_lock); - key = key_alloc(type, description, - current_fsuid(), current_fsgid(), current, KEY_POS_ALL, - flags); + key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred, + KEY_POS_ALL, flags); if (IS_ERR(key)) goto alloc_failed; @@ -317,8 +318,7 @@ static int construct_alloc_key(struct key_type *type, * waited for locks */ mutex_lock(&key_construction_mutex); - key_ref = search_process_keyrings(type, description, type->match, - current); + key_ref = search_process_keyrings(type, description, type->match, cred); if (!IS_ERR(key_ref)) goto key_already_present; @@ -363,6 +363,8 @@ static struct key *construct_key_and_link(struct key_type *type, struct key *key; int ret; + kenter(""); + user = key_user_lookup(current_fsuid()); if (!user) return ERR_PTR(-ENOMEM); @@ -376,17 +378,21 @@ static struct key *construct_key_and_link(struct key_type *type, if (ret == 0) { ret = construct_key(key, callout_info, callout_len, aux, dest_keyring); - if (ret < 0) + if (ret < 0) { + kdebug("cons failed"); goto construction_failed; + } } key_put(dest_keyring); + kleave(" = key %d", key_serial(key)); return key; construction_failed: key_negate_and_link(key, key_negative_timeout, NULL, NULL); key_put(key); key_put(dest_keyring); + kleave(" = %d", ret); return ERR_PTR(ret); } @@ -405,6 +411,7 @@ struct key *request_key_and_link(struct key_type *type, struct key *dest_keyring, unsigned long flags) { + const struct cred *cred = current_cred(); struct key *key; key_ref_t key_ref; @@ -414,7 +421,7 @@ struct key *request_key_and_link(struct key_type *type, /* search all the process keyrings for a key */ key_ref = search_process_keyrings(type, description, type->match, - current); + cred); if (!IS_ERR(key_ref)) { key = key_ref_to_ptr(key_ref); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 2125579d5d7..86747151ee5 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -105,9 +105,9 @@ static void request_key_auth_revoke(struct key *key) kenter("{%d}", key->serial); - if (rka->context) { - put_task_struct(rka->context); - rka->context = NULL; + if (rka->cred) { + put_cred(rka->cred); + rka->cred = NULL; } } /* end request_key_auth_revoke() */ @@ -122,9 +122,9 @@ static void request_key_auth_destroy(struct key *key) kenter("{%d}", key->serial); - if (rka->context) { - put_task_struct(rka->context); - rka->context = NULL; + if (rka->cred) { + put_cred(rka->cred); + rka->cred = NULL; } key_put(rka->target_key); @@ -143,6 +143,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, size_t callout_len, struct key *dest_keyring) { struct request_key_auth *rka, *irka; + const struct cred *cred = current->cred; struct key *authkey = NULL; char desc[20]; int ret; @@ -164,28 +165,25 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, /* see if the calling process is already servicing the key request of * another process */ - if (current->cred->request_key_auth) { + if (cred->request_key_auth) { /* it is - use that instantiation context here too */ - down_read(¤t->cred->request_key_auth->sem); + down_read(&cred->request_key_auth->sem); /* if the auth key has been revoked, then the key we're * servicing is already instantiated */ - if (test_bit(KEY_FLAG_REVOKED, - ¤t->cred->request_key_auth->flags)) + if (test_bit(KEY_FLAG_REVOKED, &cred->request_key_auth->flags)) goto auth_key_revoked; - irka = current->cred->request_key_auth->payload.data; - rka->context = irka->context; + irka = cred->request_key_auth->payload.data; + rka->cred = get_cred(irka->cred); rka->pid = irka->pid; - get_task_struct(rka->context); - up_read(¤t->cred->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); } else { /* it isn't - use this process as the context */ - rka->context = current; + rka->cred = get_cred(cred); rka->pid = current->pid; - get_task_struct(rka->context); } rka->target_key = key_get(target); @@ -197,7 +195,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, sprintf(desc, "%x", target->serial); authkey = key_alloc(&key_type_request_key_auth, desc, - current_fsuid(), current_fsgid(), current, + cred->fsuid, cred->fsgid, cred, KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(authkey)) { @@ -205,16 +203,16 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, goto error_alloc; } - /* construct and attach to the keyring */ + /* construct the auth key */ ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL); if (ret < 0) goto error_inst; - kleave(" = {%d}", authkey->serial); + kleave(" = {%d,%d}", authkey->serial, atomic_read(&authkey->usage)); return authkey; auth_key_revoked: - up_read(¤t->cred->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); kfree(rka->callout_info); kfree(rka); kleave("= -EKEYREVOKED"); @@ -257,6 +255,7 @@ static int key_get_instantiation_authkey_match(const struct key *key, */ struct key *key_get_instantiation_authkey(key_serial_t target_id) { + const struct cred *cred = current_cred(); struct key *authkey; key_ref_t authkey_ref; @@ -264,7 +263,7 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id) &key_type_request_key_auth, (void *) (unsigned long) target_id, key_get_instantiation_authkey_match, - current); + cred); if (IS_ERR(authkey_ref)) { authkey = ERR_CAST(authkey_ref); diff --git a/security/security.c b/security/security.c index f40a0a04c3c..a55d739c686 100644 --- a/security/security.c +++ b/security/security.c @@ -145,18 +145,13 @@ int security_capget(struct task_struct *target, return security_ops->capget(target, effective, inheritable, permitted); } -int security_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) +int security_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { - return security_ops->capset_check(effective, inheritable, permitted); -} - -void security_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) -{ - security_ops->capset_set(effective, inheritable, permitted); + return security_ops->capset(new, old, + effective, inheritable, permitted); } int security_capable(struct task_struct *tsk, int cap) @@ -228,9 +223,9 @@ void security_bprm_free(struct linux_binprm *bprm) security_ops->bprm_free_security(bprm); } -void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) { - security_ops->bprm_apply_creds(bprm, unsafe); + return security_ops->bprm_apply_creds(bprm, unsafe); } void security_bprm_post_apply_creds(struct linux_binprm *bprm) @@ -616,14 +611,19 @@ int security_task_create(unsigned long clone_flags) return security_ops->task_create(clone_flags); } -int security_cred_alloc(struct cred *cred) +void security_cred_free(struct cred *cred) { - return security_ops->cred_alloc_security(cred); + security_ops->cred_free(cred); } -void security_cred_free(struct cred *cred) +int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp) { - security_ops->cred_free(cred); + return security_ops->cred_prepare(new, old, gfp); +} + +void security_commit_creds(struct cred *new, const struct cred *old) +{ + return security_ops->cred_commit(new, old); } int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) @@ -631,10 +631,10 @@ int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) return security_ops->task_setuid(id0, id1, id2, flags); } -int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, - uid_t old_suid, int flags) +int security_task_fix_setuid(struct cred *new, const struct cred *old, + int flags) { - return security_ops->task_post_setuid(old_ruid, old_euid, old_suid, flags); + return security_ops->task_fix_setuid(new, old, flags); } int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags) @@ -716,14 +716,9 @@ int security_task_wait(struct task_struct *p) } int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p) -{ - return security_ops->task_prctl(option, arg2, arg3, arg4, arg5, rc_p); -} - -void security_task_reparent_to_init(struct task_struct *p) + unsigned long arg4, unsigned long arg5) { - security_ops->task_reparent_to_init(p); + return security_ops->task_prctl(option, arg2, arg3, arg4, arg5); } void security_task_to_inode(struct task_struct *p, struct inode *inode) @@ -1123,9 +1118,10 @@ EXPORT_SYMBOL(security_skb_classify_flow); #ifdef CONFIG_KEYS -int security_key_alloc(struct key *key, struct task_struct *tsk, unsigned long flags) +int security_key_alloc(struct key *key, const struct cred *cred, + unsigned long flags) { - return security_ops->key_alloc(key, tsk, flags); + return security_ops->key_alloc(key, cred, flags); } void security_key_free(struct key *key) @@ -1134,9 +1130,9 @@ void security_key_free(struct key *key) } int security_key_permission(key_ref_t key_ref, - struct task_struct *context, key_perm_t perm) + const struct cred *cred, key_perm_t perm) { - return security_ops->key_permission(key_ref, context, perm); + return security_ops->key_permission(key_ref, cred, perm); } int security_key_getsecurity(struct key *key, char **_buffer) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f20cbd681ba..c71bba78872 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -156,20 +156,20 @@ static int selinux_secmark_enabled(void) return (atomic_read(&selinux_secmark_refcount) > 0); } -/* Allocate and free functions for each kind of security blob. */ - -static int cred_alloc_security(struct cred *cred) +/* + * initialise the security for the init task + */ +static void cred_init_security(void) { + struct cred *cred = (struct cred *) current->cred; struct task_security_struct *tsec; tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL); if (!tsec) - return -ENOMEM; + panic("SELinux: Failed to initialize initial task.\n"); - tsec->osid = tsec->sid = SECINITSID_UNLABELED; + tsec->osid = tsec->sid = SECINITSID_KERNEL; cred->security = tsec; - - return 0; } /* @@ -1378,6 +1378,19 @@ static inline u32 signal_to_av(int sig) return perm; } +/* + * Check permission between a pair of credentials + * fork check, ptrace check, etc. + */ +static int cred_has_perm(const struct cred *actor, + const struct cred *target, + u32 perms) +{ + u32 asid = cred_sid(actor), tsid = cred_sid(target); + + return avc_has_perm(asid, tsid, SECCLASS_PROCESS, perms, NULL); +} + /* * Check permission between a pair of tasks, e.g. signal checks, * fork check, ptrace check, etc. @@ -1820,24 +1833,19 @@ static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, return secondary_ops->capget(target, effective, inheritable, permitted); } -static int selinux_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) +static int selinux_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { int error; - error = secondary_ops->capset_check(effective, inheritable, permitted); + error = secondary_ops->capset(new, old, + effective, inheritable, permitted); if (error) return error; - return task_has_perm(current, current, PROCESS__SETCAP); -} - -static void selinux_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) -{ - secondary_ops->capset_set(effective, inheritable, permitted); + return cred_has_perm(old, new, PROCESS__SETCAP); } static int selinux_capable(struct task_struct *tsk, int cap, int audit) @@ -2244,16 +2252,23 @@ static inline void flush_unauthorized_files(const struct cred *cred, spin_unlock(&files->file_lock); } -static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +static int selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) { struct task_security_struct *tsec; struct bprm_security_struct *bsec; + struct cred *new; u32 sid; int rc; - secondary_ops->bprm_apply_creds(bprm, unsafe); + rc = secondary_ops->bprm_apply_creds(bprm, unsafe); + if (rc < 0) + return rc; - tsec = current_security(); + new = prepare_creds(); + if (!new) + return -ENOMEM; + + tsec = new->security; bsec = bprm->security; sid = bsec->sid; @@ -2268,7 +2283,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) PROCESS__SHARE, NULL); if (rc) { bsec->unsafe = 1; - return; + goto out; } } @@ -2292,12 +2307,16 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) PROCESS__PTRACE, NULL); if (rc) { bsec->unsafe = 1; - return; + goto out; } } } tsec->sid = sid; } + +out: + commit_creds(new); + return 0; } /* @@ -3021,6 +3040,7 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd, static int file_map_prot_check(struct file *file, unsigned long prot, int shared) { const struct cred *cred = current_cred(); + int rc = 0; #ifndef CONFIG_PPC32 if ((prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) { @@ -3029,9 +3049,9 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared * private file mapping that will also be writable. * This has an additional check. */ - int rc = task_has_perm(current, current, PROCESS__EXECMEM); + rc = cred_has_perm(cred, cred, PROCESS__EXECMEM); if (rc) - return rc; + goto error; } #endif @@ -3048,7 +3068,9 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared return file_has_perm(cred, file, av); } - return 0; + +error: + return rc; } static int selinux_file_mmap(struct file *file, unsigned long reqprot, @@ -3090,8 +3112,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, rc = 0; if (vma->vm_start >= vma->vm_mm->start_brk && vma->vm_end <= vma->vm_mm->brk) { - rc = task_has_perm(current, current, - PROCESS__EXECHEAP); + rc = cred_has_perm(cred, cred, PROCESS__EXECHEAP); } else if (!vma->vm_file && vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack) { @@ -3104,8 +3125,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, * modified content. This typically should only * occur for text relocations. */ - rc = file_has_perm(cred, vma->vm_file, - FILE__EXECMOD); + rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD); } if (rc) return rc; @@ -3211,6 +3231,7 @@ static int selinux_dentry_open(struct file *file, const struct cred *cred) struct file_security_struct *fsec; struct inode *inode; struct inode_security_struct *isec; + inode = file->f_path.dentry->d_inode; fsec = file->f_security; isec = inode->i_security; @@ -3247,38 +3268,41 @@ static int selinux_task_create(unsigned long clone_flags) return task_has_perm(current, current, PROCESS__FORK); } -static int selinux_cred_alloc_security(struct cred *cred) +/* + * detach and free the LSM part of a set of credentials + */ +static void selinux_cred_free(struct cred *cred) { - struct task_security_struct *tsec1, *tsec2; - int rc; - - tsec1 = current_security(); + struct task_security_struct *tsec = cred->security; + cred->security = NULL; + kfree(tsec); +} - rc = cred_alloc_security(cred); - if (rc) - return rc; - tsec2 = cred->security; +/* + * prepare a new set of credentials for modification + */ +static int selinux_cred_prepare(struct cred *new, const struct cred *old, + gfp_t gfp) +{ + const struct task_security_struct *old_tsec; + struct task_security_struct *tsec; - tsec2->osid = tsec1->osid; - tsec2->sid = tsec1->sid; + old_tsec = old->security; - /* Retain the exec, fs, key, and sock SIDs across fork */ - tsec2->exec_sid = tsec1->exec_sid; - tsec2->create_sid = tsec1->create_sid; - tsec2->keycreate_sid = tsec1->keycreate_sid; - tsec2->sockcreate_sid = tsec1->sockcreate_sid; + tsec = kmemdup(old_tsec, sizeof(struct task_security_struct), gfp); + if (!tsec) + return -ENOMEM; + new->security = tsec; return 0; } /* - * detach and free the LSM part of a set of credentials + * commit new credentials */ -static void selinux_cred_free(struct cred *cred) +static void selinux_cred_commit(struct cred *new, const struct cred *old) { - struct task_security_struct *tsec = cred->security; - cred->security = NULL; - kfree(tsec); + secondary_ops->cred_commit(new, old); } static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) @@ -3292,9 +3316,10 @@ static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) return 0; } -static int selinux_task_post_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) +static int selinux_task_fix_setuid(struct cred *new, const struct cred *old, + int flags) { - return secondary_ops->task_post_setuid(id0, id1, id2, flags); + return secondary_ops->task_fix_setuid(new, old, flags); } static int selinux_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags) @@ -3368,7 +3393,7 @@ static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim /* Control the ability to change the hard limit (whether lowering or raising it), so that the hard limit can later be used as a safe reset point for the soft limit - upon context transitions. See selinux_bprm_apply_creds. */ + upon context transitions. See selinux_bprm_committing_creds. */ if (old_rlim->rlim_max != new_rlim->rlim_max) return task_has_perm(current, current, PROCESS__SETRLIMIT); @@ -3422,13 +3447,12 @@ static int selinux_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, - unsigned long arg5, - long *rc_p) + unsigned long arg5) { /* The current prctl operations do not appear to require any SELinux controls since they merely observe or modify the state of the current process. */ - return secondary_ops->task_prctl(option, arg2, arg3, arg4, arg5, rc_p); + return secondary_ops->task_prctl(option, arg2, arg3, arg4, arg5); } static int selinux_task_wait(struct task_struct *p) @@ -3436,18 +3460,6 @@ static int selinux_task_wait(struct task_struct *p) return task_has_perm(p, current, PROCESS__SIGCHLD); } -static void selinux_task_reparent_to_init(struct task_struct *p) -{ - struct task_security_struct *tsec; - - secondary_ops->task_reparent_to_init(p); - - tsec = p->cred->security; - tsec->osid = tsec->sid; - tsec->sid = SECINITSID_KERNEL; - return; -} - static void selinux_task_to_inode(struct task_struct *p, struct inode *inode) { @@ -5325,7 +5337,8 @@ static int selinux_setprocattr(struct task_struct *p, { struct task_security_struct *tsec; struct task_struct *tracer; - u32 sid = 0; + struct cred *new; + u32 sid = 0, ptsid; int error; char *str = value; @@ -5372,86 +5385,75 @@ static int selinux_setprocattr(struct task_struct *p, return error; } + new = prepare_creds(); + if (!new) + return -ENOMEM; + /* Permission checking based on the specified context is performed during the actual operation (execve, open/mkdir/...), when we know the full context of the - operation. See selinux_bprm_set_security for the execve + operation. See selinux_bprm_set_creds for the execve checks and may_create for the file creation checks. The operation will then fail if the context is not permitted. */ - tsec = p->cred->security; - if (!strcmp(name, "exec")) + tsec = new->security; + if (!strcmp(name, "exec")) { tsec->exec_sid = sid; - else if (!strcmp(name, "fscreate")) + } else if (!strcmp(name, "fscreate")) { tsec->create_sid = sid; - else if (!strcmp(name, "keycreate")) { + } else if (!strcmp(name, "keycreate")) { error = may_create_key(sid, p); if (error) - return error; + goto abort_change; tsec->keycreate_sid = sid; - } else if (!strcmp(name, "sockcreate")) + } else if (!strcmp(name, "sockcreate")) { tsec->sockcreate_sid = sid; - else if (!strcmp(name, "current")) { - struct av_decision avd; - + } else if (!strcmp(name, "current")) { + error = -EINVAL; if (sid == 0) - return -EINVAL; - /* - * SELinux allows to change context in the following case only. - * - Single threaded processes. - * - Multi threaded processes intend to change its context into - * more restricted domain (defined by TYPEBOUNDS statement). - */ - if (atomic_read(&p->mm->mm_users) != 1) { - struct task_struct *g, *t; - struct mm_struct *mm = p->mm; - read_lock(&tasklist_lock); - do_each_thread(g, t) { - if (t->mm == mm && t != p) { - read_unlock(&tasklist_lock); - error = security_bounded_transition(tsec->sid, sid); - if (!error) - goto boundary_ok; - - return error; - } - } while_each_thread(g, t); - read_unlock(&tasklist_lock); + goto abort_change; + + /* Only allow single threaded processes to change context */ + error = -EPERM; + if (!is_single_threaded(p)) { + error = security_bounded_transition(tsec->sid, sid); + if (error) + goto abort_change; } -boundary_ok: /* Check permissions for the transition. */ error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS, PROCESS__DYNTRANSITION, NULL); if (error) - return error; + goto abort_change; /* Check for ptracing, and update the task SID if ok. Otherwise, leave SID unchanged and fail. */ + ptsid = 0; task_lock(p); - rcu_read_lock(); tracer = tracehook_tracer_task(p); - if (tracer != NULL) { - u32 ptsid = task_sid(tracer); - rcu_read_unlock(); - error = avc_has_perm_noaudit(ptsid, sid, - SECCLASS_PROCESS, - PROCESS__PTRACE, 0, &avd); - if (!error) - tsec->sid = sid; - task_unlock(p); - avc_audit(ptsid, sid, SECCLASS_PROCESS, - PROCESS__PTRACE, &avd, error, NULL); + if (tracer) + ptsid = task_sid(tracer); + task_unlock(p); + + if (tracer) { + error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS, + PROCESS__PTRACE, NULL); if (error) - return error; - } else { - rcu_read_unlock(); - tsec->sid = sid; - task_unlock(p); + goto abort_change; } - } else - return -EINVAL; + tsec->sid = sid; + } else { + error = -EINVAL; + goto abort_change; + } + + commit_creds(new); return size; + +abort_change: + abort_creds(new); + return error; } static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) @@ -5471,23 +5473,21 @@ static void selinux_release_secctx(char *secdata, u32 seclen) #ifdef CONFIG_KEYS -static int selinux_key_alloc(struct key *k, struct task_struct *tsk, +static int selinux_key_alloc(struct key *k, const struct cred *cred, unsigned long flags) { - const struct task_security_struct *__tsec; + const struct task_security_struct *tsec; struct key_security_struct *ksec; ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL); if (!ksec) return -ENOMEM; - rcu_read_lock(); - __tsec = __task_cred(tsk)->security; - if (__tsec->keycreate_sid) - ksec->sid = __tsec->keycreate_sid; + tsec = cred->security; + if (tsec->keycreate_sid) + ksec->sid = tsec->keycreate_sid; else - ksec->sid = __tsec->sid; - rcu_read_unlock(); + ksec->sid = tsec->sid; k->security = ksec; return 0; @@ -5502,8 +5502,8 @@ static void selinux_key_free(struct key *k) } static int selinux_key_permission(key_ref_t key_ref, - struct task_struct *ctx, - key_perm_t perm) + const struct cred *cred, + key_perm_t perm) { struct key *key; struct key_security_struct *ksec; @@ -5515,7 +5515,7 @@ static int selinux_key_permission(key_ref_t key_ref, if (perm == 0) return 0; - sid = task_sid(ctx); + sid = cred_sid(cred); key = key_ref_to_ptr(key_ref); ksec = key->security; @@ -5545,8 +5545,7 @@ static struct security_operations selinux_ops = { .ptrace_may_access = selinux_ptrace_may_access, .ptrace_traceme = selinux_ptrace_traceme, .capget = selinux_capget, - .capset_check = selinux_capset_check, - .capset_set = selinux_capset_set, + .capset = selinux_capset, .sysctl = selinux_sysctl, .capable = selinux_capable, .quotactl = selinux_quotactl, @@ -5621,10 +5620,11 @@ static struct security_operations selinux_ops = { .dentry_open = selinux_dentry_open, .task_create = selinux_task_create, - .cred_alloc_security = selinux_cred_alloc_security, .cred_free = selinux_cred_free, + .cred_prepare = selinux_cred_prepare, + .cred_commit = selinux_cred_commit, .task_setuid = selinux_task_setuid, - .task_post_setuid = selinux_task_post_setuid, + .task_fix_setuid = selinux_task_fix_setuid, .task_setgid = selinux_task_setgid, .task_setpgid = selinux_task_setpgid, .task_getpgid = selinux_task_getpgid, @@ -5641,7 +5641,6 @@ static struct security_operations selinux_ops = { .task_kill = selinux_task_kill, .task_wait = selinux_task_wait, .task_prctl = selinux_task_prctl, - .task_reparent_to_init = selinux_task_reparent_to_init, .task_to_inode = selinux_task_to_inode, .ipc_permission = selinux_ipc_permission, @@ -5737,8 +5736,6 @@ static struct security_operations selinux_ops = { static __init int selinux_init(void) { - struct task_security_struct *tsec; - if (!security_module_enable(&selinux_ops)) { selinux_enabled = 0; return 0; @@ -5752,10 +5749,7 @@ static __init int selinux_init(void) printk(KERN_INFO "SELinux: Initializing.\n"); /* Set the security state for the initial task. */ - if (cred_alloc_security(current->cred)) - panic("SELinux: Failed to initialize initial task.\n"); - tsec = current->cred->security; - tsec->osid = tsec->sid = SECINITSID_KERNEL; + cred_init_security(); sel_inode_cache = kmem_cache_create("selinux_inode_security", sizeof(struct inode_security_struct), diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 11167fd567b..e952b397153 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -104,8 +104,7 @@ static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) if (rc != 0) return rc; - rc = smk_access(current->cred->security, ctp->cred->security, - MAY_READWRITE); + rc = smk_access(current_security(), task_security(ctp), MAY_READWRITE); if (rc != 0 && capable(CAP_MAC_OVERRIDE)) return 0; return rc; @@ -127,8 +126,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp) if (rc != 0) return rc; - rc = smk_access(ptp->cred->security, current->cred->security, - MAY_READWRITE); + rc = smk_access(task_security(ptp), current_security(), MAY_READWRITE); if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE)) return 0; return rc; @@ -976,22 +974,6 @@ static int smack_file_receive(struct file *file) * Task hooks */ -/** - * smack_cred_alloc_security - "allocate" a task cred blob - * @cred: the task creds in need of a blob - * - * Smack isn't using copies of blobs. Everyone - * points to an immutable list. No alloc required. - * No data copy required. - * - * Always returns 0 - */ -static int smack_cred_alloc_security(struct cred *cred) -{ - cred->security = current_security(); - return 0; -} - /** * smack_cred_free - "free" task-level security credentials * @cred: the credentials in question @@ -1005,6 +987,30 @@ static void smack_cred_free(struct cred *cred) cred->security = NULL; } +/** + * smack_cred_prepare - prepare new set of credentials for modification + * @new: the new credentials + * @old: the original credentials + * @gfp: the atomicity of any memory allocations + * + * Prepare a new set of credentials for modification. + */ +static int smack_cred_prepare(struct cred *new, const struct cred *old, + gfp_t gfp) +{ + new->security = old->security; + return 0; +} + +/* + * commit new credentials + * @new: the new credentials + * @old: the original credentials + */ +static void smack_cred_commit(struct cred *new, const struct cred *old) +{ +} + /** * smack_task_setpgid - Smack check on setting pgid * @p: the task object @@ -2036,6 +2042,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value) static int smack_setprocattr(struct task_struct *p, char *name, void *value, size_t size) { + struct cred *new; char *newsmack; /* @@ -2058,7 +2065,11 @@ static int smack_setprocattr(struct task_struct *p, char *name, if (newsmack == NULL) return -EINVAL; - p->cred->security = newsmack; + new = prepare_creds(); + if (!new) + return -ENOMEM; + new->security = newsmack; + commit_creds(new); return size; } @@ -2354,17 +2365,17 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, /** * smack_key_alloc - Set the key security blob * @key: object - * @tsk: the task associated with the key + * @cred: the credentials to use * @flags: unused * * No allocation required * * Returns 0 */ -static int smack_key_alloc(struct key *key, struct task_struct *tsk, +static int smack_key_alloc(struct key *key, const struct cred *cred, unsigned long flags) { - key->security = tsk->cred->security; + key->security = cred->security; return 0; } @@ -2382,14 +2393,14 @@ static void smack_key_free(struct key *key) /* * smack_key_permission - Smack access on a key * @key_ref: gets to the object - * @context: task involved + * @cred: the credentials to use * @perm: unused * * Return 0 if the task has read and write to the object, * an error code otherwise */ static int smack_key_permission(key_ref_t key_ref, - struct task_struct *context, key_perm_t perm) + const struct cred *cred, key_perm_t perm) { struct key *keyp; @@ -2405,11 +2416,10 @@ static int smack_key_permission(key_ref_t key_ref, /* * This should not occur */ - if (context->cred->security == NULL) + if (cred->security == NULL) return -EACCES; - return smk_access(context->cred->security, keyp->security, - MAY_READWRITE); + return smk_access(cred->security, keyp->security, MAY_READWRITE); } #endif /* CONFIG_KEYS */ @@ -2580,8 +2590,7 @@ struct security_operations smack_ops = { .ptrace_may_access = smack_ptrace_may_access, .ptrace_traceme = smack_ptrace_traceme, .capget = cap_capget, - .capset_check = cap_capset_check, - .capset_set = cap_capset_set, + .capset = cap_capset, .capable = cap_capable, .syslog = smack_syslog, .settime = cap_settime, @@ -2630,9 +2639,10 @@ struct security_operations smack_ops = { .file_send_sigiotask = smack_file_send_sigiotask, .file_receive = smack_file_receive, - .cred_alloc_security = smack_cred_alloc_security, .cred_free = smack_cred_free, - .task_post_setuid = cap_task_post_setuid, + .cred_prepare = smack_cred_prepare, + .cred_commit = smack_cred_commit, + .task_fix_setuid = cap_task_fix_setuid, .task_setpgid = smack_task_setpgid, .task_getpgid = smack_task_getpgid, .task_getsid = smack_task_getsid, @@ -2645,7 +2655,6 @@ struct security_operations smack_ops = { .task_movememory = smack_task_movememory, .task_kill = smack_task_kill, .task_wait = smack_task_wait, - .task_reparent_to_init = cap_task_reparent_to_init, .task_to_inode = smack_task_to_inode, .task_prctl = cap_task_prctl, @@ -2721,6 +2730,8 @@ struct security_operations smack_ops = { */ static __init int smack_init(void) { + struct cred *cred; + if (!security_module_enable(&smack_ops)) return 0; @@ -2729,7 +2740,8 @@ static __init int smack_init(void) /* * Set the security state for the initial task. */ - current->cred->security = &smack_known_floor.smk_known; + cred = (struct cred *) current->cred; + cred->security = &smack_known_floor.smk_known; /* * Initialize locks -- cgit v1.2.3-70-g09d2 From 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:26 +1100 Subject: CRED: Differentiate objective and effective subjective credentials on a task Differentiate the objective and real subjective credentials from the effective subjective credentials on a task by introducing a second credentials pointer into the task_struct. task_struct::real_cred then refers to the objective and apparent real subjective credentials of a task, as perceived by the other tasks in the system. task_struct::cred then refers to the effective subjective credentials of a task, as used by that task when it's actually running. These are not visible to the other tasks in the system. __task_cred(task) then refers to the objective/real credentials of the task in question. current_cred() refers to the effective subjective credentials of the current task. prepare_creds() uses the objective creds as a base and commit_creds() changes both pointers in the task_struct (indeed commit_creds() requires them to be the same). override_creds() and revert_creds() change the subjective creds pointer only, and the former returns the old subjective creds. These are used by NFSD, faccessat() and do_coredump(), and will by used by CacheFiles. In SELinux, current_has_perm() is provided as an alternative to task_has_perm(). This uses the effective subjective context of current, whereas task_has_perm() uses the objective/real context of the subject. Signed-off-by: David Howells Signed-off-by: James Morris --- fs/nfsd/auth.c | 5 +++- include/linux/cred.h | 29 +++++++++++---------- include/linux/init_task.h | 1 + include/linux/sched.h | 5 +++- kernel/cred.c | 38 ++++++++++++++++++--------- kernel/fork.c | 6 +++-- security/selinux/hooks.c | 65 ++++++++++++++++++++++++++++++----------------- 7 files changed, 95 insertions(+), 54 deletions(-) (limited to 'kernel/fork.c') diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 836ffa1047d..0184fe9b514 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -34,6 +34,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) int flags = nfsexp_flags(rqstp, exp); int ret; + /* discard any old override before preparing the new set */ + revert_creds(get_cred(current->real_cred)); new = prepare_creds(); if (!new) return -ENOMEM; @@ -82,7 +84,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - return commit_creds(new); + put_cred(override_creds(new)); + return 0; oom: ret = -ENOMEM; diff --git a/include/linux/cred.h b/include/linux/cred.h index 794aab5c66e..55a9c995d69 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -146,8 +146,8 @@ extern struct cred *prepare_exec_creds(void); extern struct cred *prepare_usermodehelper_creds(void); extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); -extern const struct cred *override_creds(const struct cred *) __deprecated; -extern void revert_creds(const struct cred *) __deprecated; +extern const struct cred *override_creds(const struct cred *); +extern void revert_creds(const struct cred *); extern void __init cred_init(void); /** @@ -202,32 +202,32 @@ static inline void put_cred(const struct cred *_cred) } /** - * current_cred - Access the current task's credentials + * current_cred - Access the current task's subjective credentials * - * Access the credentials of the current task. + * Access the subjective credentials of the current task. */ #define current_cred() \ (current->cred) /** - * __task_cred - Access another task's credentials + * __task_cred - Access a task's objective credentials * @task: The task to query * - * Access the credentials of another task. The caller must hold the - * RCU readlock. + * Access the objective credentials of a task. The caller must hold the RCU + * readlock. * * The caller must make sure task doesn't go away, either by holding a ref on * task or by holding tasklist_lock to prevent it from being unlinked. */ #define __task_cred(task) \ - ((const struct cred *)(rcu_dereference((task)->cred))) + ((const struct cred *)(rcu_dereference((task)->real_cred))) /** - * get_task_cred - Get another task's credentials + * get_task_cred - Get another task's objective credentials * @task: The task to query * - * Get the credentials of a task, pinning them so that they can't go away. - * Accessing a task's credentials directly is not permitted. + * Get the objective credentials of a task, pinning them so that they can't go + * away. Accessing a task's credentials directly is not permitted. * * The caller must make sure task doesn't go away, either by holding a ref on * task or by holding tasklist_lock to prevent it from being unlinked. @@ -243,10 +243,11 @@ static inline void put_cred(const struct cred *_cred) }) /** - * get_current_cred - Get the current task's credentials + * get_current_cred - Get the current task's subjective credentials * - * Get the credentials of the current task, pinning them so that they can't go - * away. Accessing the current task's credentials directly is not permitted. + * Get the subjective credentials of the current task, pinning them so that + * they can't go away. Accessing the current task's credentials directly is + * not permitted. */ #define get_current_cred() \ (get_cred(current_cred())) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 08c3b24ad9a..2597858035c 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -149,6 +149,7 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ + .real_cred = &init_cred, \ .cred = &init_cred, \ .cred_exec_mutex = \ __MUTEX_INITIALIZER(tsk.cred_exec_mutex), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 121d655e460..3443123b070 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1145,7 +1145,10 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - const struct cred *cred; /* actual/objective task credentials (COW) */ + const struct cred *real_cred; /* objective and real subjective task + * credentials (COW) */ + const struct cred *cred; /* effective (overridable) subjective task + * credentials (COW) */ struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */ char comm[TASK_COMM_LEN]; /* executable name excluding path diff --git a/kernel/cred.c b/kernel/cred.c index b8bd2f99d8c..f3ca1066061 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -35,7 +35,7 @@ static struct thread_group_cred init_tgcred = { * The initial credentials for the initial task */ struct cred init_cred = { - .usage = ATOMIC_INIT(3), + .usage = ATOMIC_INIT(4), .securebits = SECUREBITS_DEFAULT, .cap_inheritable = CAP_INIT_INH_SET, .cap_permitted = CAP_FULL_SET, @@ -120,6 +120,8 @@ EXPORT_SYMBOL(__put_cred); * prepare a new copy, which the caller then modifies and then commits by * calling commit_creds(). * + * Preparation involves making a copy of the objective creds for modification. + * * Returns a pointer to the new creds-to-be if successful, NULL otherwise. * * Call commit_creds() or abort_creds() to clean up. @@ -130,7 +132,7 @@ struct cred *prepare_creds(void) const struct cred *old; struct cred *new; - BUG_ON(atomic_read(&task->cred->usage) < 1); + BUG_ON(atomic_read(&task->real_cred->usage) < 1); new = kmem_cache_alloc(cred_jar, GFP_KERNEL); if (!new) @@ -262,6 +264,9 @@ error: * * We share if we can, but under some circumstances we have to generate a new * set. + * + * The new process gets the current process's subjective credentials as its + * objective and subjective credentials */ int copy_creds(struct task_struct *p, unsigned long clone_flags) { @@ -278,6 +283,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) #endif clone_flags & CLONE_THREAD ) { + p->real_cred = get_cred(p->cred); get_cred(p->cred); atomic_inc(&p->cred->user->processes); return 0; @@ -317,7 +323,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) #endif atomic_inc(&new->user->processes); - p->cred = new; + p->cred = p->real_cred = get_cred(new); return 0; } @@ -326,7 +332,9 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) * @new: The credentials to be assigned * * Install a new set of credentials to the current task, using RCU to replace - * the old set. + * the old set. Both the objective and the subjective credentials pointers are + * updated. This function may not be called if the subjective credentials are + * in an overridden state. * * This function eats the caller's reference to the new credentials. * @@ -338,12 +346,15 @@ int commit_creds(struct cred *new) struct task_struct *task = current; const struct cred *old; + BUG_ON(task->cred != task->real_cred); + BUG_ON(atomic_read(&task->real_cred->usage) < 2); BUG_ON(atomic_read(&new->usage) < 1); - BUG_ON(atomic_read(&task->cred->usage) < 1); - old = task->cred; + old = task->real_cred; security_commit_creds(new, old); + get_cred(new); /* we will require a ref for the subj creds too */ + /* dumpability changes */ if (old->euid != new->euid || old->egid != new->egid || @@ -369,6 +380,7 @@ int commit_creds(struct cred *new) */ if (new->user != old->user) atomic_inc(&new->user->processes); + rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user) atomic_dec(&old->user->processes); @@ -388,6 +400,8 @@ int commit_creds(struct cred *new) new->fsgid != old->fsgid) proc_id_connector(task, PROC_EVENT_GID); + /* release the old obj and subj refs both */ + put_cred(old); put_cred(old); return 0; } @@ -408,11 +422,11 @@ void abort_creds(struct cred *new) EXPORT_SYMBOL(abort_creds); /** - * override_creds - Temporarily override the current process's credentials + * override_creds - Override the current process's subjective credentials * @new: The credentials to be assigned * - * Install a set of temporary override credentials on the current process, - * returning the old set for later reversion. + * Install a set of temporary override subjective credentials on the current + * process, returning the old set for later reversion. */ const struct cred *override_creds(const struct cred *new) { @@ -424,11 +438,11 @@ const struct cred *override_creds(const struct cred *new) EXPORT_SYMBOL(override_creds); /** - * revert_creds - Revert a temporary credentials override + * revert_creds - Revert a temporary subjective credentials override * @old: The credentials to be restored * - * Revert a temporary set of override credentials to an old set, discarding the - * override set. + * Revert a temporary set of override subjective credentials to an old set, + * discarding the override set. */ void revert_creds(const struct cred *old) { diff --git a/kernel/fork.c b/kernel/fork.c index 82a7948a664..af0d0f04585 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -146,6 +146,7 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); + put_cred(tsk->real_cred); put_cred(tsk->cred); delayacct_tsk_free(tsk); @@ -961,10 +962,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; - if (atomic_read(&p->cred->user->processes) >= + if (atomic_read(&p->real_cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->cred->user != current->nsproxy->user_ns->root_user) + p->real_cred->user != current->nsproxy->user_ns->root_user) goto bad_fork_free; } @@ -1278,6 +1279,7 @@ bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); + put_cred(p->real_cred); put_cred(p->cred); bad_fork_free: free_task(p); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 21a59218463..91b06f2aa96 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -161,7 +161,7 @@ static int selinux_secmark_enabled(void) */ static void cred_init_security(void) { - struct cred *cred = (struct cred *) current->cred; + struct cred *cred = (struct cred *) current->real_cred; struct task_security_struct *tsec; tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL); @@ -184,7 +184,7 @@ static inline u32 cred_sid(const struct cred *cred) } /* - * get the security ID of a task + * get the objective security ID of a task */ static inline u32 task_sid(const struct task_struct *task) { @@ -197,7 +197,7 @@ static inline u32 task_sid(const struct task_struct *task) } /* - * get the security ID of the current task + * get the subjective security ID of the current task */ static inline u32 current_sid(void) { @@ -1395,6 +1395,7 @@ static int cred_has_perm(const struct cred *actor, * Check permission between a pair of tasks, e.g. signal checks, * fork check, ptrace check, etc. * tsk1 is the actor and tsk2 is the target + * - this uses the default subjective creds of tsk1 */ static int task_has_perm(const struct task_struct *tsk1, const struct task_struct *tsk2, @@ -1410,6 +1411,22 @@ static int task_has_perm(const struct task_struct *tsk1, return avc_has_perm(sid1, sid2, SECCLASS_PROCESS, perms, NULL); } +/* + * Check permission between current and another task, e.g. signal checks, + * fork check, ptrace check, etc. + * current is the actor and tsk2 is the target + * - this uses current's subjective creds + */ +static int current_has_perm(const struct task_struct *tsk, + u32 perms) +{ + u32 sid, tsid; + + sid = current_sid(); + tsid = task_sid(tsk); + return avc_has_perm(sid, tsid, SECCLASS_PROCESS, perms, NULL); +} + #if CAP_LAST_CAP > 63 #error Fix SELinux to handle capabilities > 63. #endif @@ -1807,7 +1824,7 @@ static int selinux_ptrace_may_access(struct task_struct *child, return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL); } - return task_has_perm(current, child, PROCESS__PTRACE); + return current_has_perm(child, PROCESS__PTRACE); } static int selinux_ptrace_traceme(struct task_struct *parent) @@ -1826,7 +1843,7 @@ static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, { int error; - error = task_has_perm(current, target, PROCESS__GETCAP); + error = current_has_perm(target, PROCESS__GETCAP); if (error) return error; @@ -3071,7 +3088,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, } else if (!vma->vm_file && vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack) { - rc = task_has_perm(current, current, PROCESS__EXECSTACK); + rc = current_has_perm(current, PROCESS__EXECSTACK); } else if (vma->vm_file && vma->anon_vma) { /* * We are making executable a file mapping that has @@ -3220,7 +3237,7 @@ static int selinux_task_create(unsigned long clone_flags) if (rc) return rc; - return task_has_perm(current, current, PROCESS__FORK); + return current_has_perm(current, PROCESS__FORK); } /* @@ -3285,17 +3302,17 @@ static int selinux_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags) static int selinux_task_setpgid(struct task_struct *p, pid_t pgid) { - return task_has_perm(current, p, PROCESS__SETPGID); + return current_has_perm(p, PROCESS__SETPGID); } static int selinux_task_getpgid(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__GETPGID); + return current_has_perm(p, PROCESS__GETPGID); } static int selinux_task_getsid(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__GETSESSION); + return current_has_perm(p, PROCESS__GETSESSION); } static void selinux_task_getsecid(struct task_struct *p, u32 *secid) @@ -3317,7 +3334,7 @@ static int selinux_task_setnice(struct task_struct *p, int nice) if (rc) return rc; - return task_has_perm(current, p, PROCESS__SETSCHED); + return current_has_perm(p, PROCESS__SETSCHED); } static int selinux_task_setioprio(struct task_struct *p, int ioprio) @@ -3328,12 +3345,12 @@ static int selinux_task_setioprio(struct task_struct *p, int ioprio) if (rc) return rc; - return task_has_perm(current, p, PROCESS__SETSCHED); + return current_has_perm(p, PROCESS__SETSCHED); } static int selinux_task_getioprio(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__GETSCHED); + return current_has_perm(p, PROCESS__GETSCHED); } static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim) @@ -3350,7 +3367,7 @@ static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim later be used as a safe reset point for the soft limit upon context transitions. See selinux_bprm_committing_creds. */ if (old_rlim->rlim_max != new_rlim->rlim_max) - return task_has_perm(current, current, PROCESS__SETRLIMIT); + return current_has_perm(current, PROCESS__SETRLIMIT); return 0; } @@ -3363,17 +3380,17 @@ static int selinux_task_setscheduler(struct task_struct *p, int policy, struct s if (rc) return rc; - return task_has_perm(current, p, PROCESS__SETSCHED); + return current_has_perm(p, PROCESS__SETSCHED); } static int selinux_task_getscheduler(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__GETSCHED); + return current_has_perm(p, PROCESS__GETSCHED); } static int selinux_task_movememory(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__SETSCHED); + return current_has_perm(p, PROCESS__SETSCHED); } static int selinux_task_kill(struct task_struct *p, struct siginfo *info, @@ -3394,7 +3411,7 @@ static int selinux_task_kill(struct task_struct *p, struct siginfo *info, rc = avc_has_perm(secid, task_sid(p), SECCLASS_PROCESS, perm, NULL); else - rc = task_has_perm(current, p, perm); + rc = current_has_perm(p, perm); return rc; } @@ -5250,7 +5267,7 @@ static int selinux_getprocattr(struct task_struct *p, unsigned len; if (current != p) { - error = task_has_perm(current, p, PROCESS__GETATTR); + error = current_has_perm(p, PROCESS__GETATTR); if (error) return error; } @@ -5309,15 +5326,15 @@ static int selinux_setprocattr(struct task_struct *p, * above restriction is ever removed. */ if (!strcmp(name, "exec")) - error = task_has_perm(current, p, PROCESS__SETEXEC); + error = current_has_perm(p, PROCESS__SETEXEC); else if (!strcmp(name, "fscreate")) - error = task_has_perm(current, p, PROCESS__SETFSCREATE); + error = current_has_perm(p, PROCESS__SETFSCREATE); else if (!strcmp(name, "keycreate")) - error = task_has_perm(current, p, PROCESS__SETKEYCREATE); + error = current_has_perm(p, PROCESS__SETKEYCREATE); else if (!strcmp(name, "sockcreate")) - error = task_has_perm(current, p, PROCESS__SETSOCKCREATE); + error = current_has_perm(p, PROCESS__SETSOCKCREATE); else if (!strcmp(name, "current")) - error = task_has_perm(current, p, PROCESS__SETCURRENT); + error = current_has_perm(p, PROCESS__SETCURRENT); else error = -EINVAL; if (error) -- cgit v1.2.3-70-g09d2 From 7e066fb870fcd1025ec3ba7bbde5d541094f4ce1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:47 -0500 Subject: tracepoints: add DECLARE_TRACE() and DEFINE_TRACE() Impact: API *CHANGE*. Must update all tracepoint users. Add DEFINE_TRACE() to tracepoints to let them declare the tracepoint structure in a single spot for all the kernel. It helps reducing memory consumption, especially when declaring a lot of tracepoints, e.g. for kmalloc tracing. *API CHANGE WARNING*: now, DECLARE_TRACE() must be used in headers for tracepoint declarations rather than DEFINE_TRACE(). This is the sane way to do it. The name previously used was misleading. Updates scheduler instrumentation to follow this API change. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- Documentation/tracepoints.txt | 7 ++++++- include/asm-generic/vmlinux.lds.h | 1 + include/linux/tracepoint.h | 35 +++++++++++++++++++++++---------- include/trace/sched.h | 24 +++++++++++----------- kernel/exit.c | 4 ++++ kernel/fork.c | 2 ++ kernel/kthread.c | 3 +++ kernel/sched.c | 6 ++++++ kernel/signal.c | 2 ++ samples/tracepoints/tp-samples-trace.h | 4 ++-- samples/tracepoints/tracepoint-sample.c | 3 +++ 11 files changed, 66 insertions(+), 25 deletions(-) (limited to 'kernel/fork.c') diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt index 5d354e16749..e8ad47b437f 100644 --- a/Documentation/tracepoints.txt +++ b/Documentation/tracepoints.txt @@ -42,7 +42,7 @@ In include/trace/subsys.h : #include -DEFINE_TRACE(subsys_eventname, +DECLARE_TRACE(subsys_eventname, TPPTOTO(int firstarg, struct task_struct *p), TPARGS(firstarg, p)); @@ -50,6 +50,8 @@ In subsys/file.c (where the tracing statement must be added) : #include +DEFINE_TRACE(subsys_eventname); + void somefct(void) { ... @@ -86,6 +88,9 @@ to limit collisions. Tracepoint names are global to the kernel: they are considered as being the same whether they are in the core kernel image or in modules. +If the tracepoint has to be used in kernel modules, an +EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be used to +export the defined tracepoints. * Probe / tracepoint example diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a5e4ed9baec..3b46ae46493 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -71,6 +71,7 @@ VMLINUX_SYMBOL(__start___markers) = .; \ *(__markers) \ VMLINUX_SYMBOL(__stop___markers) = .; \ + . = ALIGN(32); \ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ VMLINUX_SYMBOL(__stop___tracepoints) = .; \ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 7e9b42aeae0..75700545836 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -24,8 +24,12 @@ struct tracepoint { const char *name; /* Tracepoint name */ int state; /* State. */ void **funcs; -} __attribute__((aligned(8))); - +} __attribute__((aligned(32))); /* + * Aligned on 32 bytes because it is + * globally visible and gcc happily + * align these on the structure size. + * Keep in sync with vmlinux.lds.h. + */ #define TPPROTO(args...) args #define TPARGS(args...) args @@ -55,15 +59,10 @@ struct tracepoint { * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. */ -#define DEFINE_TRACE(name, proto, args) \ +#define DECLARE_TRACE(name, proto, args) \ + extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - static const char __tpstrtab_##name[] \ - __attribute__((section("__tracepoints_strings"))) \ - = #name; \ - static struct tracepoint __tracepoint_##name \ - __attribute__((section("__tracepoints"), aligned(8))) = \ - { __tpstrtab_##name, 0, NULL }; \ if (unlikely(__tracepoint_##name.state)) \ __DO_TRACE(&__tracepoint_##name, \ TPPROTO(proto), TPARGS(args)); \ @@ -77,11 +76,23 @@ struct tracepoint { return tracepoint_probe_unregister(#name, (void *)probe);\ } +#define DEFINE_TRACE(name) \ + static const char __tpstrtab_##name[] \ + __attribute__((section("__tracepoints_strings"))) = #name; \ + struct tracepoint __tracepoint_##name \ + __attribute__((section("__tracepoints"), aligned(32))) = \ + { __tpstrtab_##name, 0, NULL } + +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ + EXPORT_SYMBOL_GPL(__tracepoint_##name) +#define EXPORT_TRACEPOINT_SYMBOL(name) \ + EXPORT_SYMBOL(__tracepoint_##name) + extern void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end); #else /* !CONFIG_TRACEPOINTS */ -#define DEFINE_TRACE(name, proto, args) \ +#define DECLARE_TRACE(name, proto, args) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ { } \ static inline void trace_##name(proto) \ @@ -95,6 +106,10 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, return -ENOSYS; \ } +#define DEFINE_TRACE(name) +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) +#define EXPORT_TRACEPOINT_SYMBOL(name) + static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) { } diff --git a/include/trace/sched.h b/include/trace/sched.h index ad47369d01b..9b2854abf7e 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -4,52 +4,52 @@ #include #include -DEFINE_TRACE(sched_kthread_stop, +DECLARE_TRACE(sched_kthread_stop, TPPROTO(struct task_struct *t), TPARGS(t)); -DEFINE_TRACE(sched_kthread_stop_ret, +DECLARE_TRACE(sched_kthread_stop_ret, TPPROTO(int ret), TPARGS(ret)); -DEFINE_TRACE(sched_wait_task, +DECLARE_TRACE(sched_wait_task, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_wakeup, +DECLARE_TRACE(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_wakeup_new, +DECLARE_TRACE(sched_wakeup_new, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_switch, +DECLARE_TRACE(sched_switch, TPPROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TPARGS(rq, prev, next)); -DEFINE_TRACE(sched_migrate_task, +DECLARE_TRACE(sched_migrate_task, TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu), TPARGS(rq, p, dest_cpu)); -DEFINE_TRACE(sched_process_free, +DECLARE_TRACE(sched_process_free, TPPROTO(struct task_struct *p), TPARGS(p)); -DEFINE_TRACE(sched_process_exit, +DECLARE_TRACE(sched_process_exit, TPPROTO(struct task_struct *p), TPARGS(p)); -DEFINE_TRACE(sched_process_wait, +DECLARE_TRACE(sched_process_wait, TPPROTO(struct pid *pid), TPARGS(pid)); -DEFINE_TRACE(sched_process_fork, +DECLARE_TRACE(sched_process_fork, TPPROTO(struct task_struct *parent, struct task_struct *child), TPARGS(parent, child)); -DEFINE_TRACE(sched_signal_send, +DECLARE_TRACE(sched_signal_send, TPPROTO(int sig, struct task_struct *p), TPARGS(sig, p)); diff --git a/kernel/exit.c b/kernel/exit.c index ae2b92be5fa..f995d241866 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -54,6 +54,10 @@ #include #include +DEFINE_TRACE(sched_process_free); +DEFINE_TRACE(sched_process_exit); +DEFINE_TRACE(sched_process_wait); + static void exit_mm(struct task_struct * tsk); static inline int task_detached(struct task_struct *p) diff --git a/kernel/fork.c b/kernel/fork.c index f6083561dfe..0837d0deee5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -79,6 +79,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ +DEFINE_TRACE(sched_process_fork); + int nr_processes(void) { int cpu; diff --git a/kernel/kthread.c b/kernel/kthread.c index 8e7a7ce3ed0..4fbc456f393 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -21,6 +21,9 @@ static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; +DEFINE_TRACE(sched_kthread_stop); +DEFINE_TRACE(sched_kthread_stop_ret); + struct kthread_create_info { /* Information passed to kthread() from kthreadd. */ diff --git a/kernel/sched.c b/kernel/sched.c index 50a21f96467..327f91c63c9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -118,6 +118,12 @@ */ #define RUNTIME_INF ((u64)~0ULL) +DEFINE_TRACE(sched_wait_task); +DEFINE_TRACE(sched_wakeup); +DEFINE_TRACE(sched_wakeup_new); +DEFINE_TRACE(sched_switch); +DEFINE_TRACE(sched_migrate_task); + #ifdef CONFIG_SMP /* * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) diff --git a/kernel/signal.c b/kernel/signal.c index 4530fc65445..e9afe63da24 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -41,6 +41,8 @@ static struct kmem_cache *sigqueue_cachep; +DEFINE_TRACE(sched_signal_send); + static void __user *sig_handler(struct task_struct *t, int sig) { return t->sighand->action[sig - 1].sa.sa_handler; diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h index 0216b55bd64..01724e04c55 100644 --- a/samples/tracepoints/tp-samples-trace.h +++ b/samples/tracepoints/tp-samples-trace.h @@ -4,10 +4,10 @@ #include /* for struct inode and struct file */ #include -DEFINE_TRACE(subsys_event, +DECLARE_TRACE(subsys_event, TPPROTO(struct inode *inode, struct file *file), TPARGS(inode, file)); -DEFINE_TRACE(subsys_eventb, +DECLARE_TRACE(subsys_eventb, TPPROTO(void), TPARGS()); #endif diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c index 4ae4b7fcc04..00d169792a3 100644 --- a/samples/tracepoints/tracepoint-sample.c +++ b/samples/tracepoints/tracepoint-sample.c @@ -13,6 +13,9 @@ #include #include "tp-samples-trace.h" +DEFINE_TRACE(subsys_event); +DEFINE_TRACE(subsys_eventb); + struct proc_dir_entry *pentry_example; static int my_open(struct inode *inode, struct file *file) -- cgit v1.2.3-70-g09d2 From f201ae2356c74bcae130b2177b3dca903ea98071 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 23 Nov 2008 06:22:56 +0100 Subject: tracing/function-return-tracer: store return stack into task_struct and allocate it dynamically Impact: use deeper function tracing depth safely Some tests showed that function return tracing needed a more deeper depth of function calls. But it could be unsafe to store these return addresses to the stack. So these arrays will now be allocated dynamically into task_struct of current only when the tracer is activated. Typical scheme when tracer is activated: - allocate a return stack for each task in global list. - fork: allocate the return stack for the newly created task - exit: free return stack of current - idle init: same as fork I chose a default depth of 50. I don't have overruns anymore. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ftrace.h | 1 - arch/x86/include/asm/thread_info.h | 29 ------------ arch/x86/kernel/ftrace.c | 29 ++++++------ include/linux/ftrace.h | 5 ++ include/linux/sched.h | 23 +++++---- kernel/exit.c | 5 +- kernel/fork.c | 4 ++ kernel/sched.c | 3 ++ kernel/trace/ftrace.c | 96 +++++++++++++++++++++++++++++++++++++- 9 files changed, 137 insertions(+), 58 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 2bb43b433e0..754a3e082f9 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -29,7 +29,6 @@ struct dyn_arch_ftrace { #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_RET_TRACER -#define FTRACE_RET_STACK_SIZE 20 #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e90e81ef6ab..0921b4018c1 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -40,36 +40,8 @@ struct thread_info { */ __u8 supervisor_stack[0]; #endif - -#ifdef CONFIG_FUNCTION_RET_TRACER - /* Index of current stored adress in ret_stack */ - int curr_ret_stack; - /* Stack of return addresses for return function tracing */ - struct ftrace_ret_stack ret_stack[FTRACE_RET_STACK_SIZE]; - /* - * Number of functions that haven't been traced - * because of depth overrun. - */ - atomic_t trace_overrun; -#endif }; -#ifdef CONFIG_FUNCTION_RET_TRACER -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ - .curr_ret_stack = -1,\ - .trace_overrun = ATOMIC_INIT(0) \ -} -#else #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ @@ -82,7 +54,6 @@ struct thread_info { .fn = do_no_restart_syscall, \ }, \ } -#endif #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 356bb1eb6e9..bb137f7297e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -350,19 +350,21 @@ static int push_return_trace(unsigned long ret, unsigned long long time, unsigned long func) { int index; - struct thread_info *ti = current_thread_info(); + + if (!current->ret_stack) + return -EBUSY; /* The return trace stack is full */ - if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) { - atomic_inc(&ti->trace_overrun); + if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { + atomic_inc(¤t->trace_overrun); return -EBUSY; } - index = ++ti->curr_ret_stack; + index = ++current->curr_ret_stack; barrier(); - ti->ret_stack[index].ret = ret; - ti->ret_stack[index].func = func; - ti->ret_stack[index].calltime = time; + current->ret_stack[index].ret = ret; + current->ret_stack[index].func = func; + current->ret_stack[index].calltime = time; return 0; } @@ -373,13 +375,12 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time, { int index; - struct thread_info *ti = current_thread_info(); - index = ti->curr_ret_stack; - *ret = ti->ret_stack[index].ret; - *func = ti->ret_stack[index].func; - *time = ti->ret_stack[index].calltime; - *overrun = atomic_read(&ti->trace_overrun); - ti->curr_ret_stack--; + index = current->curr_ret_stack; + *ret = current->ret_stack[index].ret; + *func = current->ret_stack[index].func; + *time = current->ret_stack[index].calltime; + *overrun = atomic_read(¤t->trace_overrun); + current->curr_ret_stack--; } /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f7ba4ea5e12..2ba259b2def 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -323,6 +323,8 @@ struct ftrace_retfunc { }; #ifdef CONFIG_FUNCTION_RET_TRACER +#define FTRACE_RETFUNC_DEPTH 50 +#define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of a callback handler of tracing return function */ typedef void (*trace_function_return_t)(struct ftrace_retfunc *); @@ -330,6 +332,9 @@ extern int register_ftrace_return(trace_function_return_t func); /* The current handler in use */ extern trace_function_return_t ftrace_function_return; extern void unregister_ftrace_return(void); + +extern void ftrace_retfunc_init_task(struct task_struct *t); +extern void ftrace_retfunc_exit_task(struct task_struct *t); #endif #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index c8e0db46420..bee1e93c95a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1352,6 +1352,17 @@ struct task_struct { unsigned long default_timer_slack_ns; struct list_head *scm_work_list; +#ifdef CONFIG_FUNCTION_RET_TRACER + /* Index of current stored adress in ret_stack */ + int curr_ret_stack; + /* Stack of return addresses for return function tracing */ + struct ftrace_ret_stack *ret_stack; + /* + * Number of functions that haven't been traced + * because of depth overrun. + */ + atomic_t trace_overrun; +#endif }; /* @@ -2006,18 +2017,6 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct { *task_thread_info(p) = *task_thread_info(org); task_thread_info(p)->task = p; - -#ifdef CONFIG_FUNCTION_RET_TRACER - /* - * When fork() creates a child process, this function is called. - * But the child task may not inherit the return adresses traced - * by the return function tracer because it will directly execute - * in userspace and will not return to kernel functions its parent - * used. - */ - task_thread_info(p)->curr_ret_stack = -1; - atomic_set(&task_thread_info(p)->trace_overrun, 0); -#endif } static inline unsigned long *end_of_stack(struct task_struct *p) diff --git a/kernel/exit.c b/kernel/exit.c index 35c8ec2ba03..b9d446329da 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -1127,7 +1128,9 @@ NORET_TYPE void do_exit(long code) preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; - +#ifdef CONFIG_FUNCTION_RET_TRACER + ftrace_retfunc_exit_task(tsk); +#endif schedule(); BUG(); /* Avoid "noreturn function does return". */ diff --git a/kernel/fork.c b/kernel/fork.c index ac62f43ee43..d1eb30e69cc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -1269,6 +1270,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); +#ifdef CONFIG_FUNCTION_RET_TRACER + ftrace_retfunc_init_task(p); +#endif proc_fork_connector(p); cgroup_post_fork(p); return p; diff --git a/kernel/sched.c b/kernel/sched.c index 4de56108c86..fb17205950d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5901,6 +5901,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; +#ifdef CONFIG_FUNCTION_RET_TRACER + ftrace_retfunc_init_task(idle); +#endif } /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f212da48668..90d99fb02ae 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1498,10 +1498,77 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_RET_TRACER +static atomic_t ftrace_retfunc_active; + /* The callback that hooks the return of a function */ trace_function_return_t ftrace_function_return = (trace_function_return_t)ftrace_stub; + +/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ +static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) +{ + int i; + int ret = 0; + unsigned long flags; + int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE; + struct task_struct *g, *t; + + for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { + ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH + * sizeof(struct ftrace_ret_stack), + GFP_KERNEL); + if (!ret_stack_list[i]) { + start = 0; + end = i; + ret = -ENOMEM; + goto free; + } + } + + read_lock_irqsave(&tasklist_lock, flags); + do_each_thread(g, t) { + if (start == end) { + ret = -EAGAIN; + goto unlock; + } + + if (t->ret_stack == NULL) { + t->ret_stack = ret_stack_list[start++]; + t->curr_ret_stack = -1; + atomic_set(&t->trace_overrun, 0); + } + } while_each_thread(g, t); + +unlock: + read_unlock_irqrestore(&tasklist_lock, flags); +free: + for (i = start; i < end; i++) + kfree(ret_stack_list[i]); + return ret; +} + +/* Allocate a return stack for each task */ +static int start_return_tracing(void) +{ + struct ftrace_ret_stack **ret_stack_list; + int ret; + + ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE * + sizeof(struct ftrace_ret_stack *), + GFP_KERNEL); + + if (!ret_stack_list) + return -ENOMEM; + + do { + ret = alloc_retstack_tasklist(ret_stack_list); + } while (ret == -EAGAIN); + + kfree(ret_stack_list); + return ret; +} + int register_ftrace_return(trace_function_return_t func) { int ret = 0; @@ -1516,7 +1583,12 @@ int register_ftrace_return(trace_function_return_t func) ret = -EBUSY; goto out; } - + atomic_inc(&ftrace_retfunc_active); + ret = start_return_tracing(); + if (ret) { + atomic_dec(&ftrace_retfunc_active); + goto out; + } ftrace_tracing_type = FTRACE_TYPE_RETURN; ftrace_function_return = func; ftrace_startup(); @@ -1530,6 +1602,7 @@ void unregister_ftrace_return(void) { mutex_lock(&ftrace_sysctl_lock); + atomic_dec(&ftrace_retfunc_active); ftrace_function_return = (trace_function_return_t)ftrace_stub; ftrace_shutdown(); /* Restore normal tracing type */ @@ -1537,6 +1610,27 @@ void unregister_ftrace_return(void) mutex_unlock(&ftrace_sysctl_lock); } + +/* Allocate a return stack for newly created task */ +void ftrace_retfunc_init_task(struct task_struct *t) +{ + if (atomic_read(&ftrace_retfunc_active)) { + t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH + * sizeof(struct ftrace_ret_stack), + GFP_KERNEL); + if (!t->ret_stack) + return; + t->curr_ret_stack = -1; + atomic_set(&t->trace_overrun, 0); + } else + t->ret_stack = NULL; +} + +void ftrace_retfunc_exit_task(struct task_struct *t) +{ + kfree(t->ret_stack); + t->ret_stack = NULL; +} #endif -- cgit v1.2.3-70-g09d2 From 82f60f0bc854aada696f27d863c03bef91f1509d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 23 Nov 2008 09:18:56 +0100 Subject: tracing/function-return-tracer: clean up task start/exit callbacks Impact: cleanup Eliminate #ifdefs in core code by using empty inline functions. Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +++ kernel/exit.c | 2 -- kernel/fork.c | 2 -- kernel/sched.c | 2 -- 4 files changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2ba259b2def..938ca194264 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -335,6 +335,9 @@ extern void unregister_ftrace_return(void); extern void ftrace_retfunc_init_task(struct task_struct *t); extern void ftrace_retfunc_exit_task(struct task_struct *t); +#else +static inline void ftrace_retfunc_init_task(struct task_struct *t) { } +static inline void ftrace_retfunc_exit_task(struct task_struct *t) { } #endif #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/exit.c b/kernel/exit.c index b9d446329da..ef04d03b328 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1128,9 +1128,7 @@ NORET_TYPE void do_exit(long code) preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; -#ifdef CONFIG_FUNCTION_RET_TRACER ftrace_retfunc_exit_task(tsk); -#endif schedule(); BUG(); /* Avoid "noreturn function does return". */ diff --git a/kernel/fork.c b/kernel/fork.c index d1eb30e69cc..fbf4a4c0a62 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1270,9 +1270,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); -#ifdef CONFIG_FUNCTION_RET_TRACER ftrace_retfunc_init_task(p); -#endif proc_fork_connector(p); cgroup_post_fork(p); return p; diff --git a/kernel/sched.c b/kernel/sched.c index fb17205950d..388d9db044a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5901,9 +5901,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; -#ifdef CONFIG_FUNCTION_RET_TRACER ftrace_retfunc_init_task(idle); -#endif } /* -- cgit v1.2.3-70-g09d2 From 65afa5e603d507014580ead016ec887b49e1afa6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 23 Nov 2008 18:43:39 +0100 Subject: tracing/function-return-tracer: free the return stack on free_task() Impact: avoid losing some traces when a task is freed do_exit() is not the last function called when a task finishes. There are still some functions which are to be called such as ree_task(). So we delay the freeing of the return stack to the last moment. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/exit.c | 2 -- kernel/fork.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/exit.c b/kernel/exit.c index ef04d03b328..e5ae36ebe8a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include @@ -1128,7 +1127,6 @@ NORET_TYPE void do_exit(long code) preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; - ftrace_retfunc_exit_task(tsk); schedule(); BUG(); /* Avoid "noreturn function does return". */ diff --git a/kernel/fork.c b/kernel/fork.c index fbf4a4c0a62..d6e1a3205f6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -140,6 +140,7 @@ void free_task(struct task_struct *tsk) prop_local_destroy_single(&tsk->dirties); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); + ftrace_retfunc_exit_task(tsk); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); -- cgit v1.2.3-70-g09d2 From 18b6e0414e42d95183f07d8177e3ff0241abd825 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Wed, 15 Oct 2008 16:38:45 -0500 Subject: User namespaces: set of cleanups (v2) The user_ns is moved from nsproxy to user_struct, so that a struct cred by itself is sufficient to determine access (which it otherwise would not be). Corresponding ecryptfs fixes (by David Howells) are here as well. Fix refcounting. The following rules now apply: 1. The task pins the user struct. 2. The user struct pins its user namespace. 3. The user namespace pins the struct user which created it. User namespaces are cloned during copy_creds(). Unsharing a new user_ns is no longer possible. (We could re-add that, but it'll cause code duplication and doesn't seem useful if PAM doesn't need to clone user namespaces). When a user namespace is created, its first user (uid 0) gets empty keyrings and a clean group_info. This incorporates a previous patch by David Howells. Here is his original patch description: >I suggest adding the attached incremental patch. It makes the following >changes: > > (1) Provides a current_user_ns() macro to wrap accesses to current's user > namespace. > > (2) Fixes eCryptFS. > > (3) Renames create_new_userns() to create_user_ns() to be more consistent > with the other associated functions and because the 'new' in the name is > superfluous. > > (4) Moves the argument and permission checks made for CLONE_NEWUSER to the > beginning of do_fork() so that they're done prior to making any attempts > at allocation. > > (5) Calls create_user_ns() after prepare_creds(), and gives it the new creds > to fill in rather than have it return the new root user. I don't imagine > the new root user being used for anything other than filling in a cred > struct. > > This also permits me to get rid of a get_uid() and a free_uid(), as the > reference the creds were holding on the old user_struct can just be > transferred to the new namespace's creator pointer. > > (6) Makes create_user_ns() reset the UIDs and GIDs of the creds under > preparation rather than doing it in copy_creds(). > >David >Signed-off-by: David Howells Changelog: Oct 20: integrate dhowells comments 1. leave thread_keyring alone 2. use current_user_ns() in set_user() Signed-off-by: Serge Hallyn --- fs/ecryptfs/messaging.c | 13 ++++---- fs/ecryptfs/miscdev.c | 19 ++++------- include/linux/cred.h | 2 ++ include/linux/init_task.h | 1 - include/linux/nsproxy.h | 1 - include/linux/sched.h | 1 + include/linux/user_namespace.h | 13 +++----- kernel/cred.c | 15 +++++++-- kernel/fork.c | 19 +++++++++-- kernel/nsproxy.c | 15 ++------- kernel/sys.c | 4 +-- kernel/user.c | 47 ++++++++------------------ kernel/user_namespace.c | 75 +++++++++++++++++------------------------- 13 files changed, 96 insertions(+), 129 deletions(-) (limited to 'kernel/fork.c') diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index e0b0a4e28b9..6913f727624 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -360,7 +360,7 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, struct ecryptfs_msg_ctx *msg_ctx; size_t msg_size; struct nsproxy *nsproxy; - struct user_namespace *current_user_ns; + struct user_namespace *tsk_user_ns; uid_t ctx_euid; int rc; @@ -385,9 +385,9 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, mutex_unlock(&ecryptfs_daemon_hash_mux); goto wake_up; } - current_user_ns = nsproxy->user_ns; + tsk_user_ns = __task_cred(msg_ctx->task)->user->user_ns; ctx_euid = task_euid(msg_ctx->task); - rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, current_user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, tsk_user_ns); rcu_read_unlock(); mutex_unlock(&ecryptfs_daemon_hash_mux); if (rc) { @@ -405,11 +405,11 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, euid, ctx_euid); goto unlock; } - if (current_user_ns != user_ns) { + if (tsk_user_ns != user_ns) { rc = -EBADMSG; printk(KERN_WARNING "%s: Received message from user_ns " "[0x%p]; expected message from user_ns [0x%p]\n", - __func__, user_ns, nsproxy->user_ns); + __func__, user_ns, tsk_user_ns); goto unlock; } if (daemon->pid != pid) { @@ -468,8 +468,7 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, uid_t euid = current_euid(); int rc; - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); if (rc || !daemon) { rc = -ENOTCONN; printk(KERN_ERR "%s: User [%d] does not have a daemon " diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 047ac609695..efd95a0ed1e 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -47,8 +47,7 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt) mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); BUG_ON(rc || !daemon); mutex_lock(&daemon->mux); mutex_unlock(&ecryptfs_daemon_hash_mux); @@ -95,11 +94,9 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) "count; rc = [%d]\n", __func__, rc); goto out_unlock_daemon_list; } - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); if (rc || !daemon) { - rc = ecryptfs_spawn_daemon(&daemon, euid, - current->nsproxy->user_ns, + rc = ecryptfs_spawn_daemon(&daemon, euid, current_user_ns(), task_pid(current)); if (rc) { printk(KERN_ERR "%s: Error attempting to spawn daemon; " @@ -153,8 +150,7 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file) int rc; mutex_lock(&ecryptfs_daemon_hash_mux); - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); BUG_ON(rc || !daemon); mutex_lock(&daemon->mux); BUG_ON(daemon->pid != task_pid(current)); @@ -254,8 +250,7 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); BUG_ON(rc || !daemon); mutex_lock(&daemon->mux); if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { @@ -295,7 +290,7 @@ check_list: goto check_list; } BUG_ON(euid != daemon->euid); - BUG_ON(current->nsproxy->user_ns != daemon->user_ns); + BUG_ON(current_user_ns() != daemon->user_ns); BUG_ON(task_pid(current) != daemon->pid); msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, struct ecryptfs_msg_ctx, daemon_out_list); @@ -468,7 +463,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, goto out_free; } rc = ecryptfs_miscdev_response(&data[i], packet_size, - euid, current->nsproxy->user_ns, + euid, current_user_ns(), task_pid(current), seq); if (rc) printk(KERN_WARNING "%s: Failed to deliver miscdev " diff --git a/include/linux/cred.h b/include/linux/cred.h index 26c1ab17994..3282ee4318e 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -60,6 +60,7 @@ do { \ } while (0) extern struct group_info *groups_alloc(int); +extern struct group_info init_groups; extern void groups_free(struct group_info *); extern int set_current_groups(struct group_info *); extern int set_groups(struct cred *, struct group_info *); @@ -315,6 +316,7 @@ static inline void put_cred(const struct cred *_cred) #define current_fsgid() (current_cred_xxx(fsgid)) #define current_cap() (current_cred_xxx(cap_effective)) #define current_user() (current_cred_xxx(user)) +#define current_user_ns() (current_cred_xxx(user)->user_ns) #define current_security() (current_cred_xxx(security)) #define current_uid_gid(_uid, _gid) \ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2597858035c..959f5522d10 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -57,7 +57,6 @@ extern struct nsproxy init_nsproxy; .mnt_ns = NULL, \ INIT_NET_NS(net_ns) \ INIT_IPC_NS(ipc_ns) \ - .user_ns = &init_user_ns, \ } #define INIT_SIGHAND(sighand) { \ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index c8a768e5964..afad7dec1b3 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -27,7 +27,6 @@ struct nsproxy { struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns; - struct user_namespace *user_ns; struct net *net_ns; }; extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index 2036e9f2602..7f8015a3082 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -638,6 +638,7 @@ struct user_struct { /* Hash table maintenance information */ struct hlist_node uidhash_node; uid_t uid; + struct user_namespace *user_ns; #ifdef CONFIG_USER_SCHED struct task_group *tg; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index b5f41d4c2ee..315bcd37522 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -12,7 +12,7 @@ struct user_namespace { struct kref kref; struct hlist_head uidhash_table[UIDHASH_SZ]; - struct user_struct *root_user; + struct user_struct *creator; }; extern struct user_namespace init_user_ns; @@ -26,8 +26,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) return ns; } -extern struct user_namespace *copy_user_ns(int flags, - struct user_namespace *old_ns); +extern int create_user_ns(struct cred *new); extern void free_user_ns(struct kref *kref); static inline void put_user_ns(struct user_namespace *ns) @@ -43,13 +42,9 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) return &init_user_ns; } -static inline struct user_namespace *copy_user_ns(int flags, - struct user_namespace *old_ns) +static inline int create_user_ns(struct cred *new) { - if (flags & CLONE_NEWUSER) - return ERR_PTR(-EINVAL); - - return old_ns; + return -EINVAL; } static inline void put_user_ns(struct user_namespace *ns) diff --git a/kernel/cred.c b/kernel/cred.c index 13697ca2bb3..ff7bc071991 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -274,6 +274,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct thread_group_cred *tgcred; #endif struct cred *new; + int ret; mutex_init(&p->cred_exec_mutex); @@ -293,6 +294,12 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) if (!new) return -ENOMEM; + if (clone_flags & CLONE_NEWUSER) { + ret = create_user_ns(new); + if (ret < 0) + goto error_put; + } + #ifdef CONFIG_KEYS /* new threads get their own thread keyrings if their parent already * had one */ @@ -309,8 +316,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) if (!(clone_flags & CLONE_THREAD)) { tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); if (!tgcred) { - put_cred(new); - return -ENOMEM; + ret = -ENOMEM; + goto error_put; } atomic_set(&tgcred->usage, 1); spin_lock_init(&tgcred->lock); @@ -325,6 +332,10 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) atomic_inc(&new->user->processes); p->cred = p->real_cred = get_cred(new); return 0; + +error_put: + put_cred(new); + return ret; } /** diff --git a/kernel/fork.c b/kernel/fork.c index 29c18c14812..1dd89451fae 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -976,7 +976,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (atomic_read(&p->real_cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->real_cred->user != current->nsproxy->user_ns->root_user) + p->real_cred->user != INIT_USER) goto bad_fork_free; } @@ -1334,6 +1334,20 @@ long do_fork(unsigned long clone_flags, int trace = 0; long nr; + /* + * Do some preliminary argument and permissions checking before we + * actually start allocating stuff + */ + if (clone_flags & CLONE_NEWUSER) { + if (clone_flags & CLONE_THREAD) + return -EINVAL; + /* hopefully this check will go away when userns support is + * complete + */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + } + /* * We hope to recycle these flags after 2.6.26 */ @@ -1581,8 +1595,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) err = -EINVAL; if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| - CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER| - CLONE_NEWNET)) + CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) goto bad_unshare_out; /* diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 1d3ef29a258..63598dca2d0 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -80,12 +80,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_pid; } - new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns); - if (IS_ERR(new_nsp->user_ns)) { - err = PTR_ERR(new_nsp->user_ns); - goto out_user; - } - new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns); if (IS_ERR(new_nsp->net_ns)) { err = PTR_ERR(new_nsp->net_ns); @@ -95,9 +89,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, return new_nsp; out_net: - if (new_nsp->user_ns) - put_user_ns(new_nsp->user_ns); -out_user: if (new_nsp->pid_ns) put_pid_ns(new_nsp->pid_ns); out_pid: @@ -130,7 +121,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) get_nsproxy(old_ns); if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | - CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET))) + CLONE_NEWPID | CLONE_NEWNET))) return 0; if (!capable(CAP_SYS_ADMIN)) { @@ -173,8 +164,6 @@ void free_nsproxy(struct nsproxy *ns) put_ipc_ns(ns->ipc_ns); if (ns->pid_ns) put_pid_ns(ns->pid_ns); - if (ns->user_ns) - put_user_ns(ns->user_ns); put_net(ns->net_ns); kmem_cache_free(nsproxy_cachep, ns); } @@ -189,7 +178,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, int err = 0; if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | - CLONE_NEWUSER | CLONE_NEWNET))) + CLONE_NEWNET))) return 0; if (!capable(CAP_SYS_ADMIN)) diff --git a/kernel/sys.c b/kernel/sys.c index ab735040468..ebe65c2c987 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -565,13 +565,13 @@ static int set_user(struct cred *new) { struct user_struct *new_user; - new_user = alloc_uid(current->nsproxy->user_ns, new->uid); + new_user = alloc_uid(current_user_ns(), new->uid); if (!new_user) return -EAGAIN; if (atomic_read(&new_user->processes) >= current->signal->rlim[RLIMIT_NPROC].rlim_cur && - new_user != current->nsproxy->user_ns->root_user) { + new_user != INIT_USER) { free_uid(new_user); return -EAGAIN; } diff --git a/kernel/user.c b/kernel/user.c index d476307dd4b..c0ef3a46443 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -20,9 +20,9 @@ struct user_namespace init_user_ns = { .kref = { - .refcount = ATOMIC_INIT(2), + .refcount = ATOMIC_INIT(1), }, - .root_user = &root_user, + .creator = &root_user, }; EXPORT_SYMBOL_GPL(init_user_ns); @@ -48,12 +48,14 @@ static struct kmem_cache *uid_cachep; */ static DEFINE_SPINLOCK(uidhash_lock); +/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */ struct user_struct root_user = { - .__count = ATOMIC_INIT(1), + .__count = ATOMIC_INIT(2), .processes = ATOMIC_INIT(1), .files = ATOMIC_INIT(0), .sigpending = ATOMIC_INIT(0), .locked_shm = 0, + .user_ns = &init_user_ns, #ifdef CONFIG_USER_SCHED .tg = &init_task_group, #endif @@ -314,12 +316,13 @@ done: * IRQ state (as stored in flags) is restored and uidhash_lock released * upon function exit. */ -static inline void free_user(struct user_struct *up, unsigned long flags) +static void free_user(struct user_struct *up, unsigned long flags) { /* restore back the count */ atomic_inc(&up->__count); spin_unlock_irqrestore(&uidhash_lock, flags); + put_user_ns(up->user_ns); INIT_WORK(&up->work, remove_user_sysfs_dir); schedule_work(&up->work); } @@ -335,13 +338,14 @@ static inline void uids_mutex_unlock(void) { } * IRQ state (as stored in flags) is restored and uidhash_lock released * upon function exit. */ -static inline void free_user(struct user_struct *up, unsigned long flags) +static void free_user(struct user_struct *up, unsigned long flags) { uid_hash_remove(up); spin_unlock_irqrestore(&uidhash_lock, flags); sched_destroy_user(up); key_put(up->uid_keyring); key_put(up->session_keyring); + put_user_ns(up->user_ns); kmem_cache_free(uid_cachep, up); } @@ -357,7 +361,7 @@ struct user_struct *find_user(uid_t uid) { struct user_struct *ret; unsigned long flags; - struct user_namespace *ns = current->nsproxy->user_ns; + struct user_namespace *ns = current_user()->user_ns; spin_lock_irqsave(&uidhash_lock, flags); ret = uid_hash_find(uid, uidhashentry(ns, uid)); @@ -404,6 +408,8 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) if (sched_create_user(new) < 0) goto out_free_user; + new->user_ns = get_user_ns(ns); + if (uids_user_create(new)) goto out_destoy_sched; @@ -427,7 +433,6 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) up = new; } spin_unlock_irq(&uidhash_lock); - } uids_mutex_unlock(); @@ -436,6 +441,7 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) out_destoy_sched: sched_destroy_user(new); + put_user_ns(new->user_ns); out_free_user: kmem_cache_free(uid_cachep, new); out_unlock: @@ -443,33 +449,6 @@ out_unlock: return NULL; } -#ifdef CONFIG_USER_NS -void release_uids(struct user_namespace *ns) -{ - int i; - unsigned long flags; - struct hlist_head *head; - struct hlist_node *nd; - - spin_lock_irqsave(&uidhash_lock, flags); - /* - * collapse the chains so that the user_struct-s will - * be still alive, but not in hashes. subsequent free_uid() - * will free them. - */ - for (i = 0; i < UIDHASH_SZ; i++) { - head = ns->uidhash_table + i; - while (!hlist_empty(head)) { - nd = head->first; - hlist_del_init(nd); - } - } - spin_unlock_irqrestore(&uidhash_lock, flags); - - free_uid(ns->root_user); -} -#endif - static int __init uid_cache_init(void) { int n; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 0d9c51d6733..79084311ee5 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -9,70 +9,55 @@ #include #include #include +#include /* - * Clone a new ns copying an original user ns, setting refcount to 1 - * @old_ns: namespace to clone - * Return NULL on error (failure to kmalloc), new ns otherwise + * Create a new user namespace, deriving the creator from the user in the + * passed credentials, and replacing that user with the new root user for the + * new namespace. + * + * This is called by copy_creds(), which will finish setting the target task's + * credentials. */ -static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) +int create_user_ns(struct cred *new) { struct user_namespace *ns; - struct user_struct *new_user; - struct cred *new; + struct user_struct *root_user; int n; ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); if (!ns) - return ERR_PTR(-ENOMEM); + return -ENOMEM; kref_init(&ns->kref); for (n = 0; n < UIDHASH_SZ; ++n) INIT_HLIST_HEAD(ns->uidhash_table + n); - /* Insert new root user. */ - ns->root_user = alloc_uid(ns, 0); - if (!ns->root_user) { + /* Alloc new root user. */ + root_user = alloc_uid(ns, 0); + if (!root_user) { kfree(ns); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } - /* Reset current->user with a new one */ - new_user = alloc_uid(ns, current_uid()); - if (!new_user) { - free_uid(ns->root_user); - kfree(ns); - return ERR_PTR(-ENOMEM); - } - - /* Install the new user */ - new = prepare_creds(); - if (!new) { - free_uid(new_user); - free_uid(ns->root_user); - kfree(ns); - } - free_uid(new->user); - new->user = new_user; - commit_creds(new); - return ns; -} - -struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns) -{ - struct user_namespace *new_ns; - - BUG_ON(!old_ns); - get_user_ns(old_ns); - - if (!(flags & CLONE_NEWUSER)) - return old_ns; + /* set the new root user in the credentials under preparation */ + ns->creator = new->user; + new->user = root_user; + new->uid = new->euid = new->suid = new->fsuid = 0; + new->gid = new->egid = new->sgid = new->fsgid = 0; + put_group_info(new->group_info); + new->group_info = get_group_info(&init_groups); +#ifdef CONFIG_KEYS + key_put(new->request_key_auth); + new->request_key_auth = NULL; +#endif + /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ - new_ns = clone_user_ns(old_ns); + /* alloc_uid() incremented the userns refcount. Just set it to 1 */ + kref_set(&ns->kref, 1); - put_user_ns(old_ns); - return new_ns; + return 0; } void free_user_ns(struct kref *kref) @@ -80,7 +65,7 @@ void free_user_ns(struct kref *kref) struct user_namespace *ns; ns = container_of(kref, struct user_namespace, kref); - release_uids(ns); + free_uid(ns->creator); kfree(ns); } EXPORT_SYMBOL(free_user_ns); -- cgit v1.2.3-70-g09d2 From fb52607afcd0629776f1dc9e657647ceae81dd50 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 25 Nov 2008 21:07:04 +0100 Subject: tracing/function-return-tracer: change the name into function-graph-tracer Impact: cleanup This patch changes the name of the "return function tracer" into function-graph-tracer which is a more suitable name for a tracing which makes one able to retrieve the ordered call stack during the code flow. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/ftrace.h | 4 +- arch/x86/kernel/Makefile | 4 +- arch/x86/kernel/entry_32.S | 12 ++--- arch/x86/kernel/ftrace.c | 12 ++--- include/linux/ftrace.h | 24 ++++----- include/linux/ftrace_irq.h | 2 +- include/linux/sched.h | 2 +- kernel/Makefile | 2 +- kernel/fork.c | 4 +- kernel/sched.c | 2 +- kernel/trace/Kconfig | 19 ++++--- kernel/trace/Makefile | 2 +- kernel/trace/ftrace.c | 26 +++++----- kernel/trace/trace.c | 18 +++---- kernel/trace/trace.h | 12 ++--- kernel/trace/trace_functions_graph.c | 98 ++++++++++++++++++++++++++++++++++++ 17 files changed, 173 insertions(+), 72 deletions(-) create mode 100644 kernel/trace/trace_functions_graph.c (limited to 'kernel/fork.c') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e49a4fd718f..0842b112768 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -29,7 +29,7 @@ config X86 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_RET_TRACER if X86_32 + select HAVE_FUNCTION_GRAPH_TRACER if X86_32 select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 754a3e082f9..7e61b4ceb9a 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -28,7 +28,7 @@ struct dyn_arch_ftrace { #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER #ifndef __ASSEMBLY__ @@ -51,6 +51,6 @@ struct ftrace_ret_stack { extern void return_to_handler(void); #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_FUNCTION_RET_TRACER */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index af2bc36ca1c..64939a0c398 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -14,7 +14,7 @@ CFLAGS_REMOVE_paravirt-spinlocks.o = -pg CFLAGS_REMOVE_ftrace.o = -pg endif -ifdef CONFIG_FUNCTION_RET_TRACER +ifdef CONFIG_FUNCTION_GRAPH_TRACER # Don't trace __switch_to() but let it for function tracer CFLAGS_REMOVE_process_32.o = -pg endif @@ -70,7 +70,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_RET_TRACER) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 74defe21ba4..2b1f0f081a6 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1188,9 +1188,9 @@ ENTRY(mcount) cmpl $ftrace_stub, ftrace_trace_function jnz trace -#ifdef CONFIG_FUNCTION_RET_TRACER - cmpl $ftrace_stub, ftrace_function_return - jnz ftrace_return_caller +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + cmpl $ftrace_stub, ftrace_graph_function + jnz ftrace_graph_caller #endif .globl ftrace_stub ftrace_stub: @@ -1215,8 +1215,8 @@ END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifdef CONFIG_FUNCTION_RET_TRACER -ENTRY(ftrace_return_caller) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) cmpl $0, function_trace_stop jne ftrace_stub @@ -1230,7 +1230,7 @@ ENTRY(ftrace_return_caller) popl %ecx popl %eax ret -END(ftrace_return_caller) +END(ftrace_graph_caller) .globl return_to_handler return_to_handler: diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index bb137f7297e..3595a4c14ab 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -323,7 +323,7 @@ int __init ftrace_dyn_arch_init(void *data) } #endif -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER #ifndef CONFIG_DYNAMIC_FTRACE @@ -389,11 +389,11 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time, */ unsigned long ftrace_return_to_handler(void) { - struct ftrace_retfunc trace; + struct ftrace_graph_ret trace; pop_return_trace(&trace.ret, &trace.calltime, &trace.func, &trace.overrun); trace.rettime = cpu_clock(raw_smp_processor_id()); - ftrace_function_return(&trace); + ftrace_graph_function(&trace); return trace.ret; } @@ -440,12 +440,12 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) ); if (WARN_ON(faulted)) { - unregister_ftrace_return(); + unregister_ftrace_graph(); return; } if (WARN_ON(!__kernel_text_address(old))) { - unregister_ftrace_return(); + unregister_ftrace_graph(); *parent = old; return; } @@ -456,4 +456,4 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) *parent = old; } -#endif /* CONFIG_FUNCTION_RET_TRACER */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7854d87b97b..b4ac734ad8d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -115,8 +115,8 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); -#ifdef CONFIG_FUNCTION_RET_TRACER -extern void ftrace_return_caller(void); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +extern void ftrace_graph_caller(void); #endif /** @@ -315,7 +315,7 @@ ftrace_init_module(struct module *mod, /* * Structure that defines a return function trace. */ -struct ftrace_retfunc { +struct ftrace_graph_ret { unsigned long ret; /* Return address */ unsigned long func; /* Current function */ unsigned long long calltime; @@ -324,22 +324,22 @@ struct ftrace_retfunc { unsigned long overrun; }; -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of a callback handler of tracing return function */ -typedef void (*trace_function_return_t)(struct ftrace_retfunc *); +typedef void (*trace_function_graph_t)(struct ftrace_graph_ret *); -extern int register_ftrace_return(trace_function_return_t func); +extern int register_ftrace_graph(trace_function_graph_t func); /* The current handler in use */ -extern trace_function_return_t ftrace_function_return; -extern void unregister_ftrace_return(void); +extern trace_function_graph_t ftrace_graph_function; +extern void unregister_ftrace_graph(void); -extern void ftrace_retfunc_init_task(struct task_struct *t); -extern void ftrace_retfunc_exit_task(struct task_struct *t); +extern void ftrace_graph_init_task(struct task_struct *t); +extern void ftrace_graph_exit_task(struct task_struct *t); #else -static inline void ftrace_retfunc_init_task(struct task_struct *t) { } -static inline void ftrace_retfunc_exit_task(struct task_struct *t) { } +static inline void ftrace_graph_init_task(struct task_struct *t) { } +static inline void ftrace_graph_exit_task(struct task_struct *t) { } #endif #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index 0b4df55d7a7..366a054d0b0 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -2,7 +2,7 @@ #define _LINUX_FTRACE_IRQ_H -#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_RET_TRACER) +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER) extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else diff --git a/include/linux/sched.h b/include/linux/sched.h index d02a0ca70ee..7ad48f2a275 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1365,7 +1365,7 @@ struct task_struct { unsigned long default_timer_slack_ns; struct list_head *scm_work_list; -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored adress in ret_stack */ int curr_ret_stack; /* Stack of return addresses for return function tracing */ diff --git a/kernel/Makefile b/kernel/Makefile index 03a45e7e87b..703cf3b7389 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -21,7 +21,7 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg CFLAGS_REMOVE_sched.o = -pg endif -ifdef CONFIG_FUNCTION_RET_TRACER +ifdef CONFIG_FUNCTION_GRAPH_TRACER CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address() CFLAGS_REMOVE_module.o = -pg # For __module_text_address() endif diff --git a/kernel/fork.c b/kernel/fork.c index d6e1a3205f6..5f82a999c03 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -140,7 +140,7 @@ void free_task(struct task_struct *tsk) prop_local_destroy_single(&tsk->dirties); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); - ftrace_retfunc_exit_task(tsk); + ftrace_graph_exit_task(tsk); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -1271,7 +1271,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); - ftrace_retfunc_init_task(p); + ftrace_graph_init_task(p); proc_fork_connector(p); cgroup_post_fork(p); return p; diff --git a/kernel/sched.c b/kernel/sched.c index 388d9db044a..52490bf6b88 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5901,7 +5901,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; - ftrace_retfunc_init_task(idle); + ftrace_graph_init_task(idle); } /* diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 620feadff67..eb9b901e077 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -12,7 +12,7 @@ config NOP_TRACER config HAVE_FUNCTION_TRACER bool -config HAVE_FUNCTION_RET_TRACER +config HAVE_FUNCTION_GRAPH_TRACER bool config HAVE_FUNCTION_TRACE_MCOUNT_TEST @@ -63,15 +63,18 @@ config FUNCTION_TRACER (the bootup default), then the overhead of the instructions is very small and not measurable even in micro-benchmarks. -config FUNCTION_RET_TRACER - bool "Kernel Function return Tracer" - depends on HAVE_FUNCTION_RET_TRACER +config FUNCTION_GRAPH_TRACER + bool "Kernel Function Graph Tracer" + depends on HAVE_FUNCTION_GRAPH_TRACER depends on FUNCTION_TRACER help - Enable the kernel to trace a function at its return. - It's first purpose is to trace the duration of functions. - This is done by setting the current return address on the thread - info structure of the current task. + Enable the kernel to trace a function at both its return + and its entry. + It's first purpose is to trace the duration of functions and + draw a call graph for each thread with some informations like + the return value. + This is done by setting the current return address on the current + task structure into a stack of calls. config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index cef4bcb4e82..08c5fe6ddc0 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -29,7 +29,7 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o obj-$(CONFIG_STACK_TRACER) += trace_stack.o obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o -obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o obj-$(CONFIG_BTS_TRACER) += trace_bts.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 53042f118f2..9e19976af72 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -395,11 +395,11 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) unsigned long ip, fl; unsigned long ftrace_addr; -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER if (ftrace_tracing_type == FTRACE_TYPE_ENTER) ftrace_addr = (unsigned long)ftrace_caller; else - ftrace_addr = (unsigned long)ftrace_return_caller; + ftrace_addr = (unsigned long)ftrace_graph_caller; #else ftrace_addr = (unsigned long)ftrace_caller; #endif @@ -1496,13 +1496,13 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, return ret; } -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER static atomic_t ftrace_retfunc_active; /* The callback that hooks the return of a function */ -trace_function_return_t ftrace_function_return = - (trace_function_return_t)ftrace_stub; +trace_function_graph_t ftrace_graph_function = + (trace_function_graph_t)ftrace_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ @@ -1549,7 +1549,7 @@ free: } /* Allocate a return stack for each task */ -static int start_return_tracing(void) +static int start_graph_tracing(void) { struct ftrace_ret_stack **ret_stack_list; int ret; @@ -1569,7 +1569,7 @@ static int start_return_tracing(void) return ret; } -int register_ftrace_return(trace_function_return_t func) +int register_ftrace_graph(trace_function_graph_t func) { int ret = 0; @@ -1584,13 +1584,13 @@ int register_ftrace_return(trace_function_return_t func) goto out; } atomic_inc(&ftrace_retfunc_active); - ret = start_return_tracing(); + ret = start_graph_tracing(); if (ret) { atomic_dec(&ftrace_retfunc_active); goto out; } ftrace_tracing_type = FTRACE_TYPE_RETURN; - ftrace_function_return = func; + ftrace_graph_function = func; ftrace_startup(); out: @@ -1598,12 +1598,12 @@ out: return ret; } -void unregister_ftrace_return(void) +void unregister_ftrace_graph(void) { mutex_lock(&ftrace_sysctl_lock); atomic_dec(&ftrace_retfunc_active); - ftrace_function_return = (trace_function_return_t)ftrace_stub; + ftrace_graph_function = (trace_function_graph_t)ftrace_stub; ftrace_shutdown(); /* Restore normal tracing type */ ftrace_tracing_type = FTRACE_TYPE_ENTER; @@ -1612,7 +1612,7 @@ void unregister_ftrace_return(void) } /* Allocate a return stack for newly created task */ -void ftrace_retfunc_init_task(struct task_struct *t) +void ftrace_graph_init_task(struct task_struct *t) { if (atomic_read(&ftrace_retfunc_active)) { t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH @@ -1626,7 +1626,7 @@ void ftrace_retfunc_init_task(struct task_struct *t) t->ret_stack = NULL; } -void ftrace_retfunc_exit_task(struct task_struct *t) +void ftrace_graph_exit_task(struct task_struct *t) { struct ftrace_ret_stack *ret_stack = t->ret_stack; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8df8fdd69c9..f21ab2c68fd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -878,15 +878,15 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } -#ifdef CONFIG_FUNCTION_RET_TRACER -static void __trace_function_return(struct trace_array *tr, +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void __trace_function_graph(struct trace_array *tr, struct trace_array_cpu *data, - struct ftrace_retfunc *trace, + struct ftrace_graph_ret *trace, unsigned long flags, int pc) { struct ring_buffer_event *event; - struct ftrace_ret_entry *entry; + struct ftrace_graph_entry *entry; unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) @@ -1177,8 +1177,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) local_irq_restore(flags); } -#ifdef CONFIG_FUNCTION_RET_TRACER -void trace_function_return(struct ftrace_retfunc *trace) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void trace_function_graph(struct ftrace_graph_ret *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1193,12 +1193,12 @@ void trace_function_return(struct ftrace_retfunc *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_function_return(tr, data, trace, flags, pc); + __trace_function_graph(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); raw_local_irq_restore(flags); } -#endif /* CONFIG_FUNCTION_RET_TRACER */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ static struct ftrace_ops trace_ops __read_mostly = { @@ -2001,7 +2001,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) break; } case TRACE_FN_RET: { - return print_return_function(iter); + return print_graph_function(iter); break; } case TRACE_BRANCH: { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3abd645e8af..72b5ef86876 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -57,7 +57,7 @@ struct ftrace_entry { }; /* Function return entry */ -struct ftrace_ret_entry { +struct ftrace_graph_entry { struct trace_entry ent; unsigned long ip; unsigned long parent_ip; @@ -264,7 +264,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ - IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\ + IF_ASSIGN(var, ent, struct ftrace_graph_entry, TRACE_FN_RET);\ IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\ __ftrace_bad_type(); \ } while (0) @@ -398,7 +398,7 @@ void trace_function(struct trace_array *tr, unsigned long parent_ip, unsigned long flags, int pc); void -trace_function_return(struct ftrace_retfunc *trace); +trace_function_graph(struct ftrace_graph_ret *trace); void trace_bts(struct trace_array *tr, unsigned long from, @@ -489,11 +489,11 @@ extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern unsigned long trace_flags; /* Standard output formatting function used for function return traces */ -#ifdef CONFIG_FUNCTION_RET_TRACER -extern enum print_line_t print_return_function(struct trace_iterator *iter); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +extern enum print_line_t print_graph_function(struct trace_iterator *iter); #else static inline enum print_line_t -print_return_function(struct trace_iterator *iter) +print_graph_function(struct trace_iterator *iter) { return TRACE_TYPE_UNHANDLED; } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c new file mode 100644 index 00000000000..f5bad4624d2 --- /dev/null +++ b/kernel/trace/trace_functions_graph.c @@ -0,0 +1,98 @@ +/* + * + * Function graph tracer. + * Copyright (c) 2008 Frederic Weisbecker + * Mostly borrowed from function tracer which + * is Copyright (c) Steven Rostedt + * + */ +#include +#include +#include +#include + +#include "trace.h" + + +#define TRACE_GRAPH_PRINT_OVERRUN 0x1 +static struct tracer_opt trace_opts[] = { + /* Display overruns or not */ + { TRACER_OPT(overrun, TRACE_GRAPH_PRINT_OVERRUN) }, + { } /* Empty entry */ +}; + +static struct tracer_flags tracer_flags = { + .val = 0, /* Don't display overruns by default */ + .opts = trace_opts +}; + + +static int graph_trace_init(struct trace_array *tr) +{ + int cpu; + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); + + return register_ftrace_graph(&trace_function_graph); +} + +static void graph_trace_reset(struct trace_array *tr) +{ + unregister_ftrace_graph(); +} + + +enum print_line_t +print_graph_function(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct ftrace_graph_entry *field; + int ret; + + if (entry->type == TRACE_FN_RET) { + trace_assign_type(field, entry); + ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = seq_print_ip_sym(s, field->ip, + trace_flags & TRACE_ITER_SYM_MASK); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, " (%llu ns)", + field->rettime - field->calltime); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { + ret = trace_seq_printf(s, " (Overruns: %lu)", + field->overrun); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; + } + return TRACE_TYPE_UNHANDLED; +} + +static struct tracer graph_trace __read_mostly = { + .name = "function-graph", + .init = graph_trace_init, + .reset = graph_trace_reset, + .print_line = print_graph_function, + .flags = &tracer_flags, +}; + +static __init int init_graph_trace(void) +{ + return register_tracer(&graph_trace); +} + +device_initcall(init_graph_trace); -- cgit v1.2.3-70-g09d2 From e8e1abe92fd7ea9d823a3aaf81d10e2cba593b6b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 3 Dec 2008 11:04:51 -0500 Subject: ftrace: fix race in function graph during fork Impact: graph tracer race/crash fix There is a nasy race in startup of a new process running the function graph tracer. In fork.c: total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); ftrace_graph_init_task(p); proc_fork_connector(p); cgroup_post_fork(p); return p; The new task is free to run as soon as the tasklist_lock is released. This is before the ftrace_graph_init_task. If the task does run it will be using the same ret_stack and curr_ret_stack as the parent. This will cause crashes that are difficult to debug. This patch moves the ftrace_graph_init_task to just after the alloc_pid code. This fixes the above race. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/fork.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 5f82a999c03..7407ab31987 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1137,6 +1137,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, } } + ftrace_graph_init_task(p); + p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) @@ -1145,7 +1147,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) - goto bad_fork_free_pid; + goto bad_fork_free_graph; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; @@ -1238,7 +1240,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; - goto bad_fork_free_pid; + goto bad_fork_free_graph; } if (clone_flags & CLONE_THREAD) { @@ -1271,11 +1273,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); - ftrace_graph_init_task(p); proc_fork_connector(p); cgroup_post_fork(p); return p; +bad_fork_free_graph: + ftrace_graph_exit_task(p); bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); -- cgit v1.2.3-70-g09d2 From 7657d90497f98426af17f0ac633a9b335bb7a8fb Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 3 Dec 2008 13:17:33 -0600 Subject: user namespaces: require cap_set{ug}id for CLONE_NEWUSER While ideally CLONE_NEWUSER will eventually require no privilege, the required permission checks are currently not there. As a result, CLONE_NEWUSER has the same effect as a setuid(0)+setgroups(1,"0"). While we already require CAP_SYS_ADMIN, requiring CAP_SETUID and CAP_SETGID seems appropriate. Signed-off-by: Serge E. Hallyn Acked-by: "Eric W. Biederman" Signed-off-by: James Morris --- kernel/fork.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 1dd89451fae..e3a85b33107 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1344,7 +1344,8 @@ long do_fork(unsigned long clone_flags, /* hopefully this check will go away when userns support is * complete */ - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || + !capable(CAP_SETGID)) return -EPERM; } -- cgit v1.2.3-70-g09d2 From bf53de907dfdaac178c92d774aae7370d7b97d20 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 19 Dec 2008 15:10:24 +0100 Subject: x86, bts: add fork and exit handling Impact: introduce new ptrace facility Add arch_ptrace_untrace() function that is called when the tracer detaches (either voluntarily or when the tracing task dies); ptrace_disable() is only called on a voluntary detach. Add ptrace_fork() and arch_ptrace_fork(). They are called when a traced task is forked. Clear DS and BTS related fields on fork. Release DS resources and reclaim memory in ptrace_untrace(). This releases resources already when the tracing task dies. We used to do that when the traced task dies. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ds.h | 9 ++++++++ arch/x86/include/asm/ptrace.h | 7 ++++++ arch/x86/kernel/ds.c | 11 ++++++++++ arch/x86/kernel/process_32.c | 20 ++++++++--------- arch/x86/kernel/process_64.c | 20 ++++++++--------- arch/x86/kernel/ptrace.c | 50 ++++++++++++++++++++++++++++++++++--------- include/linux/ptrace.h | 22 +++++++++++++++++++ kernel/fork.c | 2 ++ kernel/ptrace.c | 12 +++++++++++ 9 files changed, 121 insertions(+), 32 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index ee0ea3a96c1..a8f672ba100 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -252,12 +252,21 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); */ extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); +/* + * Task clone/init and cleanup work + */ +extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father); +extern void ds_exit_thread(struct task_struct *tsk); + #else /* CONFIG_X86_DS */ struct cpuinfo_x86; static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} static inline void ds_switch_to(struct task_struct *prev, struct task_struct *next) {} +static inline void ds_copy_thread(struct task_struct *tsk, + struct task_struct *father) {} +static inline void ds_exit_thread(struct task_struct *tsk) {} #endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index fbf74421591..6d34d954c22 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -235,6 +235,13 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); +extern void x86_ptrace_untrace(struct task_struct *); +extern void x86_ptrace_fork(struct task_struct *child, + unsigned long clone_flags); + +#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) +#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) + #endif /* __KERNEL__ */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 98d271e60e0..da91701a234 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -1017,3 +1017,14 @@ void ds_switch_to(struct task_struct *prev, struct task_struct *next) update_debugctlmsr(next->thread.debugctlmsr); } + +void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) +{ + clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); + tsk->thread.ds_ctx = NULL; +} + +void ds_exit_thread(struct task_struct *tsk) +{ + WARN_ON(tsk->thread.ds_ctx); +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 605eff9a8ac..3ba155d2488 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -60,6 +60,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -251,17 +252,8 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } -#ifdef CONFIG_X86_DS - /* Free any BTS tracers that have not been properly released. */ - if (unlikely(current->bts)) { - ds_release_bts(current->bts); - current->bts = NULL; - - kfree(current->bts_buffer); - current->bts_buffer = NULL; - current->bts_size = 0; - } -#endif /* CONFIG_X86_DS */ + + ds_exit_thread(current); } void flush_thread(void) @@ -343,6 +335,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } + + ds_copy_thread(p, current); + + clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); + p->thread.debugctlmsr = 0; + return err; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 1cfd2a4bf85..416fb9282f4 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -53,6 +53,7 @@ #include #include #include +#include asmlinkage extern void ret_from_fork(void); @@ -236,17 +237,8 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } -#ifdef CONFIG_X86_DS - /* Free any BTS tracers that have not been properly released. */ - if (unlikely(current->bts)) { - ds_release_bts(current->bts); - current->bts = NULL; - - kfree(current->bts_buffer); - current->bts_buffer = NULL; - current->bts_size = 0; - } -#endif /* CONFIG_X86_DS */ + + ds_exit_thread(current); } void flush_thread(void) @@ -376,6 +368,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, if (err) goto out; } + + ds_copy_thread(p, me); + + clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); + p->thread.debugctlmsr = 0; + err = 0; out: if (err && p->thread.io_bitmap_ptr) { diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 45e9855da2d..6ad2bb60765 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -769,8 +769,47 @@ static int ptrace_bts_size(struct task_struct *child) return (trace->ds.top - trace->ds.begin) / trace->ds.size; } + +static void ptrace_bts_fork(struct task_struct *tsk) +{ + tsk->bts = NULL; + tsk->bts_buffer = NULL; + tsk->bts_size = 0; + tsk->thread.bts_ovfl_signal = 0; +} + +static void ptrace_bts_untrace(struct task_struct *child) +{ + if (unlikely(child->bts)) { + ds_release_bts(child->bts); + child->bts = NULL; + + kfree(child->bts_buffer); + child->bts_buffer = NULL; + child->bts_size = 0; + } +} + +static void ptrace_bts_detach(struct task_struct *child) +{ + ptrace_bts_untrace(child); +} +#else +static inline void ptrace_bts_fork(struct task_struct *tsk) {} +static inline void ptrace_bts_detach(struct task_struct *child) {} +static inline void ptrace_bts_untrace(struct task_struct *child) {} #endif /* CONFIG_X86_PTRACE_BTS */ +void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags) +{ + ptrace_bts_fork(child); +} + +void x86_ptrace_untrace(struct task_struct *child) +{ + ptrace_bts_untrace(child); +} + /* * Called by kernel/ptrace.c when detaching.. * @@ -782,16 +821,7 @@ void ptrace_disable(struct task_struct *child) #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif -#ifdef CONFIG_X86_PTRACE_BTS - if (child->bts) { - ds_release_bts(child->bts); - child->bts = NULL; - - kfree(child->bts_buffer); - child->bts_buffer = NULL; - child->bts_size = 0; - } -#endif /* CONFIG_X86_PTRACE_BTS */ + ptrace_bts_detach(child); } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 22641d5d45d..98b93ca4db0 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -94,6 +94,7 @@ extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); +extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags); #define PTRACE_MODE_READ 1 #define PTRACE_MODE_ATTACH 2 /* Returns 0 on success, -errno on denial. */ @@ -313,6 +314,27 @@ static inline void user_enable_block_step(struct task_struct *task) #define arch_ptrace_stop(code, info) do { } while (0) #endif +#ifndef arch_ptrace_untrace +/* + * Do machine-specific work before untracing child. + * + * This is called for a normal detach as well as from ptrace_exit() + * when the tracing task dies. + * + * Called with write_lock(&tasklist_lock) held. + */ +#define arch_ptrace_untrace(task) do { } while (0) +#endif + +#ifndef arch_ptrace_fork +/* + * Do machine-specific work to initialize a new task. + * + * This is called from copy_process(). + */ +#define arch_ptrace_fork(child, clone_flags) do { } while (0) +#endif + extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); diff --git a/kernel/fork.c b/kernel/fork.c index 7b93da72d4a..65ce60adc8e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1096,6 +1096,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif + if (unlikely(ptrace_reparented(current))) + ptrace_fork(p, clone_flags); /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4c8bcd7dd8e..100a71cfdab 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -25,6 +25,17 @@ #include #include + +/* + * Initialize a new task whose father had been ptraced. + * + * Called from copy_process(). + */ +void ptrace_fork(struct task_struct *child, unsigned long clone_flags) +{ + arch_ptrace_fork(child, clone_flags); +} + /* * ptrace a task: make the debugger its new parent and * move it to the ptrace list. @@ -72,6 +83,7 @@ void __ptrace_unlink(struct task_struct *child) child->parent = child->real_parent; list_del_init(&child->ptrace_entry); + arch_ptrace_untrace(child); if (task_is_traced(child)) ptrace_untrace(child); } -- cgit v1.2.3-70-g09d2