From ffa8e59df047d57e812a04f7d6baf6a25c652c0c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:34 -0400 Subject: capabilities: do not drop CAP_SETPCAP from the initial task In olden' days of yore CAP_SETPCAP had special meaning for the init task. We actually have code to make sure that CAP_SETPCAP wasn't in pE of things using the init_cred. But CAP_SETPCAP isn't so special any more and we don't have a reason to special case dropping it for init or kthreads.... Signed-off-by: Eric Paris Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- include/linux/capability.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 16ee8b49a20..11d562863e4 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -412,7 +412,6 @@ extern const kernel_cap_t __cap_init_eff_set; # define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }}) # define CAP_FULL_SET ((kernel_cap_t){{ ~0, ~0 }}) -# define CAP_INIT_EFF_SET ((kernel_cap_t){{ ~CAP_TO_MASK(CAP_SETPCAP), ~0 }}) # define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \ CAP_FS_MASK_B1 } }) @@ -423,10 +422,10 @@ extern const kernel_cap_t __cap_init_eff_set; #endif /* _KERNEL_CAPABILITY_U32S != 2 */ #define CAP_INIT_INH_SET CAP_EMPTY_SET +#define CAP_INIT_EFF_SET CAP_FULL_SET # define cap_clear(c) do { (c) = __cap_empty_set; } while (0) # define cap_set_full(c) do { (c) = __cap_full_set; } while (0) -# define cap_set_init_eff(c) do { (c) = __cap_init_eff_set; } while (0) #define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) #define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) @@ -547,6 +546,9 @@ extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); +extern const kernel_cap_t __cap_empty_set; +extern const kernel_cap_t __cap_full_set; + /** * nsown_capable - Check superior capability to one's own user_ns * @cap: The capability in question -- cgit v1.2.3-70-g09d2 From 5163b583a036b103c3cec7171d6731c125773ed6 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:39 -0400 Subject: capabilities: delete unused cap_set_full unused code. Clean it up. Signed-off-by: Eric Paris Acked-by: David Howells Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- include/linux/capability.h | 2 -- kernel/capability.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 11d562863e4..8d0da30dad2 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -425,7 +425,6 @@ extern const kernel_cap_t __cap_init_eff_set; #define CAP_INIT_EFF_SET CAP_FULL_SET # define cap_clear(c) do { (c) = __cap_empty_set; } while (0) -# define cap_set_full(c) do { (c) = __cap_full_set; } while (0) #define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) #define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) @@ -547,7 +546,6 @@ extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); extern const kernel_cap_t __cap_empty_set; -extern const kernel_cap_t __cap_full_set; /** * nsown_capable - Check superior capability to one's own user_ns diff --git a/kernel/capability.c b/kernel/capability.c index 2a374d512ea..14ea4210a53 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -22,10 +22,8 @@ */ const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; -const kernel_cap_t __cap_full_set = CAP_FULL_SET; EXPORT_SYMBOL(__cap_empty_set); -EXPORT_SYMBOL(__cap_full_set); int file_caps_enabled = 1; -- cgit v1.2.3-70-g09d2 From a3232d2fa2e3cbab3e76d91cdae5890fee8a4034 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:45 -0400 Subject: capabilities: delete all CAP_INIT macros The CAP_INIT macros of INH, BSET, and EFF made sense at one point in time, but now days they aren't helping. Just open code the logic in the init_cred. Signed-off-by: Eric Paris Acked-by: David Howells Signed-off-by: James Morris --- include/linux/capability.h | 3 --- include/linux/init_task.h | 7 ------- kernel/cred.c | 6 +++--- 3 files changed, 3 insertions(+), 13 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 8d0da30dad2..04fed72809d 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -421,9 +421,6 @@ extern const kernel_cap_t __cap_init_eff_set; #endif /* _KERNEL_CAPABILITY_U32S != 2 */ -#define CAP_INIT_INH_SET CAP_EMPTY_SET -#define CAP_INIT_EFF_SET CAP_FULL_SET - # define cap_clear(c) do { (c) = __cap_empty_set; } while (0) #define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index caa151fbebb..1f277204de3 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -83,13 +83,6 @@ extern struct group_info init_groups; #define INIT_IDS #endif -/* - * Because of the reduced scope of CAP_SETPCAP when filesystem - * capabilities are in effect, it is safe to allow CAP_SETPCAP to - * be available in the default configuration. - */ -# define CAP_INIT_BSET CAP_FULL_SET - #ifdef CONFIG_RCU_BOOST #define INIT_TASK_RCU_BOOST() \ .rcu_boost_mutex = NULL, diff --git a/kernel/cred.c b/kernel/cred.c index 5557b55048d..b982f0863ae 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -49,10 +49,10 @@ struct cred init_cred = { .magic = CRED_MAGIC, #endif .securebits = SECUREBITS_DEFAULT, - .cap_inheritable = CAP_INIT_INH_SET, + .cap_inheritable = CAP_EMPTY_SET, .cap_permitted = CAP_FULL_SET, - .cap_effective = CAP_INIT_EFF_SET, - .cap_bset = CAP_INIT_BSET, + .cap_effective = CAP_FULL_SET, + .cap_bset = CAP_FULL_SET, .user = INIT_USER, .group_info = &init_groups, #ifdef CONFIG_KEYS -- cgit v1.2.3-70-g09d2 From 9a7adcf5c6dea63d2e47e6f6d2f7a6c9f48b9337 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 11 Jan 2011 09:54:33 -0800 Subject: timers: Posix interface for alarm-timers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch exposes alarm-timers to userland via the posix clock and timers interface, using two new clockids: CLOCK_REALTIME_ALARM and CLOCK_BOOTTIME_ALARM. Both clockids behave identically to CLOCK_REALTIME and CLOCK_BOOTTIME, respectively, but timers set against the _ALARM suffixed clockids will wake the system if it is suspended. Some background can be found here: https://lwn.net/Articles/429925/ The concept for Alarm-timers was inspired by the Android Alarm driver (by Arve Hjønnevåg) found in the Android kernel tree. See: http://android.git.kernel.org/?p=kernel/common.git;a=blob;f=drivers/rtc/alarm.c;h=1250edfbdf3302f5e4ea6194847c6ef4bb7beb1c;hb=android-2.6.36 While the in-kernel interface is pretty similar between alarm-timers and Android alarm driver, the user-space interface for the Android alarm driver is via ioctls to a new char device. As mentioned above, I've instead chosen to export this functionality via the posix interface, as it seemed a little simpler and avoids creating duplicate interfaces to things like CLOCK_REALTIME and CLOCK_MONOTONIC under alternate names (ie:ANDROID_ALARM_RTC and ANDROID_ALARM_SYSTEMTIME). The semantics of the Android alarm driver are different from what this posix interface provides. For instance, threads other then the thread waiting on the Android alarm driver are able to modify the alarm being waited on. Also this interface does not allow the same wakelock semantics that the Android driver provides (ie: kernel takes a wakelock on RTC alarm-interupt, and holds it through process wakeup, and while the process runs, until the process either closes the char device or calls back in to wait on a new alarm). One potential way to implement similar semantics may be via the timerfd infrastructure, but this needs more research. There may also need to be some sort of sysfs system level policy hooks that allow alarm timers to be disabled to keep them from firing at inappropriate times (ie: laptop in a well insulated bag, mid-flight). CC: Arve Hjønnevåg CC: Thomas Gleixner CC: Alessandro Zummo Acked-by: Arnd Bergmann Signed-off-by: John Stultz --- include/linux/capability.h | 7 +- include/linux/posix-timers.h | 2 + include/linux/time.h | 2 + kernel/time/alarmtimer.c | 330 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 340 insertions(+), 1 deletion(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 16ee8b49a20..7cb23eae693 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -355,7 +355,12 @@ struct cpu_vfs_cap_data { #define CAP_SYSLOG 34 -#define CAP_LAST_CAP CAP_SYSLOG +/* Allow triggering something that will wake the system */ + +#define CAP_WAKE_ALARM 35 + + +#define CAP_LAST_CAP CAP_WAKE_ALARM #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index d51243ae072..808227d40a6 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -5,6 +5,7 @@ #include #include #include +#include union cpu_time_count { cputime_t cpu; @@ -80,6 +81,7 @@ struct k_itimer { unsigned long incr; unsigned long expires; } mmtimer; + struct alarm alarmtimer; } it; }; diff --git a/include/linux/time.h b/include/linux/time.h index 4ea5a75fcac..b3061782dec 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -295,6 +295,8 @@ struct itimerval { #define CLOCK_REALTIME_COARSE 5 #define CLOCK_MONOTONIC_COARSE 6 #define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 /* * The IDs of various hardware clocks: diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 48c2ee949e6..4058ad79d55 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -215,6 +215,21 @@ static int alarmtimer_suspend(struct device *dev) } +static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) +{ + ktime_t delta; + unsigned long flags; + struct alarm_base *base = &alarm_bases[type]; + + delta = ktime_sub(absexp, base->gettime()); + + spin_lock_irqsave(&freezer_delta_lock, flags); + if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64)) + freezer_delta = delta; + spin_unlock_irqrestore(&freezer_delta_lock, flags); +} + + /************************************************************************** * alarm kernel interface code */ @@ -279,6 +294,309 @@ void alarm_cancel(struct alarm *alarm) } +/************************************************************************** + * alarm posix interface code + */ + +/* + * clock2alarm - helper that converts from clockid to alarmtypes + * @clockid: clockid. + * + * Helper function that converts from clockids to alarmtypes + */ +static enum alarmtimer_type clock2alarm(clockid_t clockid) +{ + if (clockid == CLOCK_REALTIME_ALARM) + return ALARM_REALTIME; + if (clockid == CLOCK_BOOTTIME_ALARM) + return ALARM_BOOTTIME; + return -1; +} + +/* + * alarm_handle_timer - Callback for posix timers + * @alarm: alarm that fired + * + * Posix timer callback for expired alarm timers. + */ +static void alarm_handle_timer(struct alarm *alarm) +{ + struct k_itimer *ptr = container_of(alarm, struct k_itimer, + it.alarmtimer); + if (posix_timer_event(ptr, 0) != 0) + ptr->it_overrun++; +} + +/* + * alarm_clock_getres - posix getres interface + * @which_clock: clockid + * @tp: timespec to fill + * + * Returns the granularity of underlying alarm base clock + */ +static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp) +{ + clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid; + + return hrtimer_get_res(baseid, tp); +} + +/** + * alarm_clock_get - posix clock_get interface + * @which_clock: clockid + * @tp: timespec to fill. + * + * Provides the underlying alarm base time. + */ +static int alarm_clock_get(clockid_t which_clock, struct timespec *tp) +{ + struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)]; + + *tp = ktime_to_timespec(base->gettime()); + return 0; +} + +/** + * alarm_timer_create - posix timer_create interface + * @new_timer: k_itimer pointer to manage + * + * Initializes the k_itimer structure. + */ +static int alarm_timer_create(struct k_itimer *new_timer) +{ + enum alarmtimer_type type; + struct alarm_base *base; + + if (!capable(CAP_WAKE_ALARM)) + return -EPERM; + + type = clock2alarm(new_timer->it_clock); + base = &alarm_bases[type]; + alarm_init(&new_timer->it.alarmtimer, type, alarm_handle_timer); + return 0; +} + +/** + * alarm_timer_get - posix timer_get interface + * @new_timer: k_itimer pointer + * @cur_setting: itimerspec data to fill + * + * Copies the itimerspec data out from the k_itimer + */ +static void alarm_timer_get(struct k_itimer *timr, + struct itimerspec *cur_setting) +{ + cur_setting->it_interval = + ktime_to_timespec(timr->it.alarmtimer.period); + cur_setting->it_value = + ktime_to_timespec(timr->it.alarmtimer.node.expires); + return; +} + +/** + * alarm_timer_del - posix timer_del interface + * @timr: k_itimer pointer to be deleted + * + * Cancels any programmed alarms for the given timer. + */ +static int alarm_timer_del(struct k_itimer *timr) +{ + alarm_cancel(&timr->it.alarmtimer); + return 0; +} + +/** + * alarm_timer_set - posix timer_set interface + * @timr: k_itimer pointer to be deleted + * @flags: timer flags + * @new_setting: itimerspec to be used + * @old_setting: itimerspec being replaced + * + * Sets the timer to new_setting, and starts the timer. + */ +static int alarm_timer_set(struct k_itimer *timr, int flags, + struct itimerspec *new_setting, + struct itimerspec *old_setting) +{ + /* Save old values */ + old_setting->it_interval = + ktime_to_timespec(timr->it.alarmtimer.period); + old_setting->it_value = + ktime_to_timespec(timr->it.alarmtimer.node.expires); + + /* If the timer was already set, cancel it */ + alarm_cancel(&timr->it.alarmtimer); + + /* start the timer */ + alarm_start(&timr->it.alarmtimer, + timespec_to_ktime(new_setting->it_value), + timespec_to_ktime(new_setting->it_interval)); + return 0; +} + +/** + * alarmtimer_nsleep_wakeup - Wakeup function for alarm_timer_nsleep + * @alarm: ptr to alarm that fired + * + * Wakes up the task that set the alarmtimer + */ +static void alarmtimer_nsleep_wakeup(struct alarm *alarm) +{ + struct task_struct *task = (struct task_struct *)alarm->data; + + alarm->data = NULL; + if (task) + wake_up_process(task); +} + +/** + * alarmtimer_do_nsleep - Internal alarmtimer nsleep implementation + * @alarm: ptr to alarmtimer + * @absexp: absolute expiration time + * + * Sets the alarm timer and sleeps until it is fired or interrupted. + */ +static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp) +{ + alarm->data = (void *)current; + do { + set_current_state(TASK_INTERRUPTIBLE); + alarm_start(alarm, absexp, ktime_set(0, 0)); + if (likely(alarm->data)) + schedule(); + + alarm_cancel(alarm); + } while (alarm->data && !signal_pending(current)); + + __set_current_state(TASK_RUNNING); + + return (alarm->data == NULL); +} + + +/** + * update_rmtp - Update remaining timespec value + * @exp: expiration time + * @type: timer type + * @rmtp: user pointer to remaining timepsec value + * + * Helper function that fills in rmtp value with time between + * now and the exp value + */ +static int update_rmtp(ktime_t exp, enum alarmtimer_type type, + struct timespec __user *rmtp) +{ + struct timespec rmt; + ktime_t rem; + + rem = ktime_sub(exp, alarm_bases[type].gettime()); + + if (rem.tv64 <= 0) + return 0; + rmt = ktime_to_timespec(rem); + + if (copy_to_user(rmtp, &rmt, sizeof(*rmtp))) + return -EFAULT; + + return 1; + +} + +/** + * alarm_timer_nsleep_restart - restartblock alarmtimer nsleep + * @restart: ptr to restart block + * + * Handles restarted clock_nanosleep calls + */ +static long __sched alarm_timer_nsleep_restart(struct restart_block *restart) +{ + enum alarmtimer_type type = restart->nanosleep.index; + ktime_t exp; + struct timespec __user *rmtp; + struct alarm alarm; + int ret = 0; + + exp.tv64 = restart->nanosleep.expires; + alarm_init(&alarm, type, alarmtimer_nsleep_wakeup); + + if (alarmtimer_do_nsleep(&alarm, exp)) + goto out; + + if (freezing(current)) + alarmtimer_freezerset(exp, type); + + rmtp = restart->nanosleep.rmtp; + if (rmtp) { + ret = update_rmtp(exp, type, rmtp); + if (ret <= 0) + goto out; + } + + + /* The other values in restart are already filled in */ + ret = -ERESTART_RESTARTBLOCK; +out: + return ret; +} + +/** + * alarm_timer_nsleep - alarmtimer nanosleep + * @which_clock: clockid + * @flags: determins abstime or relative + * @tsreq: requested sleep time (abs or rel) + * @rmtp: remaining sleep time saved + * + * Handles clock_nanosleep calls against _ALARM clockids + */ +static int alarm_timer_nsleep(const clockid_t which_clock, int flags, + struct timespec *tsreq, struct timespec __user *rmtp) +{ + enum alarmtimer_type type = clock2alarm(which_clock); + struct alarm alarm; + ktime_t exp; + int ret = 0; + struct restart_block *restart; + + if (!capable(CAP_WAKE_ALARM)) + return -EPERM; + + alarm_init(&alarm, type, alarmtimer_nsleep_wakeup); + + exp = timespec_to_ktime(*tsreq); + /* Convert (if necessary) to absolute time */ + if (flags != TIMER_ABSTIME) { + ktime_t now = alarm_bases[type].gettime(); + exp = ktime_add(now, exp); + } + + if (alarmtimer_do_nsleep(&alarm, exp)) + goto out; + + if (freezing(current)) + alarmtimer_freezerset(exp, type); + + /* abs timers don't set remaining time or restart */ + if (flags == TIMER_ABSTIME) { + ret = -ERESTARTNOHAND; + goto out; + } + + if (rmtp) { + ret = update_rmtp(exp, type, rmtp); + if (ret <= 0) + goto out; + } + + restart = ¤t_thread_info()->restart_block; + restart->fn = alarm_timer_nsleep_restart; + restart->nanosleep.index = type; + restart->nanosleep.expires = exp.tv64; + restart->nanosleep.rmtp = rmtp; + ret = -ERESTART_RESTARTBLOCK; + +out: + return ret; +} /************************************************************************** * alarmtimer initialization code @@ -306,6 +624,18 @@ static int __init alarmtimer_init(void) { int error = 0; int i; + struct k_clock alarm_clock = { + .clock_getres = alarm_clock_getres, + .clock_get = alarm_clock_get, + .timer_create = alarm_timer_create, + .timer_set = alarm_timer_set, + .timer_del = alarm_timer_del, + .timer_get = alarm_timer_get, + .nsleep = alarm_timer_nsleep, + }; + + posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); + posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); /* Initialize alarm bases */ alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME; -- cgit v1.2.3-70-g09d2 From 47a150edc2ae734c0f4bf50aa19499e23b9a46f8 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Fri, 13 May 2011 04:27:54 +0100 Subject: Cache user_ns in struct cred If !CONFIG_USERNS, have current_user_ns() defined to (&init_user_ns). Get rid of _current_user_ns. This requires nsown_capable() to be defined in capability.c rather than as static inline in capability.h, so do that. Request_key needs init_user_ns defined at current_user_ns if !CONFIG_USERNS, so forward-declare that in cred.h if !CONFIG_USERNS at current_user_ns() define. Compile-tested with and without CONFIG_USERNS. Signed-off-by: Serge E. Hallyn [ This makes a huge performance difference for acl_permission_check(), up to 30%. And that is one of the hottest kernel functions for loads that are pathname-lookup heavy. ] Signed-off-by: Linus Torvalds --- include/linux/capability.h | 13 +------------ include/linux/cred.h | 10 ++++++++-- kernel/capability.c | 12 ++++++++++++ kernel/cred.c | 12 ++++++------ 4 files changed, 27 insertions(+), 20 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 16ee8b49a20..d4675af963f 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -546,18 +546,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap); extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); - -/** - * nsown_capable - Check superior capability to one's own user_ns - * @cap: The capability in question - * - * Return true if the current task has the given superior capability - * targeted at its own user namespace. - */ -static inline bool nsown_capable(int cap) -{ - return ns_capable(current_user_ns(), cap); -} +extern bool nsown_capable(int cap); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/linux/cred.h b/include/linux/cred.h index 9aeeb0ba200..be16b61283c 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -146,6 +146,7 @@ struct cred { void *security; /* subjective LSM security */ #endif struct user_struct *user; /* real user ID subscription */ + struct user_namespace *user_ns; /* cached user->user_ns */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ struct rcu_head rcu; /* RCU deletion hook */ }; @@ -354,10 +355,15 @@ static inline void put_cred(const struct cred *_cred) #define current_fsgid() (current_cred_xxx(fsgid)) #define current_cap() (current_cred_xxx(cap_effective)) #define current_user() (current_cred_xxx(user)) -#define _current_user_ns() (current_cred_xxx(user)->user_ns) #define current_security() (current_cred_xxx(security)) -extern struct user_namespace *current_user_ns(void); +#ifdef CONFIG_USER_NS +#define current_user_ns() (current_cred_xxx(user_ns)) +#else +extern struct user_namespace init_user_ns; +#define current_user_ns() (&init_user_ns) +#endif + #define current_uid_gid(_uid, _gid) \ do { \ diff --git a/kernel/capability.c b/kernel/capability.c index bf0c734d0c1..32a80e08ff4 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -399,3 +399,15 @@ bool task_ns_capable(struct task_struct *t, int cap) return ns_capable(task_cred_xxx(t, user)->user_ns, cap); } EXPORT_SYMBOL(task_ns_capable); + +/** + * nsown_capable - Check superior capability to one's own user_ns + * @cap: The capability in question + * + * Return true if the current task has the given superior capability + * targeted at its own user namespace. + */ +bool nsown_capable(int cap) +{ + return ns_capable(current_user_ns(), cap); +} diff --git a/kernel/cred.c b/kernel/cred.c index 5557b55048d..8093c16b84b 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -54,6 +54,7 @@ struct cred init_cred = { .cap_effective = CAP_INIT_EFF_SET, .cap_bset = CAP_INIT_BSET, .user = INIT_USER, + .user_ns = &init_user_ns, .group_info = &init_groups, #ifdef CONFIG_KEYS .tgcred = &init_tgcred, @@ -410,6 +411,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) goto error_put; } + /* cache user_ns in cred. Doesn't need a refcount because it will + * stay pinned by cred->user + */ + new->user_ns = new->user->user_ns; + #ifdef CONFIG_KEYS /* new threads get their own thread keyrings if their parent already * had one */ @@ -741,12 +747,6 @@ int set_create_files_as(struct cred *new, struct inode *inode) } EXPORT_SYMBOL(set_create_files_as); -struct user_namespace *current_user_ns(void) -{ - return _current_user_ns(); -} -EXPORT_SYMBOL(current_user_ns); - #ifdef CONFIG_DEBUG_CREDENTIALS bool creds_are_invalid(const struct cred *cred) -- cgit v1.2.3-70-g09d2