From bfac7009180901f57f20a73c53c3e57b1ce75a1b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 Jun 2010 01:09:56 +0200 Subject: sched: thread_group_cputime: Simplify, document the "alive" check thread_group_cputime() looks as if it is rcu-safe, but in fact this was wrong until ea6d290c which pins task->signal to task_struct. It checks ->sighand != NULL under rcu, but this can't help if ->signal can go away. Fortunately the caller either holds ->siglock, or it is fastpath_timer_check() which uses current and checks exit_state == 0. - Since ea6d290c commit tsk->signal is stable, we can read it first and avoid the initialization from INIT_CPUTIME. - Even if tsk->signal is always valid, we still have to check it is safe to use next_thread() under rcu_read_lock(). Currently the code checks ->sighand != NULL, change it to use pid_alive() which is commonly used to ensure the task wasn't unhashed before we take rcu_read_lock(). Add the comment to explain this check. - Change the main loop to use the while_each_thread() helper. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: <20100610230956.GA25921@redhat.com> Signed-off-by: Ingo Molnar --- kernel/posix-cpu-timers.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'kernel/posix-cpu-timers.c') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 9829646d399..bf2a6502860 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -232,31 +232,24 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) { - struct sighand_struct *sighand; - struct signal_struct *sig; + struct signal_struct *sig = tsk->signal; struct task_struct *t; - *times = INIT_CPUTIME; + times->utime = sig->utime; + times->stime = sig->stime; + times->sum_exec_runtime = sig->sum_sched_runtime; rcu_read_lock(); - sighand = rcu_dereference(tsk->sighand); - if (!sighand) + /* make sure we can trust tsk->thread_group list */ + if (!likely(pid_alive(tsk))) goto out; - sig = tsk->signal; - t = tsk; do { times->utime = cputime_add(times->utime, t->utime); times->stime = cputime_add(times->stime, t->stime); times->sum_exec_runtime += t->se.sum_exec_runtime; - - t = next_thread(t); - } while (t != tsk); - - times->utime = cputime_add(times->utime, sig->utime); - times->stime = cputime_add(times->stime, sig->stime); - times->sum_exec_runtime += sig->sum_sched_runtime; + } while_each_thread(tsk, t); out: rcu_read_unlock(); } -- cgit v1.2.3-70-g09d2 From 0bdd2ed4138ec04e09b4f8165981efc99e439f55 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 Jun 2010 01:10:18 +0200 Subject: sched: run_posix_cpu_timers: Don't check ->exit_state, use lock_task_sighand() run_posix_cpu_timers() doesn't work if current has already passed exit_notify(). This was needed to prevent the races with do_wait(). Since ea6d290c ->signal is always valid and can't go away. We can remove the "tsk->exit_state == 0" in fastpath_timer_check() and convert run_posix_cpu_timers() to use lock_task_sighand(). Note: it makes sense to take group_leader's sighand instead, the sub-thread still uses CPU after release_task(). But we need more changes to do this. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: <20100610231018.GA25942@redhat.com> Signed-off-by: Ingo Molnar --- kernel/posix-cpu-timers.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'kernel/posix-cpu-timers.c') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index bf2a6502860..d5dbef5e89e 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1272,10 +1272,6 @@ static inline int fastpath_timer_check(struct task_struct *tsk) { struct signal_struct *sig; - /* tsk == current, ensure it is safe to use ->signal/sighand */ - if (unlikely(tsk->exit_state)) - return 0; - if (!task_cputime_zero(&tsk->cputime_expires)) { struct task_cputime task_sample = { .utime = tsk->utime, @@ -1308,6 +1304,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) { LIST_HEAD(firing); struct k_itimer *timer, *next; + unsigned long flags; BUG_ON(!irqs_disabled()); @@ -1318,7 +1315,8 @@ void run_posix_cpu_timers(struct task_struct *tsk) if (!fastpath_timer_check(tsk)) return; - spin_lock(&tsk->sighand->siglock); + if (!lock_task_sighand(tsk, &flags)) + return; /* * Here we take off tsk->signal->cpu_timers[N] and * tsk->cpu_timers[N] all the timers that are firing, and @@ -1340,7 +1338,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) * that gets the timer lock before we do will give it up and * spin until we've taken care of that timer below. */ - spin_unlock(&tsk->sighand->siglock); + unlock_task_sighand(tsk, &flags); /* * Now that all the timers on our list have the firing flag, -- cgit v1.2.3-70-g09d2 From 8d1f431cbec115a780cd551ab1b4955c125f8d31 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 Jun 2010 20:04:46 +0200 Subject: sched: Fix the racy usage of thread_group_cputimer() in fastpath_timer_check() fastpath_timer_check()->thread_group_cputimer() is racy and unneeded. It is racy because another thread can clear ->running before thread_group_cputimer() takes cputimer->lock. In this case thread_group_cputimer() will set ->running = true again and call thread_group_cputime(). But since we do not hold tasklist or siglock, we can race with fork/exit and copy the wrong results into cputimer->cputime. It is unneeded because if ->running == true we can just use the numbers in cputimer->cputime we already have. Change fastpath_timer_check() to copy cputimer->cputime into the local variable under cputimer->lock. We do not re-check ->running under cputimer->lock, run_posix_cpu_timers() does this check later. Note: we can add more optimizations on top of this change. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: <20100611180446.GA13025@redhat.com> Signed-off-by: Ingo Molnar --- kernel/posix-cpu-timers.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/posix-cpu-timers.c') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index d5dbef5e89e..f66bdd33a6c 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1287,7 +1287,10 @@ static inline int fastpath_timer_check(struct task_struct *tsk) if (sig->cputimer.running) { struct task_cputime group_sample; - thread_group_cputimer(tsk, &group_sample); + spin_lock(&sig->cputimer.lock); + group_sample = sig->cputimer.cputime; + spin_unlock(&sig->cputimer.lock); + if (task_cputime_expired(&group_sample, &sig->cputime_expires)) return 1; } -- cgit v1.2.3-70-g09d2 From 5ab46b345e418747b3a52f0892680c0745c4223c Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 28 Aug 2009 14:05:12 +0200 Subject: rlimits: add task_struct to update_rlimit_cpu Add task_struct as a parameter to update_rlimit_cpu to be able to set rlimit_cpu of different task than current. Signed-off-by: Jiri Slaby Acked-by: James Morris --- include/linux/posix-timers.h | 2 +- kernel/posix-cpu-timers.c | 8 ++++---- kernel/sys.c | 2 +- security/selinux/hooks.c | 3 ++- 4 files changed, 8 insertions(+), 7 deletions(-) (limited to 'kernel/posix-cpu-timers.c') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 4f71bf4e628..3e23844a699 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -117,6 +117,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, long clock_nanosleep_restart(struct restart_block *restart_block); -void update_rlimit_cpu(unsigned long rlim_new); +void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); #endif diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 9829646d399..0513900995c 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -16,13 +16,13 @@ * siglock protection since other code may update expiration cache as * well. */ -void update_rlimit_cpu(unsigned long rlim_new) +void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) { cputime_t cputime = secs_to_cputime(rlim_new); - spin_lock_irq(¤t->sighand->siglock); - set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); - spin_unlock_irq(¤t->sighand->siglock); + spin_lock_irq(&task->sighand->siglock); + set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL); + spin_unlock_irq(&task->sighand->siglock); } static int check_clock(const clockid_t which_clock) diff --git a/kernel/sys.c b/kernel/sys.c index 1ba4522689d..f5183b08adf 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1320,7 +1320,7 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) if (new_rlim.rlim_cur == RLIM_INFINITY) goto out; - update_rlimit_cpu(new_rlim.rlim_cur); + update_rlimit_cpu(current, new_rlim.rlim_cur); out: return 0; } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index e3ce6b4127c..afb18a9ebba 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2338,7 +2338,8 @@ static void selinux_bprm_committing_creds(struct linux_binprm *bprm) initrlim = init_task.signal->rlim + i; rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); } - update_rlimit_cpu(current->signal->rlim[RLIMIT_CPU].rlim_cur); + update_rlimit_cpu(current, + current->signal->rlim[RLIMIT_CPU].rlim_cur); } } -- cgit v1.2.3-70-g09d2