From bfac7009180901f57f20a73c53c3e57b1ce75a1b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 Jun 2010 01:09:56 +0200 Subject: sched: thread_group_cputime: Simplify, document the "alive" check thread_group_cputime() looks as if it is rcu-safe, but in fact this was wrong until ea6d290c which pins task->signal to task_struct. It checks ->sighand != NULL under rcu, but this can't help if ->signal can go away. Fortunately the caller either holds ->siglock, or it is fastpath_timer_check() which uses current and checks exit_state == 0. - Since ea6d290c commit tsk->signal is stable, we can read it first and avoid the initialization from INIT_CPUTIME. - Even if tsk->signal is always valid, we still have to check it is safe to use next_thread() under rcu_read_lock(). Currently the code checks ->sighand != NULL, change it to use pid_alive() which is commonly used to ensure the task wasn't unhashed before we take rcu_read_lock(). Add the comment to explain this check. - Change the main loop to use the while_each_thread() helper. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: <20100610230956.GA25921@redhat.com> Signed-off-by: Ingo Molnar --- kernel/posix-cpu-timers.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'kernel/posix-cpu-timers.c') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 9829646d399..bf2a6502860 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -232,31 +232,24 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) { - struct sighand_struct *sighand; - struct signal_struct *sig; + struct signal_struct *sig = tsk->signal; struct task_struct *t; - *times = INIT_CPUTIME; + times->utime = sig->utime; + times->stime = sig->stime; + times->sum_exec_runtime = sig->sum_sched_runtime; rcu_read_lock(); - sighand = rcu_dereference(tsk->sighand); - if (!sighand) + /* make sure we can trust tsk->thread_group list */ + if (!likely(pid_alive(tsk))) goto out; - sig = tsk->signal; - t = tsk; do { times->utime = cputime_add(times->utime, t->utime); times->stime = cputime_add(times->stime, t->stime); times->sum_exec_runtime += t->se.sum_exec_runtime; - - t = next_thread(t); - } while (t != tsk); - - times->utime = cputime_add(times->utime, sig->utime); - times->stime = cputime_add(times->stime, sig->stime); - times->sum_exec_runtime += sig->sum_sched_runtime; + } while_each_thread(tsk, t); out: rcu_read_unlock(); } -- cgit v1.2.3-70-g09d2 From 0bdd2ed4138ec04e09b4f8165981efc99e439f55 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 Jun 2010 01:10:18 +0200 Subject: sched: run_posix_cpu_timers: Don't check ->exit_state, use lock_task_sighand() run_posix_cpu_timers() doesn't work if current has already passed exit_notify(). This was needed to prevent the races with do_wait(). Since ea6d290c ->signal is always valid and can't go away. We can remove the "tsk->exit_state == 0" in fastpath_timer_check() and convert run_posix_cpu_timers() to use lock_task_sighand(). Note: it makes sense to take group_leader's sighand instead, the sub-thread still uses CPU after release_task(). But we need more changes to do this. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: <20100610231018.GA25942@redhat.com> Signed-off-by: Ingo Molnar --- kernel/posix-cpu-timers.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'kernel/posix-cpu-timers.c') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index bf2a6502860..d5dbef5e89e 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1272,10 +1272,6 @@ static inline int fastpath_timer_check(struct task_struct *tsk) { struct signal_struct *sig; - /* tsk == current, ensure it is safe to use ->signal/sighand */ - if (unlikely(tsk->exit_state)) - return 0; - if (!task_cputime_zero(&tsk->cputime_expires)) { struct task_cputime task_sample = { .utime = tsk->utime, @@ -1308,6 +1304,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) { LIST_HEAD(firing); struct k_itimer *timer, *next; + unsigned long flags; BUG_ON(!irqs_disabled()); @@ -1318,7 +1315,8 @@ void run_posix_cpu_timers(struct task_struct *tsk) if (!fastpath_timer_check(tsk)) return; - spin_lock(&tsk->sighand->siglock); + if (!lock_task_sighand(tsk, &flags)) + return; /* * Here we take off tsk->signal->cpu_timers[N] and * tsk->cpu_timers[N] all the timers that are firing, and @@ -1340,7 +1338,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) * that gets the timer lock before we do will give it up and * spin until we've taken care of that timer below. */ - spin_unlock(&tsk->sighand->siglock); + unlock_task_sighand(tsk, &flags); /* * Now that all the timers on our list have the firing flag, -- cgit v1.2.3-70-g09d2 From 8d1f431cbec115a780cd551ab1b4955c125f8d31 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 Jun 2010 20:04:46 +0200 Subject: sched: Fix the racy usage of thread_group_cputimer() in fastpath_timer_check() fastpath_timer_check()->thread_group_cputimer() is racy and unneeded. It is racy because another thread can clear ->running before thread_group_cputimer() takes cputimer->lock. In this case thread_group_cputimer() will set ->running = true again and call thread_group_cputime(). But since we do not hold tasklist or siglock, we can race with fork/exit and copy the wrong results into cputimer->cputime. It is unneeded because if ->running == true we can just use the numbers in cputimer->cputime we already have. Change fastpath_timer_check() to copy cputimer->cputime into the local variable under cputimer->lock. We do not re-check ->running under cputimer->lock, run_posix_cpu_timers() does this check later. Note: we can add more optimizations on top of this change. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: <20100611180446.GA13025@redhat.com> Signed-off-by: Ingo Molnar --- kernel/posix-cpu-timers.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/posix-cpu-timers.c') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index d5dbef5e89e..f66bdd33a6c 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1287,7 +1287,10 @@ static inline int fastpath_timer_check(struct task_struct *tsk) if (sig->cputimer.running) { struct task_cputime group_sample; - thread_group_cputimer(tsk, &group_sample); + spin_lock(&sig->cputimer.lock); + group_sample = sig->cputimer.cputime; + spin_unlock(&sig->cputimer.lock); + if (task_cputime_expired(&group_sample, &sig->cputime_expires)) return 1; } -- cgit v1.2.3-70-g09d2