diff options
Diffstat (limited to 'kernel/exit.c')
-rw-r--r-- | kernel/exit.c | 153 |
1 files changed, 123 insertions, 30 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index e2bdf37f9fd..20a40647152 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -50,6 +50,7 @@ #include <linux/perf_event.h> #include <trace/events/sched.h> #include <linux/hw_breakpoint.h> +#include <linux/oom.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -68,7 +69,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead) list_del_rcu(&p->tasks); list_del_init(&p->sibling); - __get_cpu_var(process_counts)--; + __this_cpu_dec(process_counts); } list_del_rcu(&p->thread_group); } @@ -95,6 +96,14 @@ static void __exit_signal(struct task_struct *tsk) sig->tty = NULL; } else { /* + * This can only happen if the caller is de_thread(). + * FIXME: this is the temporary hack, we should teach + * posix-cpu-timers to handle this case correctly. + */ + if (unlikely(has_group_leader_pid(tsk))) + posix_cpu_timers_exit_group(tsk); + + /* * If there is any task waiting for the group exit * then notify it: */ @@ -687,6 +696,8 @@ static void exit_mm(struct task_struct * tsk) enter_lazy_tlb(mm, current); /* We don't want this task to be frozen prematurely */ clear_freeze_flag(tsk); + if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) + atomic_dec(&mm->oom_disable_count); task_unlock(tsk); mm_update_next_owner(mm); mmput(mm); @@ -700,6 +711,8 @@ static void exit_mm(struct task_struct * tsk) * space. */ static struct task_struct *find_new_reaper(struct task_struct *father) + __releases(&tasklist_lock) + __acquires(&tasklist_lock) { struct pid_namespace *pid_ns = task_active_pid_ns(father); struct task_struct *thread; @@ -828,7 +841,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) /* Let father know we died * * Thread signals are configurable, but you aren't going to use - * that to send signals to arbitary processes. + * that to send signals to arbitrary processes. * That stops right now. * * If the parent exec id doesn't match the exec id we saved @@ -895,12 +908,22 @@ NORET_TYPE void do_exit(long code) profile_task_exit(tsk); WARN_ON(atomic_read(&tsk->fs_excl)); + WARN_ON(blk_needs_flush_plug(tsk)); if (unlikely(in_interrupt())) panic("Aiee, killing interrupt handler!"); if (unlikely(!tsk->pid)) panic("Attempted to kill the idle task!"); + /* + * If do_exit is called because this processes oopsed, it's possible + * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before + * continuing. Amongst other possible reasons, this is to prevent + * mm_release()->clear_child_tid() from writing to a user-controlled + * kernel address. + */ + set_fs(USER_DS); + tracehook_report_exit(&code); validate_creds_for_do_exit(tsk); @@ -972,6 +995,15 @@ NORET_TYPE void do_exit(long code) exit_fs(tsk); check_stack_usage(); exit_thread(); + + /* + * Flush inherited counters to the parent - before the parent + * gets woken up by child-exit notifications. + * + * because of cgroup mode, must be called before cgroup_exit() + */ + perf_event_exit_task(tsk); + cgroup_exit(tsk, 1); if (group_dead) @@ -984,12 +1016,7 @@ NORET_TYPE void do_exit(long code) /* * FIXME: do that only when needed, using sched_exit tracepoint */ - flush_ptrace_hw_breakpoint(tsk); - /* - * Flush inherited counters to the parent - before the parent - * gets woken up by child-exit notifications. - */ - perf_event_exit_task(tsk); + ptrace_put_breakpoints(tsk); exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA @@ -1350,11 +1377,23 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace) return NULL; } -/* - * Handle sys_wait4 work for one task in state TASK_STOPPED. We hold - * read_lock(&tasklist_lock) on entry. If we return zero, we still hold - * the lock and this task is uninteresting. If we return nonzero, we have - * released the lock and the system call should return. +/** + * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED + * @wo: wait options + * @ptrace: is the wait for ptrace + * @p: task to wait for + * + * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED. + * + * CONTEXT: + * read_lock(&tasklist_lock), which is released if return value is + * non-zero. Also, grabs and releases @p->sighand->siglock. + * + * RETURNS: + * 0 if wait condition didn't exist and search for other wait conditions + * should continue. Non-zero return, -errno on failure and @p's pid on + * success, implies that tasklist_lock is released and wait condition + * search should terminate. */ static int wait_task_stopped(struct wait_opts *wo, int ptrace, struct task_struct *p) @@ -1370,6 +1409,9 @@ static int wait_task_stopped(struct wait_opts *wo, if (!ptrace && !(wo->wo_flags & WUNTRACED)) return 0; + if (!task_stopped_code(p, ptrace)) + return 0; + exit_code = 0; spin_lock_irq(&p->sighand->siglock); @@ -1511,33 +1553,84 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, return 0; } - if (likely(!ptrace) && unlikely(task_ptrace(p))) { + /* dead body doesn't have much to contribute */ + if (p->exit_state == EXIT_DEAD) + return 0; + + /* slay zombie? */ + if (p->exit_state == EXIT_ZOMBIE) { + /* + * A zombie ptracee is only visible to its ptracer. + * Notification and reaping will be cascaded to the real + * parent when the ptracer detaches. + */ + if (likely(!ptrace) && unlikely(task_ptrace(p))) { + /* it will become visible, clear notask_error */ + wo->notask_error = 0; + return 0; + } + + /* we don't reap group leaders with subthreads */ + if (!delay_group_leader(p)) + return wait_task_zombie(wo, p); + + /* + * Allow access to stopped/continued state via zombie by + * falling through. Clearing of notask_error is complex. + * + * When !@ptrace: + * + * If WEXITED is set, notask_error should naturally be + * cleared. If not, subset of WSTOPPED|WCONTINUED is set, + * so, if there are live subthreads, there are events to + * wait for. If all subthreads are dead, it's still safe + * to clear - this function will be called again in finite + * amount time once all the subthreads are released and + * will then return without clearing. + * + * When @ptrace: + * + * Stopped state is per-task and thus can't change once the + * target task dies. Only continued and exited can happen. + * Clear notask_error if WCONTINUED | WEXITED. + */ + if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED))) + wo->notask_error = 0; + } else { + /* + * If @p is ptraced by a task in its real parent's group, + * hide group stop/continued state when looking at @p as + * the real parent; otherwise, a single stop can be + * reported twice as group and ptrace stops. + * + * If a ptracer wants to distinguish the two events for its + * own children, it should create a separate process which + * takes the role of real parent. + */ + if (likely(!ptrace) && task_ptrace(p) && + same_thread_group(p->parent, p->real_parent)) + return 0; + /* - * This child is hidden by ptrace. - * We aren't allowed to see it now, but eventually we will. + * @p is alive and it's gonna stop, continue or exit, so + * there always is something to wait for. */ wo->notask_error = 0; - return 0; } - if (p->exit_state == EXIT_DEAD) - return 0; - /* - * We don't reap group leaders with subthreads. + * Wait for stopped. Depending on @ptrace, different stopped state + * is used and the two don't interact with each other. */ - if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) - return wait_task_zombie(wo, p); + ret = wait_task_stopped(wo, ptrace, p); + if (ret) + return ret; /* - * It's stopped or running now, so it might - * later continue, exit, or stop again. + * Wait for continued. There's only one continued state and the + * ptracer can consume it which can confuse the real parent. Don't + * use WCONTINUED from ptracer. You don't need or want it. */ - wo->notask_error = 0; - - if (task_stopped_code(p, ptrace)) - return wait_task_stopped(wo, ptrace, p); - return wait_task_continued(wo, p); } |