From 544b2c91a9f14f9565af1972203438b7f49afd48 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 14 Jun 2011 11:20:18 +0200 Subject: ptrace: implement PTRACE_LISTEN The previous patch implemented async notification for ptrace but it only worked while trace is running. This patch introduces PTRACE_LISTEN which is suggested by Oleg Nestrov. It's allowed iff tracee is in STOP trap and puts tracee into quasi-running state - tracee never really runs but wait(2) and ptrace(2) consider it to be running. While ptracer is listening, tracee is allowed to re-enter STOP to notify an async event. Listening state is cleared on the first notification. Ptracer can also clear it by issuing INTERRUPT - tracee will re-trap into STOP with listening state cleared. This allows ptracer to monitor group stop state without running tracee - use INTERRUPT to put tracee into STOP trap, issue LISTEN and then wait(2) to wait for the next group stop event. When it happens, PTRACE_GETSIGINFO provides information to determine the current state. Test program follows. #define PTRACE_SEIZE 0x4206 #define PTRACE_INTERRUPT 0x4207 #define PTRACE_LISTEN 0x4208 #define PTRACE_SEIZE_DEVEL 0x80000000 static const struct timespec ts1s = { .tv_sec = 1 }; int main(int argc, char **argv) { pid_t tracee, tracer; int i; tracee = fork(); if (!tracee) while (1) pause(); tracer = fork(); if (!tracer) { siginfo_t si; ptrace(PTRACE_SEIZE, tracee, NULL, (void *)(unsigned long)PTRACE_SEIZE_DEVEL); ptrace(PTRACE_INTERRUPT, tracee, NULL, NULL); repeat: waitid(P_PID, tracee, NULL, WSTOPPED); ptrace(PTRACE_GETSIGINFO, tracee, NULL, &si); if (!si.si_code) { printf("tracer: SIG %d\n", si.si_signo); ptrace(PTRACE_CONT, tracee, NULL, (void *)(unsigned long)si.si_signo); goto repeat; } printf("tracer: stopped=%d signo=%d\n", si.si_signo != SIGTRAP, si.si_signo); if (si.si_signo != SIGTRAP) ptrace(PTRACE_LISTEN, tracee, NULL, NULL); else ptrace(PTRACE_CONT, tracee, NULL, NULL); goto repeat; } for (i = 0; i < 3; i++) { nanosleep(&ts1s, NULL); printf("mother: SIGSTOP\n"); kill(tracee, SIGSTOP); nanosleep(&ts1s, NULL); printf("mother: SIGCONT\n"); kill(tracee, SIGCONT); } nanosleep(&ts1s, NULL); kill(tracer, SIGKILL); kill(tracee, SIGKILL); return 0; } This is identical to the program to test TRAP_NOTIFY except that tracee is PTRACE_LISTEN'd instead of PTRACE_CONT'd when group stopped. This allows ptracer to monitor when group stop ends without running tracee. # ./test-listen tracer: stopped=0 signo=5 mother: SIGSTOP tracer: SIG 19 tracer: stopped=1 signo=19 mother: SIGCONT tracer: stopped=0 signo=5 tracer: SIG 18 mother: SIGSTOP tracer: SIG 19 tracer: stopped=1 signo=19 mother: SIGCONT tracer: stopped=0 signo=5 tracer: SIG 18 mother: SIGSTOP tracer: SIG 19 tracer: stopped=1 signo=19 mother: SIGCONT tracer: stopped=0 signo=5 tracer: SIG 18 -v2: Moved JOBCTL_LISTENING check in wait_task_stopped() into task_stopped_code() as suggested by Oleg. Signed-off-by: Tejun Heo Cc: Oleg Nesterov --- kernel/exit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 20a40647152..289f59d686b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1368,7 +1368,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) static int *task_stopped_code(struct task_struct *p, bool ptrace) { if (ptrace) { - if (task_is_stopped_or_traced(p)) + if (task_is_stopped_or_traced(p) && + !(p->jobctl & JOBCTL_LISTENING)) return &p->exit_code; } else { if (p->signal->flags & SIGNAL_STOP_STOPPED) -- cgit v1.2.3-70-g09d2 From d21142ece414ce1088cfcae760689aa60d6fee80 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Jun 2011 16:50:34 +0200 Subject: ptrace: kill task_ptrace() task_ptrace(task) simply dereferences task->ptrace and isn't even used consistently only adding confusion. Kill it and directly access ->ptrace instead. This doesn't introduce any behavior change. Signed-off-by: Tejun Heo Signed-off-by: Oleg Nesterov --- include/linux/ptrace.h | 11 ----------- include/linux/tracehook.h | 16 ++++++++-------- kernel/exit.c | 8 ++++---- kernel/signal.c | 14 +++++++------- mm/oom_kill.c | 3 +-- 5 files changed, 20 insertions(+), 32 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 4f224f16952..3ff20b32259 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -145,17 +145,6 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, unsigned long data); -/** - * task_ptrace - return %PT_* flags that apply to a task - * @task: pointer to &task_struct in question - * - * Returns the %PT_* flags that apply to @task. - */ -static inline int task_ptrace(struct task_struct *task) -{ - return task->ptrace; -} - /** * ptrace_event - possibly stop for a ptrace event notification * @mask: %PT_* bit to check in @current->ptrace diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 15745cdd32c..a3e838784f4 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -63,7 +63,7 @@ struct linux_binprm; */ static inline int tracehook_expect_breakpoints(struct task_struct *task) { - return (task_ptrace(task) & PT_PTRACED) != 0; + return (task->ptrace & PT_PTRACED) != 0; } /* @@ -71,7 +71,7 @@ static inline int tracehook_expect_breakpoints(struct task_struct *task) */ static inline void ptrace_report_syscall(struct pt_regs *regs) { - int ptrace = task_ptrace(current); + int ptrace = current->ptrace; if (!(ptrace & PT_PTRACED)) return; @@ -155,7 +155,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) static inline int tracehook_unsafe_exec(struct task_struct *task) { int unsafe = 0; - int ptrace = task_ptrace(task); + int ptrace = task->ptrace; if (ptrace & PT_PTRACED) { if (ptrace & PT_PTRACE_CAP) unsafe |= LSM_UNSAFE_PTRACE_CAP; @@ -178,7 +178,7 @@ static inline int tracehook_unsafe_exec(struct task_struct *task) */ static inline struct task_struct *tracehook_tracer_task(struct task_struct *tsk) { - if (task_ptrace(tsk) & PT_PTRACED) + if (tsk->ptrace & PT_PTRACED) return rcu_dereference(tsk->parent); return NULL; } @@ -202,7 +202,7 @@ static inline void tracehook_report_exec(struct linux_binfmt *fmt, struct pt_regs *regs) { if (!ptrace_event(PT_TRACE_EXEC, PTRACE_EVENT_EXEC, 0) && - unlikely(task_ptrace(current) & PT_PTRACED)) + unlikely(current->ptrace & PT_PTRACED)) send_sig(SIGTRAP, current, 0); } @@ -285,7 +285,7 @@ static inline void tracehook_report_clone(struct pt_regs *regs, unsigned long clone_flags, pid_t pid, struct task_struct *child) { - if (unlikely(task_ptrace(child))) { + if (unlikely(child->ptrace)) { /* * It doesn't matter who attached/attaching to this * task, the pending SIGSTOP is right in any case. @@ -403,7 +403,7 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info, static inline int tracehook_consider_ignored_signal(struct task_struct *task, int sig) { - return (task_ptrace(task) & PT_PTRACED) != 0; + return (task->ptrace & PT_PTRACED) != 0; } /** @@ -422,7 +422,7 @@ static inline int tracehook_consider_ignored_signal(struct task_struct *task, static inline int tracehook_consider_fatal_signal(struct task_struct *task, int sig) { - return (task_ptrace(task) & PT_PTRACED) != 0; + return (task->ptrace & PT_PTRACED) != 0; } #define DEATH_REAP -1 diff --git a/kernel/exit.c b/kernel/exit.c index 289f59d686b..e5cc0564460 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -765,7 +765,7 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, p->exit_signal = SIGCHLD; /* If it has exited notify the new parent about this child's death. */ - if (!task_ptrace(p) && + if (!p->ptrace && p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { do_notify_parent(p, p->exit_signal); if (task_detached(p)) { @@ -795,7 +795,7 @@ static void forget_original_parent(struct task_struct *father) do { t->real_parent = reaper; if (t->parent == father) { - BUG_ON(task_ptrace(t)); + BUG_ON(t->ptrace); t->parent = t->real_parent; } if (t->pdeath_signal) @@ -1565,7 +1565,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, * Notification and reaping will be cascaded to the real * parent when the ptracer detaches. */ - if (likely(!ptrace) && unlikely(task_ptrace(p))) { + if (likely(!ptrace) && unlikely(p->ptrace)) { /* it will become visible, clear notask_error */ wo->notask_error = 0; return 0; @@ -1608,7 +1608,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, * own children, it should create a separate process which * takes the role of real parent. */ - if (likely(!ptrace) && task_ptrace(p) && + if (likely(!ptrace) && p->ptrace && same_thread_group(p->parent, p->real_parent)) return 0; diff --git a/kernel/signal.c b/kernel/signal.c index 97e575a3387..0f337087250 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1592,7 +1592,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) /* do_notify_parent_cldstop should have been called instead. */ BUG_ON(task_is_stopped_or_traced(tsk)); - BUG_ON(!task_ptrace(tsk) && + BUG_ON(!tsk->ptrace && (tsk->group_leader != tsk || !thread_group_empty(tsk))); info.si_signo = sig; @@ -1631,7 +1631,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) psig = tsk->parent->sighand; spin_lock_irqsave(&psig->siglock, flags); - if (!task_ptrace(tsk) && sig == SIGCHLD && + if (!tsk->ptrace && sig == SIGCHLD && (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) { /* @@ -1731,7 +1731,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, static inline int may_ptrace_stop(void) { - if (!likely(task_ptrace(current))) + if (!likely(current->ptrace)) return 0; /* * Are we in the middle of do_coredump? @@ -1989,7 +1989,7 @@ static bool do_signal_stop(int signr) if (!(sig->flags & SIGNAL_STOP_STOPPED)) sig->group_exit_code = signr; else - WARN_ON_ONCE(!task_ptrace(current)); + WARN_ON_ONCE(!current->ptrace); sig->group_stop_count = 0; @@ -2014,7 +2014,7 @@ static bool do_signal_stop(int signr) } } - if (likely(!task_ptrace(current))) { + if (likely(!current->ptrace)) { int notify = 0; /* @@ -2093,7 +2093,7 @@ static void do_jobctl_trap(void) static int ptrace_signal(int signr, siginfo_t *info, struct pt_regs *regs, void *cookie) { - if (!task_ptrace(current)) + if (!current->ptrace) return signr; ptrace_signal_deliver(regs, cookie); @@ -2179,7 +2179,7 @@ relock: do_notify_parent_cldstop(current, false, why); leader = current->group_leader; - if (task_ptrace(leader) && !real_parent_is_ptracer(leader)) + if (leader->ptrace && !real_parent_is_ptracer(leader)) do_notify_parent_cldstop(leader, true, why); read_unlock(&tasklist_lock); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index e4b0991ca35..b0be989d436 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -339,8 +339,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, * then wait for it to finish before killing * some other task unnecessarily. */ - if (!(task_ptrace(p->group_leader) & - PT_TRACE_EXIT)) + if (!(p->group_leader->ptrace & PT_TRACE_EXIT)) return ERR_PTR(-1UL); } } -- cgit v1.2.3-70-g09d2 From a288eecce5253cc1565d400a52b9b476a157e040 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Jun 2011 16:50:37 +0200 Subject: ptrace: kill trivial tracehooks At this point, tracehooks aren't useful to mainline kernel and mostly just add an extra layer of obfuscation. Although they have comments, without actual in-kernel users, it is difficult to tell what are their assumptions and they're actually trying to achieve. To mainline kernel, they just aren't worth keeping around. This patch kills the following trivial tracehooks. * Ones testing whether task is ptraced. Replace with ->ptrace test. tracehook_expect_breakpoints() tracehook_consider_ignored_signal() tracehook_consider_fatal_signal() * ptrace_event() wrappers. Call directly. tracehook_report_exec() tracehook_report_exit() tracehook_report_vfork_done() * ptrace_release_task() wrapper. Call directly. tracehook_finish_release_task() * noop tracehook_prepare_release_task() tracehook_report_death() This doesn't introduce any behavior change. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Cc: Martin Schwidefsky Signed-off-by: Oleg Nesterov --- arch/s390/kernel/traps.c | 4 +- fs/exec.c | 2 +- include/linux/tracehook.h | 156 ---------------------------------------------- kernel/exit.c | 7 +-- kernel/fork.c | 2 +- kernel/signal.c | 8 +-- mm/nommu.c | 3 +- 7 files changed, 11 insertions(+), 171 deletions(-) (limited to 'kernel/exit.c') diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index a65d2e82f61..a63d34c3611 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -331,7 +331,7 @@ void __kprobes do_per_trap(struct pt_regs *regs) { if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) return; - if (tracehook_consider_fatal_signal(current, SIGTRAP)) + if (current->ptrace) force_sig(SIGTRAP, current); } @@ -425,7 +425,7 @@ static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code, if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) return; if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { - if (tracehook_consider_fatal_signal(current, SIGTRAP)) + if (current->ptrace) force_sig(SIGTRAP, current); else signal = SIGILL; diff --git a/fs/exec.c b/fs/exec.c index a9f2b3631bd..b37030d0a50 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1384,7 +1384,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) bprm->recursion_depth = depth; if (retval >= 0) { if (depth == 0) - tracehook_report_exec(fmt, bprm, regs); + ptrace_event(PTRACE_EVENT_EXEC, 0); put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 3b68aa842a9..8b06d4f2b81 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -51,21 +51,6 @@ #include struct linux_binprm; -/** - * tracehook_expect_breakpoints - guess if task memory might be touched - * @task: current task, making a new mapping - * - * Return nonzero if @task is expected to want breakpoint insertion in - * its memory at some point. A zero return is no guarantee it won't - * be done, but this is a hint that it's known to be likely. - * - * May be called with @task->mm->mmap_sem held for writing. - */ -static inline int tracehook_expect_breakpoints(struct task_struct *task) -{ - return (task->ptrace & PT_PTRACED) != 0; -} - /* * ptrace report for syscall entry and exit looks identical. */ @@ -183,42 +168,6 @@ static inline struct task_struct *tracehook_tracer_task(struct task_struct *tsk) return NULL; } -/** - * tracehook_report_exec - a successful exec was completed - * @fmt: &struct linux_binfmt that performed the exec - * @bprm: &struct linux_binprm containing exec details - * @regs: user-mode register state - * - * An exec just completed, we are shortly going to return to user mode. - * The freshly initialized register state can be seen and changed in @regs. - * The name, file and other pointers in @bprm are still on hand to be - * inspected, but will be freed as soon as this returns. - * - * Called with no locks, but with some kernel resources held live - * and a reference on @fmt->module. - */ -static inline void tracehook_report_exec(struct linux_binfmt *fmt, - struct linux_binprm *bprm, - struct pt_regs *regs) -{ - ptrace_event(PTRACE_EVENT_EXEC, 0); -} - -/** - * tracehook_report_exit - task has begun to exit - * @exit_code: pointer to value destined for @current->exit_code - * - * @exit_code points to the value passed to do_exit(), which tracing - * might change here. This is almost the first thing in do_exit(), - * before freeing any resources or setting the %PF_EXITING flag. - * - * Called with no locks held. - */ -static inline void tracehook_report_exit(long *exit_code) -{ - ptrace_event(PTRACE_EVENT_EXIT, *exit_code); -} - /** * tracehook_prepare_clone - prepare for new child to be cloned * @clone_flags: %CLONE_* flags from clone/fork/vfork system call @@ -319,52 +268,6 @@ static inline void tracehook_report_clone_complete(int trace, ptrace_event(trace, pid); } -/** - * tracehook_report_vfork_done - vfork parent's child has exited or exec'd - * @child: child task, already running - * @pid: new child's PID in the parent's namespace - * - * Called after a %CLONE_VFORK parent has waited for the child to complete. - * The clone/vfork system call will return immediately after this. - * The @child pointer may be invalid if a self-reaping child died and - * tracehook_report_clone() took no action to prevent it from self-reaping. - * - * Called with no locks held. - */ -static inline void tracehook_report_vfork_done(struct task_struct *child, - pid_t pid) -{ - ptrace_event(PTRACE_EVENT_VFORK_DONE, pid); -} - -/** - * tracehook_prepare_release_task - task is being reaped, clean up tracing - * @task: task in %EXIT_DEAD state - * - * This is called in release_task() just before @task gets finally reaped - * and freed. This would be the ideal place to remove and clean up any - * tracing-related state for @task. - * - * Called with no locks held. - */ -static inline void tracehook_prepare_release_task(struct task_struct *task) -{ -} - -/** - * tracehook_finish_release_task - final tracing clean-up - * @task: task in %EXIT_DEAD state - * - * This is called in release_task() when @task is being in the middle of - * being reaped. After this, there must be no tracing entanglements. - * - * Called with write_lock_irq(&tasklist_lock) held. - */ -static inline void tracehook_finish_release_task(struct task_struct *task) -{ - ptrace_release_task(task); -} - /** * tracehook_signal_handler - signal handler setup is complete * @sig: number of signal being delivered @@ -388,41 +291,6 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info, ptrace_notify(SIGTRAP); } -/** - * tracehook_consider_ignored_signal - suppress short-circuit of ignored signal - * @task: task receiving the signal - * @sig: signal number being sent - * - * Return zero iff tracing doesn't care to examine this ignored signal, - * so it can short-circuit normal delivery and never even get queued. - * - * Called with @task->sighand->siglock held. - */ -static inline int tracehook_consider_ignored_signal(struct task_struct *task, - int sig) -{ - return (task->ptrace & PT_PTRACED) != 0; -} - -/** - * tracehook_consider_fatal_signal - suppress special handling of fatal signal - * @task: task receiving the signal - * @sig: signal number being sent - * - * Return nonzero to prevent special handling of this termination signal. - * Normally handler for signal is %SIG_DFL. It can be %SIG_IGN if @sig is - * ignored, in which case force_sig() is about to reset it to %SIG_DFL. - * When this returns zero, this signal might cause a quick termination - * that does not give the debugger a chance to intercept the signal. - * - * Called with or without @task->sighand->siglock held. - */ -static inline int tracehook_consider_fatal_signal(struct task_struct *task, - int sig) -{ - return (task->ptrace & PT_PTRACED) != 0; -} - #define DEATH_REAP -1 #define DEATH_DELAYED_GROUP_LEADER -2 @@ -457,30 +325,6 @@ static inline int tracehook_notify_death(struct task_struct *task, return task->ptrace ? SIGCHLD : DEATH_DELAYED_GROUP_LEADER; } -/** - * tracehook_report_death - task is dead and ready to be reaped - * @task: @current task now exiting - * @signal: return value from tracheook_notify_death() - * @death_cookie: value passed back from tracehook_notify_death() - * @group_dead: nonzero if this was the last thread in the group to die - * - * Thread has just become a zombie or is about to self-reap. If positive, - * @signal is the signal number just sent to the parent (usually %SIGCHLD). - * If @signal is %DEATH_REAP, this thread will self-reap. If @signal is - * %DEATH_DELAYED_GROUP_LEADER, this is a delayed_group_leader() zombie. - * The @death_cookie was passed back by tracehook_notify_death(). - * - * If normal reaping is not inhibited, @task->exit_state might be changing - * in parallel. - * - * Called without locks. - */ -static inline void tracehook_report_death(struct task_struct *task, - int signal, void *death_cookie, - int group_dead) -{ -} - #ifdef TIF_NOTIFY_RESUME /** * set_notify_resume - cause tracehook_notify_resume() to be called diff --git a/kernel/exit.c b/kernel/exit.c index e5cc0564460..d49134a7f25 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -169,7 +169,6 @@ void release_task(struct task_struct * p) struct task_struct *leader; int zap_leader; repeat: - tracehook_prepare_release_task(p); /* don't need to get the RCU readlock here - the process is dead and * can't be modifying its own credentials. But shut RCU-lockdep up */ rcu_read_lock(); @@ -179,7 +178,7 @@ repeat: proc_flush_task(p); write_lock_irq(&tasklist_lock); - tracehook_finish_release_task(p); + ptrace_release_task(p); __exit_signal(p); /* @@ -868,8 +867,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead) wake_up_process(tsk->signal->group_exit_task); write_unlock_irq(&tasklist_lock); - tracehook_report_death(tsk, signal, cookie, group_dead); - /* If the process is dead, release it - nobody will wait for it */ if (signal == DEATH_REAP) release_task(tsk); @@ -924,7 +921,7 @@ NORET_TYPE void do_exit(long code) */ set_fs(USER_DS); - tracehook_report_exit(&code); + ptrace_event(PTRACE_EVENT_EXIT, code); validate_creds_for_do_exit(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 0276c30401a..d4f0dff9d61 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1527,7 +1527,7 @@ long do_fork(unsigned long clone_flags, freezer_do_not_count(); wait_for_completion(&vfork); freezer_count(); - tracehook_report_vfork_done(p, nr); + ptrace_event(PTRACE_EVENT_VFORK_DONE, nr); } } else { nr = PTR_ERR(p); diff --git a/kernel/signal.c b/kernel/signal.c index 0f337087250..1550aee34f4 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -87,7 +87,7 @@ static int sig_ignored(struct task_struct *t, int sig, int from_ancestor_ns) /* * Tracers may want to know about even ignored signals. */ - return !tracehook_consider_ignored_signal(t, sig); + return !t->ptrace; } /* @@ -493,7 +493,8 @@ int unhandled_signal(struct task_struct *tsk, int sig) return 1; if (handler != SIG_IGN && handler != SIG_DFL) return 0; - return !tracehook_consider_fatal_signal(tsk, sig); + /* if ptraced, let the tracer determine */ + return !tsk->ptrace; } /* @@ -981,8 +982,7 @@ static void complete_signal(int sig, struct task_struct *p, int group) if (sig_fatal(p, sig) && !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) && !sigismember(&t->real_blocked, sig) && - (sig == SIGKILL || - !tracehook_consider_fatal_signal(t, sig))) { + (sig == SIGKILL || !t->ptrace)) { /* * This signal will be fatal to the whole group. */ diff --git a/mm/nommu.c b/mm/nommu.c index 1fd0c51b10a..54ae707bdae 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -1087,7 +1086,7 @@ static unsigned long determine_vm_flags(struct file *file, * it's being traced - otherwise breakpoints set in it may interfere * with another untraced process */ - if ((flags & MAP_PRIVATE) && tracehook_expect_breakpoints(current)) + if ((flags & MAP_PRIVATE) && current->ptrace) vm_flags &= ~VM_MAYSHARE; return vm_flags; -- cgit v1.2.3-70-g09d2 From 53c8f9f199b239668e6b1a907735ee323a0d1ccd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 22 Jun 2011 23:08:18 +0200 Subject: make do_notify_parent() return bool - change do_notify_parent() to return a boolean, true if the task should be reaped because its parent ignores SIGCHLD. - update the only caller which checks the returned value, exit_notify(). This temporary uglifies exit_notify() even more, will be cleanuped by the next change. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- include/linux/sched.h | 4 ++-- kernel/exit.c | 9 ++++++--- kernel/signal.c | 17 +++++++++-------- 3 files changed, 17 insertions(+), 13 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 87f7ca7ed6f..0df7231d9ee 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2145,7 +2145,7 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s spin_unlock_irqrestore(&tsk->sighand->siglock, flags); return ret; -} +} extern void block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask); @@ -2160,7 +2160,7 @@ extern int kill_pid_info_as_uid(int, struct siginfo *, struct pid *, uid_t, uid_ extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern int kill_proc_info(int, struct siginfo *, pid_t); -extern int do_notify_parent(struct task_struct *, int); +extern bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int, struct task_struct *); extern int send_sig(int, struct task_struct *, int); diff --git a/kernel/exit.c b/kernel/exit.c index d49134a7f25..34d135f4fcc 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -820,6 +820,7 @@ static void forget_original_parent(struct task_struct *father) static void exit_notify(struct task_struct *tsk, int group_dead) { int signal; + bool autoreap; void *cookie; /* @@ -858,9 +859,11 @@ static void exit_notify(struct task_struct *tsk, int group_dead) signal = tracehook_notify_death(tsk, &cookie, group_dead); if (signal >= 0) - signal = do_notify_parent(tsk, signal); + autoreap = do_notify_parent(tsk, signal); + else + autoreap = (signal == DEATH_REAP); - tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE; + tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; /* mt-exec, de_thread() is waiting for group leader */ if (unlikely(tsk->signal->notify_count < 0)) @@ -868,7 +871,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) write_unlock_irq(&tasklist_lock); /* If the process is dead, release it - nobody will wait for it */ - if (signal == DEATH_REAP) + if (autoreap) release_task(tsk); } diff --git a/kernel/signal.c b/kernel/signal.c index 1550aee34f4..d52e82cd62b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1577,15 +1577,15 @@ ret: * Let a parent know about the death of a child. * For a stopped/continued status change, use do_notify_parent_cldstop instead. * - * Returns -1 if our parent ignored us and so we've switched to - * self-reaping, or else @sig. + * Returns true if our parent ignored us and so we've switched to + * self-reaping. */ -int do_notify_parent(struct task_struct *tsk, int sig) +bool do_notify_parent(struct task_struct *tsk, int sig) { struct siginfo info; unsigned long flags; struct sighand_struct *psig; - int ret = sig; + bool autoreap = false; BUG_ON(sig == -1); @@ -1649,16 +1649,17 @@ int do_notify_parent(struct task_struct *tsk, int sig) * is implementation-defined: we do (if you don't want * it, just use SIG_IGN instead). */ - ret = tsk->exit_signal = -1; + autoreap = true; + tsk->exit_signal = -1; if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) - sig = -1; + sig = 0; } - if (valid_signal(sig) && sig > 0) + if (valid_signal(sig) && sig) __group_send_sig_info(sig, &info, tsk->parent); __wake_up_parent(tsk, tsk->parent); spin_unlock_irqrestore(&psig->siglock, flags); - return ret; + return autoreap; } /** -- cgit v1.2.3-70-g09d2 From 45cdf5cc0703c537194588c63d53bad1f2539d36 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jun 2011 19:06:50 +0200 Subject: kill tracehook_notify_death() Kill tracehook_notify_death(), reimplement the logic in its caller, exit_notify(). Also, change the exec_id's check to use thread_group_leader() instead of task_detached(), this is more clear. This logic only applies to the exiting leader, a sub-thread must never change its exit_signal. Note: when the traced group leader exits the exit_signal-or-SIGCHLD logic looks really strange: - we notify the tracer even if !thread_group_empty() but do_wait(WEXITED) can't work until all threads exit - if the tracer is real_parent, it is not clear why can't we use ->exit_signal event if !thread_group_empty() -v2: do not try to fix the 2nd oddity to avoid the subtle behavior change mixed with reorganization, suggested by Tejun. Signed-off-by: Oleg Nesterov Reviewed-by: Tejun Heo --- include/linux/tracehook.h | 34 ---------------------------------- kernel/exit.c | 21 +++++++++++++-------- 2 files changed, 13 insertions(+), 42 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 7a1bd12aeff..a71a2927a6a 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -152,40 +152,6 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info, ptrace_notify(SIGTRAP); } -#define DEATH_REAP -1 -#define DEATH_DELAYED_GROUP_LEADER -2 - -/** - * tracehook_notify_death - task is dead, ready to notify parent - * @task: @current task now exiting - * @death_cookie: value to pass to tracehook_report_death() - * @group_dead: nonzero if this was the last thread in the group to die - * - * A return value >= 0 means call do_notify_parent() with that signal - * number. Negative return value can be %DEATH_REAP to self-reap right - * now, or %DEATH_DELAYED_GROUP_LEADER to a zombie without notifying our - * parent. Note that a return value of 0 means a do_notify_parent() call - * that sends no signal, but still wakes up a parent blocked in wait*(). - * - * Called with write_lock_irq(&tasklist_lock) held. - */ -static inline int tracehook_notify_death(struct task_struct *task, - void **death_cookie, int group_dead) -{ - if (task_detached(task)) - return task->ptrace ? SIGCHLD : DEATH_REAP; - - /* - * If something other than our normal parent is ptracing us, then - * send it a SIGCHLD instead of honoring exit_signal. exit_signal - * only has special meaning to our real parent. - */ - if (thread_group_empty(task) && !ptrace_reparented(task)) - return task->exit_signal; - - return task->ptrace ? SIGCHLD : DEATH_DELAYED_GROUP_LEADER; -} - #ifdef TIF_NOTIFY_RESUME /** * set_notify_resume - cause tracehook_notify_resume() to be called diff --git a/kernel/exit.c b/kernel/exit.c index 34d135f4fcc..bb08e938ca7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -819,9 +819,7 @@ static void forget_original_parent(struct task_struct *father) */ static void exit_notify(struct task_struct *tsk, int group_dead) { - int signal; bool autoreap; - void *cookie; /* * This does two things: @@ -852,16 +850,23 @@ static void exit_notify(struct task_struct *tsk, int group_dead) * we have changed execution domain as these two values started * the same after a fork. */ - if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && + if (thread_group_leader(tsk) && tsk->exit_signal != SIGCHLD && (tsk->parent_exec_id != tsk->real_parent->self_exec_id || tsk->self_exec_id != tsk->parent_exec_id)) tsk->exit_signal = SIGCHLD; - signal = tracehook_notify_death(tsk, &cookie, group_dead); - if (signal >= 0) - autoreap = do_notify_parent(tsk, signal); - else - autoreap = (signal == DEATH_REAP); + if (unlikely(tsk->ptrace)) { + int sig = thread_group_leader(tsk) && + thread_group_empty(tsk) && + !ptrace_reparented(tsk) ? + tsk->exit_signal : SIGCHLD; + autoreap = do_notify_parent(tsk, sig); + } else if (thread_group_leader(tsk)) { + autoreap = thread_group_empty(tsk) && + do_notify_parent(tsk, tsk->exit_signal); + } else { + autoreap = true; + } tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; -- cgit v1.2.3-70-g09d2 From 8677347378044ab564470bced2275520efb3670d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 22 Jun 2011 23:09:09 +0200 Subject: make do_notify_parent() __must_check, update the callers Change other callers of do_notify_parent() to check the value it returns, this makes the subsequent task_detached() unnecessary. Mark do_notify_parent() as __must_check. Use thread_group_leader() instead of !task_detached() to check if we need to notify the real parent in wait_task_zombie(). Remove the stale comment in release_task(). "just for sanity" is no longer true, we have to set EXIT_DEAD to avoid the races with do_wait(). Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- include/linux/sched.h | 2 +- kernel/exit.c | 29 ++++++++--------------------- 2 files changed, 9 insertions(+), 22 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0df7231d9ee..0cb4f097f76 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2160,7 +2160,7 @@ extern int kill_pid_info_as_uid(int, struct siginfo *, struct pid *, uid_t, uid_ extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern int kill_proc_info(int, struct siginfo *, pid_t); -extern bool do_notify_parent(struct task_struct *, int); +extern __must_check bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int, struct task_struct *); extern int send_sig(int, struct task_struct *, int); diff --git a/kernel/exit.c b/kernel/exit.c index bb08e938ca7..f68d137ffeb 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -190,21 +190,12 @@ repeat: leader = p->group_leader; if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { BUG_ON(task_detached(leader)); - do_notify_parent(leader, leader->exit_signal); /* * If we were the last child thread and the leader has * exited already, and the leader's parent ignores SIGCHLD, * then we are the one who should release the leader. - * - * do_notify_parent() will have marked it self-reaping in - * that case. - */ - zap_leader = task_detached(leader); - - /* - * This maintains the invariant that release_task() - * only runs on a task in EXIT_DEAD, just for sanity. */ + zap_leader = do_notify_parent(leader, leader->exit_signal); if (zap_leader) leader->exit_state = EXIT_DEAD; } @@ -766,8 +757,7 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, /* If it has exited notify the new parent about this child's death. */ if (!p->ptrace && p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { - do_notify_parent(p, p->exit_signal); - if (task_detached(p)) { + if (do_notify_parent(p, p->exit_signal)) { p->exit_state = EXIT_DEAD; list_move_tail(&p->sibling, dead); } @@ -1351,16 +1341,13 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) /* We dropped tasklist, ptracer could die and untrace */ ptrace_unlink(p); /* - * If this is not a detached task, notify the parent. - * If it's still not detached after that, don't release - * it now. + * If this is not a sub-thread, notify the parent. + * If parent wants a zombie, don't release it now. */ - if (!task_detached(p)) { - do_notify_parent(p, p->exit_signal); - if (!task_detached(p)) { - p->exit_state = EXIT_ZOMBIE; - p = NULL; - } + if (thread_group_leader(p) && + !do_notify_parent(p, p->exit_signal)) { + p->exit_state = EXIT_ZOMBIE; + p = NULL; } write_unlock_irq(&tasklist_lock); } -- cgit v1.2.3-70-g09d2 From 0976a03e5ce8ec346e985f21046d7a75bb7fdffd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 22 Jun 2011 23:09:39 +0200 Subject: reparent_leader: check EXIT_DEAD instead of task_detached() Change reparent_leader() to check ->exit_state instead of ->exit_signal, this matches the similar EXIT_DEAD check in wait_consider_task() and allows us to cleanup the do_notify_parent/task_detached logic. task_detached() was really needed during reparenting before 9cd80bbb "do_wait() optimization: do not place sub-threads on ->children list" to filter out the sub-threads. After this change task_detached(p) can only be true if p is the dead group_leader and its parent ignores SIGCHLD, in this case the caller of do_notify_parent() is going to reap this task and it should set EXIT_DEAD. Signed-off-by: Oleg Nesterov Reviewed-by: Tejun Heo --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index f68d137ffeb..2b1ba8048a1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -742,7 +742,7 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, { list_move_tail(&p->sibling, &p->real_parent->children); - if (task_detached(p)) + if (p->exit_state == EXIT_DEAD) return; /* * If this is a threaded reparent there is no need to -- cgit v1.2.3-70-g09d2 From e550f14dc6322e794d4e70825f63c9c99177ae8b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 22 Jun 2011 23:09:54 +0200 Subject: kill task_detached() Upadate the last user of task_detached(), wait_task_zombie(), to use thread_group_leader() and kill task_detached(). Signed-off-by: Oleg Nesterov Reviewed-by: Tejun Heo --- include/linux/sched.h | 5 ----- kernel/exit.c | 5 ++--- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0cb4f097f76..39acee2c892 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2318,11 +2318,6 @@ static inline int thread_group_empty(struct task_struct *p) #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) -static inline int task_detached(struct task_struct *p) -{ - return p->exit_signal == -1; -} - /* * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also diff --git a/kernel/exit.c b/kernel/exit.c index 2b1ba8048a1..9fa99702645 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -189,7 +189,6 @@ repeat: zap_leader = 0; leader = p->group_leader; if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { - BUG_ON(task_detached(leader)); /* * If we were the last child thread and the leader has * exited already, and the leader's parent ignores SIGCHLD, @@ -1231,9 +1230,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) traced = ptrace_reparented(p); /* * It can be ptraced but not reparented, check - * !task_detached() to filter out sub-threads. + * thread_group_leader() to filter out sub-threads. */ - if (likely(!traced) && likely(!task_detached(p))) { + if (likely(!traced) && thread_group_leader(p)) { struct signal_struct *psig; struct signal_struct *sig; unsigned long maxrss; -- cgit v1.2.3-70-g09d2 From 479bf98c1c29b40d86e40a4e6e4944c2f03d9493 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 24 Jun 2011 17:34:39 +0200 Subject: ptrace: wait_consider_task: s/same_thread_group/ptrace_reparented/ wait_consider_task() checks same_thread_group(parent, real_parent), this is the open-coded ptrace_reparented(). __ptrace_detach() remains the only function which has to check this by hand, although we could reorganize the code to delay __ptrace_unlink. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- kernel/exit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 9fa99702645..b8d3b47bb88 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1599,8 +1599,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, * own children, it should create a separate process which * takes the role of real parent. */ - if (likely(!ptrace) && p->ptrace && - same_thread_group(p->parent, p->real_parent)) + if (likely(!ptrace) && p->ptrace && !ptrace_reparented(p)) return 0; /* -- cgit v1.2.3-70-g09d2 From 961c4675c75112717705fa5c0c53cb9664051479 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 7 Jul 2011 21:33:54 +0200 Subject: has_stopped_jobs: s/task_is_stopped/SIGNAL_STOP_STOPPED/ has_stopped_jobs() naively checks task_is_stopped(group_leader). This was always wrong even without ptrace, group_leader can be dead. And given that ptrace can change the state to TRACED this is wrong even in the single-threaded case. Change the code to check SIGNAL_STOP_STOPPED and simplify the code, retval + break/continue doesn't make this trivial code more readable. We could probably add the usual "|| signal->group_stop_count" check but I don't think this makes sense, the task can start the group-stop right after the check anyway. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- kernel/exit.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index b8d3b47bb88..6c7fbbe7d86 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -266,18 +266,16 @@ int is_current_pgrp_orphaned(void) return retval; } -static int has_stopped_jobs(struct pid *pgrp) +static bool has_stopped_jobs(struct pid *pgrp) { - int retval = 0; struct task_struct *p; do_each_pid_task(pgrp, PIDTYPE_PGID, p) { - if (!task_is_stopped(p)) - continue; - retval = 1; - break; + if (p->signal->flags & SIGNAL_STOP_STOPPED) + return true; } while_each_pid_task(pgrp, PIDTYPE_PGID, p); - return retval; + + return false; } /* -- cgit v1.2.3-70-g09d2