From 228bdaa95fb830e08b6acd1afd4d2c55093cabfa Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 9 Dec 2011 03:02:19 -0500 Subject: x86: Keep current stack in NMI breakpoints We want to allow NMI handlers to have breakpoints to be able to remove stop_machine from ftrace, kprobes and jump_labels. But if an NMI interrupts a current breakpoint, and then it triggers a breakpoint itself, it will switch to the breakpoint stack and corrupt the data on it for the breakpoint processing that it interrupted. Instead, have the NMI check if it interrupted breakpoint processing by checking if the stack that is currently used is a breakpoint stack. If it is, then load a special IDT that changes the IST for the debug exception to keep the same stack in kernel context. When the NMI is done, it puts it back. This way, if the NMI does trigger a breakpoint, it will keep using the same stack and not stomp on the breakpoint data for the breakpoint it interrupted. Suggested-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- arch/x86/kernel/cpu/common.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index aa003b13a83..caa404556b9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1026,6 +1026,8 @@ __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; +struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, + (unsigned long) nmi_idt_table }; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE); @@ -1090,6 +1092,24 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); +static DEFINE_PER_CPU(unsigned long, debug_stack_addr); + +int is_debug_stack(unsigned long addr) +{ + return addr <= __get_cpu_var(debug_stack_addr) && + addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ); +} + +void debug_stack_set_zero(void) +{ + load_idt((const struct desc_ptr *)&nmi_idt_descr); +} + +void debug_stack_reset(void) +{ + load_idt((const struct desc_ptr *)&idt_descr); +} + #else /* CONFIG_X86_64 */ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; @@ -1208,6 +1228,8 @@ void __cpuinit cpu_init(void) estacks += exception_stack_sizes[v]; oist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; + if (v == DEBUG_STACK-1) + per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; } } -- cgit v1.2.3-70-g09d2 From 42181186ad4db986fcaa40ca95c6e407e9e79372 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 16 Dec 2011 11:43:02 -0500 Subject: x86: Add counter when debug stack is used with interrupts enabled Mathieu Desnoyers pointed out a case that can cause issues with NMIs running on the debug stack: int3 -> interrupt -> NMI -> int3 Because the interrupt changes the stack, the NMI will not see that it preempted the debug stack. Looking deeper at this case, interrupts only happen when the int3 is from userspace or in an a location in the exception table (fixup). userspace -> int3 -> interurpt -> NMI -> int3 All other int3s that happen in the kernel should be processed without ever enabling interrupts, as the do_trap() call will panic the kernel if it is called to process any other location within the kernel. Adding a counter around the sections that enable interrupts while using the debug stack allows the NMI to also check that case. If the NMI sees that it either interrupted a task using the debug stack or the debug counter is non-zero, then it will have to change the IDT table to make the int3 not change stacks (which will corrupt the stack if it does). Note, I had to move the debug_usage functions out of processor.h and into debugreg.h because of the static inlined functions to inc and dec the debug_usage counter. __get_cpu_var() requires smp.h which includes processor.h, and would fail to build. Link: http://lkml.kernel.org/r/1323976535.23971.112.camel@gandalf.stny.rr.com Reported-by: Mathieu Desnoyers Cc: Linus Torvalds Cc: Peter Zijlstra Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: Paul Turner Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt --- arch/x86/include/asm/debugreg.h | 22 ++++++++++++++++++++++ arch/x86/include/asm/processor.h | 6 ------ arch/x86/kernel/cpu/common.c | 6 ++++-- arch/x86/kernel/traps.c | 14 ++++++++++++++ 4 files changed, 40 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 078ad0caefc..b903d5ea394 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump); extern void hw_breakpoint_restore(void); +#ifdef CONFIG_X86_64 +DECLARE_PER_CPU(int, debug_stack_usage); +static inline void debug_stack_usage_inc(void) +{ + __get_cpu_var(debug_stack_usage)++; +} +static inline void debug_stack_usage_dec(void) +{ + __get_cpu_var(debug_stack_usage)--; +} +int is_debug_stack(unsigned long addr); +void debug_stack_set_zero(void); +void debug_stack_reset(void); +#else /* !X86_64 */ +static inline int is_debug_stack(unsigned long addr) { return 0; } +static inline void debug_stack_set_zero(void) { } +static inline void debug_stack_reset(void) { } +static inline void debug_stack_usage_inc(void) { } +static inline void debug_stack_usage_dec(void) { } +#endif /* X86_64 */ + + #endif /* __KERNEL__ */ #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4b39d6d7e3a..b650435ffb5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -402,9 +402,6 @@ DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); extern unsigned long kernel_eflags; extern asmlinkage void ignore_sysret(void); -int is_debug_stack(unsigned long addr); -void debug_stack_set_zero(void); -void debug_stack_reset(void); #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR /* @@ -419,9 +416,6 @@ struct stack_canary { }; DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif -static inline int is_debug_stack(unsigned long addr) { return 0; } -static inline void debug_stack_set_zero(void) { } -static inline void debug_stack_reset(void) { } #endif /* X86_64 */ extern unsigned int xstate_size; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index caa404556b9..266e4649b1d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1093,11 +1093,13 @@ unsigned long kernel_eflags; DEFINE_PER_CPU(struct orig_ist, orig_ist); static DEFINE_PER_CPU(unsigned long, debug_stack_addr); +DEFINE_PER_CPU(int, debug_stack_usage); int is_debug_stack(unsigned long addr) { - return addr <= __get_cpu_var(debug_stack_addr) && - addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ); + return __get_cpu_var(debug_stack_usage) || + (addr <= __get_cpu_var(debug_stack_addr) && + addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); } void debug_stack_set_zero(void) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a93c5cabc36..0072b38e3ea 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -316,9 +316,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) return; #endif + /* + * Let others (NMI) know that the debug stack is in use + * as we may switch to the interrupt stack. + */ + debug_stack_usage_inc(); preempt_conditional_sti(regs); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); preempt_conditional_cli(regs); + debug_stack_usage_dec(); } #ifdef CONFIG_X86_64 @@ -411,6 +417,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) SIGTRAP) == NOTIFY_STOP) return; + /* + * Let others (NMI) know that the debug stack is in use + * as we may switch to the interrupt stack. + */ + debug_stack_usage_inc(); + /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); @@ -418,6 +430,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); preempt_conditional_cli(regs); + debug_stack_usage_dec(); return; } @@ -437,6 +450,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); preempt_conditional_cli(regs); + debug_stack_usage_dec(); return; } -- cgit v1.2.3-70-g09d2