From b615ebdac97c648a2ae7d23c5a0bbb3972adf928 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:00 +0100 Subject: [PATCH] x86: shorten lines in unwinder to be <= 80 characters Andrew complained about > 80 character lines in the new unwinder. Fix that. Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/i386/kernel/traps.c') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index fe9c5e8e7e6..fe81d89c50d 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -173,6 +173,8 @@ dump_trace_unwind(struct unwind_frame_info *info, void *data) return n; } +#define MSG(msg) ops->warning(data, msg) + void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, struct stacktrace_ops *ops, void *data) @@ -191,29 +193,31 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (unwind_init_frame_info(&info, task, regs) == 0) unw_ret = dump_trace_unwind(&info, &oad); } else if (task == current) - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); + unw_ret = unwind_init_running(&info, dump_trace_unwind, + &oad); else { if (unwind_init_blocked(&info, task) == 0) unw_ret = dump_trace_unwind(&info, &oad); } if (unw_ret > 0) { if (call_trace == 1 && !arch_unw_user_mode(&info)) { - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", + ops->warning_symbol(data, + "DWARF2 unwinder stuck at %s\n", UNW_PC(&info)); if (UNW_SP(&info) >= PAGE_OFFSET) { - ops->warning(data, "Leftover inexact backtrace:\n"); + MSG("Leftover inexact backtrace:\n"); stack = (void *)UNW_SP(&info); if (!stack) return; ebp = UNW_FP(&info); } else - ops->warning(data, "Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:\n"); } else if (call_trace >= 1) return; else - ops->warning(data, "Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:\n"); } else - ops->warning(data, "Inexact backtrace:\n"); + MSG("Inexact backtrace:\n"); } if (!stack) { unsigned long dummy; -- cgit v1.2.3-70-g09d2 From dd315df1767cf56bd4fb8d730fdff4a3d7e15d84 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:00 +0100 Subject: [PATCH] x86: Compress stack unwinder output The unwinder has some extra newlines, which eat up loads of screen space when it spews. (See https://bugzilla.redhat.com/bugzilla/attachment.cgi?id=137900 for a nasty example). warning_symbol-> and warning-> already printk a newline, so don't add one in the strings passed to them. [AK: redone for new code] Signed-off-by: Dave Jones Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 10 +++++----- arch/x86_64/kernel/traps.c | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/i386/kernel/traps.c') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index fe81d89c50d..396041a46e3 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -202,22 +202,22 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (unw_ret > 0) { if (call_trace == 1 && !arch_unw_user_mode(&info)) { ops->warning_symbol(data, - "DWARF2 unwinder stuck at %s\n", + "DWARF2 unwinder stuck at %s", UNW_PC(&info)); if (UNW_SP(&info) >= PAGE_OFFSET) { - MSG("Leftover inexact backtrace:\n"); + MSG("Leftover inexact backtrace:"); stack = (void *)UNW_SP(&info); if (!stack) return; ebp = UNW_FP(&info); } else - MSG("Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else if (call_trace >= 1) return; else - MSG("Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else - MSG("Inexact backtrace:\n"); + MSG("Inexact backtrace:"); } if (!stack) { unsigned long dummy; diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index d3f43c958ca..eedd4e759c3 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -280,7 +280,7 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, if (unw_ret > 0) { if (call_trace == 1 && !arch_unw_user_mode(&info)) { ops->warning_symbol(data, - "DWARF2 unwinder stuck at %s\n", + "DWARF2 unwinder stuck at %s", UNW_PC(&info)); if ((long)UNW_SP(&info) < 0) { MSG("Leftover inexact backtrace:"); @@ -288,13 +288,13 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, if (!stack) return; } else - MSG("Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else if (call_trace >= 1) return; else - MSG("Full inexact backtrace again:\n"); + MSG("Full inexact backtrace again:"); } else - MSG("Inexact backtrace:\n"); + MSG("Inexact backtrace:"); } if (!stack) { unsigned long dummy; -- cgit v1.2.3-70-g09d2 From a63954b5cad5765e52870bb649992bf636f32a6b Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 7 Dec 2006 02:14:00 +0100 Subject: [PATCH] i386: remove pointless printk from i386 oops output This just got removed on x86-64, do the same on 32bit. It always annoyed me when this ate a line of oops output pushing interesting stuff off the screen. Signed-off-by: Dave Jones Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/i386/kernel/traps.c') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 396041a46e3..48ebfab661b 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -777,7 +777,6 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg) printk(" on CPU%d, eip %08lx, registers:\n", smp_processor_id(), regs->eip); show_registers(regs); - printk(KERN_EMERG "console shuts up ...\n"); console_silent(); spin_unlock(&nmi_print_lock); bust_spinlocks(0); -- cgit v1.2.3-70-g09d2 From e5e3a0428968dcc1f9318ce1c941a918e99f8b84 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] i386: remove default_ldt, and simplify ldt-setting. This patch removes the default_ldt[] array, as it has been unused since iBCS stopped being supported. This means it is now possible to actually set an empty LDT segment. In order to deal with this, the set_ldt_desc/load_LDT pair has been replaced with a single set_ldt() operation which is responsible for both setting up the LDT descriptor in the GDT, and reloading the LDT register. If there are no LDT entries, the LDT register is loaded with a NULL descriptor. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen Acked-by: Zachary Amsden Signed-off-by: Andrew Morton --- arch/i386/kernel/ldt.c | 4 +--- arch/i386/kernel/traps.c | 3 --- include/asm-i386/desc.h | 50 ++++++++++++++++-------------------------- include/asm-i386/mmu_context.h | 4 ++-- 4 files changed, 22 insertions(+), 39 deletions(-) (limited to 'arch/i386/kernel/traps.c') diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c index 445211eb2d5..b410e5fb034 100644 --- a/arch/i386/kernel/ldt.c +++ b/arch/i386/kernel/ldt.c @@ -160,16 +160,14 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount) { int err; unsigned long size; - void *address; err = 0; - address = &default_ldt[0]; size = 5*sizeof(struct desc_struct); if (size > bytecount) size = bytecount; err = size; - if (copy_to_user(ptr, address, size)) + if (clear_user(ptr, size)) err = -EFAULT; return err; diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 48ebfab661b..56655ea8d98 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -61,9 +61,6 @@ int panic_on_unrecovered_nmi; asmlinkage int system_call(void); -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 } }; - /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq = 0; diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 5874ef119ff..a0398f780ca 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -33,11 +33,6 @@ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; } -/* - * This is the ldt that every process will get unless we need - * something other than this. - */ -extern struct desc_struct default_ldt[]; extern struct desc_struct idt_table[]; extern void set_intr_gate(unsigned int irq, void * addr); @@ -65,7 +60,6 @@ static inline void pack_gate(__u32 *a, __u32 *b, #define DESCTYPE_S 0x10 /* !system */ #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) -#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)) #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) @@ -115,13 +109,20 @@ static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const vo write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b); } -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries) +static inline void set_ldt(void *addr, unsigned int entries) { - __u32 a, b; - pack_descriptor(&a, &b, (unsigned long)addr, - entries * sizeof(struct desc_struct) - 1, - DESCTYPE_LDT, 0); - write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); + if (likely(entries == 0)) + __asm__ __volatile__("lldt %w0"::"q" (0)); + else { + unsigned cpu = smp_processor_id(); + __u32 a, b; + + pack_descriptor(&a, &b, (unsigned long)addr, + entries * sizeof(struct desc_struct) - 1, + DESCTYPE_LDT, 0); + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); + __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); + } } #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) @@ -153,35 +154,22 @@ static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entri static inline void clear_LDT(void) { - int cpu = get_cpu(); - - set_ldt_desc(cpu, &default_ldt[0], 5); - load_LDT_desc(); - put_cpu(); + set_ldt(NULL, 0); } /* * load one particular LDT into the current CPU */ -static inline void load_LDT_nolock(mm_context_t *pc, int cpu) +static inline void load_LDT_nolock(mm_context_t *pc) { - void *segments = pc->ldt; - int count = pc->size; - - if (likely(!count)) { - segments = &default_ldt[0]; - count = 5; - } - - set_ldt_desc(cpu, segments, count); - load_LDT_desc(); + set_ldt(pc->ldt, pc->size); } static inline void load_LDT(mm_context_t *pc) { - int cpu = get_cpu(); - load_LDT_nolock(pc, cpu); - put_cpu(); + preempt_disable(); + load_LDT_nolock(pc); + preempt_enable(); } static inline unsigned long get_desc_base(unsigned long *desc) diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index 62b7bf18409..1b1495372c4 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -44,7 +44,7 @@ static inline void switch_mm(struct mm_struct *prev, * load the LDT, if the LDT is different: */ if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context, cpu); + load_LDT_nolock(&next->context); } #ifdef CONFIG_SMP else { @@ -56,7 +56,7 @@ static inline void switch_mm(struct mm_struct *prev, * tlb flush IPI delivery. We must reload %cr3. */ load_cr3(next->pgd); - load_LDT_nolock(&next->context, cpu); + load_LDT_nolock(&next->context); } } #endif -- cgit v1.2.3-70-g09d2 From be44d2aabce2d62f72d5751d1871b6212bf7a1c7 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] i386: espfix cleanup Clean up the espfix code: - Introduced PER_CPU() macro to be used from asm - Introduced GET_DESC_BASE() macro to be used from asm - Rewrote the fixup code in asm, as calling a C code with the altered %ss appeared to be unsafe - No longer altering the stack from a .fixup section - 16bit per-cpu stack is no longer used, instead the stack segment base is patched the way so that the high word of the kernel and user %esp are the same. - Added the limit-patching for the espfix segment. (Chuck Ebbert) [jeremy@goop.org: use the x86 scaling addressing mode rather than shifting] Signed-off-by: Stas Sergeev Signed-off-by: Andi Kleen Acked-by: Zachary Amsden Acked-by: Chuck Ebbert <76306.1226@compuserve.com> Acked-by: Jan Beulich Cc: Andi Kleen Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andrew Morton --- arch/i386/kernel/asm-offsets.c | 5 +++ arch/i386/kernel/cpu/common.c | 11 ------- arch/i386/kernel/entry.S | 73 +++++++++++++++++++----------------------- arch/i386/kernel/head.S | 2 +- arch/i386/kernel/traps.c | 57 +++++++++------------------------ include/asm-i386/desc.h | 27 +++++++++++++--- include/asm-i386/percpu.h | 25 +++++++++++++++ 7 files changed, 103 insertions(+), 97 deletions(-) (limited to 'arch/i386/kernel/traps.c') diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index c80271f8f08..e94d910a28b 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -58,6 +58,11 @@ void foo(void) OFFSET(TI_sysenter_return, thread_info, sysenter_return); BLANK(); + OFFSET(GDS_size, Xgt_desc_struct, size); + OFFSET(GDS_address, Xgt_desc_struct, address); + OFFSET(GDS_pad, Xgt_desc_struct, pad); + BLANK(); + OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); BLANK(); diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index d9f3e3c31f0..5532fc4e1bf 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -24,9 +24,6 @@ DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); -DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); -EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); - static int cachesize_override __cpuinitdata = -1; static int disable_x86_fxsr __cpuinitdata; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -603,7 +600,6 @@ void __cpuinit cpu_init(void) struct tss_struct * t = &per_cpu(init_tss, cpu); struct thread_struct *thread = ¤t->thread; struct desc_struct *gdt; - __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); if (cpu_test_and_set(cpu, cpu_initialized)) { @@ -651,13 +647,6 @@ old_gdt: * and set up the GDT descriptor: */ memcpy(gdt, cpu_gdt_table, GDT_SIZE); - - /* Set up GDT entry for 16bit stack */ - *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= - ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | - ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | - (CPU_16BIT_STACK_SIZE - 1); - cpu_gdt_descr->size = GDT_SIZE - 1; cpu_gdt_descr->address = (unsigned long)gdt; diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 5a63d6fdb70..c38d801ba0b 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -48,6 +48,7 @@ #include #include #include +#include #include #include "irq_vectors.h" @@ -418,23 +419,18 @@ ldt_ss: * This is an "official" bug of all the x86-compatible * CPUs, which we can try to work around to make * dosemu and wine happy. */ - subl $8, %esp # reserve space for switch16 pointer - CFI_ADJUST_CFA_OFFSET 8 + movl OLDESP(%esp), %eax + movl %esp, %edx + call patch_espfix_desc + pushl $__ESPFIX_SS + CFI_ADJUST_CFA_OFFSET 4 + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 DISABLE_INTERRUPTS TRACE_IRQS_OFF - movl %esp, %eax - /* Set up the 16bit stack frame with switch32 pointer on top, - * and a switch16 pointer on top of the current frame. */ - call setup_x86_bogus_stack - CFI_ADJUST_CFA_OFFSET -8 # frame has moved - TRACE_IRQS_IRET - RESTORE_REGS - lss 20+4(%esp), %esp # switch to 16bit stack -1: INTERRUPT_RETURN -.section __ex_table,"a" - .align 4 - .long 1b,iret_exc -.previous + lss (%esp), %esp + CFI_ADJUST_CFA_OFFSET -8 + jmp restore_nocheck CFI_ENDPROC # perform work that needs to be done immediately before resumption @@ -524,30 +520,30 @@ syscall_badsys: CFI_ENDPROC #define FIXUP_ESPFIX_STACK \ - movl %esp, %eax; \ - /* switch to 32bit stack using the pointer on top of 16bit stack */ \ - lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ - /* copy data from 16bit stack to 32bit stack */ \ - call fixup_x86_bogus_stack; \ - /* put ESP to the proper location */ \ - movl %eax, %esp; -#define UNWIND_ESPFIX_STACK \ + /* since we are on a wrong stack, we cant make it a C code :( */ \ + GET_THREAD_INFO(%ebp); \ + movl TI_cpu(%ebp), %ebx; \ + PER_CPU(cpu_gdt_descr, %ebx); \ + movl GDS_address(%ebx), %ebx; \ + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ + addl %esp, %eax; \ + pushl $__KERNEL_DS; \ + CFI_ADJUST_CFA_OFFSET 4; \ pushl %eax; \ CFI_ADJUST_CFA_OFFSET 4; \ + lss (%esp), %esp; \ + CFI_ADJUST_CFA_OFFSET -8; +#define UNWIND_ESPFIX_STACK \ movl %ss, %eax; \ - /* see if on 16bit stack */ \ + /* see if on espfix stack */ \ cmpw $__ESPFIX_SS, %ax; \ - je 28f; \ -27: popl %eax; \ - CFI_ADJUST_CFA_OFFSET -4; \ -.section .fixup,"ax"; \ -28: movl $__KERNEL_DS, %eax; \ + jne 27f; \ + movl $__KERNEL_DS, %eax; \ movl %eax, %ds; \ movl %eax, %es; \ - /* switch to 32bit stack */ \ + /* switch to normal stack */ \ FIXUP_ESPFIX_STACK; \ - jmp 27b; \ -.previous +27:; /* * Build the entry stubs and pointer table with @@ -614,7 +610,6 @@ error_code: pushl %eax CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eax, 0 - xorl %eax, %eax pushl %ebp CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebp, 0 @@ -627,7 +622,6 @@ error_code: pushl %edx CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET edx, 0 - decl %eax # eax = -1 pushl %ecx CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ecx, 0 @@ -644,7 +638,7 @@ error_code: /*CFI_REGISTER es, ecx*/ movl ES(%esp), %edi # get the function address movl ORIG_EAX(%esp), %edx # get the error code - movl %eax, ORIG_EAX(%esp) + movl $-1, ORIG_EAX(%esp) movl %ecx, ES(%esp) /*CFI_REL_OFFSET es, ES*/ movl $(__USER_DS), %ecx @@ -754,7 +748,7 @@ KPROBE_ENTRY(nmi) cmpw $__ESPFIX_SS, %ax popl %eax CFI_ADJUST_CFA_OFFSET -4 - je nmi_16bit_stack + je nmi_espfix_stack cmpl $sysenter_entry,(%esp) je nmi_stack_fixup pushl %eax @@ -797,7 +791,7 @@ nmi_debug_stack_check: FIX_STACK(24,nmi_stack_correct, 1) jmp nmi_stack_correct -nmi_16bit_stack: +nmi_espfix_stack: /* We have a RING0_INT_FRAME here. * * create the pointer to lss back @@ -806,7 +800,6 @@ nmi_16bit_stack: CFI_ADJUST_CFA_OFFSET 4 pushl %esp CFI_ADJUST_CFA_OFFSET 4 - movzwl %sp, %esp addw $4, (%esp) /* copy the iret frame of 12 bytes */ .rept 3 @@ -817,11 +810,11 @@ nmi_16bit_stack: CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL FIXUP_ESPFIX_STACK # %eax == %esp - CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved xorl %edx,%edx # zero error code call do_nmi RESTORE_REGS - lss 12+4(%esp), %esp # back to 16bit stack + lss 12+4(%esp), %esp # back to espfix stack + CFI_ADJUST_CFA_OFFSET -24 1: INTERRUPT_RETURN CFI_ENDPROC .section __ex_table,"a" diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index ca31f18d277..b1f1df11fcc 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -584,7 +584,7 @@ ENTRY(cpu_gdt_table) .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ .quad 0x004092000000ffff /* 0xc8 APM DS data */ - .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */ + .quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */ .quad 0x0000000000000000 /* 0xd8 - unused */ .quad 0x0000000000000000 /* 0xe0 - unused */ .quad 0x0000000000000000 /* 0xe8 - unused */ diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 56655ea8d98..f9bb1f89d68 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -1088,49 +1088,24 @@ fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, #endif } -fastcall void setup_x86_bogus_stack(unsigned char * stk) +fastcall unsigned long patch_espfix_desc(unsigned long uesp, + unsigned long kesp) { - unsigned long *switch16_ptr, *switch32_ptr; - struct pt_regs *regs; - unsigned long stack_top, stack_bot; - unsigned short iret_frame16_off; int cpu = smp_processor_id(); - /* reserve the space on 32bit stack for the magic switch16 pointer */ - memmove(stk, stk + 8, sizeof(struct pt_regs)); - switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs)); - regs = (struct pt_regs *)stk; - /* now the switch32 on 16bit stack */ - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); - stack_top = stack_bot + CPU_16BIT_STACK_SIZE; - switch32_ptr = (unsigned long *)(stack_top - 8); - iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20; - /* copy iret frame on 16bit stack */ - memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20); - /* fill in the switch pointers */ - switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off; - switch16_ptr[1] = __ESPFIX_SS; - switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) + - 8 - CPU_16BIT_STACK_SIZE; - switch32_ptr[1] = __KERNEL_DS; -} - -fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) -{ - unsigned long *switch32_ptr; - unsigned char *stack16, *stack32; - unsigned long stack_top, stack_bot; - int len; - int cpu = smp_processor_id(); - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); - stack_top = stack_bot + CPU_16BIT_STACK_SIZE; - switch32_ptr = (unsigned long *)(stack_top - 8); - /* copy the data from 16bit stack to 32bit stack */ - len = CPU_16BIT_STACK_SIZE - 8 - sp; - stack16 = (unsigned char *)(stack_bot + sp); - stack32 = (unsigned char *) - (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len); - memcpy(stack32, stack16, len); - return stack32; + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address; + unsigned long base = (kesp - uesp) & -THREAD_SIZE; + unsigned long new_kesp = kesp - base; + unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; + __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; + /* Set up base for espfix segment */ + desc &= 0x00f0ff0000000000ULL; + desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | + ((((__u64)base) << 32) & 0xff00000000000000ULL) | + ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | + (lim_pages & 0xffff); + *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; + return new_kesp; } /* diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index a0398f780ca..6cf2ac2bfde 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -4,8 +4,6 @@ #include #include -#define CPU_16BIT_STACK_SIZE 1024 - #ifndef __ASSEMBLY__ #include @@ -16,8 +14,6 @@ extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; -DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); - struct Xgt_desc_struct { unsigned short size; unsigned long address __attribute__((packed)); @@ -181,6 +177,29 @@ static inline unsigned long get_desc_base(unsigned long *desc) return base; } +#else /* __ASSEMBLY__ */ + +/* + * GET_DESC_BASE reads the descriptor base of the specified segment. + * + * Args: + * idx - descriptor index + * gdt - GDT pointer + * base - 32bit register to which the base will be written + * lo_w - lo word of the "base" register + * lo_b - lo byte of the "base" register + * hi_b - hi byte of the low word of the "base" register + * + * Example: + * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) + * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. + */ +#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ + movb idx*8+4(gdt), lo_b; \ + movb idx*8+7(gdt), hi_b; \ + shll $16, base; \ + movw idx*8+2(gdt), lo_w; + #endif /* !__ASSEMBLY__ */ #endif diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h index 5764afa4b6a..510ae1d3486 100644 --- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -1,6 +1,31 @@ #ifndef __ARCH_I386_PERCPU__ #define __ARCH_I386_PERCPU__ +#ifndef __ASSEMBLY__ #include +#else + +/* + * PER_CPU finds an address of a per-cpu variable. + * + * Args: + * var - variable name + * cpu - 32bit register containing the current CPU number + * + * The resulting address is stored in the "cpu" argument. + * + * Example: + * PER_CPU(cpu_gdt_descr, %ebx) + */ +#ifdef CONFIG_SMP +#define PER_CPU(var, cpu) \ + movl __per_cpu_offset(,cpu,4), cpu; \ + addl $per_cpu__/**/var, cpu; +#else /* ! SMP */ +#define PER_CPU(var, cpu) \ + movl $per_cpu__/**/var, cpu; +#endif /* SMP */ + +#endif /* !__ASSEMBLY__ */ #endif /* __ARCH_I386_PERCPU__ */ -- cgit v1.2.3-70-g09d2 From acc207616a91a413a50fdd8847a747c4a7324167 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] i386: add sleazy FPU optimization i386 port of the sLeAZY-fpu feature. Chuck reports that this gives him a +/- 0.4% improvement on his simple benchmark x86_64 description follows: Right now the kernel on x86-64 has a 100% lazy fpu behavior: after *every* context switch a trap is taken for the first FPU use to restore the FPU context lazily. This is of course great for applications that have very sporadic or no FPU use (since then you avoid doing the expensive save/restore all the time). However for very frequent FPU users... you take an extra trap every context switch. The patch below adds a simple heuristic to this code: After 5 consecutive context switches of FPU use, the lazy behavior is disabled and the context gets restored every context switch. If the app indeed uses the FPU, the trap is avoided. (the chance of the 6th time slice using FPU after the previous 5 having done so are quite high obviously). After 256 switches, this is reset and lazy behavior is returned (until there are 5 consecutive ones again). The reason for this is to give apps that do longer bursts of FPU use still the lazy behavior back after some time. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/process.c | 12 ++++++++++++ arch/i386/kernel/traps.c | 3 ++- include/asm-i386/i387.h | 5 ++++- 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel/traps.c') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index dd53c58f64f..ae924c416b6 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -648,6 +648,11 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas __unlazy_fpu(prev_p); + + /* we're going to use this soon, after a few expensive things */ + if (next_p->fpu_counter > 5) + prefetch(&next->i387.fxsave); + /* * Reload esp0. */ @@ -697,6 +702,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas disable_tsc(prev_p, next_p); + /* If the task has used fpu the last 5 timeslices, just do a full + * restore of the math state immediately to avoid the trap; the + * chances of needing FPU soon are obviously high now + */ + if (next_p->fpu_counter > 5) + math_state_restore(); + return prev_p; } diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index f9bb1f89d68..4a6fa2837df 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -1118,7 +1118,7 @@ fastcall unsigned long patch_espfix_desc(unsigned long uesp, * Must be called with kernel preemption disabled (in this case, * local interrupts are disabled at the call-site in entry.S). */ -asmlinkage void math_state_restore(struct pt_regs regs) +asmlinkage void math_state_restore(void) { struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; @@ -1128,6 +1128,7 @@ asmlinkage void math_state_restore(struct pt_regs regs) init_fpu(tsk); restore_fpu(tsk); thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ + tsk->fpu_counter++; } #ifndef CONFIG_MATH_EMULATION diff --git a/include/asm-i386/i387.h b/include/asm-i386/i387.h index bc1d6edae1e..434936c732d 100644 --- a/include/asm-i386/i387.h +++ b/include/asm-i386/i387.h @@ -76,7 +76,9 @@ static inline void __save_init_fpu( struct task_struct *tsk ) #define __unlazy_fpu( tsk ) do { \ if (task_thread_info(tsk)->status & TS_USEDFPU) \ - save_init_fpu( tsk ); \ + save_init_fpu( tsk ); \ + else \ + tsk->fpu_counter = 0; \ } while (0) #define __clear_fpu( tsk ) \ @@ -118,6 +120,7 @@ static inline void save_init_fpu( struct task_struct *tsk ) extern unsigned short get_fpu_cwd( struct task_struct *tsk ); extern unsigned short get_fpu_swd( struct task_struct *tsk ); extern unsigned short get_fpu_mxcsr( struct task_struct *tsk ); +extern asmlinkage void math_state_restore(void); /* * Signal frame handlers... -- cgit v1.2.3-70-g09d2 From 9c5f8be4625e73f17e28fea89399ed871a30e064 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] x86: Mention PCI instead of RAM in NMI parity error message On modern systems RAM errors don't cause NMIs, but it's usually caused by PCI SERR. Mention PCI instead of RAM in the printk. Reported by r_hayashi@ctc-g.co.jp (Ryutaro Hayashi) Cc: r_hayashi@ctc-g.co.jp Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 3 +-- arch/x86_64/kernel/traps.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/i386/kernel/traps.c') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 4a6fa2837df..237f4884a1e 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -708,8 +708,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " "CPU %d.\n", reason, smp_processor_id()); - printk(KERN_EMERG "You probably have a hardware problem with your RAM " - "chips\n"); + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index e37b4d77d5a..70bfaab9822 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -793,8 +793,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", reason); - printk(KERN_EMERG "You probably have a hardware problem with your " - "RAM chips\n"); + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); -- cgit v1.2.3-70-g09d2 From 11a4180c0b03e2ee0c948fd8430ee092dc1625b3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] i386: Use probe_kernel_address instead of __get_user in fault paths Makes the intention of the code cleaner to read and avoids a potential deadlock on mmap_sem. Also change the types of the arguments to not include __user because they're really not user addresses. Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 24 +++++++++++++----------- arch/i386/mm/fault.c | 12 ++++++------ 2 files changed, 19 insertions(+), 17 deletions(-) (limited to 'arch/i386/kernel/traps.c') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 237f4884a1e..7b2f9f02208 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -380,7 +380,7 @@ void show_registers(struct pt_regs *regs) * time of the fault.. */ if (in_kernel) { - u8 __user *eip; + u8 *eip; int code_bytes = 64; unsigned char c; @@ -389,18 +389,20 @@ void show_registers(struct pt_regs *regs) printk(KERN_EMERG "Code: "); - eip = (u8 __user *)regs->eip - 43; - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { + eip = (u8 *)regs->eip - 43; + if (eip < (u8 *)PAGE_OFFSET || + probe_kernel_address(eip, c)) { /* try starting at EIP */ - eip = (u8 __user *)regs->eip; + eip = (u8 *)regs->eip; code_bytes = 32; } for (i = 0; i < code_bytes; i++, eip++) { - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { + if (eip < (u8 *)PAGE_OFFSET || + probe_kernel_address(eip, c)) { printk(" Bad EIP value."); break; } - if (eip == (u8 __user *)regs->eip) + if (eip == (u8 *)regs->eip) printk("<%02x> ", c); else printk("%02x ", c); @@ -416,7 +418,7 @@ static void handle_BUG(struct pt_regs *regs) if (eip < PAGE_OFFSET) return; - if (probe_kernel_address((unsigned short __user *)eip, ud2)) + if (probe_kernel_address((unsigned short *)eip, ud2)) return; if (ud2 != 0x0b0f) return; @@ -429,11 +431,11 @@ static void handle_BUG(struct pt_regs *regs) char *file; char c; - if (probe_kernel_address((unsigned short __user *)(eip + 2), - line)) + if (probe_kernel_address((unsigned short *)(eip + 2), line)) break; - if (__get_user(file, (char * __user *)(eip + 4)) || - (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) + if (probe_kernel_address((char **)(eip + 4), file) || + (unsigned long)file < PAGE_OFFSET || + probe_kernel_address(file, c)) file = ""; printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 2581575786c..aaaa4d225f7 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -22,9 +22,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -167,7 +167,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, static int __is_prefetch(struct pt_regs *regs, unsigned long addr) { unsigned long limit; - unsigned long instr = get_segment_eip (regs, &limit); + unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); int scan_more = 1; int prefetch = 0; int i; @@ -177,9 +177,9 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr) unsigned char instr_hi; unsigned char instr_lo; - if (instr > limit) + if (instr > (unsigned char *)limit) break; - if (__get_user(opcode, (unsigned char __user *) instr)) + if (probe_kernel_address(instr, opcode)) break; instr_hi = opcode & 0xf0; @@ -204,9 +204,9 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr) case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ scan_more = 0; - if (instr > limit) + if (instr > (unsigned char *)limit) break; - if (__get_user(opcode, (unsigned char __user *) instr)) + if (probe_kernel_address(instr, opcode)) break; prefetch = (instr_lo == 0xF) && (opcode == 0x0D || opcode == 0x18); -- cgit v1.2.3-70-g09d2 From 0741f4d207a644482d7a040f05cd264c98cf7ee8 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86: add sysctl for kstack_depth_to_print Add sysctl for kstack_depth_to_print. This lets users change the amount of raw stack data printed in dump_stack() without having to reboot. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andi Kleen --- Documentation/sysctl/kernel.txt | 8 ++++++++ arch/i386/kernel/traps.c | 2 +- arch/x86_64/kernel/traps.c | 2 +- include/asm-x86_64/stacktrace.h | 2 ++ kernel/sysctl.c | 9 +++++++++ 5 files changed, 21 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel/traps.c') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 0bc7f1e3c9e..5922e84d913 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -27,6 +27,7 @@ show up in /proc/sys/kernel: - hotplug - java-appletviewer [ binfmt_java, obsolete ] - java-interpreter [ binfmt_java, obsolete ] +- kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] - modprobe ==> Documentation/kmod.txt - msgmax @@ -170,6 +171,13 @@ This flag controls the L2 cache of G3 processor boards. If ============================================================== +kstack_depth_to_print: (X86 only) + +Controls the number of words to print when dumping the raw +kernel stack. + +============================================================== + osrelease, ostype & version: # cat osrelease diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 7b2f9f02208..1d48a75fa33 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -91,7 +91,7 @@ asmlinkage void alignment_check(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); -static int kstack_depth_to_print = 24; +int kstack_depth_to_print = 24; #ifdef CONFIG_STACK_UNWIND static int call_trace = 1; #else diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 264db33476a..75ceccee178 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -108,7 +108,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_enable_no_resched(); } -static int kstack_depth_to_print = 12; +int kstack_depth_to_print = 12; #ifdef CONFIG_STACK_UNWIND static int call_trace = 1; #else diff --git a/include/asm-x86_64/stacktrace.h b/include/asm-x86_64/stacktrace.h index 5eb9799bef7..6f0b5459430 100644 --- a/include/asm-x86_64/stacktrace.h +++ b/include/asm-x86_64/stacktrace.h @@ -1,6 +1,8 @@ #ifndef _ASM_STACKTRACE_H #define _ASM_STACKTRACE_H 1 +extern int kstack_depth_to_print; + /* Generic stack tracer with callbacks */ struct stacktrace_ops { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 09e569f4792..6fc5e17086f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -54,6 +54,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, #ifdef CONFIG_X86 #include +#include #endif #if defined(CONFIG_SYSCTL) @@ -707,6 +708,14 @@ static ctl_table kern_table[] = { .mode = 0444, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kstack_depth_to_print", + .data = &kstack_depth_to_print, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #if defined(CONFIG_MMU) { -- cgit v1.2.3-70-g09d2 From a36df98ab1cdd8a9e7daa4c1b5c48ffa2ad6ea09 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] i386: touch softlockup during backtracing Sometimes the soft watchdog fires after we're done oopsing. See http://projects.info-pull.com/mokb/MOKB-25-11-2006.html for an example. AK: changed to touch_nmi_watchdog() Signed-off-by: Dave Jones Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/i386/kernel/traps.c') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 1d48a75fa33..86d8476be4f 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -29,6 +29,7 @@ #include #include #include +#include #ifdef CONFIG_EISA #include @@ -248,6 +249,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (unsigned long*)context->previous_esp; if (!stack) break; + touch_nmi_watchdog(); } } EXPORT_SYMBOL(dump_trace); -- cgit v1.2.3-70-g09d2 From 359ad0d4015a9ab39243f2ebc4eb07915bd618b2 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] unwinder: more sanity checks in Dwarf2 unwinder Tighten the requirements on both input to and output from the Dwarf2 unwinder. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 7 +++++++ arch/x86_64/kernel/traps.c | 7 +++++++ include/asm-i386/unwind.h | 12 ++++-------- include/asm-x86_64/unwind.h | 8 ++------ kernel/unwind.c | 16 +++++++++++++++- 5 files changed, 35 insertions(+), 15 deletions(-) (limited to 'arch/i386/kernel/traps.c') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 86d8476be4f..c447807e2a4 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -161,12 +161,19 @@ dump_trace_unwind(struct unwind_frame_info *info, void *data) { struct ops_and_data *oad = (struct ops_and_data *)data; int n = 0; + unsigned long sp = UNW_SP(info); + if (arch_unw_user_mode(info)) + return -1; while (unwind(info) == 0 && UNW_PC(info)) { n++; oad->ops->address(oad->data, UNW_PC(info)); if (arch_unw_user_mode(info)) break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); } return n; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 9864d195c40..4fdd162f0be 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -225,12 +225,19 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context) { struct ops_and_data *oad = (struct ops_and_data *)context; int n = 0; + unsigned long sp = UNW_SP(info); + if (arch_unw_user_mode(info)) + return -1; while (unwind(info) == 0 && UNW_PC(info)) { n++; oad->ops->address(oad->data, UNW_PC(info)); if (arch_unw_user_mode(info)) break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); } return n; } diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h index 601fc67bd77..aa2c931e30d 100644 --- a/include/asm-i386/unwind.h +++ b/include/asm-i386/unwind.h @@ -79,17 +79,13 @@ extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *, void *arg), void *arg); -static inline int arch_unw_user_mode(const struct unwind_frame_info *info) +static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info) { -#if 0 /* This can only work when selector register and EFLAGS saves/restores - are properly annotated (and tracked in UNW_REGISTER_INFO). */ - return user_mode_vm(&info->regs); -#else - return info->regs.eip < PAGE_OFFSET + return user_mode_vm(&info->regs) + || info->regs.eip < PAGE_OFFSET || (info->regs.eip >= __fix_to_virt(FIX_VDSO) - && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) + && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) || info->regs.esp < PAGE_OFFSET; -#endif } #else diff --git a/include/asm-x86_64/unwind.h b/include/asm-x86_64/unwind.h index 2e7ff10fd77..2f6349e4871 100644 --- a/include/asm-x86_64/unwind.h +++ b/include/asm-x86_64/unwind.h @@ -87,14 +87,10 @@ extern int arch_unwind_init_running(struct unwind_frame_info *, static inline int arch_unw_user_mode(const struct unwind_frame_info *info) { -#if 0 /* This can only work when selector register saves/restores - are properly annotated (and tracked in UNW_REGISTER_INFO). */ - return user_mode(&info->regs); -#else - return (long)info->regs.rip >= 0 + return user_mode(&info->regs) + || (long)info->regs.rip >= 0 || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END) || (long)info->regs.rsp >= 0; -#endif } #else diff --git a/kernel/unwind.c b/kernel/unwind.c index af48168a3af..7e721f10410 100644 --- a/kernel/unwind.c +++ b/kernel/unwind.c @@ -95,6 +95,7 @@ static const struct { typedef unsigned long uleb128_t; typedef signed long sleb128_t; +#define sleb128abs __builtin_labs static struct unwind_table { struct { @@ -787,7 +788,7 @@ int unwind(struct unwind_frame_info *frame) #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) const u32 *fde = NULL, *cie = NULL; const u8 *ptr = NULL, *end = NULL; - unsigned long pc = UNW_PC(frame) - frame->call_frame; + unsigned long pc = UNW_PC(frame) - frame->call_frame, sp; unsigned long startLoc = 0, endLoc = 0, cfa; unsigned i; signed ptrType = -1; @@ -936,6 +937,9 @@ int unwind(struct unwind_frame_info *frame) state.dataAlign = get_sleb128(&ptr, end); if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) cie = NULL; + else if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign)) + return -EPERM; else { retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); /* skip augmentation */ @@ -968,6 +972,8 @@ int unwind(struct unwind_frame_info *frame) #ifdef CONFIG_FRAME_POINTER unsigned long top, bottom; + if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long)) + return -EPERM; top = STACK_TOP(frame->task); bottom = STACK_BOTTOM(frame->task); # if FRAME_RETADDR_OFFSET < 0 @@ -1018,6 +1024,7 @@ int unwind(struct unwind_frame_info *frame) || state.regs[retAddrReg].where == Nowhere || state.cfa.reg >= ARRAY_SIZE(reg_info) || reg_info[state.cfa.reg].width != sizeof(unsigned long) + || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long) || state.cfa.offs % sizeof(unsigned long)) return -EIO; /* update frame */ @@ -1038,6 +1045,8 @@ int unwind(struct unwind_frame_info *frame) #else # define CASES CASE(8); CASE(16); CASE(32); CASE(64) #endif + pc = UNW_PC(frame); + sp = UNW_SP(frame); for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { if (REG_INVALID(i)) { if (state.regs[i].where == Nowhere) @@ -1118,6 +1127,11 @@ int unwind(struct unwind_frame_info *frame) } } + if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign) + || (pc == UNW_PC(frame) && sp == UNW_SP(frame))) + return -EIO; + return 0; #undef CASES #undef FRAME_REG -- cgit v1.2.3-70-g09d2