diff options
Diffstat (limited to 'arch/x86/mm/fault_64.c')
-rw-r--r-- | arch/x86/mm/fault_64.c | 124 |
1 files changed, 117 insertions, 7 deletions
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c index 7e98a769128..0d3d5979ce2 100644 --- a/arch/x86/mm/fault_64.c +++ b/arch/x86/mm/fault_64.c @@ -64,32 +64,136 @@ static inline int notify_page_fault(struct pt_regs *regs) #endif } -/* Sometimes the CPU reports invalid exceptions on prefetch. - Check that here and ignore. - Opcode checker based on code by Richard Brunner */ -static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, - unsigned long error_code) +#ifdef CONFIG_X86_32 +/* + * Return EIP plus the CS segment base. The segment limit is also + * adjusted, clamped to the kernel/user address space (whichever is + * appropriate), and returned in *eip_limit. + * + * The segment is checked, because it might have been changed by another + * task between the original faulting instruction and here. + * + * If CS is no longer a valid code segment, or if EIP is beyond the + * limit, or if it is a kernel address when CS is not a kernel segment, + * then the returned value will be greater than *eip_limit. + * + * This is slow, but is very rarely executed. + */ +static inline unsigned long get_segment_eip(struct pt_regs *regs, + unsigned long *eip_limit) +{ + unsigned long ip = regs->ip; + unsigned seg = regs->cs & 0xffff; + u32 seg_ar, seg_limit, base, *desc; + + /* Unlikely, but must come before segment checks. */ + if (unlikely(regs->flags & VM_MASK)) { + base = seg << 4; + *eip_limit = base + 0xffff; + return base + (ip & 0xffff); + } + + /* The standard kernel/user address space limit. */ + *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; + + /* By far the most common cases. */ + if (likely(SEGMENT_IS_FLAT_CODE(seg))) + return ip; + + /* Check the segment exists, is within the current LDT/GDT size, + that kernel/user (ring 0..3) has the appropriate privilege, + that it's a code segment, and get the limit. */ + __asm__("larl %3,%0; lsll %3,%1" + : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg)); + if ((~seg_ar & 0x9800) || ip > seg_limit) { + *eip_limit = 0; + return 1; /* So that returned ip > *eip_limit. */ + } + + /* Get the GDT/LDT descriptor base. + When you look for races in this code remember that + LDT and other horrors are only used in user space. */ + if (seg & (1<<2)) { + /* Must lock the LDT while reading it. */ + mutex_lock(¤t->mm->context.lock); + desc = current->mm->context.ldt; + desc = (void *)desc + (seg & ~7); + } else { + /* Must disable preemption while reading the GDT. */ + desc = (u32 *)get_cpu_gdt_table(get_cpu()); + desc = (void *)desc + (seg & ~7); + } + + /* Decode the code segment base from the descriptor */ + base = get_desc_base((struct desc_struct *)desc); + + if (seg & (1<<2)) + mutex_unlock(¤t->mm->context.lock); + else + put_cpu(); + + /* Adjust EIP and segment limit, and clamp at the kernel limit. + It's legitimate for segments to wrap at 0xffffffff. */ + seg_limit += base; + if (seg_limit < *eip_limit && seg_limit >= base) + *eip_limit = seg_limit; + return ip + base; +} +#endif + +/* + * X86_32 + * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. + * Check that here and ignore it. + * + * X86_64 + * Sometimes the CPU reports invalid exceptions on prefetch. + * Check that here and ignore it. + * + * Opcode checker based on code by Richard Brunner + */ +static int is_prefetch(struct pt_regs *regs, unsigned long addr, + unsigned long error_code) { unsigned char *instr; int scan_more = 1; int prefetch = 0; unsigned char *max_instr; +#ifdef CONFIG_X86_32 + unsigned long limit; + if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 >= 6)) { + /* Catch an obscure case of prefetch inside an NX page. */ + if (nx_enabled && (error_code & PF_INSTR)) + return 0; + } else { + return 0; + } + instr = (unsigned char *)get_segment_eip(regs, &limit); +#else /* If it was a exec fault ignore */ if (error_code & PF_INSTR) return 0; - instr = (unsigned char __user *)convert_rip_to_linear(current, regs); +#endif + max_instr = instr + 15; +#ifdef CONFIG_X86_64 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) return 0; +#endif while (scan_more && instr < max_instr) { unsigned char opcode; unsigned char instr_hi; unsigned char instr_lo; +#ifdef CONFIG_X86_32 + if (instr > (unsigned char *)limit) + break; +#endif if (probe_kernel_address(instr, opcode)) break; @@ -125,12 +229,16 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, scan_more = (instr_lo & 0xC) == 0x4; break; case 0xF0: - /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */ + /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */ scan_more = !instr_lo || (instr_lo>>1) == 1; break; case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ scan_more = 0; +#ifdef CONFIG_X86_32 + if (instr > (unsigned char *)limit) + break; +#endif if (probe_kernel_address(instr, opcode)) break; prefetch = (instr_lo == 0xF) && @@ -185,6 +293,7 @@ bad: printk("BAD\n"); } +#ifdef CONFIG_X86_64 static const char errata93_warning[] = KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" @@ -218,6 +327,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) } return 0; } +#endif static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, unsigned long error_code) |