summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile12
-rw-r--r--arch/x86/kernel/acpi/boot.c12
-rw-r--r--arch/x86/kernel/alternative.c2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c4
-rw-r--r--arch/x86/kernel/apic_32.c4
-rw-r--r--arch/x86/kernel/apic_64.c4
-rw-r--r--arch/x86/kernel/cpu/common.c56
-rw-r--r--arch/x86/kernel/doublefault_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_32.c447
-rw-r--r--arch/x86/kernel/dumpstack_64.c573
-rw-r--r--arch/x86/kernel/early-quirks.c48
-rw-r--r--arch/x86/kernel/early_printk.c748
-rw-r--r--arch/x86/kernel/entry_32.S22
-rw-r--r--arch/x86/kernel/entry_64.S15
-rw-r--r--arch/x86/kernel/es7000_32.c28
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c23
-rw-r--r--arch/x86/kernel/head.c1
-rw-r--r--arch/x86/kernel/hpet.c6
-rw-r--r--arch/x86/kernel/i387.c14
-rw-r--r--arch/x86/kernel/io_apic_64.c4
-rw-r--r--arch/x86/kernel/irqinit_64.c43
-rw-r--r--arch/x86/kernel/ldt.c9
-rw-r--r--arch/x86/kernel/microcode.c853
-rw-r--r--arch/x86/kernel/microcode_amd.c435
-rw-r--r--arch/x86/kernel/microcode_core.c508
-rw-r--r--arch/x86/kernel/microcode_intel.c480
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c37
-rw-r--r--arch/x86/kernel/paravirt.c27
-rw-r--r--arch/x86/kernel/process_32.c43
-rw-r--r--arch/x86/kernel/process_64.c33
-rw-r--r--arch/x86/kernel/ptrace.c44
-rw-r--r--arch/x86/kernel/quirks.c41
-rw-r--r--arch/x86/kernel/setup.c206
-rw-r--r--arch/x86/kernel/signal_32.c222
-rw-r--r--arch/x86/kernel/signal_64.c207
-rw-r--r--arch/x86/kernel/smp.c6
-rw-r--r--arch/x86/kernel/smpboot.c164
-rw-r--r--arch/x86/kernel/time_32.c7
-rw-r--r--arch/x86/kernel/time_64.c23
-rw-r--r--arch/x86/kernel/tlb_32.c8
-rw-r--r--arch/x86/kernel/traps.c (renamed from arch/x86/kernel/traps_32.c)895
-rw-r--r--arch/x86/kernel/traps_64.c1214
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S2
-rw-r--r--arch/x86/kernel/xsave.c33
44 files changed, 4496 insertions, 3069 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index c9be69fedb7..0d41f0343dc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -10,7 +10,7 @@ ifdef CONFIG_FTRACE
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
-CFLAGS_REMOVE_paravirt.o = -pg
+CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
endif
#
@@ -23,7 +23,7 @@ CFLAGS_hpet.o := $(nostackp)
CFLAGS_tsc.o := $(nostackp)
obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
-obj-y += traps_$(BITS).o irq_$(BITS).o
+obj-y += traps.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time_$(BITS).o ioport.o ldt.o
obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
@@ -51,7 +51,6 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
obj-$(CONFIG_MCA) += mca_32.o
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
-obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_PCI) += early-quirks.o
apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o
@@ -90,7 +89,7 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
@@ -100,6 +99,11 @@ scx200-y += scx200_32.o
obj-$(CONFIG_OLPC) += olpc.o
+microcode-y := microcode_core.o
+microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
+microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
+obj-$(CONFIG_MICROCODE) += microcode.o
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c2ac1b4515a..eb875cdc736 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1418,8 +1418,16 @@ static int __init force_acpi_ht(const struct dmi_system_id *d)
*/
static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
{
- pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident);
- acpi_skip_timer_override = 1;
+ /*
+ * The ati_ixp4x0_rev() early PCI quirk should have set
+ * the acpi_skip_timer_override flag already:
+ */
+ if (!acpi_skip_timer_override) {
+ WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n");
+ pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n",
+ d->ident);
+ acpi_skip_timer_override = 1;
+ }
return 0;
}
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index fb04e49776b..a84ac7b570e 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -444,7 +444,7 @@ void __init alternative_instructions(void)
_text, _etext);
/* Only switch to UP mode if we don't immediately boot others */
- if (num_possible_cpus() == 1 || setup_max_cpus <= 1)
+ if (num_present_cpus() == 1 || setup_max_cpus <= 1)
alternatives_smp_switch(0);
}
#endif
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 148fcfe22f1..4cd8083c58b 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -723,9 +723,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
init_iommu_from_acpi(iommu, h);
init_iommu_devices(iommu);
- pci_enable_device(iommu->dev);
-
- return 0;
+ return pci_enable_device(iommu->dev);
}
/*
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index a91c57cb666..21c831d96af 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -295,6 +295,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
*
* Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
* MCE interrupts are supported. Thus MCE offset must be set to 0.
+ *
+ * If mask=1, the LVT entry does not generate interrupts while mask=0
+ * enables the vector. See also the BKDGs.
*/
#define APIC_EILVT_LVTOFF_MCE 0
@@ -319,6 +322,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
return APIC_EILVT_LVTOFF_IBS;
}
+EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
/*
* Program the next event, relative to now
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 53898b65a6a..94ddb69ae15 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -307,6 +307,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
*
* Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
* MCE interrupts are supported. Thus MCE offset must be set to 0.
+ *
+ * If mask=1, the LVT entry does not generate interrupts while mask=0
+ * enables the vector. See also the BKDGs.
*/
#define APIC_EILVT_LVTOFF_MCE 0
@@ -331,6 +334,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
return APIC_EILVT_LVTOFF_IBS;
}
+EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
/*
* Program the next event, relative to now
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7581b62df18..25581dcb280 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -124,18 +124,25 @@ static inline int flag_is_changeable_p(u32 flag)
{
u32 f1, f2;
- asm("pushfl\n\t"
- "pushfl\n\t"
- "popl %0\n\t"
- "movl %0,%1\n\t"
- "xorl %2,%0\n\t"
- "pushl %0\n\t"
- "popfl\n\t"
- "pushfl\n\t"
- "popl %0\n\t"
- "popfl\n\t"
- : "=&r" (f1), "=&r" (f2)
- : "ir" (flag));
+ /*
+ * Cyrix and IDT cpus allow disabling of CPUID
+ * so the code below may return different results
+ * when it is executed before and after enabling
+ * the CPUID. Add "volatile" to not allow gcc to
+ * optimize the subsequent calls to this function.
+ */
+ asm volatile ("pushfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "movl %0,%1\n\t"
+ "xorl %2,%0\n\t"
+ "pushl %0\n\t"
+ "popfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "popfl\n\t"
+ : "=&r" (f1), "=&r" (f2)
+ : "ir" (flag));
return ((f1^f2) & flag) != 0;
}
@@ -719,12 +726,24 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
#endif
}
+#ifdef CONFIG_X86_64
+static void vgetcpu_set_mode(void)
+{
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
+ vgetcpu_mode = VGETCPU_RDTSCP;
+ else
+ vgetcpu_mode = VGETCPU_LSL;
+}
+#endif
+
void __init identify_boot_cpu(void)
{
identify_cpu(&boot_cpu_data);
#ifdef CONFIG_X86_32
sysenter_setup();
enable_sep_cpu();
+#else
+ vgetcpu_set_mode();
#endif
}
@@ -797,7 +816,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
else if (c->cpuid_level >= 0)
vendor = c->x86_vendor_id;
- if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
+ if (vendor && !strstr(c->x86_model_id, vendor))
printk(KERN_CONT "%s ", vendor);
if (c->x86_model_id[0])
@@ -1121,16 +1140,5 @@ void __cpuinit cpu_init(void)
xsave_init();
}
-#ifdef CONFIG_HOTPLUG_CPU
-void __cpuinit cpu_uninit(void)
-{
- int cpu = raw_smp_processor_id();
- cpu_clear(cpu, cpu_initialized);
-
- /* lazy TLB state */
- per_cpu(cpu_tlbstate, cpu).state = 0;
- per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
-}
-#endif
#endif
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index a47798b59f0..b4f14c6c09d 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -66,6 +66,6 @@ struct tss_struct doublefault_tss __cacheline_aligned = {
.ds = __USER_DS,
.fs = __KERNEL_PERCPU,
- .__cr3 = __pa(swapper_pg_dir)
+ .__cr3 = __pa_nodebug(swapper_pg_dir),
}
};
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
new file mode 100644
index 00000000000..201ee359a1a
--- /dev/null
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/utsname.h>
+#include <linux/hardirq.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/kexec.h>
+#include <linux/bug.h>
+#include <linux/nmi.h>
+
+#include <asm/stacktrace.h>
+
+#define STACKSLOTS_PER_LINE 8
+#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
+
+int panic_on_unrecovered_nmi;
+int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
+static unsigned int code_bytes = 64;
+static int die_counter;
+
+void printk_address(unsigned long address, int reliable)
+{
+ printk(" [<%p>] %s%pS\n", (void *) address,
+ reliable ? "" : "? ", (void *) address);
+}
+
+static inline int valid_stack_ptr(struct thread_info *tinfo,
+ void *p, unsigned int size, void *end)
+{
+ void *t = tinfo;
+ if (end) {
+ if (p < end && p >= (end-THREAD_SIZE))
+ return 1;
+ else
+ return 0;
+ }
+ return p > t && p < t + THREAD_SIZE - size;
+}
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+ struct stack_frame *next_frame;
+ unsigned long return_address;
+};
+
+static inline unsigned long
+print_context_stack(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data,
+ unsigned long *end)
+{
+ struct stack_frame *frame = (struct stack_frame *)bp;
+
+ while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
+ unsigned long addr;
+
+ addr = *stack;
+ if (__kernel_text_address(addr)) {
+ if ((unsigned long) stack == bp + sizeof(long)) {
+ ops->address(data, addr, 1);
+ frame = frame->next_frame;
+ bp = (unsigned long) frame;
+ } else {
+ ops->address(data, addr, bp == 0);
+ }
+ }
+ stack++;
+ }
+ return bp;
+}
+
+void dump_trace(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data)
+{
+ if (!task)
+ task = current;
+
+ if (!stack) {
+ unsigned long dummy;
+ stack = &dummy;
+ if (task && task != current)
+ stack = (unsigned long *)task->thread.sp;
+ }
+
+#ifdef CONFIG_FRAME_POINTER
+ if (!bp) {
+ if (task == current) {
+ /* Grab bp right from our regs */
+ get_bp(bp);
+ } else {
+ /* bp is the last reg pushed by switch_to */
+ bp = *(unsigned long *) task->thread.sp;
+ }
+ }
+#endif
+
+ for (;;) {
+ struct thread_info *context;
+
+ context = (struct thread_info *)
+ ((unsigned long)stack & (~(THREAD_SIZE - 1)));
+ bp = print_context_stack(context, stack, bp, ops, data, NULL);
+
+ stack = (unsigned long *)context->previous_esp;
+ if (!stack)
+ break;
+ if (ops->stack(data, "IRQ") < 0)
+ break;
+ touch_nmi_watchdog();
+ }
+}
+EXPORT_SYMBOL(dump_trace);
+
+static void
+print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+ printk(data);
+ print_symbol(msg, symbol);
+ printk("\n");
+}
+
+static void print_trace_warning(void *data, char *msg)
+{
+ printk("%s%s\n", (char *)data, msg);
+}
+
+static int print_trace_stack(void *data, char *name)
+{
+ printk("%s <%s> ", (char *)data, name);
+ return 0;
+}
+
+/*
+ * Print one address/symbol entries per line.
+ */
+static void print_trace_address(void *data, unsigned long addr, int reliable)
+{
+ touch_nmi_watchdog();
+ printk(data);
+ printk_address(addr, reliable);
+}
+
+static const struct stacktrace_ops print_trace_ops = {
+ .warning = print_trace_warning,
+ .warning_symbol = print_trace_warning_symbol,
+ .stack = print_trace_stack,
+ .address = print_trace_address,
+};
+
+static void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp, char *log_lvl)
+{
+ printk("%sCall Trace:\n", log_lvl);
+ dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+}
+
+void show_trace(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp)
+{
+ show_trace_log_lvl(task, regs, stack, bp, "");
+}
+
+static void
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *sp, unsigned long bp, char *log_lvl)
+{
+ unsigned long *stack;
+ int i;
+
+ if (sp == NULL) {
+ if (task)
+ sp = (unsigned long *)task->thread.sp;
+ else
+ sp = (unsigned long *)&sp;
+ }
+
+ stack = sp;
+ for (i = 0; i < kstack_depth_to_print; i++) {
+ if (kstack_end(stack))
+ break;
+ if (i && ((i % STACKSLOTS_PER_LINE) == 0))
+ printk("\n%s", log_lvl);
+ printk(" %08lx", *stack++);
+ touch_nmi_watchdog();
+ }
+ printk("\n");
+ show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+}
+
+void show_stack(struct task_struct *task, unsigned long *sp)
+{
+ show_stack_log_lvl(task, NULL, sp, 0, "");
+}
+
+/*
+ * The architecture-independent dump_stack generator
+ */
+void dump_stack(void)
+{
+ unsigned long bp = 0;
+ unsigned long stack;
+
+#ifdef CONFIG_FRAME_POINTER
+ if (!bp)
+ get_bp(bp);
+#endif
+
+ printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+ current->pid, current->comm, print_tainted(),
+ init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
+ show_trace(NULL, NULL, &stack, bp);
+}
+
+EXPORT_SYMBOL(dump_stack);
+
+void show_registers(struct pt_regs *regs)
+{
+ int i;
+
+ print_modules();
+ __show_regs(regs, 0);
+
+ printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
+ TASK_COMM_LEN, current->comm, task_pid_nr(current),
+ current_thread_info(), current, task_thread_info(current));
+ /*
+ * When in-kernel, we also print out the stack and code at the
+ * time of the fault..
+ */
+ if (!user_mode_vm(regs)) {
+ unsigned int code_prologue = code_bytes * 43 / 64;
+ unsigned int code_len = code_bytes;
+ unsigned char c;
+ u8 *ip;
+
+ printk(KERN_EMERG "Stack:\n");
+ show_stack_log_lvl(NULL, regs, &regs->sp,
+ 0, KERN_EMERG);
+
+ printk(KERN_EMERG "Code: ");
+
+ ip = (u8 *)regs->ip - code_prologue;
+ if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
+ /* try starting at IP */
+ ip = (u8 *)regs->ip;
+ code_len = code_len - code_prologue + 1;
+ }
+ for (i = 0; i < code_len; i++, ip++) {
+ if (ip < (u8 *)PAGE_OFFSET ||
+ probe_kernel_address(ip, c)) {
+ printk(" Bad EIP value.");
+ break;
+ }
+ if (ip == (u8 *)regs->ip)
+ printk("<%02x> ", c);
+ else
+ printk("%02x ", c);
+ }
+ }
+ printk("\n");
+}
+
+int is_valid_bugaddr(unsigned long ip)
+{
+ unsigned short ud2;
+
+ if (ip < PAGE_OFFSET)
+ return 0;
+ if (probe_kernel_address((unsigned short *)ip, ud2))
+ return 0;
+
+ return ud2 == 0x0b0f;
+}
+
+static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+
+unsigned __kprobes long oops_begin(void)
+{
+ unsigned long flags;
+
+ oops_enter();
+
+ if (die_owner != raw_smp_processor_id()) {
+ console_verbose();
+ raw_local_irq_save(flags);
+ __raw_spin_lock(&die_lock);
+ die_owner = smp_processor_id();
+ die_nest_count = 0;
+ bust_spinlocks(1);
+ } else {
+ raw_local_irq_save(flags);
+ }
+ die_nest_count++;
+ return flags;
+}
+
+void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+{
+ bust_spinlocks(0);
+ die_owner = -1;
+ add_taint(TAINT_DIE);
+ __raw_spin_unlock(&die_lock);
+ raw_local_irq_restore(flags);
+
+ if (!regs)
+ return;
+
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
+ if (in_interrupt())
+ panic("Fatal exception in interrupt");
+ if (panic_on_oops)
+ panic("Fatal exception");
+ oops_exit();
+ do_exit(signr);
+}
+
+int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+{
+ unsigned short ss;
+ unsigned long sp;
+
+ printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC");
+#endif
+ printk("\n");
+ if (notify_die(DIE_OOPS, str, regs, err,
+ current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
+ return 1;
+
+ show_registers(regs);
+ /* Executive summary in case the oops scrolled away */
+ sp = (unsigned long) (&regs->sp);
+ savesegment(ss, ss);
+ if (user_mode(regs)) {
+ sp = regs->sp;
+ ss = regs->ss & 0xffff;
+ }
+ printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+ print_symbol("%s", regs->ip);
+ printk(" SS:ESP %04x:%08lx\n", ss, sp);
+ return 0;
+}
+
+/*
+ * This is gone through when something in the kernel has done something bad
+ * and is about to be terminated:
+ */
+void die(const char *str, struct pt_regs *regs, long err)
+{
+ unsigned long flags = oops_begin();
+
+ if (die_nest_count < 3) {
+ report_bug(regs->ip, regs);
+
+ if (__die(str, regs, err))
+ regs = NULL;
+ } else {
+ printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
+ }
+
+ oops_end(flags, regs, SIGSEGV);
+}
+
+static DEFINE_SPINLOCK(nmi_print_lock);
+
+void notrace __kprobes
+die_nmi(char *str, struct pt_regs *regs, int do_panic)
+{
+ if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
+ return;
+
+ spin_lock(&nmi_print_lock);
+ /*
+ * We are in trouble anyway, lets at least try
+ * to get a message out:
+ */
+ bust_spinlocks(1);
+ printk(KERN_EMERG "%s", str);
+ printk(" on CPU%d, ip %08lx, registers:\n",
+ smp_processor_id(), regs->ip);
+ show_registers(regs);
+ if (do_panic)
+ panic("Non maskable interrupt");
+ console_silent();
+ spin_unlock(&nmi_print_lock);
+ bust_spinlocks(0);
+
+ /*
+ * If we are in kernel we are probably nested up pretty bad
+ * and might aswell get out now while we still can:
+ */
+ if (!user_mode_vm(regs)) {
+ current->thread.trap_no = 2;
+ crash_kexec(regs);
+ }
+
+ do_exit(SIGSEGV);
+}
+
+static int __init oops_setup(char *s)
+{
+ if (!s)
+ return -EINVAL;
+ if (!strcmp(s, "panic"))
+ panic_on_oops = 1;
+ return 0;
+}
+early_param("oops", oops_setup);
+
+static int __init kstack_setup(char *s)
+{
+ if (!s)
+ return -EINVAL;
+ kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+ return 0;
+}
+early_param("kstack", kstack_setup);
+
+static int __init code_bytes_setup(char *s)
+{
+ code_bytes = simple_strtoul(s, NULL, 0);
+ if (code_bytes > 8192)
+ code_bytes = 8192;
+
+ return 1;
+}
+__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
new file mode 100644
index 00000000000..086cc8118e3
--- /dev/null
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -0,0 +1,573 @@
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/utsname.h>
+#include <linux/hardirq.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/kexec.h>
+#include <linux/bug.h>
+#include <linux/nmi.h>
+
+#include <asm/stacktrace.h>
+
+#define STACKSLOTS_PER_LINE 4
+#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
+
+int panic_on_unrecovered_nmi;
+int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
+static unsigned int code_bytes = 64;
+static int die_counter;
+
+void printk_address(unsigned long address, int reliable)
+{
+ printk(" [<%p>] %s%pS\n", (void *) address,
+ reliable ? "" : "? ", (void *) address);
+}
+
+static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
+ unsigned *usedp, char **idp)
+{
+ static char ids[][8] = {
+ [DEBUG_STACK - 1] = "#DB",
+ [NMI_STACK - 1] = "NMI",
+ [DOUBLEFAULT_STACK - 1] = "#DF",
+ [STACKFAULT_STACK - 1] = "#SS",
+ [MCE_STACK - 1] = "#MC",
+#if DEBUG_STKSZ > EXCEPTION_STKSZ
+ [N_EXCEPTION_STACKS ...
+ N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
+#endif
+ };
+ unsigned k;
+
+ /*
+ * Iterate over all exception stacks, and figure out whether
+ * 'stack' is in one of them:
+ */
+ for (k = 0; k < N_EXCEPTION_STACKS; k++) {
+ unsigned long end = per_cpu(orig_ist, cpu).ist[k];
+ /*
+ * Is 'stack' above this exception frame's end?
+ * If yes then skip to the next frame.
+ */
+ if (stack >= end)
+ continue;
+ /*
+ * Is 'stack' above this exception frame's start address?
+ * If yes then we found the right frame.
+ */
+ if (stack >= end - EXCEPTION_STKSZ) {
+ /*
+ * Make sure we only iterate through an exception
+ * stack once. If it comes up for the second time
+ * then there's something wrong going on - just
+ * break out and return NULL:
+ */
+ if (*usedp & (1U << k))
+ break;
+ *usedp |= 1U << k;
+ *idp = ids[k];
+ return (unsigned long *)end;
+ }
+ /*
+ * If this is a debug stack, and if it has a larger size than
+ * the usual exception stacks, then 'stack' might still
+ * be within the lower portion of the debug stack:
+ */
+#if DEBUG_STKSZ > EXCEPTION_STKSZ
+ if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
+ unsigned j = N_EXCEPTION_STACKS - 1;
+
+ /*
+ * Black magic. A large debug stack is composed of
+ * multiple exception stack entries, which we
+ * iterate through now. Dont look:
+ */
+ do {
+ ++j;
+ end -= EXCEPTION_STKSZ;
+ ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
+ } while (stack < end - EXCEPTION_STKSZ);
+ if (*usedp & (1U << j))
+ break;
+ *usedp |= 1U << j;
+ *idp = ids[j];
+ return (unsigned long *)end;
+ }
+#endif
+ }
+ return NULL;
+}
+
+/*
+ * x86-64 can have up to three kernel stacks:
+ * process stack
+ * interrupt stack
+ * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
+ */
+
+static inline int valid_stack_ptr(struct thread_info *tinfo,
+ void *p, unsigned int size, void *end)
+{
+ void *t = tinfo;
+ if (end) {
+ if (p < end && p >= (end-THREAD_SIZE))
+ return 1;
+ else
+ return 0;
+ }
+ return p > t && p < t + THREAD_SIZE - size;
+}
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+ struct stack_frame *next_frame;
+ unsigned long return_address;
+};
+
+static inline unsigned long
+print_context_stack(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data,
+ unsigned long *end)
+{
+ struct stack_frame *frame = (struct stack_frame *)bp;
+
+ while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
+ unsigned long addr;
+
+ addr = *stack;
+ if (__kernel_text_address(addr)) {
+ if ((unsigned long) stack == bp + sizeof(long)) {
+ ops->address(data, addr, 1);
+ frame = frame->next_frame;
+ bp = (unsigned long) frame;
+ } else {
+ ops->address(data, addr, bp == 0);
+ }
+ }
+ stack++;
+ }
+ return bp;
+}
+
+void dump_trace(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data)
+{
+ const unsigned cpu = get_cpu();
+ unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
+ unsigned used = 0;
+ struct thread_info *tinfo;
+
+ if (!task)
+ task = current;
+
+ if (!stack) {
+ unsigned long dummy;
+ stack = &dummy;
+ if (task && task != current)
+ stack = (unsigned long *)task->thread.sp;
+ }
+
+#ifdef CONFIG_FRAME_POINTER
+ if (!bp) {
+ if (task == current) {
+ /* Grab bp right from our regs */
+ get_bp(bp);
+ } else {
+ /* bp is the last reg pushed by switch_to */
+ bp = *(unsigned long *) task->thread.sp;
+ }
+ }
+#endif
+
+ /*
+ * Print function call entries in all stacks, starting at the
+ * current stack address. If the stacks consist of nested
+ * exceptions
+ */
+ tinfo = task_thread_info(task);
+ for (;;) {
+ char *id;
+ unsigned long *estack_end;
+ estack_end = in_exception_stack(cpu, (unsigned long)stack,
+ &used, &id);
+
+ if (estack_end) {
+ if (ops->stack(data, id) < 0)
+ break;
+
+ bp = print_context_stack(tinfo, stack, bp, ops,
+ data, estack_end);
+ ops->stack(data, "<EOE>");
+ /*
+ * We link to the next stack via the
+ * second-to-last pointer (index -2 to end) in the
+ * exception stack:
+ */
+ stack = (unsigned long *) estack_end[-2];
+ continue;
+ }
+ if (irqstack_end) {
+ unsigned long *irqstack;
+ irqstack = irqstack_end -
+ (IRQSTACKSIZE - 64) / sizeof(*irqstack);
+
+ if (stack >= irqstack && stack < irqstack_end) {
+ if (ops->stack(data, "IRQ") < 0)
+ break;
+ bp = print_context_stack(tinfo, stack, bp,
+ ops, data, irqstack_end);
+ /*
+ * We link to the next stack (which would be
+ * the process stack normally) the last
+ * pointer (index -1 to end) in the IRQ stack:
+ */
+ stack = (unsigned long *) (irqstack_end[-1]);
+ irqstack_end = NULL;
+ ops->stack(data, "EOI");
+ continue;
+ }
+ }
+ break;
+ }
+
+ /*
+ * This handles the process stack:
+ */
+ bp = print_context_stack(tinfo, stack, bp, ops, data, NULL);
+ put_cpu();
+}
+EXPORT_SYMBOL(dump_trace);
+
+static void
+print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+ printk(data);
+ print_symbol(msg, symbol);
+ printk("\n");
+}
+
+static void print_trace_warning(void *data, char *msg)
+{
+ printk("%s%s\n", (char *)data, msg);
+}
+
+static int print_trace_stack(void *data, char *name)
+{
+ printk("%s <%s> ", (char *)data, name);
+ return 0;
+}
+
+/*
+ * Print one address/symbol entries per line.
+ */
+static void print_trace_address(void *data, unsigned long addr, int reliable)
+{
+ touch_nmi_watchdog();
+ printk(data);
+ printk_address(addr, reliable);
+}
+
+static const struct stacktrace_ops print_trace_ops = {
+ .warning = print_trace_warning,
+ .warning_symbol = print_trace_warning_symbol,
+ .stack = print_trace_stack,
+ .address = print_trace_address,
+};
+
+static void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp, char *log_lvl)
+{
+ printk("%sCall Trace:\n", log_lvl);
+ dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+}
+
+void show_trace(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp)
+{
+ show_trace_log_lvl(task, regs, stack, bp, "");
+}
+
+static void
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *sp, unsigned long bp, char *log_lvl)
+{
+ unsigned long *stack;
+ int i;
+ const int cpu = smp_processor_id();
+ unsigned long *irqstack_end =
+ (unsigned long *) (cpu_pda(cpu)->irqstackptr);
+ unsigned long *irqstack =
+ (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+
+ /*
+ * debugging aid: "show_stack(NULL, NULL);" prints the
+ * back trace for this cpu.
+ */
+
+ if (sp == NULL) {
+ if (task)
+ sp = (unsigned long *)task->thread.sp;
+ else
+ sp = (unsigned long *)&sp;
+ }
+
+ stack = sp;
+ for (i = 0; i < kstack_depth_to_print; i++) {
+ if (stack >= irqstack && stack <= irqstack_end) {
+ if (stack == irqstack_end) {
+ stack = (unsigned long *) (irqstack_end[-1]);
+ printk(" <EOI> ");
+ }
+ } else {
+ if (((long) stack & (THREAD_SIZE-1)) == 0)
+ break;
+ }
+ if (i && ((i % STACKSLOTS_PER_LINE) == 0))
+ printk("\n%s", log_lvl);
+ printk(" %016lx", *stack++);
+ touch_nmi_watchdog();
+ }
+ printk("\n");
+ show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+}
+
+void show_stack(struct task_struct *task, unsigned long *sp)
+{
+ show_stack_log_lvl(task, NULL, sp, 0, "");
+}
+
+/*
+ * The architecture-independent dump_stack generator
+ */
+void dump_stack(void)
+{
+ unsigned long bp = 0;
+ unsigned long stack;
+
+#ifdef CONFIG_FRAME_POINTER
+ if (!bp)
+ get_bp(bp);
+#endif
+
+ printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+ current->pid, current->comm, print_tainted(),
+ init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
+ show_trace(NULL, NULL, &stack, bp);
+}
+EXPORT_SYMBOL(dump_stack);
+
+void show_registers(struct pt_regs *regs)
+{
+ int i;
+ unsigned long sp;
+ const int cpu = smp_processor_id();
+ struct task_struct *cur = cpu_pda(cpu)->pcurrent;
+
+ sp = regs->sp;
+ printk("CPU %d ", cpu);
+ __show_regs(regs, 1);
+ printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
+ cur->comm, cur->pid, task_thread_info(cur), cur);
+
+ /*
+ * When in-kernel, we also print out the stack and code at the
+ * time of the fault..
+ */
+ if (!user_mode(regs)) {
+ unsigned int code_prologue = code_bytes * 43 / 64;
+ unsigned int code_len = code_bytes;
+ unsigned char c;
+ u8 *ip;
+
+ printk(KERN_EMERG "Stack:\n");
+ show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
+ regs->bp, KERN_EMERG);
+
+ printk(KERN_EMERG "Code: ");
+
+ ip = (u8 *)regs->ip - code_prologue;
+ if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
+ /* try starting at IP */
+ ip = (u8 *)regs->ip;
+ code_len = code_len - code_prologue + 1;
+ }
+ for (i = 0; i < code_len; i++, ip++) {
+ if (ip < (u8 *)PAGE_OFFSET ||
+ probe_kernel_address(ip, c)) {
+ printk(" Bad RIP value.");
+ break;
+ }
+ if (ip == (u8 *)regs->ip)
+ printk("<%02x> ", c);
+ else
+ printk("%02x ", c);
+ }
+ }
+ printk("\n");
+}
+
+int is_valid_bugaddr(unsigned long ip)
+{
+ unsigned short ud2;
+
+ if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
+ return 0;
+
+ return ud2 == 0x0b0f;
+}
+
+static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+
+unsigned __kprobes long oops_begin(void)
+{
+ int cpu;
+ unsigned long flags;
+
+ oops_enter();
+
+ /* racy, but better than risking deadlock. */
+ raw_local_irq_save(flags);
+ cpu = smp_processor_id();
+ if (!__raw_spin_trylock(&die_lock)) {
+ if (cpu == die_owner)
+ /* nested oops. should stop eventually */;
+ else
+ __raw_spin_lock(&die_lock);
+ }
+ die_nest_count++;
+ die_owner = cpu;
+ console_verbose();
+ bust_spinlocks(1);
+ return flags;
+}
+
+void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+{
+ die_owner = -1;
+ bust_spinlocks(0);
+ die_nest_count--;
+ if (!die_nest_count)
+ /* Nest count reaches zero, release the lock. */
+ __raw_spin_unlock(&die_lock);
+ raw_local_irq_restore(flags);
+ if (!regs) {
+ oops_exit();
+ return;
+ }
+ if (in_interrupt())
+ panic("Fatal exception in interrupt");
+ if (panic_on_oops)
+ panic("Fatal exception");
+ oops_exit();
+ do_exit(signr);
+}
+
+int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+{
+ printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC");
+#endif
+ printk("\n");
+ if (notify_die(DIE_OOPS, str, regs, err,
+ current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
+ return 1;
+
+ show_registers(regs);
+ add_taint(TAINT_DIE);
+ /* Executive summary in case the oops scrolled away */
+ printk(KERN_ALERT "RIP ");
+ printk_address(regs->ip, 1);
+ printk(" RSP <%016lx>\n", regs->sp);
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
+ return 0;
+}
+
+void die(const char *str, struct pt_regs *regs, long err)
+{
+ unsigned long flags = oops_begin();
+
+ if (!user_mode(regs))
+ report_bug(regs->ip, regs);
+
+ if (__die(str, regs, err))
+ regs = NULL;
+ oops_end(flags, regs, SIGSEGV);
+}
+
+notrace __kprobes void
+die_nmi(char *str, struct pt_regs *regs, int do_panic)
+{
+ unsigned long flags;
+
+ if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
+ return;
+
+ flags = oops_begin();
+ /*
+ * We are in trouble anyway, lets at least try
+ * to get a message out.
+ */
+ printk(KERN_EMERG "%s", str);
+ printk(" on CPU%d, ip %08lx, registers:\n",
+ smp_processor_id(), regs->ip);
+ show_registers(regs);
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
+ if (do_panic || panic_on_oops)
+ panic("Non maskable interrupt");
+ oops_end(flags, NULL, SIGBUS);
+ nmi_exit();
+ local_irq_enable();
+ do_exit(SIGBUS);
+}
+
+static int __init oops_setup(char *s)
+{
+ if (!s)
+ return -EINVAL;
+ if (!strcmp(s, "panic"))
+ panic_on_oops = 1;
+ return 0;
+}
+early_param("oops", oops_setup);
+
+static int __init kstack_setup(char *s)
+{
+ if (!s)
+ return -EINVAL;
+ kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+ return 0;
+}
+early_param("kstack", kstack_setup);
+
+static int __init code_bytes_setup(char *s)
+{
+ code_bytes = simple_strtoul(s, NULL, 0);
+ if (code_bytes > 8192)
+ code_bytes = 8192;
+
+ return 1;
+}
+__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 24bb5faf5ef..733c4f8d42e 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -95,6 +95,52 @@ static void __init nvidia_bugs(int num, int slot, int func)
}
+static u32 ati_ixp4x0_rev(int num, int slot, int func)
+{
+ u32 d;
+ u8 b;
+
+ b = read_pci_config_byte(num, slot, func, 0xac);
+ b &= ~(1<<5);
+ write_pci_config_byte(num, slot, func, 0xac, b);
+
+ d = read_pci_config(num, slot, func, 0x70);
+ d |= 1<<8;
+ write_pci_config(num, slot, func, 0x70, d);
+
+ d = read_pci_config(num, slot, func, 0x8);
+ d &= 0xff;
+ return d;
+}
+
+static void __init ati_bugs(int num, int slot, int func)
+{
+#if defined(CONFIG_ACPI) && defined (CONFIG_X86_IO_APIC)
+ u32 d;
+ u8 b;
+
+ if (acpi_use_timer_override)
+ return;
+
+ d = ati_ixp4x0_rev(num, slot, func);
+ if (d < 0x82)
+ acpi_skip_timer_override = 1;
+ else {
+ /* check for IRQ0 interrupt swap */
+ outb(0x72, 0xcd6); b = inb(0xcd7);
+ if (!(b & 0x2))
+ acpi_skip_timer_override = 1;
+ }
+
+ if (acpi_skip_timer_override) {
+ printk(KERN_INFO "SB4X0 revision 0x%x\n", d);
+ printk(KERN_INFO "Ignoring ACPI timer override.\n");
+ printk(KERN_INFO "If you got timer trouble "
+ "try acpi_use_timer_override\n");
+ }
+#endif
+}
+
#ifdef CONFIG_DMAR
static void __init intel_g33_dmar(int num, int slot, int func)
{
@@ -128,6 +174,8 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
+ { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS,
+ PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
#ifdef CONFIG_DMAR
{ PCI_VENDOR_ID_INTEL, 0x29c0,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar },
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index ff9e7350da5..34ad997d383 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -3,11 +3,19 @@
#include <linux/init.h>
#include <linux/string.h>
#include <linux/screen_info.h>
+#include <linux/usb/ch9.h>
+#include <linux/pci_regs.h>
+#include <linux/pci_ids.h>
+#include <linux/errno.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/fcntl.h>
#include <asm/setup.h>
#include <xen/hvc-console.h>
+#include <asm/pci-direct.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+#include <linux/usb/ehci_def.h>
/* Simple VGA output */
#define VGABASE (__ISA_IO_base + 0xb8000)
@@ -78,6 +86,7 @@ static int early_serial_base = 0x3f8; /* ttyS0 */
static int early_serial_putc(unsigned char ch)
{
unsigned timeout = 0xffff;
+
while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
cpu_relax();
outb(ch, early_serial_base + TXR);
@@ -111,7 +120,7 @@ static __init void early_serial_init(char *s)
if (!strncmp(s, "0x", 2)) {
early_serial_base = simple_strtoul(s, &e, 16);
} else {
- static int bases[] = { 0x3f8, 0x2f8 };
+ static const int __initconst bases[] = { 0x3f8, 0x2f8 };
if (!strncmp(s, "ttyS", 4))
s += 4;
@@ -151,6 +160,721 @@ static struct console early_serial_console = {
.index = -1,
};
+#ifdef CONFIG_EARLY_PRINTK_DBGP
+
+static struct ehci_caps __iomem *ehci_caps;
+static struct ehci_regs __iomem *ehci_regs;
+static struct ehci_dbg_port __iomem *ehci_debug;
+static unsigned int dbgp_endpoint_out;
+
+struct ehci_dev {
+ u32 bus;
+ u32 slot;
+ u32 func;
+};
+
+static struct ehci_dev ehci_dev;
+
+#define USB_DEBUG_DEVNUM 127
+
+#define DBGP_DATA_TOGGLE 0x8800
+
+static inline u32 dbgp_pid_update(u32 x, u32 tok)
+{
+ return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
+}
+
+static inline u32 dbgp_len_update(u32 x, u32 len)
+{
+ return (x & ~0x0f) | (len & 0x0f);
+}
+
+/*
+ * USB Packet IDs (PIDs)
+ */
+
+/* token */
+#define USB_PID_OUT 0xe1
+#define USB_PID_IN 0x69
+#define USB_PID_SOF 0xa5
+#define USB_PID_SETUP 0x2d
+/* handshake */
+#define USB_PID_ACK 0xd2
+#define USB_PID_NAK 0x5a
+#define USB_PID_STALL 0x1e
+#define USB_PID_NYET 0x96
+/* data */
+#define USB_PID_DATA0 0xc3
+#define USB_PID_DATA1 0x4b
+#define USB_PID_DATA2 0x87
+#define USB_PID_MDATA 0x0f
+/* Special */
+#define USB_PID_PREAMBLE 0x3c
+#define USB_PID_ERR 0x3c
+#define USB_PID_SPLIT 0x78
+#define USB_PID_PING 0xb4
+#define USB_PID_UNDEF_0 0xf0
+
+#define USB_PID_DATA_TOGGLE 0x88
+#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
+
+#define PCI_CAP_ID_EHCI_DEBUG 0xa
+
+#define HUB_ROOT_RESET_TIME 50 /* times are in msec */
+#define HUB_SHORT_RESET_TIME 10
+#define HUB_LONG_RESET_TIME 200
+#define HUB_RESET_TIMEOUT 500
+
+#define DBGP_MAX_PACKET 8
+
+static int dbgp_wait_until_complete(void)
+{
+ u32 ctrl;
+ int loop = 0x100000;
+
+ do {
+ ctrl = readl(&ehci_debug->control);
+ /* Stop when the transaction is finished */
+ if (ctrl & DBGP_DONE)
+ break;
+ } while (--loop > 0);
+
+ if (!loop)
+ return -1;
+
+ /*
+ * Now that we have observed the completed transaction,
+ * clear the done bit.
+ */
+ writel(ctrl | DBGP_DONE, &ehci_debug->control);
+ return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
+}
+
+static void dbgp_mdelay(int ms)
+{
+ int i;
+
+ while (ms--) {
+ for (i = 0; i < 1000; i++)
+ outb(0x1, 0x80);
+ }
+}
+
+static void dbgp_breath(void)
+{
+ /* Sleep to give the debug port a chance to breathe */
+}
+
+static int dbgp_wait_until_done(unsigned ctrl)
+{
+ u32 pids, lpid;
+ int ret;
+ int loop = 3;
+
+retry:
+ writel(ctrl | DBGP_GO, &ehci_debug->control);
+ ret = dbgp_wait_until_complete();
+ pids = readl(&ehci_debug->pids);
+ lpid = DBGP_PID_GET(pids);
+
+ if (ret < 0)
+ return ret;
+
+ /*
+ * If the port is getting full or it has dropped data
+ * start pacing ourselves, not necessary but it's friendly.
+ */
+ if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
+ dbgp_breath();
+
+ /* If I get a NACK reissue the transmission */
+ if (lpid == USB_PID_NAK) {
+ if (--loop > 0)
+ goto retry;
+ }
+
+ return ret;
+}
+
+static void dbgp_set_data(const void *buf, int size)
+{
+ const unsigned char *bytes = buf;
+ u32 lo, hi;
+ int i;
+
+ lo = hi = 0;
+ for (i = 0; i < 4 && i < size; i++)
+ lo |= bytes[i] << (8*i);
+ for (; i < 8 && i < size; i++)
+ hi |= bytes[i] << (8*(i - 4));
+ writel(lo, &ehci_debug->data03);
+ writel(hi, &ehci_debug->data47);
+}
+
+static void dbgp_get_data(void *buf, int size)
+{
+ unsigned char *bytes = buf;
+ u32 lo, hi;
+ int i;
+
+ lo = readl(&ehci_debug->data03);
+ hi = readl(&ehci_debug->data47);
+ for (i = 0; i < 4 && i < size; i++)
+ bytes[i] = (lo >> (8*i)) & 0xff;
+ for (; i < 8 && i < size; i++)
+ bytes[i] = (hi >> (8*(i - 4))) & 0xff;
+}
+
+static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
+ const char *bytes, int size)
+{
+ u32 pids, addr, ctrl;
+ int ret;
+
+ if (size > DBGP_MAX_PACKET)
+ return -1;
+
+ addr = DBGP_EPADDR(devnum, endpoint);
+
+ pids = readl(&ehci_debug->pids);
+ pids = dbgp_pid_update(pids, USB_PID_OUT);
+
+ ctrl = readl(&ehci_debug->control);
+ ctrl = dbgp_len_update(ctrl, size);
+ ctrl |= DBGP_OUT;
+ ctrl |= DBGP_GO;
+
+ dbgp_set_data(bytes, size);
+ writel(addr, &ehci_debug->address);
+ writel(pids, &ehci_debug->pids);
+
+ ret = dbgp_wait_until_done(ctrl);
+ if (ret < 0)
+ return ret;
+
+ return ret;
+}
+
+static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
+ int size)
+{
+ u32 pids, addr, ctrl;
+ int ret;
+
+ if (size > DBGP_MAX_PACKET)
+ return -1;
+
+ addr = DBGP_EPADDR(devnum, endpoint);
+
+ pids = readl(&ehci_debug->pids);
+ pids = dbgp_pid_update(pids, USB_PID_IN);
+
+ ctrl = readl(&ehci_debug->control);
+ ctrl = dbgp_len_update(ctrl, size);
+ ctrl &= ~DBGP_OUT;
+ ctrl |= DBGP_GO;
+
+ writel(addr, &ehci_debug->address);
+ writel(pids, &ehci_debug->pids);
+ ret = dbgp_wait_until_done(ctrl);
+ if (ret < 0)
+ return ret;
+
+ if (size > ret)
+ size = ret;
+ dbgp_get_data(data, size);
+ return ret;
+}
+
+static int dbgp_control_msg(unsigned devnum, int requesttype, int request,
+ int value, int index, void *data, int size)
+{
+ u32 pids, addr, ctrl;
+ struct usb_ctrlrequest req;
+ int read;
+ int ret;
+
+ read = (requesttype & USB_DIR_IN) != 0;
+ if (size > (read ? DBGP_MAX_PACKET:0))
+ return -1;
+
+ /* Compute the control message */
+ req.bRequestType = requesttype;
+ req.bRequest = request;
+ req.wValue = cpu_to_le16(value);
+ req.wIndex = cpu_to_le16(index);
+ req.wLength = cpu_to_le16(size);
+
+ pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
+ addr = DBGP_EPADDR(devnum, 0);
+
+ ctrl = readl(&ehci_debug->control);
+ ctrl = dbgp_len_update(ctrl, sizeof(req));
+ ctrl |= DBGP_OUT;
+ ctrl |= DBGP_GO;
+
+ /* Send the setup message */
+ dbgp_set_data(&req, sizeof(req));
+ writel(addr, &ehci_debug->address);
+ writel(pids, &ehci_debug->pids);
+ ret = dbgp_wait_until_done(ctrl);
+ if (ret < 0)
+ return ret;
+
+ /* Read the result */
+ return dbgp_bulk_read(devnum, 0, data, size);
+}
+
+
+/* Find a PCI capability */
+static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
+{
+ u8 pos;
+ int bytes;
+
+ if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
+ PCI_STATUS_CAP_LIST))
+ return 0;
+
+ pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
+ for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
+ u8 id;
+
+ pos &= ~3;
+ id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
+ if (id == 0xff)
+ break;
+ if (id == cap)
+ return pos;
+
+ pos = read_pci_config_byte(num, slot, func,
+ pos+PCI_CAP_LIST_NEXT);
+ }
+ return 0;
+}
+
+static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
+{
+ u32 class;
+
+ class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
+ if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
+ return 0;
+
+ return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
+}
+
+static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
+{
+ u32 bus, slot, func;
+
+ for (bus = 0; bus < 256; bus++) {
+ for (slot = 0; slot < 32; slot++) {
+ for (func = 0; func < 8; func++) {
+ unsigned cap;
+
+ cap = __find_dbgp(bus, slot, func);
+
+ if (!cap)
+ continue;
+ if (ehci_num-- != 0)
+ continue;
+ *rbus = bus;
+ *rslot = slot;
+ *rfunc = func;
+ return cap;
+ }
+ }
+ }
+ return 0;
+}
+
+static int ehci_reset_port(int port)
+{
+ u32 portsc;
+ u32 delay_time, delay;
+ int loop;
+
+ /* Reset the usb debug port */
+ portsc = readl(&ehci_regs->port_status[port - 1]);
+ portsc &= ~PORT_PE;
+ portsc |= PORT_RESET;
+ writel(portsc, &ehci_regs->port_status[port - 1]);
+
+ delay = HUB_ROOT_RESET_TIME;
+ for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
+ delay_time += delay) {
+ dbgp_mdelay(delay);
+
+ portsc = readl(&ehci_regs->port_status[port - 1]);
+ if (portsc & PORT_RESET) {
+ /* force reset to complete */
+ loop = 2;
+ writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
+ &ehci_regs->port_status[port - 1]);
+ do {
+ portsc = readl(&ehci_regs->port_status[port-1]);
+ } while ((portsc & PORT_RESET) && (--loop > 0));
+ }
+
+ /* Device went away? */
+ if (!(portsc & PORT_CONNECT))
+ return -ENOTCONN;
+
+ /* bomb out completely if something weird happend */
+ if ((portsc & PORT_CSC))
+ return -EINVAL;
+
+ /* If we've finished resetting, then break out of the loop */
+ if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
+ return 0;
+ }
+ return -EBUSY;
+}
+
+static int ehci_wait_for_port(int port)
+{
+ u32 status;
+ int ret, reps;
+
+ for (reps = 0; reps < 3; reps++) {
+ dbgp_mdelay(100);
+ status = readl(&ehci_regs->status);
+ if (status & STS_PCD) {
+ ret = ehci_reset_port(port);
+ if (ret == 0)
+ return 0;
+ }
+ }
+ return -ENOTCONN;
+}
+
+#ifdef DBGP_DEBUG
+# define dbgp_printk early_printk
+#else
+static inline void dbgp_printk(const char *fmt, ...) { }
+#endif
+
+typedef void (*set_debug_port_t)(int port);
+
+static void default_set_debug_port(int port)
+{
+}
+
+static set_debug_port_t set_debug_port = default_set_debug_port;
+
+static void nvidia_set_debug_port(int port)
+{
+ u32 dword;
+ dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
+ 0x74);
+ dword &= ~(0x0f<<12);
+ dword |= ((port & 0x0f)<<12);
+ write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
+ dword);
+ dbgp_printk("set debug port to %d\n", port);
+}
+
+static void __init detect_set_debug_port(void)
+{
+ u32 vendorid;
+
+ vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
+ 0x00);
+
+ if ((vendorid & 0xffff) == 0x10de) {
+ dbgp_printk("using nvidia set_debug_port\n");
+ set_debug_port = nvidia_set_debug_port;
+ }
+}
+
+static int __init ehci_setup(void)
+{
+ struct usb_debug_descriptor dbgp_desc;
+ u32 cmd, ctrl, status, portsc, hcs_params;
+ u32 debug_port, new_debug_port = 0, n_ports;
+ u32 devnum;
+ int ret, i;
+ int loop;
+ int port_map_tried;
+ int playtimes = 3;
+
+try_next_time:
+ port_map_tried = 0;
+
+try_next_port:
+
+ hcs_params = readl(&ehci_caps->hcs_params);
+ debug_port = HCS_DEBUG_PORT(hcs_params);
+ n_ports = HCS_N_PORTS(hcs_params);
+
+ dbgp_printk("debug_port: %d\n", debug_port);
+ dbgp_printk("n_ports: %d\n", n_ports);
+
+ for (i = 1; i <= n_ports; i++) {
+ portsc = readl(&ehci_regs->port_status[i-1]);
+ dbgp_printk("portstatus%d: %08x\n", i, portsc);
+ }
+
+ if (port_map_tried && (new_debug_port != debug_port)) {
+ if (--playtimes) {
+ set_debug_port(new_debug_port);
+ goto try_next_time;
+ }
+ return -1;
+ }
+
+ loop = 10;
+ /* Reset the EHCI controller */
+ cmd = readl(&ehci_regs->command);
+ cmd |= CMD_RESET;
+ writel(cmd, &ehci_regs->command);
+ do {
+ cmd = readl(&ehci_regs->command);
+ } while ((cmd & CMD_RESET) && (--loop > 0));
+
+ if (!loop) {
+ dbgp_printk("can not reset ehci\n");
+ return -1;
+ }
+ dbgp_printk("ehci reset done\n");
+
+ /* Claim ownership, but do not enable yet */
+ ctrl = readl(&ehci_debug->control);
+ ctrl |= DBGP_OWNER;
+ ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
+ writel(ctrl, &ehci_debug->control);
+
+ /* Start the ehci running */
+ cmd = readl(&ehci_regs->command);
+ cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
+ cmd |= CMD_RUN;
+ writel(cmd, &ehci_regs->command);
+
+ /* Ensure everything is routed to the EHCI */
+ writel(FLAG_CF, &ehci_regs->configured_flag);
+
+ /* Wait until the controller is no longer halted */
+ loop = 10;
+ do {
+ status = readl(&ehci_regs->status);
+ } while ((status & STS_HALT) && (--loop > 0));
+
+ if (!loop) {
+ dbgp_printk("ehci can be started\n");
+ return -1;
+ }
+ dbgp_printk("ehci started\n");
+
+ /* Wait for a device to show up in the debug port */
+ ret = ehci_wait_for_port(debug_port);
+ if (ret < 0) {
+ dbgp_printk("No device found in debug port\n");
+ goto next_debug_port;
+ }
+ dbgp_printk("ehci wait for port done\n");
+
+ /* Enable the debug port */
+ ctrl = readl(&ehci_debug->control);
+ ctrl |= DBGP_CLAIM;
+ writel(ctrl, &ehci_debug->control);
+ ctrl = readl(&ehci_debug->control);
+ if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
+ dbgp_printk("No device in debug port\n");
+ writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
+ goto err;
+ }
+ dbgp_printk("debug ported enabled\n");
+
+ /* Completely transfer the debug device to the debug controller */
+ portsc = readl(&ehci_regs->port_status[debug_port - 1]);
+ portsc &= ~PORT_PE;
+ writel(portsc, &ehci_regs->port_status[debug_port - 1]);
+
+ dbgp_mdelay(100);
+
+ /* Find the debug device and make it device number 127 */
+ for (devnum = 0; devnum <= 127; devnum++) {
+ ret = dbgp_control_msg(devnum,
+ USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+ USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
+ &dbgp_desc, sizeof(dbgp_desc));
+ if (ret > 0)
+ break;
+ }
+ if (devnum > 127) {
+ dbgp_printk("Could not find attached debug device\n");
+ goto err;
+ }
+ if (ret < 0) {
+ dbgp_printk("Attached device is not a debug device\n");
+ goto err;
+ }
+ dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
+
+ /* Move the device to 127 if it isn't already there */
+ if (devnum != USB_DEBUG_DEVNUM) {
+ ret = dbgp_control_msg(devnum,
+ USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+ USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
+ if (ret < 0) {
+ dbgp_printk("Could not move attached device to %d\n",
+ USB_DEBUG_DEVNUM);
+ goto err;
+ }
+ devnum = USB_DEBUG_DEVNUM;
+ dbgp_printk("debug device renamed to 127\n");
+ }
+
+ /* Enable the debug interface */
+ ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
+ USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+ USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
+ if (ret < 0) {
+ dbgp_printk(" Could not enable the debug device\n");
+ goto err;
+ }
+ dbgp_printk("debug interface enabled\n");
+
+ /* Perform a small write to get the even/odd data state in sync
+ */
+ ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
+ if (ret < 0) {
+ dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
+ goto err;
+ }
+ dbgp_printk("small write doned\n");
+
+ return 0;
+err:
+ /* Things didn't work so remove my claim */
+ ctrl = readl(&ehci_debug->control);
+ ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
+ writel(ctrl, &ehci_debug->control);
+ return -1;
+
+next_debug_port:
+ port_map_tried |= (1<<(debug_port - 1));
+ new_debug_port = ((debug_port-1+1)%n_ports) + 1;
+ if (port_map_tried != ((1<<n_ports) - 1)) {
+ set_debug_port(new_debug_port);
+ goto try_next_port;
+ }
+ if (--playtimes) {
+ set_debug_port(new_debug_port);
+ goto try_next_time;
+ }
+
+ return -1;
+}
+
+static int __init early_dbgp_init(char *s)
+{
+ u32 debug_port, bar, offset;
+ u32 bus, slot, func, cap;
+ void __iomem *ehci_bar;
+ u32 dbgp_num;
+ u32 bar_val;
+ char *e;
+ int ret;
+ u8 byte;
+
+ if (!early_pci_allowed())
+ return -1;
+
+ dbgp_num = 0;
+ if (*s)
+ dbgp_num = simple_strtoul(s, &e, 10);
+ dbgp_printk("dbgp_num: %d\n", dbgp_num);
+
+ cap = find_dbgp(dbgp_num, &bus, &slot, &func);
+ if (!cap)
+ return -1;
+
+ dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
+ func);
+
+ debug_port = read_pci_config(bus, slot, func, cap);
+ bar = (debug_port >> 29) & 0x7;
+ bar = (bar * 4) + 0xc;
+ offset = (debug_port >> 16) & 0xfff;
+ dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
+ if (bar != PCI_BASE_ADDRESS_0) {
+ dbgp_printk("only debug ports on bar 1 handled.\n");
+
+ return -1;
+ }
+
+ bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
+ dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
+ if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
+ dbgp_printk("only simple 32bit mmio bars supported\n");
+
+ return -1;
+ }
+
+ /* double check if the mem space is enabled */
+ byte = read_pci_config_byte(bus, slot, func, 0x04);
+ if (!(byte & 0x2)) {
+ byte |= 0x02;
+ write_pci_config_byte(bus, slot, func, 0x04, byte);
+ dbgp_printk("mmio for ehci enabled\n");
+ }
+
+ /*
+ * FIXME I don't have the bar size so just guess PAGE_SIZE is more
+ * than enough. 1K is the biggest I have seen.
+ */
+ set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
+ ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
+ ehci_bar += bar_val & ~PAGE_MASK;
+ dbgp_printk("ehci_bar: %p\n", ehci_bar);
+
+ ehci_caps = ehci_bar;
+ ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
+ ehci_debug = ehci_bar + offset;
+ ehci_dev.bus = bus;
+ ehci_dev.slot = slot;
+ ehci_dev.func = func;
+
+ detect_set_debug_port();
+
+ ret = ehci_setup();
+ if (ret < 0) {
+ dbgp_printk("ehci_setup failed\n");
+ ehci_debug = NULL;
+
+ return -1;
+ }
+
+ return 0;
+}
+
+static void early_dbgp_write(struct console *con, const char *str, u32 n)
+{
+ int chunk, ret;
+
+ if (!ehci_debug)
+ return;
+ while (n > 0) {
+ chunk = n;
+ if (chunk > DBGP_MAX_PACKET)
+ chunk = DBGP_MAX_PACKET;
+ ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
+ dbgp_endpoint_out, str, chunk);
+ str += chunk;
+ n -= chunk;
+ }
+}
+
+static struct console early_dbgp_console = {
+ .name = "earlydbg",
+ .write = early_dbgp_write,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+};
+#endif
+
/* Console interface to a host file on AMD's SimNow! */
static int simnow_fd;
@@ -165,6 +889,7 @@ enum {
static noinline long simnow(long cmd, long a, long b, long c)
{
long ret;
+
asm volatile("cpuid" :
"=a" (ret) :
"b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
@@ -174,6 +899,7 @@ static noinline long simnow(long cmd, long a, long b, long c)
static void __init simnow_init(char *str)
{
char *fn = "klog";
+
if (*str == '=')
fn = ++str;
/* error ignored */
@@ -194,7 +920,7 @@ static struct console simnow_console = {
/* Direct interface for emergencies */
static struct console *early_console = &early_vga_console;
-static int early_console_initialized;
+static int __initdata early_console_initialized;
asmlinkage void early_printk(const char *fmt, ...)
{
@@ -208,10 +934,11 @@ asmlinkage void early_printk(const char *fmt, ...)
va_end(ap);
}
-static int __initdata keep_early;
static int __init setup_early_printk(char *buf)
{
+ int keep_early;
+
if (!buf)
return 0;
@@ -219,8 +946,7 @@ static int __init setup_early_printk(char *buf)
return 0;
early_console_initialized = 1;
- if (strstr(buf, "keep"))
- keep_early = 1;
+ keep_early = (strstr(buf, "keep") != NULL);
if (!strncmp(buf, "serial", 6)) {
early_serial_init(buf + 6);
@@ -238,6 +964,17 @@ static int __init setup_early_printk(char *buf)
simnow_init(buf + 6);
early_console = &simnow_console;
keep_early = 1;
+#ifdef CONFIG_EARLY_PRINTK_DBGP
+ } else if (!strncmp(buf, "dbgp", 4)) {
+ if (early_dbgp_init(buf+4) < 0)
+ return 0;
+ early_console = &early_dbgp_console;
+ /*
+ * usb subsys will reset ehci controller, so don't keep
+ * that early console
+ */
+ keep_early = 0;
+#endif
#ifdef CONFIG_HVC_XEN
} else if (!strncmp(buf, "xen", 3)) {
early_console = &xenboot_console;
@@ -251,4 +988,5 @@ static int __init setup_early_printk(char *buf)
register_console(early_console);
return 0;
}
+
early_param("earlyprintk", setup_early_printk);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 109792bc7cf..b21fbfaffe3 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -730,6 +730,7 @@ error_code:
movl $(__USER_DS), %ecx
movl %ecx, %ds
movl %ecx, %es
+ TRACE_IRQS_OFF
movl %esp,%eax # pt_regs pointer
call *%edi
jmp ret_from_exception
@@ -760,20 +761,9 @@ ENTRY(device_not_available)
RING0_INT_FRAME
pushl $-1 # mark this as an int
CFI_ADJUST_CFA_OFFSET 4
- SAVE_ALL
- GET_CR0_INTO_EAX
- testl $0x4, %eax # EM (math emulation bit)
- jne device_not_available_emulate
- preempt_stop(CLBR_ANY)
- call math_state_restore
- jmp ret_from_exception
-device_not_available_emulate:
- pushl $0 # temporary storage for ORIG_EIP
+ pushl $do_device_not_available
CFI_ADJUST_CFA_OFFSET 4
- call math_emulate
- addl $4, %esp
- CFI_ADJUST_CFA_OFFSET -4
- jmp ret_from_exception
+ jmp error_code
CFI_ENDPROC
END(device_not_available)
@@ -814,6 +804,7 @@ debug_stack_correct:
pushl $-1 # mark this as an int
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
+ TRACE_IRQS_OFF
xorl %edx,%edx # error code 0
movl %esp,%eax # pt_regs pointer
call do_debug
@@ -858,6 +849,7 @@ nmi_stack_correct:
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
+ TRACE_IRQS_OFF
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_nmi
@@ -898,6 +890,7 @@ nmi_espfix_stack:
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
+ TRACE_IRQS_OFF
FIXUP_ESPFIX_STACK # %eax == %esp
xorl %edx,%edx # zero error code
call do_nmi
@@ -928,6 +921,7 @@ KPROBE_ENTRY(int3)
pushl $-1 # mark this as an int
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
+ TRACE_IRQS_OFF
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_int3
@@ -1030,7 +1024,7 @@ ENTRY(machine_check)
RING0_INT_FRAME
pushl $0
CFI_ADJUST_CFA_OFFSET 4
- pushl machine_check_vector
+ pushl $do_machine_check
CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index cf3a0b2d005..1db6ce4314e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -667,6 +667,13 @@ END(stub_rt_sigreturn)
SAVE_ARGS
leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
pushq %rbp
+ /*
+ * Save rbp twice: One is for marking the stack frame, as usual, and the
+ * other, to fill pt_regs properly. This is because bx comes right
+ * before the last saved register in that structure, and not bp. If the
+ * base pointer were in the place bx is today, this would not be needed.
+ */
+ movq %rbp, -8(%rsp)
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET rbp, 0
movq %rsp,%rbp
@@ -932,6 +939,9 @@ END(spurious_interrupt)
.if \ist
movq %gs:pda_data_offset, %rbp
.endif
+ .if \irqtrace
+ TRACE_IRQS_OFF
+ .endif
movq %rsp,%rdi
movq ORIG_RAX(%rsp),%rsi
movq $-1,ORIG_RAX(%rsp)
@@ -1058,7 +1068,8 @@ KPROBE_ENTRY(error_entry)
je error_kernelspace
error_swapgs:
SWAPGS
-error_sti:
+error_sti:
+ TRACE_IRQS_OFF
movq %rdi,RDI(%rsp)
CFI_REL_OFFSET rdi,RDI
movq %rsp,%rdi
@@ -1232,7 +1243,7 @@ ENTRY(simd_coprocessor_error)
END(simd_coprocessor_error)
ENTRY(device_not_available)
- zeroentry math_state_restore
+ zeroentry do_device_not_available
END(device_not_available)
/* runs on exception stack */
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index 849e5cd485b..f454c78fcef 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -109,6 +109,7 @@ struct oem_table {
};
extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
+extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr);
#endif
struct mip_reg {
@@ -243,21 +244,38 @@ parse_unisys_oem (char *oemptr)
}
#ifdef CONFIG_ACPI
-int __init
-find_unisys_acpi_oem_table(unsigned long *oem_addr)
+static unsigned long oem_addrX;
+static unsigned long oem_size;
+int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
{
struct acpi_table_header *header = NULL;
int i = 0;
- while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) {
+ acpi_size tbl_size;
+
+ while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) {
if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
struct oem_table *t = (struct oem_table *)header;
- *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr,
- t->OEMTableSize);
+
+ oem_addrX = t->OEMTableAddr;
+ oem_size = t->OEMTableSize;
+ early_acpi_os_unmap_memory(header, tbl_size);
+
+ *oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
+ oem_size);
return 0;
}
+ early_acpi_os_unmap_memory(header, tbl_size);
}
return -1;
}
+
+void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
+{
+ if (!oem_addr)
+ return;
+
+ __acpi_unmap_table((char *)oem_addr, oem_size);
+}
#endif
static void
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index ae2ffc8a400..33581d94a90 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -114,7 +114,7 @@ static void uv_send_IPI_one(int cpu, int vector)
unsigned long val, apicid, lapicid;
int pnode;
- apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */
+ apicid = per_cpu(x86_cpu_to_apicid, cpu);
lapicid = apicid & 0x3f; /* ZZZ macro needed */
pnode = uv_apicid_to_pnode(apicid);
val =
@@ -202,12 +202,10 @@ static unsigned int phys_pkg_id(int index_msb)
return uv_read_apic_id() >> index_msb;
}
-#ifdef ZZZ /* Needs x2apic patch */
static void uv_send_IPI_self(int vector)
{
apic_write(APIC_SELF_IPI, vector);
}
-#endif
struct genapic apic_x2apic_uv_x = {
.name = "UV large system",
@@ -215,15 +213,15 @@ struct genapic apic_x2apic_uv_x = {
.int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
.target_cpus = uv_target_cpus,
- .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
+ .vector_allocation_domain = uv_vector_allocation_domain,
.apic_id_registered = uv_apic_id_registered,
.init_apic_ldr = uv_init_apic_ldr,
.send_IPI_all = uv_send_IPI_all,
.send_IPI_allbutself = uv_send_IPI_allbutself,
.send_IPI_mask = uv_send_IPI_mask,
- /* ZZZ.send_IPI_self = uv_send_IPI_self, */
+ .send_IPI_self = uv_send_IPI_self,
.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
- .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
+ .phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = (0xFFFFFFFFu),
@@ -286,12 +284,13 @@ static __init void map_low_mmrs(void)
enum map_type {map_wb, map_uc};
-static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
+static __init void map_high(char *id, unsigned long base, int shift,
+ int max_pnode, enum map_type map_type)
{
unsigned long bytes, paddr;
paddr = base << shift;
- bytes = (1UL << shift);
+ bytes = (1UL << shift) * (max_pnode + 1);
printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
paddr + bytes);
if (map_type == map_uc)
@@ -307,7 +306,7 @@ static __init void map_gru_high(int max_pnode)
gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
if (gru.s.enable)
- map_high("GRU", gru.s.base, shift, map_wb);
+ map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
}
static __init void map_config_high(int max_pnode)
@@ -317,7 +316,7 @@ static __init void map_config_high(int max_pnode)
cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
if (cfg.s.enable)
- map_high("CONFIG", cfg.s.base, shift, map_uc);
+ map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc);
}
static __init void map_mmr_high(int max_pnode)
@@ -327,7 +326,7 @@ static __init void map_mmr_high(int max_pnode)
mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
if (mmr.s.enable)
- map_high("MMR", mmr.s.base, shift, map_uc);
+ map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
}
static __init void map_mmioh_high(int max_pnode)
@@ -337,7 +336,7 @@ static __init void map_mmioh_high(int max_pnode)
mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
if (mmioh.s.enable)
- map_high("MMIOH", mmioh.s.base, shift, map_uc);
+ map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc);
}
static __init void uv_rtc_init(void)
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index 3e66bd364a9..1dcb0f13897 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -35,6 +35,7 @@ void __init reserve_ebda_region(void)
/* start of EBDA area */
ebda_addr = get_bios_ebda();
+ printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem);
/* Fixup: bios puts an EBDA in the top 64K segment */
/* of conventional memory, but does not adjust lowmem. */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 73deaffadd0..acf62fc233d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -115,13 +115,17 @@ static void hpet_reserve_platform_timers(unsigned long id)
hd.hd_phys_address = hpet_address;
hd.hd_address = hpet;
hd.hd_nirqs = nrtimers;
- hd.hd_flags = HPET_DATA_PLATFORM;
hpet_reserve_timer(&hd, 0);
#ifdef CONFIG_HPET_EMULATE_RTC
hpet_reserve_timer(&hd, 1);
#endif
+ /*
+ * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254
+ * is wrong for i8259!) not the output IRQ. Many BIOS writers
+ * don't bother configuring *any* comparator interrupts.
+ */
hd.hd_irq[0] = HPET_LEGACY_8254;
hd.hd_irq[1] = HPET_LEGACY_RTC;
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 45723f1fe19..1f20608d4ca 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -468,9 +468,23 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
static int save_i387_xsave(void __user *buf)
{
+ struct task_struct *tsk = current;
struct _fpstate_ia32 __user *fx = buf;
int err = 0;
+ /*
+ * For legacy compatible, we always set FP/SSE bits in the bit
+ * vector while saving the state to the user context.
+ * This will enable us capturing any changes(during sigreturn) to
+ * the FP/SSE bits by the legacy applications which don't touch
+ * xstate_bv in the xsave header.
+ *
+ * xsave aware applications can change the xstate_bv in the xsave
+ * header as well as change any contents in the memory layout.
+ * xrestore as part of sigreturn will capture all the changes.
+ */
+ tsk->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
+
if (save_i387_fxsave(fx) < 0)
return -1;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index a1bec2969c6..02063ae042f 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1281,8 +1281,8 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
icr = apic_icr_read();
- printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
+ printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
+ printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
v = apic_read(APIC_LVTT);
printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 1f26fd9ec4f..5b5be9d43c2 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -135,7 +135,7 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
};
-static void __init init_ISA_irqs (void)
+void __init init_ISA_irqs(void)
{
int i;
@@ -164,22 +164,8 @@ static void __init init_ISA_irqs (void)
void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-void __init native_init_IRQ(void)
+static void __init smp_intr_init(void)
{
- int i;
-
- init_ISA_irqs();
- /*
- * Cover the whole vector space, no vector can escape
- * us. (some of these will be overridden and become
- * 'special' SMP interrupts)
- */
- for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
- int vector = FIRST_EXTERNAL_VECTOR + i;
- if (vector != IA32_SYSCALL_VECTOR)
- set_intr_gate(vector, interrupt[i]);
- }
-
#ifdef CONFIG_SMP
/*
* The reschedule interrupt is a CPU-to-CPU reschedule-helper
@@ -207,6 +193,12 @@ void __init native_init_IRQ(void)
/* Low priority IPI to cleanup after moving an irq */
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
#endif
+}
+
+static void __init apic_intr_init(void)
+{
+ smp_intr_init();
+
alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
@@ -216,6 +208,25 @@ void __init native_init_IRQ(void)
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+}
+
+void __init native_init_IRQ(void)
+{
+ int i;
+
+ init_ISA_irqs();
+ /*
+ * Cover the whole vector space, no vector can escape
+ * us. (some of these will be overridden and become
+ * 'special' SMP interrupts)
+ */
+ for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
+ int vector = FIRST_EXTERNAL_VECTOR + i;
+ if (vector != IA32_SYSCALL_VECTOR)
+ set_intr_gate(vector, interrupt[i]);
+ }
+
+ apic_intr_init();
if (!acpi_ioapic)
setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 0ed5f939b90..eee32b43fee 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -52,6 +52,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
(mincount - oldsize) * LDT_ENTRY_SIZE);
+ paravirt_alloc_ldt(newldt, mincount);
+
#ifdef CONFIG_X86_64
/* CHECKME: Do we really need this ? */
wmb();
@@ -74,6 +76,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
#endif
}
if (oldsize) {
+ paravirt_free_ldt(oldldt, oldsize);
if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
else
@@ -85,10 +88,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
{
int err = alloc_ldt(new, old->size, 0);
+ int i;
if (err < 0)
return err;
- memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE);
+
+ for(i = 0; i < old->size; i++)
+ write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
return 0;
}
@@ -125,6 +131,7 @@ void destroy_context(struct mm_struct *mm)
if (mm == current->active_mm)
clear_LDT();
#endif
+ paravirt_free_ldt(mm->context.ldt, mm->context.size);
if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt);
else
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
deleted file mode 100644
index 652fa5c38eb..00000000000
--- a/arch/x86/kernel/microcode.c
+++ /dev/null
@@ -1,853 +0,0 @@
-/*
- * Intel CPU Microcode Update Driver for Linux
- *
- * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
- * 2006 Shaohua Li <shaohua.li@intel.com>
- *
- * This driver allows to upgrade microcode on Intel processors
- * belonging to IA-32 family - PentiumPro, Pentium II,
- * Pentium III, Xeon, Pentium 4, etc.
- *
- * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
- * Software Developer's Manual
- * Order Number 253668 or free download from:
- *
- * http://developer.intel.com/design/pentium4/manuals/253668.htm
- *
- * For more information, go to http://www.urbanmyth.org/microcode
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com>
- * Initial release.
- * 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com>
- * Added read() support + cleanups.
- * 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com>
- * Added 'device trimming' support. open(O_WRONLY) zeroes
- * and frees the saved copy of applied microcode.
- * 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com>
- * Made to use devfs (/dev/cpu/microcode) + cleanups.
- * 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com>
- * Added misc device support (now uses both devfs and misc).
- * Added MICROCODE_IOCFREE ioctl to clear memory.
- * 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com>
- * Messages for error cases (non Intel & no suitable microcode).
- * 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
- * Removed ->release(). Removed exclusive open and status bitmap.
- * Added microcode_rwsem to serialize read()/write()/ioctl().
- * Removed global kernel lock usage.
- * 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
- * Write 0 to 0x8B msr and then cpuid before reading revision,
- * so that it works even if there were no update done by the
- * BIOS. Otherwise, reading from 0x8B gives junk (which happened
- * to be 0 on my machine which is why it worked even when I
- * disabled update by the BIOS)
- * Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
- * 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
- * Tigran Aivazian <tigran@veritas.com>
- * Intel Pentium 4 processor support and bugfixes.
- * 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
- * Bugfix for HT (Hyper-Threading) enabled processors
- * whereby processor resources are shared by all logical processors
- * in a single CPU package.
- * 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
- * Tigran Aivazian <tigran@veritas.com>,
- * Serialize updates as required on HT processors due to speculative
- * nature of implementation.
- * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
- * Fix the panic when writing zero-length microcode chunk.
- * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
- * Jun Nakajima <jun.nakajima@intel.com>
- * Support for the microcode updates in the new format.
- * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
- * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
- * because we no longer hold a copy of applied microcode
- * in kernel memory.
- * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
- * Fix sigmatch() macro to handle old CPUs with pf == 0.
- * Thanks to Stuart Swales for pointing out this bug.
- */
-
-//#define DEBUG /* pr_debug */
-#include <linux/capability.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/smp_lock.h>
-#include <linux/cpumask.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/miscdevice.h>
-#include <linux/spinlock.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/mutex.h>
-#include <linux/cpu.h>
-#include <linux/firmware.h>
-#include <linux/platform_device.h>
-
-#include <asm/msr.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-
-MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
-MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
-MODULE_LICENSE("GPL");
-
-#define MICROCODE_VERSION "1.14a"
-
-#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */
-#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
-#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */
-#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */
-#define DWSIZE (sizeof (u32))
-#define get_totalsize(mc) \
- (((microcode_t *)mc)->hdr.totalsize ? \
- ((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE)
-#define get_datasize(mc) \
- (((microcode_t *)mc)->hdr.datasize ? \
- ((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
-
-#define sigmatch(s1, s2, p1, p2) \
- (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
-
-#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
-
-/* serialize access to the physical write to MSR 0x79 */
-static DEFINE_SPINLOCK(microcode_update_lock);
-
-/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
-static DEFINE_MUTEX(microcode_mutex);
-
-static struct ucode_cpu_info {
- int valid;
- unsigned int sig;
- unsigned int pf;
- unsigned int rev;
- microcode_t *mc;
-} ucode_cpu_info[NR_CPUS];
-
-static void collect_cpu_info(int cpu_num)
-{
- struct cpuinfo_x86 *c = &cpu_data(cpu_num);
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
- unsigned int val[2];
-
- /* We should bind the task to the CPU */
- BUG_ON(raw_smp_processor_id() != cpu_num);
- uci->pf = uci->rev = 0;
- uci->mc = NULL;
- uci->valid = 1;
-
- if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
- cpu_has(c, X86_FEATURE_IA64)) {
- printk(KERN_ERR "microcode: CPU%d not a capable Intel "
- "processor\n", cpu_num);
- uci->valid = 0;
- return;
- }
-
- uci->sig = cpuid_eax(0x00000001);
-
- if ((c->x86_model >= 5) || (c->x86 > 6)) {
- /* get processor flags from MSR 0x17 */
- rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
- uci->pf = 1 << ((val[1] >> 18) & 7);
- }
-
- wrmsr(MSR_IA32_UCODE_REV, 0, 0);
- /* see notes above for revision 1.07. Apparent chip bug */
- sync_core();
- /* get the current revision from MSR 0x8B */
- rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev);
- pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
- uci->sig, uci->pf, uci->rev);
-}
-
-static inline int microcode_update_match(int cpu_num,
- microcode_header_t *mc_header, int sig, int pf)
-{
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-
- if (!sigmatch(sig, uci->sig, pf, uci->pf)
- || mc_header->rev <= uci->rev)
- return 0;
- return 1;
-}
-
-static int microcode_sanity_check(void *mc)
-{
- microcode_header_t *mc_header = mc;
- struct extended_sigtable *ext_header = NULL;
- struct extended_signature *ext_sig;
- unsigned long total_size, data_size, ext_table_size;
- int sum, orig_sum, ext_sigcount = 0, i;
-
- total_size = get_totalsize(mc_header);
- data_size = get_datasize(mc_header);
- if (data_size + MC_HEADER_SIZE > total_size) {
- printk(KERN_ERR "microcode: error! "
- "Bad data size in microcode data file\n");
- return -EINVAL;
- }
-
- if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
- printk(KERN_ERR "microcode: error! "
- "Unknown microcode update format\n");
- return -EINVAL;
- }
- ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
- if (ext_table_size) {
- if ((ext_table_size < EXT_HEADER_SIZE)
- || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
- printk(KERN_ERR "microcode: error! "
- "Small exttable size in microcode data file\n");
- return -EINVAL;
- }
- ext_header = mc + MC_HEADER_SIZE + data_size;
- if (ext_table_size != exttable_size(ext_header)) {
- printk(KERN_ERR "microcode: error! "
- "Bad exttable size in microcode data file\n");
- return -EFAULT;
- }
- ext_sigcount = ext_header->count;
- }
-
- /* check extended table checksum */
- if (ext_table_size) {
- int ext_table_sum = 0;
- int *ext_tablep = (int *)ext_header;
-
- i = ext_table_size / DWSIZE;
- while (i--)
- ext_table_sum += ext_tablep[i];
- if (ext_table_sum) {
- printk(KERN_WARNING "microcode: aborting, "
- "bad extended signature table checksum\n");
- return -EINVAL;
- }
- }
-
- /* calculate the checksum */
- orig_sum = 0;
- i = (MC_HEADER_SIZE + data_size) / DWSIZE;
- while (i--)
- orig_sum += ((int *)mc)[i];
- if (orig_sum) {
- printk(KERN_ERR "microcode: aborting, bad checksum\n");
- return -EINVAL;
- }
- if (!ext_table_size)
- return 0;
- /* check extended signature checksum */
- for (i = 0; i < ext_sigcount; i++) {
- ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
- EXT_SIGNATURE_SIZE * i;
- sum = orig_sum
- - (mc_header->sig + mc_header->pf + mc_header->cksum)
- + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
- if (sum) {
- printk(KERN_ERR "microcode: aborting, bad checksum\n");
- return -EINVAL;
- }
- }
- return 0;
-}
-
-/*
- * return 0 - no update found
- * return 1 - found update
- * return < 0 - error
- */
-static int get_maching_microcode(void *mc, int cpu)
-{
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- microcode_header_t *mc_header = mc;
- struct extended_sigtable *ext_header;
- unsigned long total_size = get_totalsize(mc_header);
- int ext_sigcount, i;
- struct extended_signature *ext_sig;
- void *new_mc;
-
- if (microcode_update_match(cpu, mc_header,
- mc_header->sig, mc_header->pf))
- goto find;
-
- if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
- return 0;
-
- ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
- ext_sigcount = ext_header->count;
- ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
- for (i = 0; i < ext_sigcount; i++) {
- if (microcode_update_match(cpu, mc_header,
- ext_sig->sig, ext_sig->pf))
- goto find;
- ext_sig++;
- }
- return 0;
-find:
- pr_debug("microcode: CPU%d found a matching microcode update with"
- " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev);
- new_mc = vmalloc(total_size);
- if (!new_mc) {
- printk(KERN_ERR "microcode: error! Can not allocate memory\n");
- return -ENOMEM;
- }
-
- /* free previous update file */
- vfree(uci->mc);
-
- memcpy(new_mc, mc, total_size);
- uci->mc = new_mc;
- return 1;
-}
-
-static void apply_microcode(int cpu)
-{
- unsigned long flags;
- unsigned int val[2];
- int cpu_num = raw_smp_processor_id();
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-
- /* We should bind the task to the CPU */
- BUG_ON(cpu_num != cpu);
-
- if (uci->mc == NULL)
- return;
-
- /* serialize access to the physical write to MSR 0x79 */
- spin_lock_irqsave(&microcode_update_lock, flags);
-
- /* write microcode via MSR 0x79 */
- wrmsr(MSR_IA32_UCODE_WRITE,
- (unsigned long) uci->mc->bits,
- (unsigned long) uci->mc->bits >> 16 >> 16);
- wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-
- /* see notes above for revision 1.07. Apparent chip bug */
- sync_core();
-
- /* get the current revision from MSR 0x8B */
- rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-
- spin_unlock_irqrestore(&microcode_update_lock, flags);
- if (val[1] != uci->mc->hdr.rev) {
- printk(KERN_ERR "microcode: CPU%d update from revision "
- "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]);
- return;
- }
- printk(KERN_INFO "microcode: CPU%d updated from revision "
- "0x%x to 0x%x, date = %08x \n",
- cpu_num, uci->rev, val[1], uci->mc->hdr.date);
- uci->rev = val[1];
-}
-
-#ifdef CONFIG_MICROCODE_OLD_INTERFACE
-static void __user *user_buffer; /* user area microcode data buffer */
-static unsigned int user_buffer_size; /* it's size */
-
-static long get_next_ucode(void **mc, long offset)
-{
- microcode_header_t mc_header;
- unsigned long total_size;
-
- /* No more data */
- if (offset >= user_buffer_size)
- return 0;
- if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) {
- printk(KERN_ERR "microcode: error! Can not read user data\n");
- return -EFAULT;
- }
- total_size = get_totalsize(&mc_header);
- if (offset + total_size > user_buffer_size) {
- printk(KERN_ERR "microcode: error! Bad total size in microcode "
- "data file\n");
- return -EINVAL;
- }
- *mc = vmalloc(total_size);
- if (!*mc)
- return -ENOMEM;
- if (copy_from_user(*mc, user_buffer + offset, total_size)) {
- printk(KERN_ERR "microcode: error! Can not read user data\n");
- vfree(*mc);
- return -EFAULT;
- }
- return offset + total_size;
-}
-
-static int do_microcode_update (void)
-{
- long cursor = 0;
- int error = 0;
- void *new_mc = NULL;
- int cpu;
- cpumask_t old;
-
- old = current->cpus_allowed;
-
- while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) {
- error = microcode_sanity_check(new_mc);
- if (error)
- goto out;
- /*
- * It's possible the data file has multiple matching ucode,
- * lets keep searching till the latest version
- */
- for_each_online_cpu(cpu) {
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
- if (!uci->valid)
- continue;
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
- error = get_maching_microcode(new_mc, cpu);
- if (error < 0)
- goto out;
- if (error == 1)
- apply_microcode(cpu);
- }
- vfree(new_mc);
- }
-out:
- if (cursor > 0)
- vfree(new_mc);
- if (cursor < 0)
- error = cursor;
- set_cpus_allowed_ptr(current, &old);
- return error;
-}
-
-static int microcode_open (struct inode *unused1, struct file *unused2)
-{
- cycle_kernel_lock();
- return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
-}
-
-static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
-{
- ssize_t ret;
-
- if ((len >> PAGE_SHIFT) > num_physpages) {
- printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages);
- return -EINVAL;
- }
-
- get_online_cpus();
- mutex_lock(&microcode_mutex);
-
- user_buffer = (void __user *) buf;
- user_buffer_size = (int) len;
-
- ret = do_microcode_update();
- if (!ret)
- ret = (ssize_t)len;
-
- mutex_unlock(&microcode_mutex);
- put_online_cpus();
-
- return ret;
-}
-
-static const struct file_operations microcode_fops = {
- .owner = THIS_MODULE,
- .write = microcode_write,
- .open = microcode_open,
-};
-
-static struct miscdevice microcode_dev = {
- .minor = MICROCODE_MINOR,
- .name = "microcode",
- .fops = &microcode_fops,
-};
-
-static int __init microcode_dev_init (void)
-{
- int error;
-
- error = misc_register(&microcode_dev);
- if (error) {
- printk(KERN_ERR
- "microcode: can't misc_register on minor=%d\n",
- MICROCODE_MINOR);
- return error;
- }
-
- return 0;
-}
-
-static void microcode_dev_exit (void)
-{
- misc_deregister(&microcode_dev);
-}
-
-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
-#else
-#define microcode_dev_init() 0
-#define microcode_dev_exit() do { } while(0)
-#endif
-
-static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
- unsigned long size, long offset)
-{
- microcode_header_t *mc_header;
- unsigned long total_size;
-
- /* No more data */
- if (offset >= size)
- return 0;
- mc_header = (microcode_header_t *)(buf + offset);
- total_size = get_totalsize(mc_header);
-
- if (offset + total_size > size) {
- printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
- return -EINVAL;
- }
-
- *mc = vmalloc(total_size);
- if (!*mc) {
- printk(KERN_ERR "microcode: error! Can not allocate memory\n");
- return -ENOMEM;
- }
- memcpy(*mc, buf + offset, total_size);
- return offset + total_size;
-}
-
-/* fake device for request_firmware */
-static struct platform_device *microcode_pdev;
-
-static int cpu_request_microcode(int cpu)
-{
- char name[30];
- struct cpuinfo_x86 *c = &cpu_data(cpu);
- const struct firmware *firmware;
- const u8 *buf;
- unsigned long size;
- long offset = 0;
- int error;
- void *mc;
-
- /* We should bind the task to the CPU */
- BUG_ON(cpu != raw_smp_processor_id());
- sprintf(name,"intel-ucode/%02x-%02x-%02x",
- c->x86, c->x86_model, c->x86_mask);
- error = request_firmware(&firmware, name, &microcode_pdev->dev);
- if (error) {
- pr_debug("microcode: data file %s load failed\n", name);
- return error;
- }
- buf = firmware->data;
- size = firmware->size;
- while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset))
- > 0) {
- error = microcode_sanity_check(mc);
- if (error)
- break;
- error = get_maching_microcode(mc, cpu);
- if (error < 0)
- break;
- /*
- * It's possible the data file has multiple matching ucode,
- * lets keep searching till the latest version
- */
- if (error == 1) {
- apply_microcode(cpu);
- error = 0;
- }
- vfree(mc);
- }
- if (offset > 0)
- vfree(mc);
- if (offset < 0)
- error = offset;
- release_firmware(firmware);
-
- return error;
-}
-
-static int apply_microcode_check_cpu(int cpu)
-{
- struct cpuinfo_x86 *c = &cpu_data(cpu);
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- cpumask_t old;
- unsigned int val[2];
- int err = 0;
-
- /* Check if the microcode is available */
- if (!uci->mc)
- return 0;
-
- old = current->cpus_allowed;
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-
- /* Check if the microcode we have in memory matches the CPU */
- if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
- cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001))
- err = -EINVAL;
-
- if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) {
- /* get processor flags from MSR 0x17 */
- rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
- if (uci->pf != (1 << ((val[1] >> 18) & 7)))
- err = -EINVAL;
- }
-
- if (!err) {
- wrmsr(MSR_IA32_UCODE_REV, 0, 0);
- /* see notes above for revision 1.07. Apparent chip bug */
- sync_core();
- /* get the current revision from MSR 0x8B */
- rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
- if (uci->rev != val[1])
- err = -EINVAL;
- }
-
- if (!err)
- apply_microcode(cpu);
- else
- printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:"
- " sig=0x%x, pf=0x%x, rev=0x%x\n",
- cpu, uci->sig, uci->pf, uci->rev);
-
- set_cpus_allowed_ptr(current, &old);
- return err;
-}
-
-static void microcode_init_cpu(int cpu, int resume)
-{
- cpumask_t old;
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
- old = current->cpus_allowed;
-
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
- mutex_lock(&microcode_mutex);
- collect_cpu_info(cpu);
- if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
- cpu_request_microcode(cpu);
- mutex_unlock(&microcode_mutex);
- set_cpus_allowed_ptr(current, &old);
-}
-
-static void microcode_fini_cpu(int cpu)
-{
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
- mutex_lock(&microcode_mutex);
- uci->valid = 0;
- vfree(uci->mc);
- uci->mc = NULL;
- mutex_unlock(&microcode_mutex);
-}
-
-static ssize_t reload_store(struct sys_device *dev,
- struct sysdev_attribute *attr,
- const char *buf, size_t sz)
-{
- struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
- char *end;
- unsigned long val = simple_strtoul(buf, &end, 0);
- int err = 0;
- int cpu = dev->id;
-
- if (end == buf)
- return -EINVAL;
- if (val == 1) {
- cpumask_t old = current->cpus_allowed;
-
- get_online_cpus();
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-
- mutex_lock(&microcode_mutex);
- if (uci->valid)
- err = cpu_request_microcode(cpu);
- mutex_unlock(&microcode_mutex);
- put_online_cpus();
- set_cpus_allowed_ptr(current, &old);
- }
- if (err)
- return err;
- return sz;
-}
-
-static ssize_t version_show(struct sys_device *dev,
- struct sysdev_attribute *attr, char *buf)
-{
- struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
-
- return sprintf(buf, "0x%x\n", uci->rev);
-}
-
-static ssize_t pf_show(struct sys_device *dev,
- struct sysdev_attribute *attr, char *buf)
-{
- struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
-
- return sprintf(buf, "0x%x\n", uci->pf);
-}
-
-static SYSDEV_ATTR(reload, 0200, NULL, reload_store);
-static SYSDEV_ATTR(version, 0400, version_show, NULL);
-static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL);
-
-static struct attribute *mc_default_attrs[] = {
- &attr_reload.attr,
- &attr_version.attr,
- &attr_processor_flags.attr,
- NULL
-};
-
-static struct attribute_group mc_attr_group = {
- .attrs = mc_default_attrs,
- .name = "microcode",
-};
-
-static int __mc_sysdev_add(struct sys_device *sys_dev, int resume)
-{
- int err, cpu = sys_dev->id;
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
- if (!cpu_online(cpu))
- return 0;
-
- pr_debug("microcode: CPU%d added\n", cpu);
- memset(uci, 0, sizeof(*uci));
-
- err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
- if (err)
- return err;
-
- microcode_init_cpu(cpu, resume);
-
- return 0;
-}
-
-static int mc_sysdev_add(struct sys_device *sys_dev)
-{
- return __mc_sysdev_add(sys_dev, 0);
-}
-
-static int mc_sysdev_remove(struct sys_device *sys_dev)
-{
- int cpu = sys_dev->id;
-
- if (!cpu_online(cpu))
- return 0;
-
- pr_debug("microcode: CPU%d removed\n", cpu);
- microcode_fini_cpu(cpu);
- sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
- return 0;
-}
-
-static int mc_sysdev_resume(struct sys_device *dev)
-{
- int cpu = dev->id;
-
- if (!cpu_online(cpu))
- return 0;
- pr_debug("microcode: CPU%d resumed\n", cpu);
- /* only CPU 0 will apply ucode here */
- apply_microcode(0);
- return 0;
-}
-
-static struct sysdev_driver mc_sysdev_driver = {
- .add = mc_sysdev_add,
- .remove = mc_sysdev_remove,
- .resume = mc_sysdev_resume,
-};
-
-static __cpuinit int
-mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
- struct sys_device *sys_dev;
-
- sys_dev = get_cpu_sysdev(cpu);
- switch (action) {
- case CPU_UP_CANCELED_FROZEN:
- /* The CPU refused to come up during a system resume */
- microcode_fini_cpu(cpu);
- break;
- case CPU_ONLINE:
- case CPU_DOWN_FAILED:
- mc_sysdev_add(sys_dev);
- break;
- case CPU_ONLINE_FROZEN:
- /* System-wide resume is in progress, try to apply microcode */
- if (apply_microcode_check_cpu(cpu)) {
- /* The application of microcode failed */
- microcode_fini_cpu(cpu);
- __mc_sysdev_add(sys_dev, 1);
- break;
- }
- case CPU_DOWN_FAILED_FROZEN:
- if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
- printk(KERN_ERR "microcode: Failed to create the sysfs "
- "group for CPU%d\n", cpu);
- break;
- case CPU_DOWN_PREPARE:
- mc_sysdev_remove(sys_dev);
- break;
- case CPU_DOWN_PREPARE_FROZEN:
- /* Suspend is in progress, only remove the interface */
- sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block __refdata mc_cpu_notifier = {
- .notifier_call = mc_cpu_callback,
-};
-
-static int __init microcode_init (void)
-{
- int error;
-
- printk(KERN_INFO
- "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
-
- error = microcode_dev_init();
- if (error)
- return error;
- microcode_pdev = platform_device_register_simple("microcode", -1,
- NULL, 0);
- if (IS_ERR(microcode_pdev)) {
- microcode_dev_exit();
- return PTR_ERR(microcode_pdev);
- }
-
- get_online_cpus();
- error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
- put_online_cpus();
- if (error) {
- microcode_dev_exit();
- platform_device_unregister(microcode_pdev);
- return error;
- }
-
- register_hotcpu_notifier(&mc_cpu_notifier);
- return 0;
-}
-
-static void __exit microcode_exit (void)
-{
- microcode_dev_exit();
-
- unregister_hotcpu_notifier(&mc_cpu_notifier);
-
- get_online_cpus();
- sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
- put_online_cpus();
-
- platform_device_unregister(microcode_pdev);
-}
-
-module_init(microcode_init)
-module_exit(microcode_exit)
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
new file mode 100644
index 00000000000..7a1f8eeac2c
--- /dev/null
+++ b/arch/x86/kernel/microcode_amd.c
@@ -0,0 +1,435 @@
+/*
+ * AMD CPU Microcode Update Driver for Linux
+ * Copyright (C) 2008 Advanced Micro Devices Inc.
+ *
+ * Author: Peter Oruba <peter.oruba@amd.com>
+ *
+ * Based on work by:
+ * Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ *
+ * This driver allows to upgrade microcode on AMD
+ * family 0x10 and 0x11 processors.
+ *
+ * Licensed unter the terms of the GNU General Public
+ * License version 2. See file COPYING for details.
+*/
+
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/microcode.h>
+
+MODULE_DESCRIPTION("AMD Microcode Update Driver");
+MODULE_AUTHOR("Peter Oruba <peter.oruba@amd.com>");
+MODULE_LICENSE("GPL v2");
+
+#define UCODE_MAGIC 0x00414d44
+#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
+#define UCODE_UCODE_TYPE 0x00000001
+
+struct equiv_cpu_entry {
+ unsigned int installed_cpu;
+ unsigned int fixed_errata_mask;
+ unsigned int fixed_errata_compare;
+ unsigned int equiv_cpu;
+};
+
+struct microcode_header_amd {
+ unsigned int data_code;
+ unsigned int patch_id;
+ unsigned char mc_patch_data_id[2];
+ unsigned char mc_patch_data_len;
+ unsigned char init_flag;
+ unsigned int mc_patch_data_checksum;
+ unsigned int nb_dev_id;
+ unsigned int sb_dev_id;
+ unsigned char processor_rev_id[2];
+ unsigned char nb_rev_id;
+ unsigned char sb_rev_id;
+ unsigned char bios_api_rev;
+ unsigned char reserved1[3];
+ unsigned int match_reg[8];
+};
+
+struct microcode_amd {
+ struct microcode_header_amd hdr;
+ unsigned int mpb[0];
+};
+
+#define UCODE_MAX_SIZE (2048)
+#define DEFAULT_UCODE_DATASIZE (896)
+#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd))
+#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
+#define DWSIZE (sizeof(u32))
+/* For now we support a fixed ucode total size only */
+#define get_totalsize(mc) \
+ ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \
+ + MC_HEADER_SIZE)
+
+/* serialize access to the physical write */
+static DEFINE_SPINLOCK(microcode_update_lock);
+
+static struct equiv_cpu_entry *equiv_cpu_table;
+
+static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
+{
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ memset(csig, 0, sizeof(*csig));
+
+ if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
+ printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n",
+ cpu);
+ return -1;
+ }
+
+ asm volatile("movl %1, %%ecx; rdmsr"
+ : "=a" (csig->rev)
+ : "i" (0x0000008B) : "ecx");
+
+ printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n",
+ csig->rev);
+
+ return 0;
+}
+
+static int get_matching_microcode(int cpu, void *mc, int rev)
+{
+ struct microcode_header_amd *mc_header = mc;
+ struct pci_dev *nb_pci_dev, *sb_pci_dev;
+ unsigned int current_cpu_id;
+ unsigned int equiv_cpu_id = 0x00;
+ unsigned int i = 0;
+
+ BUG_ON(equiv_cpu_table == NULL);
+ current_cpu_id = cpuid_eax(0x00000001);
+
+ while (equiv_cpu_table[i].installed_cpu != 0) {
+ if (current_cpu_id == equiv_cpu_table[i].installed_cpu) {
+ equiv_cpu_id = equiv_cpu_table[i].equiv_cpu;
+ break;
+ }
+ i++;
+ }
+
+ if (!equiv_cpu_id) {
+ printk(KERN_ERR "microcode: CPU%d cpu_id "
+ "not found in equivalent cpu table \n", cpu);
+ return 0;
+ }
+
+ if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) {
+ printk(KERN_ERR
+ "microcode: CPU%d patch does not match "
+ "(patch is %x, cpu extended is %x) \n",
+ cpu, mc_header->processor_rev_id[0],
+ (equiv_cpu_id & 0xff));
+ return 0;
+ }
+
+ if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) {
+ printk(KERN_ERR "microcode: CPU%d patch does not match "
+ "(patch is %x, cpu base id is %x) \n",
+ cpu, mc_header->processor_rev_id[1],
+ ((equiv_cpu_id >> 16) & 0xff));
+
+ return 0;
+ }
+
+ /* ucode may be northbridge specific */
+ if (mc_header->nb_dev_id) {
+ nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
+ (mc_header->nb_dev_id & 0xff),
+ NULL);
+ if ((!nb_pci_dev) ||
+ (mc_header->nb_rev_id != nb_pci_dev->revision)) {
+ printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu);
+ pci_dev_put(nb_pci_dev);
+ return 0;
+ }
+ pci_dev_put(nb_pci_dev);
+ }
+
+ /* ucode may be southbridge specific */
+ if (mc_header->sb_dev_id) {
+ sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
+ (mc_header->sb_dev_id & 0xff),
+ NULL);
+ if ((!sb_pci_dev) ||
+ (mc_header->sb_rev_id != sb_pci_dev->revision)) {
+ printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu);
+ pci_dev_put(sb_pci_dev);
+ return 0;
+ }
+ pci_dev_put(sb_pci_dev);
+ }
+
+ if (mc_header->patch_id <= rev)
+ return 0;
+
+ return 1;
+}
+
+static void apply_microcode_amd(int cpu)
+{
+ unsigned long flags;
+ unsigned int eax, edx;
+ unsigned int rev;
+ int cpu_num = raw_smp_processor_id();
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
+ struct microcode_amd *mc_amd = uci->mc;
+ unsigned long addr;
+
+ /* We should bind the task to the CPU */
+ BUG_ON(cpu_num != cpu);
+
+ if (mc_amd == NULL)
+ return;
+
+ spin_lock_irqsave(&microcode_update_lock, flags);
+
+ addr = (unsigned long)&mc_amd->hdr.data_code;
+ edx = (unsigned int)(((unsigned long)upper_32_bits(addr)));
+ eax = (unsigned int)(((unsigned long)lower_32_bits(addr)));
+
+ asm volatile("movl %0, %%ecx; wrmsr" :
+ : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx");
+
+ /* get patch id after patching */
+ asm volatile("movl %1, %%ecx; rdmsr"
+ : "=a" (rev)
+ : "i" (0x0000008B) : "ecx");
+
+ spin_unlock_irqrestore(&microcode_update_lock, flags);
+
+ /* check current patch id and patch's id for match */
+ if (rev != mc_amd->hdr.patch_id) {
+ printk(KERN_ERR "microcode: CPU%d update from revision "
+ "0x%x to 0x%x failed\n", cpu_num,
+ mc_amd->hdr.patch_id, rev);
+ return;
+ }
+
+ printk(KERN_INFO "microcode: CPU%d updated from revision "
+ "0x%x to 0x%x \n",
+ cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
+
+ uci->cpu_sig.rev = rev;
+}
+
+static void * get_next_ucode(u8 *buf, unsigned int size,
+ int (*get_ucode_data)(void *, const void *, size_t),
+ unsigned int *mc_size)
+{
+ unsigned int total_size;
+#define UCODE_CONTAINER_SECTION_HDR 8
+ u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
+ void *mc;
+
+ if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR))
+ return NULL;
+
+ if (section_hdr[0] != UCODE_UCODE_TYPE) {
+ printk(KERN_ERR "microcode: error! "
+ "Wrong microcode payload type field\n");
+ return NULL;
+ }
+
+ total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
+
+ printk(KERN_INFO "microcode: size %u, total_size %u\n",
+ size, total_size);
+
+ if (total_size > size || total_size > UCODE_MAX_SIZE) {
+ printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
+ return NULL;
+ }
+
+ mc = vmalloc(UCODE_MAX_SIZE);
+ if (mc) {
+ memset(mc, 0, UCODE_MAX_SIZE);
+ if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) {
+ vfree(mc);
+ mc = NULL;
+ } else
+ *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
+ }
+#undef UCODE_CONTAINER_SECTION_HDR
+ return mc;
+}
+
+
+static int install_equiv_cpu_table(u8 *buf,
+ int (*get_ucode_data)(void *, const void *, size_t))
+{
+#define UCODE_CONTAINER_HEADER_SIZE 12
+ u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
+ unsigned int *buf_pos = (unsigned int *)container_hdr;
+ unsigned long size;
+
+ if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE))
+ return 0;
+
+ size = buf_pos[2];
+
+ if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
+ printk(KERN_ERR "microcode: error! "
+ "Wrong microcode equivalnet cpu table\n");
+ return 0;
+ }
+
+ equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
+ if (!equiv_cpu_table) {
+ printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n");
+ return 0;
+ }
+
+ buf += UCODE_CONTAINER_HEADER_SIZE;
+ if (get_ucode_data(equiv_cpu_table, buf, size)) {
+ vfree(equiv_cpu_table);
+ return 0;
+ }
+
+ return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
+#undef UCODE_CONTAINER_HEADER_SIZE
+}
+
+static void free_equiv_cpu_table(void)
+{
+ if (equiv_cpu_table) {
+ vfree(equiv_cpu_table);
+ equiv_cpu_table = NULL;
+ }
+}
+
+static int generic_load_microcode(int cpu, void *data, size_t size,
+ int (*get_ucode_data)(void *, const void *, size_t))
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ u8 *ucode_ptr = data, *new_mc = NULL, *mc;
+ int new_rev = uci->cpu_sig.rev;
+ unsigned int leftover;
+ unsigned long offset;
+
+ offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data);
+ if (!offset) {
+ printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
+ return -EINVAL;
+ }
+
+ ucode_ptr += offset;
+ leftover = size - offset;
+
+ while (leftover) {
+ unsigned int uninitialized_var(mc_size);
+ struct microcode_header_amd *mc_header;
+
+ mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size);
+ if (!mc)
+ break;
+
+ mc_header = (struct microcode_header_amd *)mc;
+ if (get_matching_microcode(cpu, mc, new_rev)) {
+ if (new_mc)
+ vfree(new_mc);
+ new_rev = mc_header->patch_id;
+ new_mc = mc;
+ } else
+ vfree(mc);
+
+ ucode_ptr += mc_size;
+ leftover -= mc_size;
+ }
+
+ if (new_mc) {
+ if (!leftover) {
+ if (uci->mc)
+ vfree(uci->mc);
+ uci->mc = new_mc;
+ pr_debug("microcode: CPU%d found a matching microcode update with"
+ " version 0x%x (current=0x%x)\n",
+ cpu, new_rev, uci->cpu_sig.rev);
+ } else
+ vfree(new_mc);
+ }
+
+ free_equiv_cpu_table();
+
+ return (int)leftover;
+}
+
+static int get_ucode_fw(void *to, const void *from, size_t n)
+{
+ memcpy(to, from, n);
+ return 0;
+}
+
+static int request_microcode_fw(int cpu, struct device *device)
+{
+ const char *fw_name = "amd-ucode/microcode_amd.bin";
+ const struct firmware *firmware;
+ int ret;
+
+ /* We should bind the task to the CPU */
+ BUG_ON(cpu != raw_smp_processor_id());
+
+ ret = request_firmware(&firmware, fw_name, device);
+ if (ret) {
+ printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name);
+ return ret;
+ }
+
+ ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size,
+ &get_ucode_fw);
+
+ release_firmware(firmware);
+
+ return ret;
+}
+
+static int request_microcode_user(int cpu, const void __user *buf, size_t size)
+{
+ printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode"
+ "is not supported\n");
+ return -1;
+}
+
+static void microcode_fini_cpu_amd(int cpu)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+ vfree(uci->mc);
+ uci->mc = NULL;
+}
+
+static struct microcode_ops microcode_amd_ops = {
+ .request_microcode_user = request_microcode_user,
+ .request_microcode_fw = request_microcode_fw,
+ .collect_cpu_info = collect_cpu_info_amd,
+ .apply_microcode = apply_microcode_amd,
+ .microcode_fini_cpu = microcode_fini_cpu_amd,
+};
+
+struct microcode_ops * __init init_amd_microcode(void)
+{
+ return &microcode_amd_ops;
+}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
new file mode 100644
index 00000000000..936d8d55f23
--- /dev/null
+++ b/arch/x86/kernel/microcode_core.c
@@ -0,0 +1,508 @@
+/*
+ * Intel CPU Microcode Update Driver for Linux
+ *
+ * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ * 2006 Shaohua Li <shaohua.li@intel.com>
+ *
+ * This driver allows to upgrade microcode on Intel processors
+ * belonging to IA-32 family - PentiumPro, Pentium II,
+ * Pentium III, Xeon, Pentium 4, etc.
+ *
+ * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
+ * Software Developer's Manual
+ * Order Number 253668 or free download from:
+ *
+ * http://developer.intel.com/design/pentium4/manuals/253668.htm
+ *
+ * For more information, go to http://www.urbanmyth.org/microcode
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ * Initial release.
+ * 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ * Added read() support + cleanups.
+ * 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ * Added 'device trimming' support. open(O_WRONLY) zeroes
+ * and frees the saved copy of applied microcode.
+ * 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ * Made to use devfs (/dev/cpu/microcode) + cleanups.
+ * 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com>
+ * Added misc device support (now uses both devfs and misc).
+ * Added MICROCODE_IOCFREE ioctl to clear memory.
+ * 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com>
+ * Messages for error cases (non Intel & no suitable microcode).
+ * 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
+ * Removed ->release(). Removed exclusive open and status bitmap.
+ * Added microcode_rwsem to serialize read()/write()/ioctl().
+ * Removed global kernel lock usage.
+ * 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
+ * Write 0 to 0x8B msr and then cpuid before reading revision,
+ * so that it works even if there were no update done by the
+ * BIOS. Otherwise, reading from 0x8B gives junk (which happened
+ * to be 0 on my machine which is why it worked even when I
+ * disabled update by the BIOS)
+ * Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
+ * 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
+ * Tigran Aivazian <tigran@veritas.com>
+ * Intel Pentium 4 processor support and bugfixes.
+ * 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
+ * Bugfix for HT (Hyper-Threading) enabled processors
+ * whereby processor resources are shared by all logical processors
+ * in a single CPU package.
+ * 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
+ * Tigran Aivazian <tigran@veritas.com>,
+ * Serialize updates as required on HT processors due to
+ * speculative nature of implementation.
+ * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
+ * Fix the panic when writing zero-length microcode chunk.
+ * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
+ * Jun Nakajima <jun.nakajima@intel.com>
+ * Support for the microcode updates in the new format.
+ * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
+ * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
+ * because we no longer hold a copy of applied microcode
+ * in kernel memory.
+ * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
+ * Fix sigmatch() macro to handle old CPUs with pf == 0.
+ * Thanks to Stuart Swales for pointing out this bug.
+ */
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/microcode.h>
+
+MODULE_DESCRIPTION("Microcode Update Driver");
+MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
+MODULE_LICENSE("GPL");
+
+#define MICROCODE_VERSION "2.00"
+
+struct microcode_ops *microcode_ops;
+
+/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+static DEFINE_MUTEX(microcode_mutex);
+
+struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
+EXPORT_SYMBOL_GPL(ucode_cpu_info);
+
+#ifdef CONFIG_MICROCODE_OLD_INTERFACE
+static int do_microcode_update(const void __user *buf, size_t size)
+{
+ cpumask_t old;
+ int error = 0;
+ int cpu;
+
+ old = current->cpus_allowed;
+
+ for_each_online_cpu(cpu) {
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+ if (!uci->valid)
+ continue;
+
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+ error = microcode_ops->request_microcode_user(cpu, buf, size);
+ if (error < 0)
+ goto out;
+ if (!error)
+ microcode_ops->apply_microcode(cpu);
+ }
+out:
+ set_cpus_allowed_ptr(current, &old);
+ return error;
+}
+
+static int microcode_open(struct inode *unused1, struct file *unused2)
+{
+ cycle_kernel_lock();
+ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
+static ssize_t microcode_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ ssize_t ret;
+
+ if ((len >> PAGE_SHIFT) > num_physpages) {
+ printk(KERN_ERR "microcode: too much data (max %ld pages)\n",
+ num_physpages);
+ return -EINVAL;
+ }
+
+ get_online_cpus();
+ mutex_lock(&microcode_mutex);
+
+ ret = do_microcode_update(buf, len);
+ if (!ret)
+ ret = (ssize_t)len;
+
+ mutex_unlock(&microcode_mutex);
+ put_online_cpus();
+
+ return ret;
+}
+
+static const struct file_operations microcode_fops = {
+ .owner = THIS_MODULE,
+ .write = microcode_write,
+ .open = microcode_open,
+};
+
+static struct miscdevice microcode_dev = {
+ .minor = MICROCODE_MINOR,
+ .name = "microcode",
+ .fops = &microcode_fops,
+};
+
+static int __init microcode_dev_init(void)
+{
+ int error;
+
+ error = misc_register(&microcode_dev);
+ if (error) {
+ printk(KERN_ERR
+ "microcode: can't misc_register on minor=%d\n",
+ MICROCODE_MINOR);
+ return error;
+ }
+
+ return 0;
+}
+
+static void microcode_dev_exit(void)
+{
+ misc_deregister(&microcode_dev);
+}
+
+MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
+#else
+#define microcode_dev_init() 0
+#define microcode_dev_exit() do { } while (0)
+#endif
+
+/* fake device for request_firmware */
+struct platform_device *microcode_pdev;
+
+static ssize_t reload_store(struct sys_device *dev,
+ struct sysdev_attribute *attr,
+ const char *buf, size_t sz)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
+ char *end;
+ unsigned long val = simple_strtoul(buf, &end, 0);
+ int err = 0;
+ int cpu = dev->id;
+
+ if (end == buf)
+ return -EINVAL;
+ if (val == 1) {
+ cpumask_t old = current->cpus_allowed;
+
+ get_online_cpus();
+ if (cpu_online(cpu)) {
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+ mutex_lock(&microcode_mutex);
+ if (uci->valid) {
+ err = microcode_ops->request_microcode_fw(cpu,
+ &microcode_pdev->dev);
+ if (!err)
+ microcode_ops->apply_microcode(cpu);
+ }
+ mutex_unlock(&microcode_mutex);
+ set_cpus_allowed_ptr(current, &old);
+ }
+ put_online_cpus();
+ }
+ if (err)
+ return err;
+ return sz;
+}
+
+static ssize_t version_show(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *buf)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
+
+ return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
+}
+
+static ssize_t pf_show(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *buf)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
+
+ return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
+}
+
+static SYSDEV_ATTR(reload, 0200, NULL, reload_store);
+static SYSDEV_ATTR(version, 0400, version_show, NULL);
+static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL);
+
+static struct attribute *mc_default_attrs[] = {
+ &attr_reload.attr,
+ &attr_version.attr,
+ &attr_processor_flags.attr,
+ NULL
+};
+
+static struct attribute_group mc_attr_group = {
+ .attrs = mc_default_attrs,
+ .name = "microcode",
+};
+
+static void microcode_fini_cpu(int cpu)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+ mutex_lock(&microcode_mutex);
+ microcode_ops->microcode_fini_cpu(cpu);
+ uci->valid = 0;
+ mutex_unlock(&microcode_mutex);
+}
+
+static void collect_cpu_info(int cpu)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+ memset(uci, 0, sizeof(*uci));
+ if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig))
+ uci->valid = 1;
+}
+
+static int microcode_resume_cpu(int cpu)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ struct cpu_signature nsig;
+
+ pr_debug("microcode: CPU%d resumed\n", cpu);
+
+ if (!uci->mc)
+ return 1;
+
+ /*
+ * Let's verify that the 'cached' ucode does belong
+ * to this cpu (a bit of paranoia):
+ */
+ if (microcode_ops->collect_cpu_info(cpu, &nsig)) {
+ microcode_fini_cpu(cpu);
+ return -1;
+ }
+
+ if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) {
+ microcode_fini_cpu(cpu);
+ /* Should we look for a new ucode here? */
+ return 1;
+ }
+
+ return 0;
+}
+
+void microcode_update_cpu(int cpu)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ int err = 0;
+
+ /*
+ * Check if the system resume is in progress (uci->valid != NULL),
+ * otherwise just request a firmware:
+ */
+ if (uci->valid) {
+ err = microcode_resume_cpu(cpu);
+ } else {
+ collect_cpu_info(cpu);
+ if (uci->valid && system_state == SYSTEM_RUNNING)
+ err = microcode_ops->request_microcode_fw(cpu,
+ &microcode_pdev->dev);
+ }
+ if (!err)
+ microcode_ops->apply_microcode(cpu);
+}
+
+static void microcode_init_cpu(int cpu)
+{
+ cpumask_t old = current->cpus_allowed;
+
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+ /* We should bind the task to the CPU */
+ BUG_ON(raw_smp_processor_id() != cpu);
+
+ mutex_lock(&microcode_mutex);
+ microcode_update_cpu(cpu);
+ mutex_unlock(&microcode_mutex);
+
+ set_cpus_allowed_ptr(current, &old);
+}
+
+static int mc_sysdev_add(struct sys_device *sys_dev)
+{
+ int err, cpu = sys_dev->id;
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+ if (!cpu_online(cpu))
+ return 0;
+
+ pr_debug("microcode: CPU%d added\n", cpu);
+ memset(uci, 0, sizeof(*uci));
+
+ err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
+ if (err)
+ return err;
+
+ microcode_init_cpu(cpu);
+ return 0;
+}
+
+static int mc_sysdev_remove(struct sys_device *sys_dev)
+{
+ int cpu = sys_dev->id;
+
+ if (!cpu_online(cpu))
+ return 0;
+
+ pr_debug("microcode: CPU%d removed\n", cpu);
+ microcode_fini_cpu(cpu);
+ sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+ return 0;
+}
+
+static int mc_sysdev_resume(struct sys_device *dev)
+{
+ int cpu = dev->id;
+
+ if (!cpu_online(cpu))
+ return 0;
+
+ /* only CPU 0 will apply ucode here */
+ microcode_update_cpu(0);
+ return 0;
+}
+
+static struct sysdev_driver mc_sysdev_driver = {
+ .add = mc_sysdev_add,
+ .remove = mc_sysdev_remove,
+ .resume = mc_sysdev_resume,
+};
+
+static __cpuinit int
+mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ struct sys_device *sys_dev;
+
+ sys_dev = get_cpu_sysdev(cpu);
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ microcode_init_cpu(cpu);
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
+ pr_debug("microcode: CPU%d added\n", cpu);
+ if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
+ printk(KERN_ERR "microcode: Failed to create the sysfs "
+ "group for CPU%d\n", cpu);
+ break;
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ /* Suspend is in progress, only remove the interface */
+ sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+ pr_debug("microcode: CPU%d removed\n", cpu);
+ break;
+ case CPU_DEAD:
+ case CPU_UP_CANCELED_FROZEN:
+ /* The CPU refused to come up during a system resume */
+ microcode_fini_cpu(cpu);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __refdata mc_cpu_notifier = {
+ .notifier_call = mc_cpu_callback,
+};
+
+static int __init microcode_init(void)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+ int error;
+
+ if (c->x86_vendor == X86_VENDOR_INTEL)
+ microcode_ops = init_intel_microcode();
+ else if (c->x86_vendor == X86_VENDOR_AMD)
+ microcode_ops = init_amd_microcode();
+
+ if (!microcode_ops) {
+ printk(KERN_ERR "microcode: no support for this CPU vendor\n");
+ return -ENODEV;
+ }
+
+ error = microcode_dev_init();
+ if (error)
+ return error;
+ microcode_pdev = platform_device_register_simple("microcode", -1,
+ NULL, 0);
+ if (IS_ERR(microcode_pdev)) {
+ microcode_dev_exit();
+ return PTR_ERR(microcode_pdev);
+ }
+
+ get_online_cpus();
+ error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
+ put_online_cpus();
+ if (error) {
+ microcode_dev_exit();
+ platform_device_unregister(microcode_pdev);
+ return error;
+ }
+
+ register_hotcpu_notifier(&mc_cpu_notifier);
+
+ printk(KERN_INFO
+ "Microcode Update Driver: v" MICROCODE_VERSION
+ " <tigran@aivazian.fsnet.co.uk>"
+ " <peter.oruba@amd.com>\n");
+
+ return 0;
+}
+
+static void __exit microcode_exit(void)
+{
+ microcode_dev_exit();
+
+ unregister_hotcpu_notifier(&mc_cpu_notifier);
+
+ get_online_cpus();
+ sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
+ put_online_cpus();
+
+ platform_device_unregister(microcode_pdev);
+
+ microcode_ops = NULL;
+
+ printk(KERN_INFO
+ "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
+}
+
+module_init(microcode_init);
+module_exit(microcode_exit);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
new file mode 100644
index 00000000000..622dc4a2178
--- /dev/null
+++ b/arch/x86/kernel/microcode_intel.c
@@ -0,0 +1,480 @@
+/*
+ * Intel CPU Microcode Update Driver for Linux
+ *
+ * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+ * 2006 Shaohua Li <shaohua.li@intel.com>
+ *
+ * This driver allows to upgrade microcode on Intel processors
+ * belonging to IA-32 family - PentiumPro, Pentium II,
+ * Pentium III, Xeon, Pentium 4, etc.
+ *
+ * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
+ * Software Developer's Manual
+ * Order Number 253668 or free download from:
+ *
+ * http://developer.intel.com/design/pentium4/manuals/253668.htm
+ *
+ * For more information, go to http://www.urbanmyth.org/microcode
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ * Initial release.
+ * 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ * Added read() support + cleanups.
+ * 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ * Added 'device trimming' support. open(O_WRONLY) zeroes
+ * and frees the saved copy of applied microcode.
+ * 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com>
+ * Made to use devfs (/dev/cpu/microcode) + cleanups.
+ * 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com>
+ * Added misc device support (now uses both devfs and misc).
+ * Added MICROCODE_IOCFREE ioctl to clear memory.
+ * 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com>
+ * Messages for error cases (non Intel & no suitable microcode).
+ * 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
+ * Removed ->release(). Removed exclusive open and status bitmap.
+ * Added microcode_rwsem to serialize read()/write()/ioctl().
+ * Removed global kernel lock usage.
+ * 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
+ * Write 0 to 0x8B msr and then cpuid before reading revision,
+ * so that it works even if there were no update done by the
+ * BIOS. Otherwise, reading from 0x8B gives junk (which happened
+ * to be 0 on my machine which is why it worked even when I
+ * disabled update by the BIOS)
+ * Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
+ * 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
+ * Tigran Aivazian <tigran@veritas.com>
+ * Intel Pentium 4 processor support and bugfixes.
+ * 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
+ * Bugfix for HT (Hyper-Threading) enabled processors
+ * whereby processor resources are shared by all logical processors
+ * in a single CPU package.
+ * 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
+ * Tigran Aivazian <tigran@veritas.com>,
+ * Serialize updates as required on HT processors due to
+ * speculative nature of implementation.
+ * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
+ * Fix the panic when writing zero-length microcode chunk.
+ * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
+ * Jun Nakajima <jun.nakajima@intel.com>
+ * Support for the microcode updates in the new format.
+ * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
+ * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
+ * because we no longer hold a copy of applied microcode
+ * in kernel memory.
+ * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
+ * Fix sigmatch() macro to handle old CPUs with pf == 0.
+ * Thanks to Stuart Swales for pointing out this bug.
+ */
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/microcode.h>
+
+MODULE_DESCRIPTION("Microcode Update Driver");
+MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
+MODULE_LICENSE("GPL");
+
+struct microcode_header_intel {
+ unsigned int hdrver;
+ unsigned int rev;
+ unsigned int date;
+ unsigned int sig;
+ unsigned int cksum;
+ unsigned int ldrver;
+ unsigned int pf;
+ unsigned int datasize;
+ unsigned int totalsize;
+ unsigned int reserved[3];
+};
+
+struct microcode_intel {
+ struct microcode_header_intel hdr;
+ unsigned int bits[0];
+};
+
+/* microcode format is extended from prescott processors */
+struct extended_signature {
+ unsigned int sig;
+ unsigned int pf;
+ unsigned int cksum;
+};
+
+struct extended_sigtable {
+ unsigned int count;
+ unsigned int cksum;
+ unsigned int reserved[3];
+ struct extended_signature sigs[0];
+};
+
+#define DEFAULT_UCODE_DATASIZE (2000)
+#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
+#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
+#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
+#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
+#define DWSIZE (sizeof(u32))
+#define get_totalsize(mc) \
+ (((struct microcode_intel *)mc)->hdr.totalsize ? \
+ ((struct microcode_intel *)mc)->hdr.totalsize : \
+ DEFAULT_UCODE_TOTALSIZE)
+
+#define get_datasize(mc) \
+ (((struct microcode_intel *)mc)->hdr.datasize ? \
+ ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
+
+#define sigmatch(s1, s2, p1, p2) \
+ (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
+
+#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
+
+/* serialize access to the physical write to MSR 0x79 */
+static DEFINE_SPINLOCK(microcode_update_lock);
+
+static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
+{
+ struct cpuinfo_x86 *c = &cpu_data(cpu_num);
+ unsigned int val[2];
+
+ memset(csig, 0, sizeof(*csig));
+
+ if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
+ cpu_has(c, X86_FEATURE_IA64)) {
+ printk(KERN_ERR "microcode: CPU%d not a capable Intel "
+ "processor\n", cpu_num);
+ return -1;
+ }
+
+ csig->sig = cpuid_eax(0x00000001);
+
+ if ((c->x86_model >= 5) || (c->x86 > 6)) {
+ /* get processor flags from MSR 0x17 */
+ rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+ csig->pf = 1 << ((val[1] >> 18) & 7);
+ }
+
+ wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+ /* see notes above for revision 1.07. Apparent chip bug */
+ sync_core();
+ /* get the current revision from MSR 0x8B */
+ rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
+ pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
+ csig->sig, csig->pf, csig->rev);
+
+ return 0;
+}
+
+static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
+{
+ return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
+}
+
+static inline int
+update_match_revision(struct microcode_header_intel *mc_header, int rev)
+{
+ return (mc_header->rev <= rev) ? 0 : 1;
+}
+
+static int microcode_sanity_check(void *mc)
+{
+ struct microcode_header_intel *mc_header = mc;
+ struct extended_sigtable *ext_header = NULL;
+ struct extended_signature *ext_sig;
+ unsigned long total_size, data_size, ext_table_size;
+ int sum, orig_sum, ext_sigcount = 0, i;
+
+ total_size = get_totalsize(mc_header);
+ data_size = get_datasize(mc_header);
+ if (data_size + MC_HEADER_SIZE > total_size) {
+ printk(KERN_ERR "microcode: error! "
+ "Bad data size in microcode data file\n");
+ return -EINVAL;
+ }
+
+ if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
+ printk(KERN_ERR "microcode: error! "
+ "Unknown microcode update format\n");
+ return -EINVAL;
+ }
+ ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
+ if (ext_table_size) {
+ if ((ext_table_size < EXT_HEADER_SIZE)
+ || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
+ printk(KERN_ERR "microcode: error! "
+ "Small exttable size in microcode data file\n");
+ return -EINVAL;
+ }
+ ext_header = mc + MC_HEADER_SIZE + data_size;
+ if (ext_table_size != exttable_size(ext_header)) {
+ printk(KERN_ERR "microcode: error! "
+ "Bad exttable size in microcode data file\n");
+ return -EFAULT;
+ }
+ ext_sigcount = ext_header->count;
+ }
+
+ /* check extended table checksum */
+ if (ext_table_size) {
+ int ext_table_sum = 0;
+ int *ext_tablep = (int *)ext_header;
+
+ i = ext_table_size / DWSIZE;
+ while (i--)
+ ext_table_sum += ext_tablep[i];
+ if (ext_table_sum) {
+ printk(KERN_WARNING "microcode: aborting, "
+ "bad extended signature table checksum\n");
+ return -EINVAL;
+ }
+ }
+
+ /* calculate the checksum */
+ orig_sum = 0;
+ i = (MC_HEADER_SIZE + data_size) / DWSIZE;
+ while (i--)
+ orig_sum += ((int *)mc)[i];
+ if (orig_sum) {
+ printk(KERN_ERR "microcode: aborting, bad checksum\n");
+ return -EINVAL;
+ }
+ if (!ext_table_size)
+ return 0;
+ /* check extended signature checksum */
+ for (i = 0; i < ext_sigcount; i++) {
+ ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
+ EXT_SIGNATURE_SIZE * i;
+ sum = orig_sum
+ - (mc_header->sig + mc_header->pf + mc_header->cksum)
+ + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
+ if (sum) {
+ printk(KERN_ERR "microcode: aborting, bad checksum\n");
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+/*
+ * return 0 - no update found
+ * return 1 - found update
+ */
+static int
+get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev)
+{
+ struct microcode_header_intel *mc_header = mc;
+ struct extended_sigtable *ext_header;
+ unsigned long total_size = get_totalsize(mc_header);
+ int ext_sigcount, i;
+ struct extended_signature *ext_sig;
+
+ if (!update_match_revision(mc_header, rev))
+ return 0;
+
+ if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf))
+ return 1;
+
+ /* Look for ext. headers: */
+ if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
+ return 0;
+
+ ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
+ ext_sigcount = ext_header->count;
+ ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
+
+ for (i = 0; i < ext_sigcount; i++) {
+ if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf))
+ return 1;
+ ext_sig++;
+ }
+ return 0;
+}
+
+static void apply_microcode(int cpu)
+{
+ unsigned long flags;
+ unsigned int val[2];
+ int cpu_num = raw_smp_processor_id();
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ struct microcode_intel *mc_intel = uci->mc;
+
+ /* We should bind the task to the CPU */
+ BUG_ON(cpu_num != cpu);
+
+ if (mc_intel == NULL)
+ return;
+
+ /* serialize access to the physical write to MSR 0x79 */
+ spin_lock_irqsave(&microcode_update_lock, flags);
+
+ /* write microcode via MSR 0x79 */
+ wrmsr(MSR_IA32_UCODE_WRITE,
+ (unsigned long) mc_intel->bits,
+ (unsigned long) mc_intel->bits >> 16 >> 16);
+ wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+
+ /* see notes above for revision 1.07. Apparent chip bug */
+ sync_core();
+
+ /* get the current revision from MSR 0x8B */
+ rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+
+ spin_unlock_irqrestore(&microcode_update_lock, flags);
+ if (val[1] != mc_intel->hdr.rev) {
+ printk(KERN_ERR "microcode: CPU%d update from revision "
+ "0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]);
+ return;
+ }
+ printk(KERN_INFO "microcode: CPU%d updated from revision "
+ "0x%x to 0x%x, date = %04x-%02x-%02x \n",
+ cpu_num, uci->cpu_sig.rev, val[1],
+ mc_intel->hdr.date & 0xffff,
+ mc_intel->hdr.date >> 24,
+ (mc_intel->hdr.date >> 16) & 0xff);
+ uci->cpu_sig.rev = val[1];
+}
+
+static int generic_load_microcode(int cpu, void *data, size_t size,
+ int (*get_ucode_data)(void *, const void *, size_t))
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ u8 *ucode_ptr = data, *new_mc = NULL, *mc;
+ int new_rev = uci->cpu_sig.rev;
+ unsigned int leftover = size;
+
+ while (leftover) {
+ struct microcode_header_intel mc_header;
+ unsigned int mc_size;
+
+ if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
+ break;
+
+ mc_size = get_totalsize(&mc_header);
+ if (!mc_size || mc_size > leftover) {
+ printk(KERN_ERR "microcode: error!"
+ "Bad data in microcode data file\n");
+ break;
+ }
+
+ mc = vmalloc(mc_size);
+ if (!mc)
+ break;
+
+ if (get_ucode_data(mc, ucode_ptr, mc_size) ||
+ microcode_sanity_check(mc) < 0) {
+ vfree(mc);
+ break;
+ }
+
+ if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) {
+ if (new_mc)
+ vfree(new_mc);
+ new_rev = mc_header.rev;
+ new_mc = mc;
+ } else
+ vfree(mc);
+
+ ucode_ptr += mc_size;
+ leftover -= mc_size;
+ }
+
+ if (new_mc) {
+ if (!leftover) {
+ if (uci->mc)
+ vfree(uci->mc);
+ uci->mc = (struct microcode_intel *)new_mc;
+ pr_debug("microcode: CPU%d found a matching microcode update with"
+ " version 0x%x (current=0x%x)\n",
+ cpu, new_rev, uci->cpu_sig.rev);
+ } else
+ vfree(new_mc);
+ }
+
+ return (int)leftover;
+}
+
+static int get_ucode_fw(void *to, const void *from, size_t n)
+{
+ memcpy(to, from, n);
+ return 0;
+}
+
+static int request_microcode_fw(int cpu, struct device *device)
+{
+ char name[30];
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ const struct firmware *firmware;
+ int ret;
+
+ /* We should bind the task to the CPU */
+ BUG_ON(cpu != raw_smp_processor_id());
+ sprintf(name, "intel-ucode/%02x-%02x-%02x",
+ c->x86, c->x86_model, c->x86_mask);
+ ret = request_firmware(&firmware, name, device);
+ if (ret) {
+ pr_debug("microcode: data file %s load failed\n", name);
+ return ret;
+ }
+
+ ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size,
+ &get_ucode_fw);
+
+ release_firmware(firmware);
+
+ return ret;
+}
+
+static int get_ucode_user(void *to, const void *from, size_t n)
+{
+ return copy_from_user(to, from, n);
+}
+
+static int request_microcode_user(int cpu, const void __user *buf, size_t size)
+{
+ /* We should bind the task to the CPU */
+ BUG_ON(cpu != raw_smp_processor_id());
+
+ return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user);
+}
+
+static void microcode_fini_cpu(int cpu)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+ vfree(uci->mc);
+ uci->mc = NULL;
+}
+
+struct microcode_ops microcode_intel_ops = {
+ .request_microcode_user = request_microcode_user,
+ .request_microcode_fw = request_microcode_fw,
+ .collect_cpu_info = collect_cpu_info,
+ .apply_microcode = apply_microcode,
+ .microcode_fini_cpu = microcode_fini_cpu,
+};
+
+struct microcode_ops * __init init_intel_microcode(void)
+{
+ return &microcode_intel_ops;
+}
+
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
new file mode 100644
index 00000000000..0e9f1982b1d
--- /dev/null
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -0,0 +1,37 @@
+/*
+ * Split spinlock implementation out into its own file, so it can be
+ * compiled in a FTRACE-compatible way.
+ */
+#include <linux/spinlock.h>
+#include <linux/module.h>
+
+#include <asm/paravirt.h>
+
+static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
+{
+ __raw_spin_lock(lock);
+}
+
+struct pv_lock_ops pv_lock_ops = {
+#ifdef CONFIG_SMP
+ .spin_is_locked = __ticket_spin_is_locked,
+ .spin_is_contended = __ticket_spin_is_contended,
+
+ .spin_lock = __ticket_spin_lock,
+ .spin_lock_flags = default_spin_lock_flags,
+ .spin_trylock = __ticket_spin_trylock,
+ .spin_unlock = __ticket_spin_unlock,
+#endif
+};
+EXPORT_SYMBOL(pv_lock_ops);
+
+void __init paravirt_use_bytelocks(void)
+{
+#ifdef CONFIG_SMP
+ pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
+ pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
+ pv_lock_ops.spin_lock = __byte_spin_lock;
+ pv_lock_ops.spin_trylock = __byte_spin_trylock;
+ pv_lock_ops.spin_unlock = __byte_spin_unlock;
+#endif
+}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 6b0bb73998d..e4c8fb60887 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -268,17 +268,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
return __get_cpu_var(paravirt_lazy_mode);
}
-void __init paravirt_use_bytelocks(void)
-{
-#ifdef CONFIG_SMP
- pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
- pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
- pv_lock_ops.spin_lock = __byte_spin_lock;
- pv_lock_ops.spin_trylock = __byte_spin_trylock;
- pv_lock_ops.spin_unlock = __byte_spin_unlock;
-#endif
-}
-
struct pv_info pv_info = {
.name = "bare hardware",
.paravirt_enabled = 0,
@@ -349,6 +338,10 @@ struct pv_cpu_ops pv_cpu_ops = {
.write_ldt_entry = native_write_ldt_entry,
.write_gdt_entry = native_write_gdt_entry,
.write_idt_entry = native_write_idt_entry,
+
+ .alloc_ldt = paravirt_nop,
+ .free_ldt = paravirt_nop,
+
.load_sp0 = native_load_sp0,
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
@@ -460,18 +453,6 @@ struct pv_mmu_ops pv_mmu_ops = {
.set_fixmap = native_set_fixmap,
};
-struct pv_lock_ops pv_lock_ops = {
-#ifdef CONFIG_SMP
- .spin_is_locked = __ticket_spin_is_locked,
- .spin_is_contended = __ticket_spin_is_contended,
-
- .spin_lock = __ticket_spin_lock,
- .spin_trylock = __ticket_spin_trylock,
- .spin_unlock = __ticket_spin_unlock,
-#endif
-};
-EXPORT_SYMBOL(pv_lock_ops);
-
EXPORT_SYMBOL_GPL(pv_time_ops);
EXPORT_SYMBOL (pv_cpu_ops);
EXPORT_SYMBOL (pv_mmu_ops);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 205188db962..0a1302fe6d4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -76,47 +76,12 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return ((unsigned long *)tsk->thread.sp)[3];
}
-#ifdef CONFIG_HOTPLUG_CPU
-#include <asm/nmi.h>
-
-static void cpu_exit_clear(void)
-{
- int cpu = raw_smp_processor_id();
-
- idle_task_exit();
-
- cpu_uninit();
- irq_ctx_exit(cpu);
-
- cpu_clear(cpu, cpu_callout_map);
- cpu_clear(cpu, cpu_callin_map);
-
- numa_remove_cpu(cpu);
- c1e_remove_cpu(cpu);
-}
-
-/* We don't actually take CPU down, just spin without interrupts. */
-static inline void play_dead(void)
-{
- /* This must be done before dead CPU ack */
- cpu_exit_clear();
- mb();
- /* Ack it */
- __get_cpu_var(cpu_state) = CPU_DEAD;
-
- /*
- * With physical CPU hotplug, we should halt the cpu
- */
- local_irq_disable();
- /* mask all interrupts, flush any and all caches, and halt */
- wbinvd_halt();
-}
-#else
+#ifndef CONFIG_SMP
static inline void play_dead(void)
{
BUG();
}
-#endif /* CONFIG_HOTPLUG_CPU */
+#endif
/*
* The idle thread. There's no useful work to be
@@ -158,7 +123,7 @@ void cpu_idle(void)
}
}
-void __show_registers(struct pt_regs *regs, int all)
+void __show_regs(struct pt_regs *regs, int all)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
unsigned long d0, d1, d2, d3, d6, d7;
@@ -224,7 +189,7 @@ void __show_registers(struct pt_regs *regs, int all)
void show_regs(struct pt_regs *regs)
{
- __show_registers(regs, 1);
+ __show_regs(regs, 1);
show_trace(NULL, regs, &regs->sp, regs->bp);
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 2a8ccb9238b..cd8c0ed02b7 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -86,30 +86,12 @@ void exit_idle(void)
__exit_idle();
}
-#ifdef CONFIG_HOTPLUG_CPU
-DECLARE_PER_CPU(int, cpu_state);
-
-#include <linux/nmi.h>
-/* We halt the CPU with physical CPU hotplug */
-static inline void play_dead(void)
-{
- idle_task_exit();
- c1e_remove_cpu(raw_smp_processor_id());
-
- mb();
- /* Ack it */
- __get_cpu_var(cpu_state) = CPU_DEAD;
-
- local_irq_disable();
- /* mask all interrupts, flush any and all caches, and halt */
- wbinvd_halt();
-}
-#else
+#ifndef CONFIG_SMP
static inline void play_dead(void)
{
BUG();
}
-#endif /* CONFIG_HOTPLUG_CPU */
+#endif
/*
* The idle thread. There's no useful work to be
@@ -154,7 +136,7 @@ void cpu_idle(void)
}
/* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs *regs)
+void __show_regs(struct pt_regs *regs, int all)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
unsigned long d0, d1, d2, d3, d6, d7;
@@ -193,6 +175,9 @@ void __show_regs(struct pt_regs *regs)
rdmsrl(MSR_GS_BASE, gs);
rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+ if (!all)
+ return;
+
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
@@ -218,7 +203,7 @@ void __show_regs(struct pt_regs *regs)
void show_regs(struct pt_regs *regs)
{
printk(KERN_INFO "CPU %d:", smp_processor_id());
- __show_regs(regs);
+ __show_regs(regs, 1);
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
}
@@ -754,12 +739,12 @@ unsigned long get_wchan(struct task_struct *p)
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
stack = (unsigned long)task_stack_page(p);
- if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
+ if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
return 0;
fp = *(u64 *)(p->thread.sp);
do {
if (fp < (unsigned long)stack ||
- fp > (unsigned long)stack+THREAD_SIZE)
+ fp >= (unsigned long)stack+THREAD_SIZE)
return 0;
ip = *(u64 *)(fp+8);
if (!in_sched_functions(ip))
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e375b658efc..0a6d8c12e10 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -40,7 +40,9 @@ enum x86_regset {
REGSET_GENERAL,
REGSET_FP,
REGSET_XFP,
+ REGSET_IOPERM64 = REGSET_XFP,
REGSET_TLS,
+ REGSET_IOPERM32,
};
/*
@@ -555,6 +557,29 @@ static int ptrace_set_debugreg(struct task_struct *child,
return 0;
}
+/*
+ * These access the current or another (stopped) task's io permission
+ * bitmap for debugging or core dump.
+ */
+static int ioperm_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ return target->thread.io_bitmap_max / regset->size;
+}
+
+static int ioperm_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ if (!target->thread.io_bitmap_ptr)
+ return -ENXIO;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ target->thread.io_bitmap_ptr,
+ 0, IO_BITMAP_BYTES);
+}
+
#ifdef CONFIG_X86_PTRACE_BTS
/*
* The configuration for a particular BTS hardware implementation.
@@ -1385,6 +1410,12 @@ static const struct user_regset x86_64_regsets[] = {
.size = sizeof(long), .align = sizeof(long),
.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
},
+ [REGSET_IOPERM64] = {
+ .core_note_type = NT_386_IOPERM,
+ .n = IO_BITMAP_LONGS,
+ .size = sizeof(long), .align = sizeof(long),
+ .active = ioperm_active, .get = ioperm_get
+ },
};
static const struct user_regset_view user_x86_64_view = {
@@ -1431,6 +1462,12 @@ static const struct user_regset x86_32_regsets[] = {
.active = regset_tls_active,
.get = regset_tls_get, .set = regset_tls_set
},
+ [REGSET_IOPERM32] = {
+ .core_note_type = NT_386_IOPERM,
+ .n = IO_BITMAP_BYTES / sizeof(u32),
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = ioperm_active, .get = ioperm_get
+ },
};
static const struct user_regset_view user_x86_32_view = {
@@ -1452,7 +1489,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
#endif
}
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
+ int error_code, int si_code)
{
struct siginfo info;
@@ -1461,7 +1499,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
memset(&info, 0, sizeof(info));
info.si_signo = SIGTRAP;
- info.si_code = TRAP_BRKPT;
+ info.si_code = si_code;
/* User-mode ip? */
info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL;
@@ -1548,5 +1586,5 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
*/
if (test_thread_flag(TIF_SINGLESTEP) &&
tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
- send_sigtrap(current, regs, 0);
+ send_sigtrap(current, regs, 0, TRAP_BRKPT);
}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index d1385881810..f6a11b9b1f9 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -354,9 +354,27 @@ static void ati_force_hpet_resume(void)
printk(KERN_DEBUG "Force enabled HPET at resume\n");
}
+static u32 ati_ixp4x0_rev(struct pci_dev *dev)
+{
+ u32 d;
+ u8 b;
+
+ pci_read_config_byte(dev, 0xac, &b);
+ b &= ~(1<<5);
+ pci_write_config_byte(dev, 0xac, b);
+ pci_read_config_dword(dev, 0x70, &d);
+ d |= 1<<8;
+ pci_write_config_dword(dev, 0x70, d);
+ pci_read_config_dword(dev, 0x8, &d);
+ d &= 0xff;
+ dev_printk(KERN_DEBUG, &dev->dev, "SB4X0 revision 0x%x\n", d);
+ return d;
+}
+
static void ati_force_enable_hpet(struct pci_dev *dev)
{
- u32 uninitialized_var(val);
+ u32 d, val;
+ u8 b;
if (hpet_address || force_hpet_address)
return;
@@ -366,14 +384,33 @@ static void ati_force_enable_hpet(struct pci_dev *dev)
return;
}
+ d = ati_ixp4x0_rev(dev);
+ if (d < 0x82)
+ return;
+
+ /* base address */
pci_write_config_dword(dev, 0x14, 0xfed00000);
pci_read_config_dword(dev, 0x14, &val);
+
+ /* enable interrupt */
+ outb(0x72, 0xcd6); b = inb(0xcd7);
+ b |= 0x1;
+ outb(0x72, 0xcd6); outb(b, 0xcd7);
+ outb(0x72, 0xcd6); b = inb(0xcd7);
+ if (!(b & 0x1))
+ return;
+ pci_read_config_dword(dev, 0x64, &d);
+ d |= (1<<10);
+ pci_write_config_dword(dev, 0x64, d);
+ pci_read_config_dword(dev, 0x64, &d);
+ if (!(d & (1<<10)))
+ return;
+
force_hpet_address = val;
force_hpet_resume_type = ATI_FORCE_HPET_RESUME;
dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n",
force_hpet_address);
cached_dev = dev;
- return;
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS,
ati_force_enable_hpet);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 46c98efbbf8..2255782e8d4 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -302,7 +302,7 @@ static void __init relocate_initrd(void)
if (clen > MAX_MAP_CHUNK-slop)
clen = MAX_MAP_CHUNK-slop;
mapaddr = ramdisk_image & PAGE_MASK;
- p = early_ioremap(mapaddr, clen+slop);
+ p = early_memremap(mapaddr, clen+slop);
memcpy(q, p+slop, clen);
early_iounmap(p, clen+slop);
q += clen;
@@ -379,7 +379,7 @@ static void __init parse_setup_data(void)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
- data = early_ioremap(pa_data, PAGE_SIZE);
+ data = early_memremap(pa_data, PAGE_SIZE);
switch (data->type) {
case SETUP_E820_EXT:
parse_e820_ext(data, pa_data);
@@ -402,7 +402,7 @@ static void __init e820_reserve_setup_data(void)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
- data = early_ioremap(pa_data, sizeof(*data));
+ data = early_memremap(pa_data, sizeof(*data));
e820_update_range(pa_data, sizeof(*data)+data->len,
E820_RAM, E820_RESERVED_KERN);
found = 1;
@@ -428,7 +428,7 @@ static void __init reserve_early_setup_data(void)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
- data = early_ioremap(pa_data, sizeof(*data));
+ data = early_memremap(pa_data, sizeof(*data));
sprintf(buf, "setup data %x", data->type);
reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
pa_data = data->next;
@@ -582,6 +582,190 @@ static struct x86_quirks default_x86_quirks __initdata;
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
/*
+ * Some BIOSes seem to corrupt the low 64k of memory during events
+ * like suspend/resume and unplugging an HDMI cable. Reserve all
+ * remaining free memory in that area and fill it with a distinct
+ * pattern.
+ */
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+#define MAX_SCAN_AREAS 8
+
+static int __read_mostly memory_corruption_check = -1;
+
+static unsigned __read_mostly corruption_check_size = 64*1024;
+static unsigned __read_mostly corruption_check_period = 60; /* seconds */
+
+static struct e820entry scan_areas[MAX_SCAN_AREAS];
+static int num_scan_areas;
+
+
+static int set_corruption_check(char *arg)
+{
+ char *end;
+
+ memory_corruption_check = simple_strtol(arg, &end, 10);
+
+ return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check", set_corruption_check);
+
+static int set_corruption_check_period(char *arg)
+{
+ char *end;
+
+ corruption_check_period = simple_strtoul(arg, &end, 10);
+
+ return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check_period", set_corruption_check_period);
+
+static int set_corruption_check_size(char *arg)
+{
+ char *end;
+ unsigned size;
+
+ size = memparse(arg, &end);
+
+ if (*end == '\0')
+ corruption_check_size = size;
+
+ return (size == corruption_check_size) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check_size", set_corruption_check_size);
+
+
+static void __init setup_bios_corruption_check(void)
+{
+ u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
+
+ if (memory_corruption_check == -1) {
+ memory_corruption_check =
+#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
+ 1
+#else
+ 0
+#endif
+ ;
+ }
+
+ if (corruption_check_size == 0)
+ memory_corruption_check = 0;
+
+ if (!memory_corruption_check)
+ return;
+
+ corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
+
+ while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
+ u64 size;
+ addr = find_e820_area_size(addr, &size, PAGE_SIZE);
+
+ if (addr == 0)
+ break;
+
+ if ((addr + size) > corruption_check_size)
+ size = corruption_check_size - addr;
+
+ if (size == 0)
+ break;
+
+ e820_update_range(addr, size, E820_RAM, E820_RESERVED);
+ scan_areas[num_scan_areas].addr = addr;
+ scan_areas[num_scan_areas].size = size;
+ num_scan_areas++;
+
+ /* Assume we've already mapped this early memory */
+ memset(__va(addr), 0, size);
+
+ addr += size;
+ }
+
+ printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
+ num_scan_areas);
+ update_e820();
+}
+
+static struct timer_list periodic_check_timer;
+
+void check_for_bios_corruption(void)
+{
+ int i;
+ int corruption = 0;
+
+ if (!memory_corruption_check)
+ return;
+
+ for(i = 0; i < num_scan_areas; i++) {
+ unsigned long *addr = __va(scan_areas[i].addr);
+ unsigned long size = scan_areas[i].size;
+
+ for(; size; addr++, size -= sizeof(unsigned long)) {
+ if (!*addr)
+ continue;
+ printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
+ addr, __pa(addr), *addr);
+ corruption = 1;
+ *addr = 0;
+ }
+ }
+
+ WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
+}
+
+static void periodic_check_for_corruption(unsigned long data)
+{
+ check_for_bios_corruption();
+ mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ));
+}
+
+void start_periodic_check_for_corruption(void)
+{
+ if (!memory_corruption_check || corruption_check_period == 0)
+ return;
+
+ printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
+ corruption_check_period);
+
+ init_timer(&periodic_check_timer);
+ periodic_check_timer.function = &periodic_check_for_corruption;
+ periodic_check_for_corruption(0);
+}
+#endif
+
+static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
+{
+ printk(KERN_NOTICE
+ "%s detected: BIOS may corrupt low RAM, working it around.\n",
+ d->ident);
+
+ e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+
+ return 0;
+}
+
+/* List of systems that have known low memory corruption BIOS problems */
+static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
+#ifdef CONFIG_X86_RESERVE_LOW_64K
+ {
+ .callback = dmi_low_memory_corruption,
+ .ident = "AMI BIOS",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
+ },
+ },
+ {
+ .callback = dmi_low_memory_corruption,
+ .ident = "Phoenix BIOS",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
+ },
+ },
+#endif
+ {}
+};
+
+/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
* for initialization. Note, the efi init code path is determined by the
@@ -715,6 +899,10 @@ void __init setup_arch(char **cmdline_p)
finish_e820_parsing();
+ dmi_scan_machine();
+
+ dmi_check_system(bad_bios_dmi_table);
+
#ifdef CONFIG_X86_32
probe_roms();
#endif
@@ -771,6 +959,10 @@ void __init setup_arch(char **cmdline_p)
high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
#endif
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+ setup_bios_corruption_check();
+#endif
+
/* max_pfn_mapped is updated here */
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
max_pfn_mapped = max_low_pfn_mapped;
@@ -799,8 +991,6 @@ void __init setup_arch(char **cmdline_p)
vsmp_init();
#endif
- dmi_scan_machine();
-
io_delay_init();
/*
@@ -808,6 +998,8 @@ void __init setup_arch(char **cmdline_p)
*/
acpi_boot_table_init();
+ early_acpi_boot_init();
+
#ifdef CONFIG_ACPI_NUMA
/*
* Parse SRAT to discover nodes.
@@ -903,3 +1095,5 @@ void __init setup_arch(char **cmdline_p)
#endif
#endif
}
+
+
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index b21070ea33a..d6dd057d0f2 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -27,6 +27,7 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/vdso.h>
+#include <asm/syscall.h>
#include <asm/syscalls.h>
#include "sigframe.h"
@@ -112,6 +113,27 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
return do_sigaltstack(uss, uoss, regs->sp);
}
+#define COPY(x) { \
+ err |= __get_user(regs->x, &sc->x); \
+}
+
+#define COPY_SEG(seg) { \
+ unsigned short tmp; \
+ err |= __get_user(tmp, &sc->seg); \
+ regs->seg = tmp; \
+}
+
+#define COPY_SEG_STRICT(seg) { \
+ unsigned short tmp; \
+ err |= __get_user(tmp, &sc->seg); \
+ regs->seg = tmp | 3; \
+}
+
+#define GET_SEG(seg) { \
+ unsigned short tmp; \
+ err |= __get_user(tmp, &sc->seg); \
+ loadsegment(seg, tmp); \
+}
/*
* Do a signal return; undo the signal stack.
@@ -120,28 +142,13 @@ static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned long *pax)
{
+ void __user *buf;
+ unsigned int tmpflags;
unsigned int err = 0;
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
-#define COPY(x) err |= __get_user(regs->x, &sc->x)
-
-#define COPY_SEG(seg) \
- { unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
- regs->seg = tmp; }
-
-#define COPY_SEG_STRICT(seg) \
- { unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
- regs->seg = tmp|3; }
-
-#define GET_SEG(seg) \
- { unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
- loadsegment(seg, tmp); }
-
GET_SEG(gs);
COPY_SEG(fs);
COPY_SEG(es);
@@ -151,21 +158,12 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
COPY_SEG_STRICT(cs);
COPY_SEG_STRICT(ss);
- {
- unsigned int tmpflags;
-
- err |= __get_user(tmpflags, &sc->flags);
- regs->flags = (regs->flags & ~FIX_EFLAGS) |
- (tmpflags & FIX_EFLAGS);
- regs->orig_ax = -1; /* disable syscall checks */
- }
-
- {
- void __user *buf;
+ err |= __get_user(tmpflags, &sc->flags);
+ regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+ regs->orig_ax = -1; /* disable syscall checks */
- err |= __get_user(buf, &sc->fpstate);
- err |= restore_i387_xstate(buf);
- }
+ err |= __get_user(buf, &sc->fpstate);
+ err |= restore_i387_xstate(buf);
err |= __get_user(*pax, &sc->ax);
return err;
@@ -214,9 +212,8 @@ badframe:
return 0;
}
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+static long do_rt_sigreturn(struct pt_regs *regs)
{
- struct pt_regs *regs = (struct pt_regs *)&__unused;
struct rt_sigframe __user *frame;
unsigned long ax;
sigset_t set;
@@ -242,10 +239,17 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
return ax;
badframe:
- force_sig(SIGSEGV, current);
+ signal_fault(regs, frame, "rt_sigreturn");
return 0;
}
+asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+{
+ struct pt_regs *regs = (struct pt_regs *)&__unused;
+
+ return do_rt_sigreturn(regs);
+}
+
/*
* Set up a signal frame.
*/
@@ -337,39 +341,29 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
}
static int
-setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
- struct pt_regs *regs)
+__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
+ struct pt_regs *regs)
{
struct sigframe __user *frame;
void __user *restorer;
int err = 0;
- int usig;
void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- goto give_sigsegv;
+ return -EFAULT;
- usig = current_thread_info()->exec_domain
- && current_thread_info()->exec_domain->signal_invmap
- && sig < 32
- ? current_thread_info()->exec_domain->signal_invmap[sig]
- : sig;
+ if (__put_user(sig, &frame->sig))
+ return -EFAULT;
- err = __put_user(usig, &frame->sig);
- if (err)
- goto give_sigsegv;
-
- err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
- if (err)
- goto give_sigsegv;
+ if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
+ return -EFAULT;
if (_NSIG_WORDS > 1) {
- err = __copy_to_user(&frame->extramask, &set->sig[1],
- sizeof(frame->extramask));
- if (err)
- goto give_sigsegv;
+ if (__copy_to_user(&frame->extramask, &set->sig[1],
+ sizeof(frame->extramask)))
+ return -EFAULT;
}
if (current->mm->context.vdso)
@@ -394,7 +388,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
if (err)
- goto give_sigsegv;
+ return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long)frame;
@@ -409,38 +403,27 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
regs->cs = __USER_CS;
return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
}
-static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs *regs)
+static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
void __user *restorer;
int err = 0;
- int usig;
void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- goto give_sigsegv;
-
- usig = current_thread_info()->exec_domain
- && current_thread_info()->exec_domain->signal_invmap
- && sig < 32
- ? current_thread_info()->exec_domain->signal_invmap[sig]
- : sig;
+ return -EFAULT;
- err |= __put_user(usig, &frame->sig);
+ err |= __put_user(sig, &frame->sig);
err |= __put_user(&frame->info, &frame->pinfo);
err |= __put_user(&frame->uc, &frame->puc);
err |= copy_siginfo_to_user(&frame->info, info);
if (err)
- goto give_sigsegv;
+ return -EFAULT;
/* Create the ucontext. */
if (cpu_has_xsave)
@@ -456,7 +439,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
- goto give_sigsegv;
+ return -EFAULT;
/* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -476,12 +459,12 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
if (err)
- goto give_sigsegv;
+ return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long)frame;
regs->ip = (unsigned long)ka->sa.sa_handler;
- regs->ax = (unsigned long)usig;
+ regs->ax = (unsigned long)sig;
regs->dx = (unsigned long)&frame->info;
regs->cx = (unsigned long)&frame->uc;
@@ -491,15 +474,48 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->cs = __USER_CS;
return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
}
/*
* OK, we're invoking a handler:
*/
+static int signr_convert(int sig)
+{
+ struct thread_info *info = current_thread_info();
+
+ if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
+ return info->exec_domain->signal_invmap[sig];
+ return sig;
+}
+
+#define is_ia32 1
+#define ia32_setup_frame __setup_frame
+#define ia32_setup_rt_frame __setup_rt_frame
+
+static int
+setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
+{
+ int usig = signr_convert(sig);
+ int ret;
+
+ /* Set up the stack frame */
+ if (is_ia32) {
+ if (ka->sa.sa_flags & SA_SIGINFO)
+ ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
+ else
+ ret = ia32_setup_frame(usig, ka, set, regs);
+ } else
+ ret = __setup_rt_frame(sig, ka, info, set, regs);
+
+ if (ret) {
+ force_sigsegv(sig, current);
+ return -EFAULT;
+ }
+
+ return ret;
+}
+
static int
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
sigset_t *oldset, struct pt_regs *regs)
@@ -507,9 +523,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
int ret;
/* Are we from a system call? */
- if ((long)regs->orig_ax >= 0) {
+ if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */
- switch (regs->ax) {
+ switch (syscall_get_error(current, regs)) {
case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND:
regs->ax = -EINTR;
@@ -536,15 +552,20 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
regs->flags &= ~X86_EFLAGS_TF;
- /* Set up the stack frame */
- if (ka->sa.sa_flags & SA_SIGINFO)
- ret = setup_rt_frame(sig, ka, info, oldset, regs);
- else
- ret = setup_frame(sig, ka, oldset, regs);
+ ret = setup_rt_frame(sig, ka, info, oldset, regs);
if (ret)
return ret;
+#ifdef CONFIG_X86_64
+ /*
+ * This has nothing to do with segment registers,
+ * despite the name. This magic affects uaccess.h
+ * macros' behavior. Reset it to the normal setting.
+ */
+ set_fs(USER_DS);
+#endif
+
/*
* Clear the direction flag as per the ABI for function entry.
*/
@@ -571,6 +592,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
return 0;
}
+#define NR_restart_syscall __NR_restart_syscall
/*
* Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by
@@ -623,9 +645,9 @@ static void do_signal(struct pt_regs *regs)
}
/* Did we come from a system call? */
- if ((long)regs->orig_ax >= 0) {
+ if (syscall_get_nr(current, regs) >= 0) {
/* Restart the system call - no handlers present */
- switch (regs->ax) {
+ switch (syscall_get_error(current, regs)) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
@@ -634,7 +656,7 @@ static void do_signal(struct pt_regs *regs)
break;
case -ERESTART_RESTARTBLOCK:
- regs->ax = __NR_restart_syscall;
+ regs->ax = NR_restart_syscall;
regs->ip -= 2;
break;
}
@@ -657,6 +679,12 @@ static void do_signal(struct pt_regs *regs)
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+ /* notify userspace of pending MCEs */
+ if (thread_info_flags & _TIF_MCE_NOTIFY)
+ mce_notify_user();
+#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
+
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
@@ -666,5 +694,23 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
tracehook_notify_resume(regs);
}
+#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
+#endif /* CONFIG_X86_32 */
+}
+
+void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
+{
+ struct task_struct *me = current;
+
+ if (show_unhandled_signals && printk_ratelimit()) {
+ printk(KERN_INFO
+ "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
+ me->comm, me->pid, where, frame,
+ regs->ip, regs->sp, regs->orig_ax);
+ print_vma_addr(" in ", regs->ip);
+ printk(KERN_CONT "\n");
+ }
+
+ force_sig(SIGSEGV, me);
}
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 823a55bf8c3..a5c9627f4db 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -52,6 +52,16 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
return do_sigaltstack(uss, uoss, regs->sp);
}
+#define COPY(x) { \
+ err |= __get_user(regs->x, &sc->x); \
+}
+
+#define COPY_SEG_STRICT(seg) { \
+ unsigned short tmp; \
+ err |= __get_user(tmp, &sc->seg); \
+ regs->seg = tmp | 3; \
+}
+
/*
* Do a signal return; undo the signal stack.
*/
@@ -59,13 +69,13 @@ static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned long *pax)
{
+ void __user *buf;
+ unsigned int tmpflags;
unsigned int err = 0;
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
-#define COPY(x) (err |= __get_user(regs->x, &sc->x))
-
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
COPY(r8);
@@ -80,34 +90,24 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
/* Kernel saves and restores only the CS segment register on signals,
* which is the bare minimum needed to allow mixed 32/64-bit code.
* App's signal handler can save/restore other segments if needed. */
- {
- unsigned cs;
- err |= __get_user(cs, &sc->cs);
- regs->cs = cs | 3; /* Force into user mode */
- }
+ COPY_SEG_STRICT(cs);
- {
- unsigned int tmpflags;
- err |= __get_user(tmpflags, &sc->flags);
- regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
- regs->orig_ax = -1; /* disable syscall checks */
- }
+ err |= __get_user(tmpflags, &sc->flags);
+ regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+ regs->orig_ax = -1; /* disable syscall checks */
- {
- struct _fpstate __user *buf;
- err |= __get_user(buf, &sc->fpstate);
- err |= restore_i387_xstate(buf);
- }
+ err |= __get_user(buf, &sc->fpstate);
+ err |= restore_i387_xstate(buf);
err |= __get_user(*pax, &sc->ax);
return err;
}
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+static long do_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
- sigset_t set;
unsigned long ax;
+ sigset_t set;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -130,10 +130,15 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
return ax;
badframe:
- signal_fault(regs, frame, "sigreturn");
+ signal_fault(regs, frame, "rt_sigreturn");
return 0;
}
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+{
+ return do_rt_sigreturn(regs);
+}
+
/*
* Set up a signal frame.
*/
@@ -195,8 +200,8 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
return (void __user *)round_down(sp - size, 64);
}
-static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs *regs)
+static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
void __user *fp = NULL;
@@ -209,17 +214,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
if (save_i387_xstate(fp) < 0)
- err |= -1;
+ return -EFAULT;
} else
frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- goto give_sigsegv;
+ return -EFAULT;
if (ka->sa.sa_flags & SA_SIGINFO) {
- err |= copy_siginfo_to_user(&frame->info, info);
- if (err)
- goto give_sigsegv;
+ if (copy_siginfo_to_user(&frame->info, info))
+ return -EFAULT;
}
/* Create the ucontext. */
@@ -247,11 +251,11 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
} else {
/* could use a vstub here */
- goto give_sigsegv;
+ return -EFAULT;
}
if (err)
- goto give_sigsegv;
+ return -EFAULT;
/* Set up registers for signal handler */
regs->di = sig;
@@ -271,15 +275,45 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->cs = __USER_CS;
return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
}
/*
* OK, we're invoking a handler
*/
+static int signr_convert(int sig)
+{
+ return sig;
+}
+
+#ifdef CONFIG_IA32_EMULATION
+#define is_ia32 test_thread_flag(TIF_IA32)
+#else
+#define is_ia32 0
+#endif
+
+static int
+setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
+{
+ int usig = signr_convert(sig);
+ int ret;
+
+ /* Set up the stack frame */
+ if (is_ia32) {
+ if (ka->sa.sa_flags & SA_SIGINFO)
+ ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
+ else
+ ret = ia32_setup_frame(usig, ka, set, regs);
+ } else
+ ret = __setup_rt_frame(sig, ka, info, set, regs);
+
+ if (ret) {
+ force_sigsegv(sig, current);
+ return -EFAULT;
+ }
+
+ return ret;
+}
static int
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
@@ -317,51 +351,48 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
regs->flags &= ~X86_EFLAGS_TF;
-#ifdef CONFIG_IA32_EMULATION
- if (test_thread_flag(TIF_IA32)) {
- if (ka->sa.sa_flags & SA_SIGINFO)
- ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs);
- else
- ret = ia32_setup_frame(sig, ka, oldset, regs);
- } else
-#endif
ret = setup_rt_frame(sig, ka, info, oldset, regs);
- if (ret == 0) {
- /*
- * This has nothing to do with segment registers,
- * despite the name. This magic affects uaccess.h
- * macros' behavior. Reset it to the normal setting.
- */
- set_fs(USER_DS);
+ if (ret)
+ return ret;
- /*
- * Clear the direction flag as per the ABI for function entry.
- */
- regs->flags &= ~X86_EFLAGS_DF;
+#ifdef CONFIG_X86_64
+ /*
+ * This has nothing to do with segment registers,
+ * despite the name. This magic affects uaccess.h
+ * macros' behavior. Reset it to the normal setting.
+ */
+ set_fs(USER_DS);
+#endif
- /*
- * Clear TF when entering the signal handler, but
- * notify any tracer that was single-stepping it.
- * The tracer may want to single-step inside the
- * handler too.
- */
- regs->flags &= ~X86_EFLAGS_TF;
+ /*
+ * Clear the direction flag as per the ABI for function entry.
+ */
+ regs->flags &= ~X86_EFLAGS_DF;
- spin_lock_irq(&current->sighand->siglock);
- sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
- if (!(ka->sa.sa_flags & SA_NODEFER))
- sigaddset(&current->blocked, sig);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
+ /*
+ * Clear TF when entering the signal handler, but
+ * notify any tracer that was single-stepping it.
+ * The tracer may want to single-step inside the
+ * handler too.
+ */
+ regs->flags &= ~X86_EFLAGS_TF;
- tracehook_signal_handler(sig, info, ka, regs,
- test_thread_flag(TIF_SINGLESTEP));
- }
+ spin_lock_irq(&current->sighand->siglock);
+ sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+ if (!(ka->sa.sa_flags & SA_NODEFER))
+ sigaddset(&current->blocked, sig);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
- return ret;
+ tracehook_signal_handler(sig, info, ka, regs,
+ test_thread_flag(TIF_SINGLESTEP));
+
+ return 0;
}
+#define NR_restart_syscall \
+ test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
/*
* Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by
@@ -391,7 +422,8 @@ static void do_signal(struct pt_regs *regs)
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
- /* Re-enable any watchpoints before delivering the
+ /*
+ * Re-enable any watchpoints before delivering the
* signal to user space. The processor register will
* have been cleared if the watchpoint triggered
* inside the kernel.
@@ -399,7 +431,7 @@ static void do_signal(struct pt_regs *regs)
if (current->thread.debugreg7)
set_debugreg(current->thread.debugreg7, 7);
- /* Whee! Actually deliver the signal. */
+ /* Whee! Actually deliver the signal. */
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
/*
* A signal was successfully delivered; the saved
@@ -422,10 +454,9 @@ static void do_signal(struct pt_regs *regs)
regs->ax = regs->orig_ax;
regs->ip -= 2;
break;
+
case -ERESTART_RESTARTBLOCK:
- regs->ax = test_thread_flag(TIF_IA32) ?
- __NR_ia32_restart_syscall :
- __NR_restart_syscall;
+ regs->ax = NR_restart_syscall;
regs->ip -= 2;
break;
}
@@ -441,14 +472,18 @@ static void do_signal(struct pt_regs *regs)
}
}
-void do_notify_resume(struct pt_regs *regs, void *unused,
- __u32 thread_info_flags)
+/*
+ * notification of userspace execution resumption
+ * - triggered by the TIF_WORK_MASK flags
+ */
+void
+do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
-#ifdef CONFIG_X86_MCE
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
mce_notify_user();
-#endif /* CONFIG_X86_MCE */
+#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
@@ -458,17 +493,23 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
+
+#ifdef CONFIG_X86_32
+ clear_thread_flag(TIF_IRET);
+#endif /* CONFIG_X86_32 */
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
{
struct task_struct *me = current;
+
if (show_unhandled_signals && printk_ratelimit()) {
- printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
- me->comm, me->pid, where, frame, regs->ip,
- regs->sp, regs->orig_ax);
+ printk(KERN_INFO
+ "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
+ me->comm, me->pid, where, frame,
+ regs->ip, regs->sp, regs->orig_ax);
print_vma_addr(" in ", regs->ip);
- printk("\n");
+ printk(KERN_CONT "\n");
}
force_sig(SIGSEGV, me);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 361b7a4c640..18f9b19f5f8 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -214,12 +214,16 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
struct smp_ops smp_ops = {
.smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
.smp_prepare_cpus = native_smp_prepare_cpus,
- .cpu_up = native_cpu_up,
.smp_cpus_done = native_smp_cpus_done,
.smp_send_stop = native_smp_send_stop,
.smp_send_reschedule = native_smp_send_reschedule,
+ .cpu_up = native_cpu_up,
+ .cpu_die = native_cpu_die,
+ .cpu_disable = native_cpu_disable,
+ .play_dead = native_play_dead,
+
.send_call_func_ipi = native_send_call_func_ipi,
.send_call_func_single_ipi = native_send_call_func_single_ipi,
};
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9056f7e272c..8c3aca7cb34 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -52,6 +52,7 @@
#include <asm/desc.h>
#include <asm/nmi.h>
#include <asm/irq.h>
+#include <asm/idle.h>
#include <asm/smp.h>
#include <asm/trampoline.h>
#include <asm/cpu.h>
@@ -333,14 +334,17 @@ static void __cpuinit start_secondary(void *unused)
* does not change while we are assigning vectors to cpus. Holding
* this lock ensures we don't half assign or remove an irq from a cpu.
*/
- ipi_call_lock_irq();
+ ipi_call_lock();
lock_vector_lock();
__setup_vector_irq(smp_processor_id());
cpu_set(smp_processor_id(), cpu_online_map);
unlock_vector_lock();
- ipi_call_unlock_irq();
+ ipi_call_unlock();
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+ /* enable local interrupts */
+ local_irq_enable();
+
setup_secondary_clock();
wmb();
@@ -595,10 +599,12 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
- maxlvt = lapic_get_maxlvt();
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
- accept_status = (apic_read(APIC_ESR) & 0xEF);
+ if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+ maxlvt = lapic_get_maxlvt();
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ }
pr_debug("NMI sent.\n");
if (send_status)
@@ -1255,39 +1261,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
check_nmi_watchdog();
}
-#ifdef CONFIG_HOTPLUG_CPU
-
-static void remove_siblinginfo(int cpu)
-{
- int sibling;
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-
- for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) {
- cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
- /*/
- * last thread sibling in this cpu core going down
- */
- if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
- cpu_data(sibling).booted_cores--;
- }
-
- for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu))
- cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
- cpus_clear(per_cpu(cpu_sibling_map, cpu));
- cpus_clear(per_cpu(cpu_core_map, cpu));
- c->phys_proc_id = 0;
- c->cpu_core_id = 0;
- cpu_clear(cpu, cpu_sibling_setup_map);
-}
-
-static int additional_cpus __initdata = -1;
-
-static __init int setup_additional_cpus(char *s)
-{
- return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL;
-}
-early_param("additional_cpus", setup_additional_cpus);
-
/*
* cpu_possible_map should be static, it cannot change as cpu's
* are onlined, or offlined. The reason is per-cpu data-structures
@@ -1307,21 +1280,13 @@ early_param("additional_cpus", setup_additional_cpus);
*/
__init void prefill_possible_map(void)
{
- int i;
- int possible;
+ int i, possible;
/* no processor from mptable or madt */
if (!num_processors)
num_processors = 1;
- if (additional_cpus == -1) {
- if (disabled_cpus > 0)
- additional_cpus = disabled_cpus;
- else
- additional_cpus = 0;
- }
-
- possible = num_processors + additional_cpus;
+ possible = num_processors + disabled_cpus;
if (possible > NR_CPUS)
possible = NR_CPUS;
@@ -1334,6 +1299,31 @@ __init void prefill_possible_map(void)
nr_cpu_ids = possible;
}
+#ifdef CONFIG_HOTPLUG_CPU
+
+static void remove_siblinginfo(int cpu)
+{
+ int sibling;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) {
+ cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
+ /*/
+ * last thread sibling in this cpu core going down
+ */
+ if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
+ cpu_data(sibling).booted_cores--;
+ }
+
+ for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu))
+ cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
+ cpus_clear(per_cpu(cpu_sibling_map, cpu));
+ cpus_clear(per_cpu(cpu_core_map, cpu));
+ c->phys_proc_id = 0;
+ c->cpu_core_id = 0;
+ cpu_clear(cpu, cpu_sibling_setup_map);
+}
+
static void __ref remove_cpu_from_maps(int cpu)
{
cpu_clear(cpu, cpu_online_map);
@@ -1344,25 +1334,9 @@ static void __ref remove_cpu_from_maps(int cpu)
numa_remove_cpu(cpu);
}
-int __cpu_disable(void)
+void cpu_disable_common(void)
{
int cpu = smp_processor_id();
-
- /*
- * Perhaps use cpufreq to drop frequency, but that could go
- * into generic code.
- *
- * We won't take down the boot processor on i386 due to some
- * interrupts only being able to be serviced by the BSP.
- * Especially so if we're not using an IOAPIC -zwane
- */
- if (cpu == 0)
- return -EBUSY;
-
- if (nmi_watchdog == NMI_LOCAL_APIC)
- stop_apic_nmi_watchdog(NULL);
- clear_local_APIC();
-
/*
* HACK:
* Allow any queued timer interrupts to get serviced
@@ -1380,10 +1354,32 @@ int __cpu_disable(void)
remove_cpu_from_maps(cpu);
unlock_vector_lock();
fixup_irqs(cpu_online_map);
+}
+
+int native_cpu_disable(void)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * Perhaps use cpufreq to drop frequency, but that could go
+ * into generic code.
+ *
+ * We won't take down the boot processor on i386 due to some
+ * interrupts only being able to be serviced by the BSP.
+ * Especially so if we're not using an IOAPIC -zwane
+ */
+ if (cpu == 0)
+ return -EBUSY;
+
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ stop_apic_nmi_watchdog(NULL);
+ clear_local_APIC();
+
+ cpu_disable_common();
return 0;
}
-void __cpu_die(unsigned int cpu)
+void native_cpu_die(unsigned int cpu)
{
/* We don't do anything here: idle task is faking death itself. */
unsigned int i;
@@ -1400,15 +1396,45 @@ void __cpu_die(unsigned int cpu)
}
printk(KERN_ERR "CPU %u didn't die...\n", cpu);
}
+
+void play_dead_common(void)
+{
+ idle_task_exit();
+ reset_lazy_tlbstate();
+ irq_ctx_exit(raw_smp_processor_id());
+ c1e_remove_cpu(raw_smp_processor_id());
+
+ mb();
+ /* Ack it */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+
+ /*
+ * With physical CPU hotplug, we should halt the cpu
+ */
+ local_irq_disable();
+}
+
+void native_play_dead(void)
+{
+ play_dead_common();
+ wbinvd_halt();
+}
+
#else /* ... !CONFIG_HOTPLUG_CPU */
-int __cpu_disable(void)
+int native_cpu_disable(void)
{
return -ENOSYS;
}
-void __cpu_die(unsigned int cpu)
+void native_cpu_die(unsigned int cpu)
{
/* We said "no" in __cpu_disable */
BUG();
}
+
+void native_play_dead(void)
+{
+ BUG();
+}
+
#endif
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index bbecf8b6bf9..77b400f06ea 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -47,10 +47,9 @@ unsigned long profile_pc(struct pt_regs *regs)
unsigned long pc = instruction_pointer(regs);
#ifdef CONFIG_SMP
- if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->cs) &&
- in_lock_functions(pc)) {
+ if (!user_mode_vm(regs) && in_lock_functions(pc)) {
#ifdef CONFIG_FRAME_POINTER
- return *(unsigned long *)(regs->bp + 4);
+ return *(unsigned long *)(regs->bp + sizeof(long));
#else
unsigned long *sp = (unsigned long *)&regs->sp;
@@ -95,6 +94,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
do_timer_interrupt_hook();
+#ifdef CONFIG_MCA
if (MCA_bus) {
/* The PS/2 uses level-triggered interrupts. You can't
turn them off, nor would you want to (any attempt to
@@ -108,6 +108,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
u8 irq_v = inb_p( 0x61 ); /* read the current state */
outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */
}
+#endif
return IRQ_HANDLED;
}
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index e3d49c553af..cb19d650c21 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -16,6 +16,7 @@
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/time.h>
+#include <linux/mca.h>
#include <asm/i8253.h>
#include <asm/hpet.h>
@@ -33,23 +34,34 @@ unsigned long profile_pc(struct pt_regs *regs)
/* Assume the lock function has either no stack frame or a copy
of flags from PUSHF
Eflags always has bits 22 and up cleared unlike kernel addresses. */
- if (!user_mode(regs) && in_lock_functions(pc)) {
+ if (!user_mode_vm(regs) && in_lock_functions(pc)) {
+#ifdef CONFIG_FRAME_POINTER
+ return *(unsigned long *)(regs->bp + sizeof(long));
+#else
unsigned long *sp = (unsigned long *)regs->sp;
if (sp[0] >> 22)
return sp[0];
if (sp[1] >> 22)
return sp[1];
+#endif
}
return pc;
}
EXPORT_SYMBOL(profile_pc);
-static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
+irqreturn_t timer_interrupt(int irq, void *dev_id)
{
add_pda(irq0_irqs, 1);
global_clock_event->event_handler(global_clock_event);
+#ifdef CONFIG_MCA
+ if (MCA_bus) {
+ u8 irq_v = inb_p(0x61); /* read the current state */
+ outb_p(irq_v|0x80, 0x61); /* reset the IRQ */
+ }
+#endif
+
return IRQ_HANDLED;
}
@@ -100,7 +112,7 @@ unsigned long __init calibrate_cpu(void)
}
static struct irqaction irq0 = {
- .handler = timer_event_interrupt,
+ .handler = timer_interrupt,
.flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING,
.mask = CPU_MASK_NONE,
.name = "timer"
@@ -111,16 +123,13 @@ void __init hpet_time_init(void)
if (!hpet_enable())
setup_pit_timer();
+ irq0.mask = cpumask_of_cpu(0);
setup_irq(0, &irq0);
}
void __init time_init(void)
{
tsc_init();
- if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
- vgetcpu_mode = VGETCPU_RDTSCP;
- else
- vgetcpu_mode = VGETCPU_LSL;
late_time_init = choose_time_init();
}
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index fec1ecedc9b..e00534b3353 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -241,3 +241,11 @@ void flush_tlb_all(void)
on_each_cpu(do_flush_tlb_all, NULL, 1);
}
+void reset_lazy_tlbstate(void)
+{
+ int cpu = raw_smp_processor_id();
+
+ per_cpu(cpu_tlbstate, cpu).state = 0;
+ per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
+}
+
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps.c
index da5a5964fcc..e062974cce3 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps.c
@@ -7,13 +7,11 @@
*/
/*
- * 'Traps.c' handles hardware traps and faults after we have saved some
- * state in 'asm.s'.
+ * Handle hardware traps and faults.
*/
#include <linux/interrupt.h>
#include <linux/kallsyms.h>
#include <linux/spinlock.h>
-#include <linux/highmem.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/utsname.h>
@@ -32,6 +30,8 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/io.h>
#ifdef CONFIG_EISA
#include <linux/ioport.h>
@@ -46,21 +46,31 @@
#include <linux/edac.h>
#endif
-#include <asm/arch_hooks.h>
#include <asm/stacktrace.h>
#include <asm/processor.h>
#include <asm/debugreg.h>
#include <asm/atomic.h>
#include <asm/system.h>
#include <asm/unwind.h>
+#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/i387.h>
+
+#include <mach_traps.h>
+
+#ifdef CONFIG_X86_64
+#include <asm/pgalloc.h>
+#include <asm/proto.h>
+#include <asm/pda.h>
+#else
+#include <asm/processor-flags.h>
+#include <asm/arch_hooks.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/io.h>
#include <asm/traps.h>
-#include "mach_traps.h"
+#include "cpu/mcheck/mce.h"
DECLARE_BITMAP(used_vectors, NR_VECTORS);
EXPORT_SYMBOL_GPL(used_vectors);
@@ -77,418 +87,104 @@ char ignore_fpu_irq;
*/
gate_desc idt_table[256]
__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
-
-int panic_on_unrecovered_nmi;
-int kstack_depth_to_print = 24;
-static unsigned int code_bytes = 64;
-static int ignore_nmis;
-static int die_counter;
-
-void printk_address(unsigned long address, int reliable)
-{
-#ifdef CONFIG_KALLSYMS
- unsigned long offset = 0;
- unsigned long symsize;
- const char *symname;
- char *modname;
- char *delim = ":";
- char namebuf[KSYM_NAME_LEN];
- char reliab[4] = "";
-
- symname = kallsyms_lookup(address, &symsize, &offset,
- &modname, namebuf);
- if (!symname) {
- printk(" [<%08lx>]\n", address);
- return;
- }
- if (!reliable)
- strcpy(reliab, "? ");
-
- if (!modname)
- modname = delim = "";
- printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
- address, reliab, delim, modname, delim, symname, offset, symsize);
-#else
- printk(" [<%08lx>]\n", address);
#endif
-}
-
-static inline int valid_stack_ptr(struct thread_info *tinfo,
- void *p, unsigned int size)
-{
- void *t = tinfo;
- return p > t && p <= t + THREAD_SIZE - size;
-}
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
- struct stack_frame *next_frame;
- unsigned long return_address;
-};
-
-static inline unsigned long
-print_context_stack(struct thread_info *tinfo,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data)
-{
- struct stack_frame *frame = (struct stack_frame *)bp;
-
- while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) {
- unsigned long addr;
-
- addr = *stack;
- if (__kernel_text_address(addr)) {
- if ((unsigned long) stack == bp + 4) {
- ops->address(data, addr, 1);
- frame = frame->next_frame;
- bp = (unsigned long) frame;
- } else {
- ops->address(data, addr, bp == 0);
- }
- }
- stack++;
- }
- return bp;
-}
-
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data)
-{
- if (!task)
- task = current;
-
- if (!stack) {
- unsigned long dummy;
- stack = &dummy;
- if (task != current)
- stack = (unsigned long *)task->thread.sp;
- }
-
-#ifdef CONFIG_FRAME_POINTER
- if (!bp) {
- if (task == current) {
- /* Grab bp right from our regs */
- asm("movl %%ebp, %0" : "=r" (bp) :);
- } else {
- /* bp is the last reg pushed by switch_to */
- bp = *(unsigned long *) task->thread.sp;
- }
- }
-#endif
-
- for (;;) {
- struct thread_info *context;
-
- context = (struct thread_info *)
- ((unsigned long)stack & (~(THREAD_SIZE - 1)));
- bp = print_context_stack(context, stack, bp, ops, data);
- /*
- * Should be after the line below, but somewhere
- * in early boot context comes out corrupted and we
- * can't reference it:
- */
- if (ops->stack(data, "IRQ") < 0)
- break;
- stack = (unsigned long *)context->previous_esp;
- if (!stack)
- break;
- touch_nmi_watchdog();
- }
-}
-EXPORT_SYMBOL(dump_trace);
-
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
- printk(data);
- print_symbol(msg, symbol);
- printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
- printk("%s%s\n", (char *)data, msg);
-}
-static int print_trace_stack(void *data, char *name)
-{
- return 0;
-}
-
-/*
- * Print one address/symbol entries per line.
- */
-static void print_trace_address(void *data, unsigned long addr, int reliable)
-{
- printk("%s [<%08lx>] ", (char *)data, addr);
- if (!reliable)
- printk("? ");
- print_symbol("%s\n", addr);
- touch_nmi_watchdog();
-}
-
-static const struct stacktrace_ops print_trace_ops = {
- .warning = print_trace_warning,
- .warning_symbol = print_trace_warning_symbol,
- .stack = print_trace_stack,
- .address = print_trace_address,
-};
+static int ignore_nmis;
-static void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl)
+static inline void conditional_sti(struct pt_regs *regs)
{
- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
- printk("%s =======================\n", log_lvl);
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_enable();
}
-void show_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp)
+static inline void preempt_conditional_sti(struct pt_regs *regs)
{
- show_trace_log_lvl(task, regs, stack, bp, "");
+ inc_preempt_count();
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_enable();
}
-static void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl)
+static inline void preempt_conditional_cli(struct pt_regs *regs)
{
- unsigned long *stack;
- int i;
-
- if (sp == NULL) {
- if (task)
- sp = (unsigned long *)task->thread.sp;
- else
- sp = (unsigned long *)&sp;
- }
-
- stack = sp;
- for (i = 0; i < kstack_depth_to_print; i++) {
- if (kstack_end(stack))
- break;
- if (i && ((i % 8) == 0))
- printk("\n%s ", log_lvl);
- printk("%08lx ", *stack++);
- }
- printk("\n%sCall Trace:\n", log_lvl);
-
- show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_disable();
+ dec_preempt_count();
}
-void show_stack(struct task_struct *task, unsigned long *sp)
+#ifdef CONFIG_X86_32
+static inline void
+die_if_kernel(const char *str, struct pt_regs *regs, long err)
{
- printk(" ");
- show_stack_log_lvl(task, NULL, sp, 0, "");
+ if (!user_mode_vm(regs))
+ die(str, regs, err);
}
/*
- * The architecture-independent dump_stack generator
+ * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
+ * invalid offset set (the LAZY one) and the faulting thread has
+ * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
+ * we set the offset field correctly and return 1.
*/
-void dump_stack(void)
+static int lazy_iobitmap_copy(void)
{
- unsigned long bp = 0;
- unsigned long stack;
-
-#ifdef CONFIG_FRAME_POINTER
- if (!bp)
- asm("movl %%ebp, %0" : "=r" (bp):);
-#endif
-
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
- current->pid, current->comm, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
-
- show_trace(current, NULL, &stack, bp);
-}
-
-EXPORT_SYMBOL(dump_stack);
-
-void show_registers(struct pt_regs *regs)
-{
- int i;
+ struct thread_struct *thread;
+ struct tss_struct *tss;
+ int cpu;
- print_modules();
- __show_registers(regs, 0);
+ cpu = get_cpu();
+ tss = &per_cpu(init_tss, cpu);
+ thread = &current->thread;
- printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
- TASK_COMM_LEN, current->comm, task_pid_nr(current),
- current_thread_info(), current, task_thread_info(current));
- /*
- * When in-kernel, we also print out the stack and code at the
- * time of the fault..
- */
- if (!user_mode_vm(regs)) {
- unsigned int code_prologue = code_bytes * 43 / 64;
- unsigned int code_len = code_bytes;
- unsigned char c;
- u8 *ip;
-
- printk("\n" KERN_EMERG "Stack: ");
- show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
-
- printk(KERN_EMERG "Code: ");
-
- ip = (u8 *)regs->ip - code_prologue;
- if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
- /* try starting at EIP */
- ip = (u8 *)regs->ip;
- code_len = code_len - code_prologue + 1;
- }
- for (i = 0; i < code_len; i++, ip++) {
- if (ip < (u8 *)PAGE_OFFSET ||
- probe_kernel_address(ip, c)) {
- printk(" Bad EIP value.");
- break;
- }
- if (ip == (u8 *)regs->ip)
- printk("<%02x> ", c);
- else
- printk("%02x ", c);
+ if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
+ thread->io_bitmap_ptr) {
+ memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
+ thread->io_bitmap_max);
+ /*
+ * If the previously set map was extending to higher ports
+ * than the current one, pad extra space with 0xff (no access).
+ */
+ if (thread->io_bitmap_max < tss->io_bitmap_max) {
+ memset((char *) tss->io_bitmap +
+ thread->io_bitmap_max, 0xff,
+ tss->io_bitmap_max - thread->io_bitmap_max);
}
- }
- printk("\n");
-}
-
-int is_valid_bugaddr(unsigned long ip)
-{
- unsigned short ud2;
-
- if (ip < PAGE_OFFSET)
- return 0;
- if (probe_kernel_address((unsigned short *)ip, ud2))
- return 0;
-
- return ud2 == 0x0b0f;
-}
-
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
-static int die_owner = -1;
-static unsigned int die_nest_count;
-
-unsigned __kprobes long oops_begin(void)
-{
- unsigned long flags;
-
- oops_enter();
-
- if (die_owner != raw_smp_processor_id()) {
- console_verbose();
- raw_local_irq_save(flags);
- __raw_spin_lock(&die_lock);
- die_owner = smp_processor_id();
- die_nest_count = 0;
- bust_spinlocks(1);
- } else {
- raw_local_irq_save(flags);
- }
- die_nest_count++;
- return flags;
-}
-
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
-{
- bust_spinlocks(0);
- die_owner = -1;
- add_taint(TAINT_DIE);
- __raw_spin_unlock(&die_lock);
- raw_local_irq_restore(flags);
-
- if (!regs)
- return;
-
- if (kexec_should_crash(current))
- crash_kexec(regs);
-
- if (in_interrupt())
- panic("Fatal exception in interrupt");
-
- if (panic_on_oops)
- panic("Fatal exception");
-
- oops_exit();
- do_exit(signr);
-}
-
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
-{
- unsigned short ss;
- unsigned long sp;
+ tss->io_bitmap_max = thread->io_bitmap_max;
+ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
+ tss->io_bitmap_owner = thread;
+ put_cpu();
- printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
- printk("\n");
- if (notify_die(DIE_OOPS, str, regs, err,
- current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
return 1;
-
- show_registers(regs);
- /* Executive summary in case the oops scrolled away */
- sp = (unsigned long) (&regs->sp);
- savesegment(ss, ss);
- if (user_mode(regs)) {
- sp = regs->sp;
- ss = regs->ss & 0xffff;
}
- printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
- print_symbol("%s", regs->ip);
- printk(" SS:ESP %04x:%08lx\n", ss, sp);
- return 0;
-}
-
-/*
- * This is gone through when something in the kernel has done something bad
- * and is about to be terminated:
- */
-void die(const char *str, struct pt_regs *regs, long err)
-{
- unsigned long flags = oops_begin();
-
- if (die_nest_count < 3) {
- report_bug(regs->ip, regs);
-
- if (__die(str, regs, err))
- regs = NULL;
- } else {
- printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
- }
-
- oops_end(flags, regs, SIGSEGV);
-}
+ put_cpu();
-static inline void
-die_if_kernel(const char *str, struct pt_regs *regs, long err)
-{
- if (!user_mode_vm(regs))
- die(str, regs, err);
+ return 0;
}
+#endif
static void __kprobes
-do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs,
+do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
long error_code, siginfo_t *info)
{
struct task_struct *tsk = current;
+#ifdef CONFIG_X86_32
if (regs->flags & X86_VM_MASK) {
- if (vm86)
+ /*
+ * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
+ * On nmi (interrupt 2), do_trap should not be called.
+ */
+ if (trapnr < 6)
goto vm86_trap;
goto trap_signal;
}
+#endif
if (!user_mode(regs))
goto kernel_trap;
+#ifdef CONFIG_X86_32
trap_signal:
+#endif
/*
* We want error_code and trap_no set for userspace faults and
* kernelspace faults which result in die(), but not
@@ -501,6 +197,18 @@ trap_signal:
tsk->thread.error_code = error_code;
tsk->thread.trap_no = trapnr;
+#ifdef CONFIG_X86_64
+ if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
+ printk_ratelimit()) {
+ printk(KERN_INFO
+ "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
+ tsk->comm, tsk->pid, str,
+ regs->ip, regs->sp, error_code);
+ print_vma_addr(" in ", regs->ip);
+ printk("\n");
+ }
+#endif
+
if (info)
force_sig_info(signr, info, tsk);
else
@@ -515,29 +223,29 @@ kernel_trap:
}
return;
+#ifdef CONFIG_X86_32
vm86_trap:
if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, trapnr))
goto trap_signal;
return;
+#endif
}
#define DO_ERROR(trapnr, signr, str, name) \
-void do_##name(struct pt_regs *regs, long error_code) \
+dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
{ \
- trace_hardirqs_fixup(); \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
== NOTIFY_STOP) \
return; \
- do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
+ conditional_sti(regs); \
+ do_trap(trapnr, signr, str, regs, error_code, NULL); \
}
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
-void do_##name(struct pt_regs *regs, long error_code) \
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
{ \
siginfo_t info; \
- if (irq) \
- local_irq_enable(); \
info.si_signo = signr; \
info.si_errno = 0; \
info.si_code = sicode; \
@@ -545,90 +253,68 @@ void do_##name(struct pt_regs *regs, long error_code) \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
== NOTIFY_STOP) \
return; \
- do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
+ conditional_sti(regs); \
+ do_trap(trapnr, signr, str, regs, error_code, &info); \
}
-#define DO_VM86_ERROR(trapnr, signr, str, name) \
-void do_##name(struct pt_regs *regs, long error_code) \
-{ \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
- do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
-}
-
-#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-void do_##name(struct pt_regs *regs, long error_code) \
-{ \
- siginfo_t info; \
- info.si_signo = signr; \
- info.si_errno = 0; \
- info.si_code = sicode; \
- info.si_addr = (void __user *)siaddr; \
- trace_hardirqs_fixup(); \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
- do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
-}
-
-DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
-#ifndef CONFIG_KPROBES
-DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
-#endif
-DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
-DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
+DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
+DO_ERROR(4, SIGSEGV, "overflow", overflow)
+DO_ERROR(5, SIGSEGV, "bounds", bounds)
+DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
+#ifdef CONFIG_X86_32
DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
-DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
-DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
+#endif
+DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+
+#ifdef CONFIG_X86_64
+/* Runs on IST stack */
+dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
+{
+ if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
+ 12, SIGBUS) == NOTIFY_STOP)
+ return;
+ preempt_conditional_sti(regs);
+ do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
+ preempt_conditional_cli(regs);
+}
+
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+{
+ static const char str[] = "double fault";
+ struct task_struct *tsk = current;
+
+ /* Return not checked because double check cannot be ignored */
+ notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
-void __kprobes
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 8;
+
+ /* This is always a kernel trap and never fixable (and thus must
+ never return). */
+ for (;;)
+ die(str, regs, error_code);
+}
+#endif
+
+dotraplinkage void __kprobes
do_general_protection(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk;
- struct thread_struct *thread;
- struct tss_struct *tss;
- int cpu;
- cpu = get_cpu();
- tss = &per_cpu(init_tss, cpu);
- thread = &current->thread;
-
- /*
- * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
- * invalid offset set (the LAZY one) and the faulting thread has
- * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS
- * and we set the offset field correctly. Then we let the CPU to
- * restart the faulting instruction.
- */
- if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
- thread->io_bitmap_ptr) {
- memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
- thread->io_bitmap_max);
- /*
- * If the previously set map was extending to higher ports
- * than the current one, pad extra space with 0xff (no access).
- */
- if (thread->io_bitmap_max < tss->io_bitmap_max) {
- memset((char *) tss->io_bitmap +
- thread->io_bitmap_max, 0xff,
- tss->io_bitmap_max - thread->io_bitmap_max);
- }
- tss->io_bitmap_max = thread->io_bitmap_max;
- tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
- tss->io_bitmap_owner = thread;
- put_cpu();
+ conditional_sti(regs);
+#ifdef CONFIG_X86_32
+ if (lazy_iobitmap_copy()) {
+ /* restart the faulting instruction */
return;
}
- put_cpu();
if (regs->flags & X86_VM_MASK)
goto gp_in_vm86;
+#endif
tsk = current;
if (!user_mode(regs))
@@ -650,10 +336,12 @@ do_general_protection(struct pt_regs *regs, long error_code)
force_sig(SIGSEGV, tsk);
return;
+#ifdef CONFIG_X86_32
gp_in_vm86:
local_irq_enable();
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
return;
+#endif
gp_in_kernel:
if (fixup_exception(regs))
@@ -690,7 +378,8 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs)
printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
/* Clear and disable the memory parity error line. */
- clear_mem_error(reason);
+ reason = (reason & 0xf) | 4;
+ outb(reason, 0x61);
}
static notrace __kprobes void
@@ -716,7 +405,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
static notrace __kprobes void
unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
{
- if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+ if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
+ NOTIFY_STOP)
return;
#ifdef CONFIG_MCA
/*
@@ -739,41 +429,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
}
-static DEFINE_SPINLOCK(nmi_print_lock);
-
-void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- spin_lock(&nmi_print_lock);
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out:
- */
- bust_spinlocks(1);
- printk(KERN_EMERG "%s", str);
- printk(" on CPU%d, ip %08lx, registers:\n",
- smp_processor_id(), regs->ip);
- show_registers(regs);
- if (do_panic)
- panic("Non maskable interrupt");
- console_silent();
- spin_unlock(&nmi_print_lock);
- bust_spinlocks(0);
-
- /*
- * If we are in kernel we are probably nested up pretty bad
- * and might aswell get out now while we still can:
- */
- if (!user_mode_vm(regs)) {
- current->thread.trap_no = 2;
- crash_kexec(regs);
- }
-
- do_exit(SIGSEGV);
-}
-
static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
{
unsigned char reason = 0;
@@ -812,22 +467,25 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
mem_parity_error(reason, regs);
if (reason & 0x40)
io_check_error(reason, regs);
+#ifdef CONFIG_X86_32
/*
* Reassert NMI in case it became active meanwhile
* as it's edge-triggered:
*/
reassert_nmi();
+#endif
}
-notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
+dotraplinkage notrace __kprobes void
+do_nmi(struct pt_regs *regs, long error_code)
{
- int cpu;
-
nmi_enter();
- cpu = smp_processor_id();
-
- ++nmi_count(cpu);
+#ifdef CONFIG_X86_32
+ { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); }
+#else
+ add_pda(__nmi_count, 1);
+#endif
if (!ignore_nmis)
default_do_nmi(regs);
@@ -847,21 +505,44 @@ void restart_nmi(void)
acpi_nmi_enable();
}
-#ifdef CONFIG_KPROBES
-void __kprobes do_int3(struct pt_regs *regs, long error_code)
+/* May run on IST stack. */
+dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
{
- trace_hardirqs_fixup();
-
+#ifdef CONFIG_KPROBES
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
return;
- /*
- * This is an interrupt gate, because kprobes wants interrupts
- * disabled. Normal trap handlers don't.
- */
- restore_interrupts(regs);
+#else
+ if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
+ == NOTIFY_STOP)
+ return;
+#endif
+
+ preempt_conditional_sti(regs);
+ do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
+ preempt_conditional_cli(regs);
+}
- do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
+#ifdef CONFIG_X86_64
+/* Help handler running on IST stack to switch back to user stack
+ for scheduling or signal handling. The actual stack switch is done in
+ entry.S */
+asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
+{
+ struct pt_regs *regs = eregs;
+ /* Did already sync */
+ if (eregs == (struct pt_regs *)eregs->sp)
+ ;
+ /* Exception from user space */
+ else if (user_mode(eregs))
+ regs = task_pt_regs(current);
+ /* Exception from kernel and interrupts are enabled. Move to
+ kernel process stack. */
+ else if (eregs->flags & X86_EFLAGS_IF)
+ regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
+ if (eregs != regs)
+ *regs = *eregs;
+ return regs;
}
#endif
@@ -886,13 +567,14 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code)
* about restoring all the debug state, and ptrace doesn't have to
* find every occurrence of the TF bit that could be saved away even
* by user code)
+ *
+ * May run on IST stack.
*/
-void __kprobes do_debug(struct pt_regs *regs, long error_code)
+dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
- unsigned int condition;
-
- trace_hardirqs_fixup();
+ unsigned long condition;
+ int si_code;
get_debugreg(condition, 6);
@@ -905,9 +587,9 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
SIGTRAP) == NOTIFY_STOP)
return;
+
/* It's safe to allow irq's after DR6 has been saved */
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
+ preempt_conditional_sti(regs);
/* Mask out spurious debug traps due to lazy DR7 setting */
if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
@@ -915,8 +597,10 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
goto clear_dr7;
}
+#ifdef CONFIG_X86_32
if (regs->flags & X86_VM_MASK)
goto debug_vm86;
+#endif
/* Save debug status register where ptrace can see it */
tsk->thread.debugreg6 = condition;
@@ -926,17 +610,13 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
* kernel space (but re-enable TF when returning to user mode).
*/
if (condition & DR_STEP) {
- /*
- * We already checked v86 mode above, so we can
- * check for kernel mode by just checking the CPL
- * of CS.
- */
if (!user_mode(regs))
goto clear_TF_reenable;
}
+ si_code = get_si_code(condition);
/* Ok, finally something we can handle */
- send_sigtrap(tsk, regs, error_code);
+ send_sigtrap(tsk, regs, error_code, si_code);
/*
* Disable additional traps. They'll be re-enabled when
@@ -944,18 +624,37 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
*/
clear_dr7:
set_debugreg(0, 7);
+ preempt_conditional_cli(regs);
return;
+#ifdef CONFIG_X86_32
debug_vm86:
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
+ preempt_conditional_cli(regs);
return;
+#endif
clear_TF_reenable:
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
regs->flags &= ~X86_EFLAGS_TF;
+ preempt_conditional_cli(regs);
return;
}
+#ifdef CONFIG_X86_64
+static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
+{
+ if (fixup_exception(regs))
+ return 1;
+
+ notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
+ /* Illegal floating point operation in the kernel */
+ current->thread.trap_no = trapnr;
+ die(str, regs, 0);
+ return 0;
+}
+#endif
+
/*
* Note that we play around with the 'TS' bit in an attempt to get
* the correct behaviour even in the presence of the asynchronous
@@ -992,7 +691,9 @@ void math_error(void __user *ip)
swd = get_fpu_swd(task);
switch (swd & ~cwd & 0x3f) {
case 0x000: /* No unmasked exception */
+#ifdef CONFIG_X86_32
return;
+#endif
default: /* Multiple exceptions */
break;
case 0x001: /* Invalid Op */
@@ -1020,9 +721,18 @@ void math_error(void __user *ip)
force_sig_info(SIGFPE, &info, task);
}
-void do_coprocessor_error(struct pt_regs *regs, long error_code)
+dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
{
+ conditional_sti(regs);
+
+#ifdef CONFIG_X86_32
ignore_fpu_irq = 1;
+#else
+ if (!user_mode(regs) &&
+ kernel_math_error(regs, "kernel x87 math error", 16))
+ return;
+#endif
+
math_error((void __user *)regs->ip);
}
@@ -1074,8 +784,12 @@ static void simd_math_error(void __user *ip)
force_sig_info(SIGFPE, &info, task);
}
-void do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
+dotraplinkage void
+do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
{
+ conditional_sti(regs);
+
+#ifdef CONFIG_X86_32
if (cpu_has_xmm) {
/* Handle SIMD FPU exceptions on PIII+ processors. */
ignore_fpu_irq = 1;
@@ -1094,16 +808,25 @@ void do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
current->thread.error_code = error_code;
die_if_kernel("cache flush denied", regs, error_code);
force_sig(SIGSEGV, current);
+#else
+ if (!user_mode(regs) &&
+ kernel_math_error(regs, "kernel simd math error", 19))
+ return;
+ simd_math_error((void __user *)regs->ip);
+#endif
}
-void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
+dotraplinkage void
+do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
{
+ conditional_sti(regs);
#if 0
/* No need to warn about this any longer. */
printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
#endif
}
+#ifdef CONFIG_X86_32
unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
{
struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
@@ -1122,6 +845,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
return new_kesp;
}
+#else
+asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
+{
+}
+
+asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
+{
+}
+#endif
/*
* 'math_state_restore()' saves the current math information in the
@@ -1154,14 +886,24 @@ asmlinkage void math_state_restore(void)
}
clts(); /* Allow maths ops (or we recurse) */
+#ifdef CONFIG_X86_32
restore_fpu(tsk);
+#else
+ /*
+ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+ */
+ if (unlikely(restore_fpu_checking(tsk))) {
+ stts();
+ force_sig(SIGSEGV, tsk);
+ return;
+ }
+#endif
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
tsk->fpu_counter++;
}
EXPORT_SYMBOL_GPL(math_state_restore);
#ifndef CONFIG_MATH_EMULATION
-
asmlinkage void math_emulate(long arg)
{
printk(KERN_EMERG
@@ -1170,12 +912,54 @@ asmlinkage void math_emulate(long arg)
force_sig(SIGFPE, current);
schedule();
}
-
#endif /* CONFIG_MATH_EMULATION */
+dotraplinkage void __kprobes
+do_device_not_available(struct pt_regs *regs, long error)
+{
+#ifdef CONFIG_X86_32
+ if (read_cr0() & X86_CR0_EM) {
+ conditional_sti(regs);
+ math_emulate(0);
+ } else {
+ math_state_restore(); /* interrupts still off */
+ conditional_sti(regs);
+ }
+#else
+ math_state_restore();
+#endif
+}
+
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_MCE
+dotraplinkage void __kprobes do_machine_check(struct pt_regs *regs, long error)
+{
+ conditional_sti(regs);
+ machine_check_vector(regs, error);
+}
+#endif
+
+dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
+{
+ siginfo_t info;
+ local_irq_enable();
+
+ info.si_signo = SIGILL;
+ info.si_errno = 0;
+ info.si_code = ILL_BADSTK;
+ info.si_addr = 0;
+ if (notify_die(DIE_TRAP, "iret exception",
+ regs, error_code, 32, SIGILL) == NOTIFY_STOP)
+ return;
+ do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
+}
+#endif
+
void __init trap_init(void)
{
+#ifdef CONFIG_X86_32
int i;
+#endif
#ifdef CONFIG_EISA
void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@ -1185,29 +969,40 @@ void __init trap_init(void)
early_iounmap(p, 4);
#endif
- set_trap_gate(0, &divide_error);
- set_intr_gate(1, &debug);
- set_intr_gate(2, &nmi);
- set_system_intr_gate(3, &int3); /* int3 can be called from all */
- set_system_gate(4, &overflow); /* int4 can be called from all */
- set_trap_gate(5, &bounds);
- set_trap_gate(6, &invalid_op);
- set_trap_gate(7, &device_not_available);
+ set_intr_gate(0, &divide_error);
+ set_intr_gate_ist(1, &debug, DEBUG_STACK);
+ set_intr_gate_ist(2, &nmi, NMI_STACK);
+ /* int3 can be called from all */
+ set_system_intr_gate_ist(3, &int3, DEBUG_STACK);
+ /* int4 can be called from all */
+ set_system_intr_gate(4, &overflow);
+ set_intr_gate(5, &bounds);
+ set_intr_gate(6, &invalid_op);
+ set_intr_gate(7, &device_not_available);
+#ifdef CONFIG_X86_32
set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
- set_trap_gate(9, &coprocessor_segment_overrun);
- set_trap_gate(10, &invalid_TSS);
- set_trap_gate(11, &segment_not_present);
- set_trap_gate(12, &stack_segment);
- set_trap_gate(13, &general_protection);
+#else
+ set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
+#endif
+ set_intr_gate(9, &coprocessor_segment_overrun);
+ set_intr_gate(10, &invalid_TSS);
+ set_intr_gate(11, &segment_not_present);
+ set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
+ set_intr_gate(13, &general_protection);
set_intr_gate(14, &page_fault);
- set_trap_gate(15, &spurious_interrupt_bug);
- set_trap_gate(16, &coprocessor_error);
- set_trap_gate(17, &alignment_check);
+ set_intr_gate(15, &spurious_interrupt_bug);
+ set_intr_gate(16, &coprocessor_error);
+ set_intr_gate(17, &alignment_check);
#ifdef CONFIG_X86_MCE
- set_trap_gate(18, &machine_check);
+ set_intr_gate_ist(18, &machine_check, MCE_STACK);
#endif
- set_trap_gate(19, &simd_coprocessor_error);
+ set_intr_gate(19, &simd_coprocessor_error);
+#ifdef CONFIG_IA32_EMULATION
+ set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
+#endif
+
+#ifdef CONFIG_X86_32
if (cpu_has_fxsr) {
printk(KERN_INFO "Enabling fast FPU save and restore... ");
set_in_cr4(X86_CR4_OSFXSR);
@@ -1220,36 +1015,20 @@ void __init trap_init(void)
printk("done.\n");
}
- set_system_gate(SYSCALL_VECTOR, &system_call);
+ set_system_trap_gate(SYSCALL_VECTOR, &system_call);
/* Reserve all the builtin and the syscall vector: */
for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
set_bit(i, used_vectors);
set_bit(SYSCALL_VECTOR, used_vectors);
-
+#endif
/*
* Should be a barrier for any external CPU state:
*/
cpu_init();
+#ifdef CONFIG_X86_32
trap_init_hook();
+#endif
}
-
-static int __init kstack_setup(char *s)
-{
- kstack_depth_to_print = simple_strtoul(s, NULL, 0);
-
- return 1;
-}
-__setup("kstack=", kstack_setup);
-
-static int __init code_bytes_setup(char *s)
-{
- code_bytes = simple_strtoul(s, NULL, 0);
- if (code_bytes > 8192)
- code_bytes = 8192;
-
- return 1;
-}
-__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
deleted file mode 100644
index 2887a789e38..00000000000
--- a/arch/x86/kernel/traps_64.c
+++ /dev/null
@@ -1,1214 +0,0 @@
-/*
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
- *
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-/*
- * 'Traps.c' handles hardware traps and faults after we have saved some
- * state in 'entry.S'.
- */
-#include <linux/moduleparam.h>
-#include <linux/interrupt.h>
-#include <linux/kallsyms.h>
-#include <linux/spinlock.h>
-#include <linux/kprobes.h>
-#include <linux/uaccess.h>
-#include <linux/utsname.h>
-#include <linux/kdebug.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/ptrace.h>
-#include <linux/string.h>
-#include <linux/unwind.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/kexec.h>
-#include <linux/sched.h>
-#include <linux/timer.h>
-#include <linux/init.h>
-#include <linux/bug.h>
-#include <linux/nmi.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/io.h>
-
-#if defined(CONFIG_EDAC)
-#include <linux/edac.h>
-#endif
-
-#include <asm/stacktrace.h>
-#include <asm/processor.h>
-#include <asm/debugreg.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <asm/unwind.h>
-#include <asm/desc.h>
-#include <asm/i387.h>
-#include <asm/pgalloc.h>
-#include <asm/proto.h>
-#include <asm/pda.h>
-#include <asm/traps.h>
-
-#include <mach_traps.h>
-
-int panic_on_unrecovered_nmi;
-int kstack_depth_to_print = 12;
-static unsigned int code_bytes = 64;
-static int ignore_nmis;
-static int die_counter;
-
-static inline void conditional_sti(struct pt_regs *regs)
-{
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
-}
-
-static inline void preempt_conditional_sti(struct pt_regs *regs)
-{
- inc_preempt_count();
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
-}
-
-static inline void preempt_conditional_cli(struct pt_regs *regs)
-{
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_disable();
- /* Make sure to not schedule here because we could be running
- on an exception stack. */
- dec_preempt_count();
-}
-
-void printk_address(unsigned long address, int reliable)
-{
- printk(" [<%016lx>] %s%pS\n",
- address, reliable ? "" : "? ", (void *) address);
-}
-
-static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
- unsigned *usedp, char **idp)
-{
- static char ids[][8] = {
- [DEBUG_STACK - 1] = "#DB",
- [NMI_STACK - 1] = "NMI",
- [DOUBLEFAULT_STACK - 1] = "#DF",
- [STACKFAULT_STACK - 1] = "#SS",
- [MCE_STACK - 1] = "#MC",
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
- [N_EXCEPTION_STACKS ...
- N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
-#endif
- };
- unsigned k;
-
- /*
- * Iterate over all exception stacks, and figure out whether
- * 'stack' is in one of them:
- */
- for (k = 0; k < N_EXCEPTION_STACKS; k++) {
- unsigned long end = per_cpu(orig_ist, cpu).ist[k];
- /*
- * Is 'stack' above this exception frame's end?
- * If yes then skip to the next frame.
- */
- if (stack >= end)
- continue;
- /*
- * Is 'stack' above this exception frame's start address?
- * If yes then we found the right frame.
- */
- if (stack >= end - EXCEPTION_STKSZ) {
- /*
- * Make sure we only iterate through an exception
- * stack once. If it comes up for the second time
- * then there's something wrong going on - just
- * break out and return NULL:
- */
- if (*usedp & (1U << k))
- break;
- *usedp |= 1U << k;
- *idp = ids[k];
- return (unsigned long *)end;
- }
- /*
- * If this is a debug stack, and if it has a larger size than
- * the usual exception stacks, then 'stack' might still
- * be within the lower portion of the debug stack:
- */
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
- if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
- unsigned j = N_EXCEPTION_STACKS - 1;
-
- /*
- * Black magic. A large debug stack is composed of
- * multiple exception stack entries, which we
- * iterate through now. Dont look:
- */
- do {
- ++j;
- end -= EXCEPTION_STKSZ;
- ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
- } while (stack < end - EXCEPTION_STKSZ);
- if (*usedp & (1U << j))
- break;
- *usedp |= 1U << j;
- *idp = ids[j];
- return (unsigned long *)end;
- }
-#endif
- }
- return NULL;
-}
-
-/*
- * x86-64 can have up to three kernel stacks:
- * process stack
- * interrupt stack
- * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
- */
-
-static inline int valid_stack_ptr(struct thread_info *tinfo,
- void *p, unsigned int size, void *end)
-{
- void *t = tinfo;
- if (end) {
- if (p < end && p >= (end-THREAD_SIZE))
- return 1;
- else
- return 0;
- }
- return p > t && p < t + THREAD_SIZE - size;
-}
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
- struct stack_frame *next_frame;
- unsigned long return_address;
-};
-
-static inline unsigned long
-print_context_stack(struct thread_info *tinfo,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data,
- unsigned long *end)
-{
- struct stack_frame *frame = (struct stack_frame *)bp;
-
- while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
- unsigned long addr;
-
- addr = *stack;
- if (__kernel_text_address(addr)) {
- if ((unsigned long) stack == bp + 8) {
- ops->address(data, addr, 1);
- frame = frame->next_frame;
- bp = (unsigned long) frame;
- } else {
- ops->address(data, addr, bp == 0);
- }
- }
- stack++;
- }
- return bp;
-}
-
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data)
-{
- const unsigned cpu = get_cpu();
- unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
- unsigned used = 0;
- struct thread_info *tinfo;
-
- if (!task)
- task = current;
-
- if (!stack) {
- unsigned long dummy;
- stack = &dummy;
- if (task && task != current)
- stack = (unsigned long *)task->thread.sp;
- }
-
-#ifdef CONFIG_FRAME_POINTER
- if (!bp) {
- if (task == current) {
- /* Grab bp right from our regs */
- asm("movq %%rbp, %0" : "=r" (bp) : );
- } else {
- /* bp is the last reg pushed by switch_to */
- bp = *(unsigned long *) task->thread.sp;
- }
- }
-#endif
-
- /*
- * Print function call entries in all stacks, starting at the
- * current stack address. If the stacks consist of nested
- * exceptions
- */
- tinfo = task_thread_info(task);
- for (;;) {
- char *id;
- unsigned long *estack_end;
- estack_end = in_exception_stack(cpu, (unsigned long)stack,
- &used, &id);
-
- if (estack_end) {
- if (ops->stack(data, id) < 0)
- break;
-
- bp = print_context_stack(tinfo, stack, bp, ops,
- data, estack_end);
- ops->stack(data, "<EOE>");
- /*
- * We link to the next stack via the
- * second-to-last pointer (index -2 to end) in the
- * exception stack:
- */
- stack = (unsigned long *) estack_end[-2];
- continue;
- }
- if (irqstack_end) {
- unsigned long *irqstack;
- irqstack = irqstack_end -
- (IRQSTACKSIZE - 64) / sizeof(*irqstack);
-
- if (stack >= irqstack && stack < irqstack_end) {
- if (ops->stack(data, "IRQ") < 0)
- break;
- bp = print_context_stack(tinfo, stack, bp,
- ops, data, irqstack_end);
- /*
- * We link to the next stack (which would be
- * the process stack normally) the last
- * pointer (index -1 to end) in the IRQ stack:
- */
- stack = (unsigned long *) (irqstack_end[-1]);
- irqstack_end = NULL;
- ops->stack(data, "EOI");
- continue;
- }
- }
- break;
- }
-
- /*
- * This handles the process stack:
- */
- bp = print_context_stack(tinfo, stack, bp, ops, data, NULL);
- put_cpu();
-}
-EXPORT_SYMBOL(dump_trace);
-
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
- print_symbol(msg, symbol);
- printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
- printk("%s\n", msg);
-}
-
-static int print_trace_stack(void *data, char *name)
-{
- printk(" <%s> ", name);
- return 0;
-}
-
-static void print_trace_address(void *data, unsigned long addr, int reliable)
-{
- touch_nmi_watchdog();
- printk_address(addr, reliable);
-}
-
-static const struct stacktrace_ops print_trace_ops = {
- .warning = print_trace_warning,
- .warning_symbol = print_trace_warning_symbol,
- .stack = print_trace_stack,
- .address = print_trace_address,
-};
-
-static void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl)
-{
- printk("Call Trace:\n");
- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-}
-
-void show_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp)
-{
- show_trace_log_lvl(task, regs, stack, bp, "");
-}
-
-static void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl)
-{
- unsigned long *stack;
- int i;
- const int cpu = smp_processor_id();
- unsigned long *irqstack_end =
- (unsigned long *) (cpu_pda(cpu)->irqstackptr);
- unsigned long *irqstack =
- (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
-
- /*
- * debugging aid: "show_stack(NULL, NULL);" prints the
- * back trace for this cpu.
- */
-
- if (sp == NULL) {
- if (task)
- sp = (unsigned long *)task->thread.sp;
- else
- sp = (unsigned long *)&sp;
- }
-
- stack = sp;
- for (i = 0; i < kstack_depth_to_print; i++) {
- if (stack >= irqstack && stack <= irqstack_end) {
- if (stack == irqstack_end) {
- stack = (unsigned long *) (irqstack_end[-1]);
- printk(" <EOI> ");
- }
- } else {
- if (((long) stack & (THREAD_SIZE-1)) == 0)
- break;
- }
- if (i && ((i % 4) == 0))
- printk("\n");
- printk(" %016lx", *stack++);
- touch_nmi_watchdog();
- }
- printk("\n");
- show_trace_log_lvl(task, regs, sp, bp, log_lvl);
-}
-
-void show_stack(struct task_struct *task, unsigned long *sp)
-{
- show_stack_log_lvl(task, NULL, sp, 0, "");
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
- unsigned long bp = 0;
- unsigned long stack;
-
-#ifdef CONFIG_FRAME_POINTER
- if (!bp)
- asm("movq %%rbp, %0" : "=r" (bp) : );
-#endif
-
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
- current->pid, current->comm, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- show_trace(NULL, NULL, &stack, bp);
-}
-EXPORT_SYMBOL(dump_stack);
-
-void show_registers(struct pt_regs *regs)
-{
- int i;
- unsigned long sp;
- const int cpu = smp_processor_id();
- struct task_struct *cur = cpu_pda(cpu)->pcurrent;
-
- sp = regs->sp;
- printk("CPU %d ", cpu);
- __show_regs(regs);
- printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
- cur->comm, cur->pid, task_thread_info(cur), cur);
-
- /*
- * When in-kernel, we also print out the stack and code at the
- * time of the fault..
- */
- if (!user_mode(regs)) {
- unsigned int code_prologue = code_bytes * 43 / 64;
- unsigned int code_len = code_bytes;
- unsigned char c;
- u8 *ip;
-
- printk("Stack: ");
- show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
- regs->bp, "");
-
- printk(KERN_EMERG "Code: ");
-
- ip = (u8 *)regs->ip - code_prologue;
- if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
- /* try starting at RIP */
- ip = (u8 *)regs->ip;
- code_len = code_len - code_prologue + 1;
- }
- for (i = 0; i < code_len; i++, ip++) {
- if (ip < (u8 *)PAGE_OFFSET ||
- probe_kernel_address(ip, c)) {
- printk(" Bad RIP value.");
- break;
- }
- if (ip == (u8 *)regs->ip)
- printk("<%02x> ", c);
- else
- printk("%02x ", c);
- }
- }
- printk("\n");
-}
-
-int is_valid_bugaddr(unsigned long ip)
-{
- unsigned short ud2;
-
- if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
- return 0;
-
- return ud2 == 0x0b0f;
-}
-
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
-static int die_owner = -1;
-static unsigned int die_nest_count;
-
-unsigned __kprobes long oops_begin(void)
-{
- int cpu;
- unsigned long flags;
-
- oops_enter();
-
- /* racy, but better than risking deadlock. */
- raw_local_irq_save(flags);
- cpu = smp_processor_id();
- if (!__raw_spin_trylock(&die_lock)) {
- if (cpu == die_owner)
- /* nested oops. should stop eventually */;
- else
- __raw_spin_lock(&die_lock);
- }
- die_nest_count++;
- die_owner = cpu;
- console_verbose();
- bust_spinlocks(1);
- return flags;
-}
-
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
-{
- die_owner = -1;
- bust_spinlocks(0);
- die_nest_count--;
- if (!die_nest_count)
- /* Nest count reaches zero, release the lock. */
- __raw_spin_unlock(&die_lock);
- raw_local_irq_restore(flags);
- if (!regs) {
- oops_exit();
- return;
- }
- if (panic_on_oops)
- panic("Fatal exception");
- oops_exit();
- do_exit(signr);
-}
-
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
-{
- printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
- printk("\n");
- if (notify_die(DIE_OOPS, str, regs, err,
- current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
- return 1;
-
- show_registers(regs);
- add_taint(TAINT_DIE);
- /* Executive summary in case the oops scrolled away */
- printk(KERN_ALERT "RIP ");
- printk_address(regs->ip, 1);
- printk(" RSP <%016lx>\n", regs->sp);
- if (kexec_should_crash(current))
- crash_kexec(regs);
- return 0;
-}
-
-void die(const char *str, struct pt_regs *regs, long err)
-{
- unsigned long flags = oops_begin();
-
- if (!user_mode(regs))
- report_bug(regs->ip, regs);
-
- if (__die(str, regs, err))
- regs = NULL;
- oops_end(flags, regs, SIGSEGV);
-}
-
-notrace __kprobes void
-die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
- unsigned long flags;
-
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- flags = oops_begin();
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out.
- */
- printk(KERN_EMERG "%s", str);
- printk(" on CPU%d, ip %08lx, registers:\n",
- smp_processor_id(), regs->ip);
- show_registers(regs);
- if (kexec_should_crash(current))
- crash_kexec(regs);
- if (do_panic || panic_on_oops)
- panic("Non maskable interrupt");
- oops_end(flags, NULL, SIGBUS);
- nmi_exit();
- local_irq_enable();
- do_exit(SIGBUS);
-}
-
-static void __kprobes
-do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
- long error_code, siginfo_t *info)
-{
- struct task_struct *tsk = current;
-
- if (!user_mode(regs))
- goto kernel_trap;
-
- /*
- * We want error_code and trap_no set for userspace faults and
- * kernelspace faults which result in die(), but not
- * kernelspace faults which are fixed up. die() gives the
- * process no chance to handle the signal and notice the
- * kernel fault information, so that won't result in polluting
- * the information about previously queued, but not yet
- * delivered, faults. See also do_general_protection below.
- */
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = trapnr;
-
- if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
- printk_ratelimit()) {
- printk(KERN_INFO
- "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
- tsk->comm, tsk->pid, str,
- regs->ip, regs->sp, error_code);
- print_vma_addr(" in ", regs->ip);
- printk("\n");
- }
-
- if (info)
- force_sig_info(signr, info, tsk);
- else
- force_sig(signr, tsk);
- return;
-
-kernel_trap:
- if (!fixup_exception(regs)) {
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = trapnr;
- die(str, regs, error_code);
- }
- return;
-}
-
-#define DO_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
-{ \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
- conditional_sti(regs); \
- do_trap(trapnr, signr, str, regs, error_code, NULL); \
-}
-
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
-{ \
- siginfo_t info; \
- info.si_signo = signr; \
- info.si_errno = 0; \
- info.si_code = sicode; \
- info.si_addr = (void __user *)siaddr; \
- trace_hardirqs_fixup(); \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
- conditional_sti(regs); \
- do_trap(trapnr, signr, str, regs, error_code, &info); \
-}
-
-DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
-DO_ERROR(4, SIGSEGV, "overflow", overflow)
-DO_ERROR(5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
-DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
-DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
-DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
-
-/* Runs on IST stack */
-asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
-{
- if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
- 12, SIGBUS) == NOTIFY_STOP)
- return;
- preempt_conditional_sti(regs);
- do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
- preempt_conditional_cli(regs);
-}
-
-asmlinkage void do_double_fault(struct pt_regs *regs, long error_code)
-{
- static const char str[] = "double fault";
- struct task_struct *tsk = current;
-
- /* Return not checked because double check cannot be ignored */
- notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
-
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 8;
-
- /* This is always a kernel trap and never fixable (and thus must
- never return). */
- for (;;)
- die(str, regs, error_code);
-}
-
-asmlinkage void __kprobes
-do_general_protection(struct pt_regs *regs, long error_code)
-{
- struct task_struct *tsk;
-
- conditional_sti(regs);
-
- tsk = current;
- if (!user_mode(regs))
- goto gp_in_kernel;
-
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 13;
-
- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
- printk_ratelimit()) {
- printk(KERN_INFO
- "%s[%d] general protection ip:%lx sp:%lx error:%lx",
- tsk->comm, tsk->pid,
- regs->ip, regs->sp, error_code);
- print_vma_addr(" in ", regs->ip);
- printk("\n");
- }
-
- force_sig(SIGSEGV, tsk);
- return;
-
-gp_in_kernel:
- if (fixup_exception(regs))
- return;
-
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 13;
- if (notify_die(DIE_GPF, "general protection fault", regs,
- error_code, 13, SIGSEGV) == NOTIFY_STOP)
- return;
- die("general protection fault", regs, error_code);
-}
-
-static notrace __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs *regs)
-{
- printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
- reason);
- printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
-
-#if defined(CONFIG_EDAC)
- if (edac_handler_set()) {
- edac_atomic_assert_error();
- return;
- }
-#endif
-
- if (panic_on_unrecovered_nmi)
- panic("NMI: Not continuing");
-
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
-
- /* Clear and disable the memory parity error line. */
- reason = (reason & 0xf) | 4;
- outb(reason, 0x61);
-}
-
-static notrace __kprobes void
-io_check_error(unsigned char reason, struct pt_regs *regs)
-{
- printk("NMI: IOCK error (debug interrupt?)\n");
- show_registers(regs);
-
- /* Re-enable the IOCK line, wait for a few seconds */
- reason = (reason & 0xf) | 8;
- outb(reason, 0x61);
- mdelay(2000);
- reason &= ~8;
- outb(reason, 0x61);
-}
-
-static notrace __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
-{
- if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
- NOTIFY_STOP)
- return;
- printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
- reason);
- printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
-
- if (panic_on_unrecovered_nmi)
- panic("NMI: Not continuing");
-
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
-}
-
-/* Runs on IST stack. This code must keep interrupts off all the time.
- Nested NMIs are prevented by the CPU. */
-asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
-{
- unsigned char reason = 0;
- int cpu;
-
- cpu = smp_processor_id();
-
- /* Only the BSP gets external NMIs from the system. */
- if (!cpu)
- reason = get_nmi_reason();
-
- if (!(reason & 0xc0)) {
- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP)
- return;
- /*
- * Ok, so this is none of the documented NMI sources,
- * so it must be the NMI watchdog.
- */
- if (nmi_watchdog_tick(regs, reason))
- return;
- if (!do_nmi_callback(regs, cpu))
- unknown_nmi_error(reason, regs);
-
- return;
- }
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- /* AK: following checks seem to be broken on modern chipsets. FIXME */
- if (reason & 0x80)
- mem_parity_error(reason, regs);
- if (reason & 0x40)
- io_check_error(reason, regs);
-}
-
-asmlinkage notrace __kprobes void
-do_nmi(struct pt_regs *regs, long error_code)
-{
- nmi_enter();
-
- add_pda(__nmi_count, 1);
-
- if (!ignore_nmis)
- default_do_nmi(regs);
-
- nmi_exit();
-}
-
-void stop_nmi(void)
-{
- acpi_nmi_disable();
- ignore_nmis++;
-}
-
-void restart_nmi(void)
-{
- ignore_nmis--;
- acpi_nmi_enable();
-}
-
-/* runs on IST stack. */
-asmlinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
-{
- trace_hardirqs_fixup();
-
- if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
- == NOTIFY_STOP)
- return;
-
- preempt_conditional_sti(regs);
- do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
- preempt_conditional_cli(regs);
-}
-
-/* Help handler running on IST stack to switch back to user stack
- for scheduling or signal handling. The actual stack switch is done in
- entry.S */
-asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
-{
- struct pt_regs *regs = eregs;
- /* Did already sync */
- if (eregs == (struct pt_regs *)eregs->sp)
- ;
- /* Exception from user space */
- else if (user_mode(eregs))
- regs = task_pt_regs(current);
- /* Exception from kernel and interrupts are enabled. Move to
- kernel process stack. */
- else if (eregs->flags & X86_EFLAGS_IF)
- regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
- if (eregs != regs)
- *regs = *eregs;
- return regs;
-}
-
-/* runs on IST stack. */
-asmlinkage void __kprobes do_debug(struct pt_regs *regs,
- unsigned long error_code)
-{
- struct task_struct *tsk = current;
- unsigned long condition;
- siginfo_t info;
-
- trace_hardirqs_fixup();
-
- get_debugreg(condition, 6);
-
- /*
- * The processor cleared BTF, so don't mark that we need it set.
- */
- clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
- tsk->thread.debugctlmsr = 0;
-
- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
- SIGTRAP) == NOTIFY_STOP)
- return;
-
- preempt_conditional_sti(regs);
-
- /* Mask out spurious debug traps due to lazy DR7 setting */
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
- if (!tsk->thread.debugreg7)
- goto clear_dr7;
- }
-
- tsk->thread.debugreg6 = condition;
-
- /*
- * Single-stepping through TF: make sure we ignore any events in
- * kernel space (but re-enable TF when returning to user mode).
- */
- if (condition & DR_STEP) {
- if (!user_mode(regs))
- goto clear_TF_reenable;
- }
-
- /* Ok, finally something we can handle */
- tsk->thread.trap_no = 1;
- tsk->thread.error_code = error_code;
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_BRKPT;
- info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
- force_sig_info(SIGTRAP, &info, tsk);
-
-clear_dr7:
- set_debugreg(0, 7);
- preempt_conditional_cli(regs);
- return;
-
-clear_TF_reenable:
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->flags &= ~X86_EFLAGS_TF;
- preempt_conditional_cli(regs);
- return;
-}
-
-static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
-{
- if (fixup_exception(regs))
- return 1;
-
- notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
- /* Illegal floating point operation in the kernel */
- current->thread.trap_no = trapnr;
- die(str, regs, 0);
- return 0;
-}
-
-/*
- * Note that we play around with the 'TS' bit in an attempt to get
- * the correct behaviour even in the presence of the asynchronous
- * IRQ13 behaviour
- */
-asmlinkage void do_coprocessor_error(struct pt_regs *regs)
-{
- void __user *ip = (void __user *)(regs->ip);
- struct task_struct *task;
- siginfo_t info;
- unsigned short cwd, swd;
-
- conditional_sti(regs);
- if (!user_mode(regs) &&
- kernel_math_error(regs, "kernel x87 math error", 16))
- return;
-
- /*
- * Save the info for the exception handler and clear the error.
- */
- task = current;
- save_init_fpu(task);
- task->thread.trap_no = 16;
- task->thread.error_code = 0;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = __SI_FAULT;
- info.si_addr = ip;
- /*
- * (~cwd & swd) will mask out exceptions that are not set to unmasked
- * status. 0x3f is the exception bits in these regs, 0x200 is the
- * C1 reg you need in case of a stack fault, 0x040 is the stack
- * fault bit. We should only be taking one exception at a time,
- * so if this combination doesn't produce any single exception,
- * then we have a bad program that isn't synchronizing its FPU usage
- * and it will suffer the consequences since we won't be able to
- * fully reproduce the context of the exception
- */
- cwd = get_fpu_cwd(task);
- swd = get_fpu_swd(task);
- switch (swd & ~cwd & 0x3f) {
- case 0x000: /* No unmasked exception */
- default: /* Multiple exceptions */
- break;
- case 0x001: /* Invalid Op */
- /*
- * swd & 0x240 == 0x040: Stack Underflow
- * swd & 0x240 == 0x240: Stack Overflow
- * User must clear the SF bit (0x40) if set
- */
- info.si_code = FPE_FLTINV;
- break;
- case 0x002: /* Denormalize */
- case 0x010: /* Underflow */
- info.si_code = FPE_FLTUND;
- break;
- case 0x004: /* Zero Divide */
- info.si_code = FPE_FLTDIV;
- break;
- case 0x008: /* Overflow */
- info.si_code = FPE_FLTOVF;
- break;
- case 0x020: /* Precision */
- info.si_code = FPE_FLTRES;
- break;
- }
- force_sig_info(SIGFPE, &info, task);
-}
-
-asmlinkage void bad_intr(void)
-{
- printk("bad interrupt");
-}
-
-asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
-{
- void __user *ip = (void __user *)(regs->ip);
- struct task_struct *task;
- siginfo_t info;
- unsigned short mxcsr;
-
- conditional_sti(regs);
- if (!user_mode(regs) &&
- kernel_math_error(regs, "kernel simd math error", 19))
- return;
-
- /*
- * Save the info for the exception handler and clear the error.
- */
- task = current;
- save_init_fpu(task);
- task->thread.trap_no = 19;
- task->thread.error_code = 0;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = __SI_FAULT;
- info.si_addr = ip;
- /*
- * The SIMD FPU exceptions are handled a little differently, as there
- * is only a single status/control register. Thus, to determine which
- * unmasked exception was caught we must mask the exception mask bits
- * at 0x1f80, and then use these to mask the exception bits at 0x3f.
- */
- mxcsr = get_fpu_mxcsr(task);
- switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
- case 0x000:
- default:
- break;
- case 0x001: /* Invalid Op */
- info.si_code = FPE_FLTINV;
- break;
- case 0x002: /* Denormalize */
- case 0x010: /* Underflow */
- info.si_code = FPE_FLTUND;
- break;
- case 0x004: /* Zero Divide */
- info.si_code = FPE_FLTDIV;
- break;
- case 0x008: /* Overflow */
- info.si_code = FPE_FLTOVF;
- break;
- case 0x020: /* Precision */
- info.si_code = FPE_FLTRES;
- break;
- }
- force_sig_info(SIGFPE, &info, task);
-}
-
-asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs)
-{
-}
-
-asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
-{
-}
-
-asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
-{
-}
-
-/*
- * 'math_state_restore()' saves the current math information in the
- * old math state array, and gets the new ones from the current task
- *
- * Careful.. There are problems with IBM-designed IRQ13 behaviour.
- * Don't touch unless you *really* know how it works.
- */
-asmlinkage void math_state_restore(void)
-{
- struct task_struct *me = current;
-
- if (!used_math()) {
- local_irq_enable();
- /*
- * does a slab alloc which can sleep
- */
- if (init_fpu(me)) {
- /*
- * ran out of memory!
- */
- do_group_exit(SIGKILL);
- return;
- }
- local_irq_disable();
- }
-
- clts(); /* Allow maths ops (or we recurse) */
- /*
- * Paranoid restore. send a SIGSEGV if we fail to restore the state.
- */
- if (unlikely(restore_fpu_checking(me))) {
- stts();
- force_sig(SIGSEGV, me);
- return;
- }
- task_thread_info(me)->status |= TS_USEDFPU;
- me->fpu_counter++;
-}
-EXPORT_SYMBOL_GPL(math_state_restore);
-
-void __init trap_init(void)
-{
- set_intr_gate(0, &divide_error);
- set_intr_gate_ist(1, &debug, DEBUG_STACK);
- set_intr_gate_ist(2, &nmi, NMI_STACK);
- /* int3 can be called from all */
- set_system_gate_ist(3, &int3, DEBUG_STACK);
- /* int4 can be called from all */
- set_system_gate(4, &overflow);
- set_intr_gate(5, &bounds);
- set_intr_gate(6, &invalid_op);
- set_intr_gate(7, &device_not_available);
- set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
- set_intr_gate(9, &coprocessor_segment_overrun);
- set_intr_gate(10, &invalid_TSS);
- set_intr_gate(11, &segment_not_present);
- set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
- set_intr_gate(13, &general_protection);
- set_intr_gate(14, &page_fault);
- set_intr_gate(15, &spurious_interrupt_bug);
- set_intr_gate(16, &coprocessor_error);
- set_intr_gate(17, &alignment_check);
-#ifdef CONFIG_X86_MCE
- set_intr_gate_ist(18, &machine_check, MCE_STACK);
-#endif
- set_intr_gate(19, &simd_coprocessor_error);
-
-#ifdef CONFIG_IA32_EMULATION
- set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
-#endif
- /*
- * Should be a barrier for any external CPU state:
- */
- cpu_init();
-}
-
-static int __init oops_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- if (!strcmp(s, "panic"))
- panic_on_oops = 1;
- return 0;
-}
-early_param("oops", oops_setup);
-
-static int __init kstack_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- kstack_depth_to_print = simple_strtoul(s, NULL, 0);
- return 0;
-}
-early_param("kstack", kstack_setup);
-
-static int __init code_bytes_setup(char *s)
-{
- code_bytes = simple_strtoul(s, NULL, 0);
- if (code_bytes > 8192)
- code_bytes = 8192;
-
- return 1;
-}
-__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 201e81a91a9..46e05447405 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -172,8 +172,8 @@ SECTIONS
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
*(.x86_cpu_dev.init)
}
- SECURITY_INIT
__x86_cpu_dev_end = .;
+ SECURITY_INIT
. = ALIGN(8);
.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 07713d64deb..9abac8a9d82 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -95,7 +95,9 @@ int save_i387_xstate(void __user *buf)
* Start with clearing the user buffer. This will present a
* clean context for the bytes not touched by the fxsave/xsave.
*/
- __clear_user(buf, sig_xstate_size);
+ err = __clear_user(buf, sig_xstate_size);
+ if (err)
+ return err;
if (task_thread_info(tsk)->status & TS_XSAVE)
err = xsave_user(buf);
@@ -114,6 +116,8 @@ int save_i387_xstate(void __user *buf)
if (task_thread_info(tsk)->status & TS_XSAVE) {
struct _fpstate __user *fx = buf;
+ struct _xstate __user *x = buf;
+ u64 xstate_bv;
err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
sizeof(struct _fpx_sw_bytes));
@@ -121,6 +125,31 @@ int save_i387_xstate(void __user *buf)
err |= __put_user(FP_XSTATE_MAGIC2,
(__u32 __user *) (buf + sig_xstate_size
- FP_XSTATE_MAGIC2_SIZE));
+
+ /*
+ * Read the xstate_bv which we copied (directly from the cpu or
+ * from the state in task struct) to the user buffers and
+ * set the FP/SSE bits.
+ */
+ err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv);
+
+ /*
+ * For legacy compatible, we always set FP/SSE bits in the bit
+ * vector while saving the state to the user context. This will
+ * enable us capturing any changes(during sigreturn) to
+ * the FP/SSE bits by the legacy applications which don't touch
+ * xstate_bv in the xsave header.
+ *
+ * xsave aware apps can change the xstate_bv in the xsave
+ * header as well as change any contents in the memory layout.
+ * xrestore as part of sigreturn will capture all the changes.
+ */
+ xstate_bv |= XSTATE_FPSSE;
+
+ err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv);
+
+ if (err)
+ return err;
}
return 1;
@@ -272,7 +301,7 @@ void __cpuinit xsave_init(void)
/*
* setup the xstate image representing the init state
*/
-void setup_xstate_init(void)
+static void __init setup_xstate_init(void)
{
init_xstate_buf = alloc_bootmem(xstate_size);
init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;