summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mcheck/mce.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck/mce.c')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c87
1 files changed, 59 insertions, 28 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2f5aab26320..721a77ca811 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
+static void default_decode_mce(struct mce *m)
+{
+ pr_emerg("No human readable MCE decoding support on this CPU type.\n");
+ pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
+}
+
+/*
+ * CPU/chipset specific EDAC code can register a callback here to print
+ * MCE errors in a human-readable form:
+ */
+void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
+EXPORT_SYMBOL(x86_mce_decode_callback);
/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -165,49 +177,46 @@ void mce_log(struct mce *mce)
set_bit(0, &mce_need_notify);
}
-void __weak decode_mce(struct mce *m)
-{
- return;
-}
-
static void print_mce(struct mce *m)
{
- printk(KERN_EMERG
- "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
+ pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
m->extcpu, m->mcgstatus, m->bank, m->status);
+
if (m->ip) {
- printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
- !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
- m->cs, m->ip);
+ pr_emerg("RIP%s %02x:<%016Lx> ",
+ !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
+ m->cs, m->ip);
+
if (m->cs == __KERNEL_CS)
print_symbol("{%s}", m->ip);
- printk(KERN_CONT "\n");
+ pr_cont("\n");
}
- printk(KERN_EMERG "TSC %llx ", m->tsc);
+
+ pr_emerg("TSC %llx ", m->tsc);
if (m->addr)
- printk(KERN_CONT "ADDR %llx ", m->addr);
+ pr_cont("ADDR %llx ", m->addr);
if (m->misc)
- printk(KERN_CONT "MISC %llx ", m->misc);
- printk(KERN_CONT "\n");
- printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
- m->cpuvendor, m->cpuid, m->time, m->socketid,
- m->apicid);
+ pr_cont("MISC %llx ", m->misc);
- decode_mce(m);
+ pr_cont("\n");
+ pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
+ m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
+
+ /*
+ * Print out human-readable details about the MCE error,
+ * (if the CPU has an implementation for that):
+ */
+ x86_mce_decode_callback(m);
}
static void print_mce_head(void)
{
- printk(KERN_EMERG "\nHARDWARE ERROR\n");
+ pr_emerg("\nHARDWARE ERROR\n");
}
static void print_mce_tail(void)
{
- printk(KERN_EMERG "This is not a software problem!\n"
-#if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD))
- "Run through mcelog --ascii to decode and contact your hardware vendor\n"
-#endif
- );
+ pr_emerg("This is not a software problem!\n");
}
#define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -221,6 +230,7 @@ static atomic_t mce_fake_paniced;
static void wait_for_panic(void)
{
long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
+
preempt_disable();
local_irq_enable();
while (timeout-- > 0)
@@ -288,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
static int msr_to_offset(u32 msr)
{
unsigned bank = __get_cpu_var(injectm.bank);
+
if (msr == rip_msr)
return offsetof(struct mce, ip);
if (msr == MSR_IA32_MCx_STATUS(bank))
@@ -305,13 +316,25 @@ static int msr_to_offset(u32 msr)
static u64 mce_rdmsrl(u32 msr)
{
u64 v;
+
if (__get_cpu_var(injectm).finished) {
int offset = msr_to_offset(msr);
+
if (offset < 0)
return 0;
return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
}
- rdmsrl(msr, v);
+
+ if (rdmsrl_safe(msr, &v)) {
+ WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
+ /*
+ * Return zero in case the access faulted. This should
+ * not happen normally but can happen if the CPU does
+ * something weird, or if the code is buggy.
+ */
+ v = 0;
+ }
+
return v;
}
@@ -319,6 +342,7 @@ static void mce_wrmsrl(u32 msr, u64 v)
{
if (__get_cpu_var(injectm).finished) {
int offset = msr_to_offset(msr);
+
if (offset >= 0)
*(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
return;
@@ -415,7 +439,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
m->ip = mce_rdmsrl(rip_msr);
}
-#ifdef CONFIG_X86_LOCAL_APIC
+#ifdef CONFIG_X86_LOCAL_APIC
/*
* Called after interrupts have been reenabled again
* when a MCE happened during an interrupts off region
@@ -1172,6 +1196,7 @@ static int mce_banks_init(void)
return -ENOMEM;
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
b->ctl = -1ULL;
b->init = 1;
}
@@ -1189,7 +1214,8 @@ static int __cpuinit mce_cap_init(void)
rdmsrl(MSR_IA32_MCG_CAP, cap);
b = cap & MCG_BANKCNT_MASK;
- printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
+ if (!banks)
+ printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
if (b > MAX_NR_BANKS) {
printk(KERN_WARNING
@@ -1203,6 +1229,7 @@ static int __cpuinit mce_cap_init(void)
banks = b;
if (!mce_banks) {
int err = mce_banks_init();
+
if (err)
return err;
}
@@ -1237,6 +1264,7 @@ static void mce_init(void)
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
if (!b->init)
continue;
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
@@ -1626,6 +1654,7 @@ static int mce_disable(void)
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0);
}
@@ -1911,6 +1940,7 @@ static void mce_disable_cpu(void *h)
cmci_clear();
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0);
}
@@ -1928,6 +1958,7 @@ static void mce_reenable_cpu(void *h)
cmci_reenable();
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
}