diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/mce.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-inject.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 12 |
5 files changed, 24 insertions, 14 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index fa5f71e021d..6b52980c29c 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -61,7 +61,7 @@ #define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ #define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ #define MCJ_EXCEPTION 0x8 /* raise as exception */ -#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ +#define MCJ_IRQ_BROADCAST 0x10 /* do IRQ broadcasting */ #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index ddc72f83933..5ac2d1fb28b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -153,7 +153,7 @@ static void raise_mce(struct mce *m) return; #ifdef CONFIG_X86_LOCAL_APIC - if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) { + if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) { unsigned long start; int cpu; @@ -167,7 +167,7 @@ static void raise_mce(struct mce *m) cpumask_clear_cpu(cpu, mce_inject_cpumask); } if (!cpumask_empty(mce_inject_cpumask)) { - if (m->inject_flags & MCJ_IRQ_BRAODCAST) { + if (m->inject_flags & MCJ_IRQ_BROADCAST) { /* * don't wait because mce_irq_ipi is necessary * to be sync with following raise_local diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index beb1f1689e5..e2703520d12 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -110,22 +110,17 @@ static struct severity { /* known AR MCACODs: */ #ifdef CONFIG_MEMORY_FAILURE MCESEV( - KEEP, "HT thread notices Action required: data load error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), - MCGMASK(MCG_STATUS_EIPV, 0) + KEEP, "Action required but unaffected thread is continuable", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR), + MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV) ), MCESEV( - AR, "Action required: data load error", + AR, "Action required: data load error in a user process", SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), USER ), MCESEV( - KEEP, "HT thread notices Action required: instruction fetch error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), - MCGMASK(MCG_STATUS_EIPV, 0) - ), - MCESEV( - AR, "Action required: instruction fetch error", + AR, "Action required: instruction fetch error in a user process", SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), USER ), diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9239504b41c..bf49cdbb010 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -89,7 +89,10 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; -/* MCA banks polled by the period polling timer for corrected events */ +/* + * MCA banks polled by the period polling timer for corrected events. + * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). + */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL }; diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index ae1697c2afe..d56405309dc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -24,6 +24,18 @@ * Also supports reliable discovery of shared banks. */ +/* + * CMCI can be delivered to multiple cpus that share a machine check bank + * so we need to designate a single cpu to process errors logged in each bank + * in the interrupt handler (otherwise we would have many races and potential + * double reporting of the same error). + * Note that this can change when a cpu is offlined or brought online since + * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear() + * disables CMCI on all banks owned by the cpu and clears this bitfield. At + * this point, cmci_rediscover() kicks in and a different cpu may end up + * taking ownership of some of the shared MCA banks that were previously + * owned by the offlined cpu. + */ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); /* |