diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 11:07:04 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 11:07:04 -0700 |
commit | b20c99eb668f10b855a9fd87e0a2f5db3fb3637d (patch) | |
tree | 87cb380f2006a1c5ee2c612fead142d261c64c4e /arch/x86/kernel/cpu | |
parent | bb8c4701704d81ef98657dc51adb99aa5a0c5ac9 (diff) | |
parent | ead6fa95b7e9d38b4526503403ba1c029b03dd72 (diff) |
Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS changes from Ingo Molnar:
"[ The reason for drivers/ updates is that Boris asked for the
drivers/edac/ changes to go via x86/ras in this cycle ]
Main changes:
- AMD CPUs:
. Add ECC event decoding support for new F15h models
. Various erratum fixes
. Fix single-channel on dual-channel-controllers bug.
- Intel CPUs:
. UC uncorrectable memory error parsing fix
. Add support for CMC (Corrected Machine Check) 'FF' (Firmware
First) flag in the APEI HEST
- Various cleanups and fixes"
* 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
amd64_edac: Fix incorrect wraparounds
amd64_edac: Correct erratum 505 range
cpc925_edac: Use proper array termination
x86/mce, acpi/apei: Only disable banks listed in HEST if mce is configured
amd64_edac: Get rid of boot_cpu_data accesses
amd64_edac: Add ECC decoding support for newer F15h models
x86, amd_nb: Clarify F15h, model 30h GART and L3 support
pci_ids: Add PCI device ID functions 3 and 4 for newer F15h models.
x38_edac: Make a local function static
i3200_edac: Make a local function static
x86/mce: Pay no attention to 'F' bit in MCACOD when parsing 'UC' errors
APEI/ERST: Fix error message formatting
amd64_edac: Fix single-channel setups
EDAC: Replace strict_strtol() with kstrtol()
mce: acpi/apei: Soft-offline a page on firmware GHES notification
mce: acpi/apei: Add a boot option to disable ff mode for corrected errors
mce: acpi/apei: Honour Firmware First for MCA banks listed in APEI HEST CMC
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-internal.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 28 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 42 |
3 files changed, 63 insertions, 10 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 5b7d4fa5d3b..09edd0b65fe 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -25,15 +25,18 @@ int mce_severity(struct mce *a, int tolerant, char **msg); struct dentry *mce_get_debugfs_dir(void); extern struct mce_bank *mce_banks; +extern mce_banks_t mce_banks_ce_disabled; #ifdef CONFIG_X86_MCE_INTEL unsigned long mce_intel_adjust_timer(unsigned long interval); void mce_intel_cmci_poll(void); void mce_intel_hcpu_update(unsigned long cpu); +void cmci_disable_bank(int bank); #else # define mce_intel_adjust_timer mce_adjust_timer_default static inline void mce_intel_cmci_poll(void) { } static inline void mce_intel_hcpu_update(unsigned long cpu) { } +static inline void cmci_disable_bank(int bank) { } #endif void mce_timer_kick(unsigned long interval); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 87a65c939bc..b3218cdee95 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -97,6 +97,15 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL }; +/* + * MCA banks controlled through firmware first for corrected errors. + * This is a global list of banks for which we won't enable CMCI and we + * won't poll. Firmware controls these banks and is responsible for + * reporting corrected errors through GHES. Uncorrected/recoverable + * errors are still notified through a machine check. + */ +mce_banks_t mce_banks_ce_disabled; + static DEFINE_PER_CPU(struct work_struct, mce_work); static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); @@ -1935,6 +1944,25 @@ static struct miscdevice mce_chrdev_device = { &mce_chrdev_ops, }; +static void __mce_disable_bank(void *arg) +{ + int bank = *((int *)arg); + __clear_bit(bank, __get_cpu_var(mce_poll_banks)); + cmci_disable_bank(bank); +} + +void mce_disable_bank(int bank) +{ + if (bank >= mca_cfg.banks) { + pr_warn(FW_BUG + "Ignoring request to disable invalid MCA bank %d.\n", + bank); + return; + } + set_bit(bank, mce_banks_ce_disabled); + on_each_cpu(__mce_disable_bank, &bank, 1); +} + /* * mce=off Disables machine check * mce=no_cmci Disables CMCI diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index d56405309dc..4cfe0458ca6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -203,6 +203,10 @@ static void cmci_discover(int banks) if (test_bit(i, owned)) continue; + /* Skip banks in firmware first mode */ + if (test_bit(i, mce_banks_ce_disabled)) + continue; + rdmsrl(MSR_IA32_MCx_CTL2(i), val); /* Already owned by someone else? */ @@ -271,6 +275,19 @@ void cmci_recheck(void) local_irq_restore(flags); } +/* Caller must hold the lock on cmci_discover_lock */ +static void __cmci_disable_bank(int bank) +{ + u64 val; + + if (!test_bit(bank, __get_cpu_var(mce_banks_owned))) + return; + rdmsrl(MSR_IA32_MCx_CTL2(bank), val); + val &= ~MCI_CTL2_CMCI_EN; + wrmsrl(MSR_IA32_MCx_CTL2(bank), val); + __clear_bit(bank, __get_cpu_var(mce_banks_owned)); +} + /* * Disable CMCI on this CPU for all banks it owns when it goes down. * This allows other CPUs to claim the banks on rediscovery. @@ -280,20 +297,12 @@ void cmci_clear(void) unsigned long flags; int i; int banks; - u64 val; if (!cmci_supported(&banks)) return; raw_spin_lock_irqsave(&cmci_discover_lock, flags); - for (i = 0; i < banks; i++) { - if (!test_bit(i, __get_cpu_var(mce_banks_owned))) - continue; - /* Disable CMCI */ - rdmsrl(MSR_IA32_MCx_CTL2(i), val); - val &= ~MCI_CTL2_CMCI_EN; - wrmsrl(MSR_IA32_MCx_CTL2(i), val); - __clear_bit(i, __get_cpu_var(mce_banks_owned)); - } + for (i = 0; i < banks; i++) + __cmci_disable_bank(i); raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } @@ -327,6 +336,19 @@ void cmci_reenable(void) cmci_discover(banks); } +void cmci_disable_bank(int bank) +{ + int banks; + unsigned long flags; + + if (!cmci_supported(&banks)) + return; + + raw_spin_lock_irqsave(&cmci_discover_lock, flags); + __cmci_disable_bank(bank); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); +} + static void intel_init_cmci(void) { int banks; |