diff options
author | Tejun Heo <tj@kernel.org> | 2011-11-28 09:46:22 -0800 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-11-28 09:46:22 -0800 |
commit | d4bbf7e7759afc172e2bfbc5c416324590049cdd (patch) | |
tree | 7eab5ee5481cd3dcf1162329fec827177640018a /arch/x86/kernel/cpu/mcheck | |
parent | a150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 (diff) | |
parent | 401d0069cb344f401bc9d264c31db55876ff78c0 (diff) |
Merge branch 'master' into x86/memblock
Conflicts & resolutions:
* arch/x86/xen/setup.c
dc91c728fd "xen: allow extra memory to be in multiple regions"
24aa07882b "memblock, x86: Replace memblock_x86_reserve/free..."
conflicted on xen_add_extra_mem() updates. The resolution is
trivial as the latter just want to replace
memblock_x86_reserve_range() with memblock_reserve().
* drivers/pci/intel-iommu.c
166e9278a3f "x86/ia64: intel-iommu: move to drivers/iommu/"
5dfe8660a3d "bootmem: Replace work_with_active_regions() with..."
conflicted as the former moved the file under drivers/iommu/.
Resolved by applying the chnages from the latter on the moved
file.
* mm/Kconfig
6661672053a "memblock: add NO_BOOTMEM config symbol"
c378ddd53f9 "memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option"
conflicted trivially. Both added config options. Just
letting both add their own options resolves the conflict.
* mm/memblock.c
d1f0ece6cdc "mm/memblock.c: small function definition fixes"
ed7b56a799c "memblock: Remove memblock_memory_can_coalesce()"
confliected. The former updates function removed by the
latter. Resolution is trivial.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-apei.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-inject.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 152 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 364 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 1 |
7 files changed, 304 insertions, 256 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 83930deec3c..507ea58688e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -28,6 +28,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/export.h> #include <linux/kernel.h> #include <linux/acpi.h> #include <linux/cper.h> diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 0ed633c5048..319882ef848 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -78,27 +78,20 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) static cpumask_var_t mce_inject_cpumask; -static int mce_raise_notify(struct notifier_block *self, - unsigned long val, void *data) +static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) { - struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) - return NOTIFY_DONE; + if (!cpumask_test_cpu(cpu, mce_inject_cpumask)) + return NMI_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) - raise_exception(m, args->regs); + raise_exception(m, regs); else if (m->status) raise_poll(m); - return NOTIFY_STOP; + return NMI_HANDLED; } -static struct notifier_block mce_raise_nb = { - .notifier_call = mce_raise_notify, - .priority = NMI_LOCAL_NORMAL_PRIOR, -}; - /* Inject mce on current CPU */ static int raise_local(void) { @@ -215,8 +208,9 @@ static int inject_init(void) if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) return -ENOMEM; printk(KERN_INFO "Machine check injector initialized\n"); - mce_chrdev_ops.write = mce_write; - register_die_notifier(&mce_raise_nb); + register_mce_write_callback(mce_write); + register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, + "mce_notify"); return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 1e8d66c1336..7395d5f4272 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -43,61 +43,105 @@ static struct severity { unsigned char covered; char *msg; } severities[] = { -#define KERNEL .context = IN_KERNEL -#define USER .context = IN_USER -#define SER .ser = SER_REQUIRED -#define NOSER .ser = NO_SER -#define SEV(s) .sev = MCE_ ## s ## _SEVERITY -#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } -#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } -#define MCGMASK(x, res, s, m, r...) \ - { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } -#define MASK(x, y, s, m, r...) \ - { .mask = x, .result = y, SEV(s), .msg = m, ## r } +#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c } +#define KERNEL .context = IN_KERNEL +#define USER .context = IN_USER +#define SER .ser = SER_REQUIRED +#define NOSER .ser = NO_SER +#define BITCLR(x) .mask = x, .result = 0 +#define BITSET(x) .mask = x, .result = x +#define MCGMASK(x, y) .mcgmask = x, .mcgres = y +#define MASK(x, y) .mask = x, .result = y #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) #define MCACOD 0xffff - BITCLR(MCI_STATUS_VAL, NO, "Invalid"), - BITCLR(MCI_STATUS_EN, NO, "Not enabled"), - BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), + MCESEV( + NO, "Invalid", + BITCLR(MCI_STATUS_VAL) + ), + MCESEV( + NO, "Not enabled", + BITCLR(MCI_STATUS_EN) + ), + MCESEV( + PANIC, "Processor context corrupt", + BITSET(MCI_STATUS_PCC) + ), /* When MCIP is not set something is very confused */ - MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"), + MCESEV( + PANIC, "MCIP not set in MCA handler", + MCGMASK(MCG_STATUS_MCIP, 0) + ), /* Neither return not error IP -- no chance to recover -> PANIC */ - MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC, - "Neither restart nor error IP"), - MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP", - KERNEL), - BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER), - MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME, - "Spurious not enabled", SER), + MCESEV( + PANIC, "Neither restart nor error IP", + MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0) + ), + MCESEV( + PANIC, "In kernel and no restart IP", + KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) + ), + MCESEV( + KEEP, "Corrected error", + NOSER, BITCLR(MCI_STATUS_UC) + ), /* ignore OVER for UCNA */ - MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP, - "Uncorrected no action required", SER), - MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC, - "Illegal combination (UCNA with AR=1)", SER), - MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER), + MCESEV( + KEEP, "Uncorrected no action required", + SER, MASK(MCI_UC_SAR, MCI_STATUS_UC) + ), + MCESEV( + PANIC, "Illegal combination (UCNA with AR=1)", + SER, + MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR) + ), + MCESEV( + KEEP, "Non signalled machine check", + SER, BITCLR(MCI_STATUS_S) + ), /* AR add known MCACODs here */ - MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC, - "Action required with lost events", SER), - MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC, - "Action required; unknown MCACOD", SER), + MCESEV( + PANIC, "Action required with lost events", + SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR) + ), + MCESEV( + PANIC, "Action required: unknown MCACOD", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) + ), /* known AO MCACODs: */ - MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO, - "Action optional: memory scrubbing error", SER), - MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO, - "Action optional: last level cache writeback error", SER), - - MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME, - "Action optional unknown MCACOD", SER), - MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME, - "Action optional with lost events", SER), - BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), - BITSET(MCI_STATUS_UC, UC, "Uncorrected"), - BITSET(0, SOME, "No match") /* always matches. keep at end */ + MCESEV( + AO, "Action optional: memory scrubbing error", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0) + ), + MCESEV( + AO, "Action optional: last level cache writeback error", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a) + ), + MCESEV( + SOME, "Action optional: unknown MCACOD", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S) + ), + MCESEV( + SOME, "Action optional with lost events", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S) + ), + + MCESEV( + PANIC, "Overflowed uncorrected", + BITSET(MCI_STATUS_OVER|MCI_STATUS_UC) + ), + MCESEV( + UC, "Uncorrected", + BITSET(MCI_STATUS_UC) + ), + MCESEV( + SOME, "No match", + BITSET(0) + ) /* always matches. keep at end */ }; /* @@ -112,15 +156,15 @@ static int error_context(struct mce *m) return IN_KERNEL; } -int mce_severity(struct mce *a, int tolerant, char **msg) +int mce_severity(struct mce *m, int tolerant, char **msg) { - enum context ctx = error_context(a); + enum context ctx = error_context(m); struct severity *s; for (s = severities;; s++) { - if ((a->status & s->mask) != s->result) + if ((m->status & s->mask) != s->result) continue; - if ((a->mcgstatus & s->mcgmask) != s->mcgres) + if ((m->mcgstatus & s->mcgmask) != s->mcgres) continue; if (s->ser == SER_REQUIRED && !mce_ser) continue; @@ -197,15 +241,15 @@ static const struct file_operations severities_coverage_fops = { static int __init severities_debugfs_init(void) { - struct dentry *dmce = NULL, *fseverities_coverage = NULL; + struct dentry *dmce, *fsev; dmce = mce_get_debugfs_dir(); - if (dmce == NULL) + if (!dmce) goto err_out; - fseverities_coverage = debugfs_create_file("severities-coverage", - 0444, dmce, NULL, - &severities_coverage_fops); - if (fseverities_coverage == NULL) + + fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL, + &severities_coverage_fops); + if (!fsev) goto err_out; return 0; @@ -214,4 +258,4 @@ err_out: return -ENOMEM; } late_initcall(severities_debugfs_init); -#endif +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ff1ae9b6464..2af127d4c3d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -10,7 +10,6 @@ #include <linux/thread_info.h> #include <linux/capability.h> #include <linux/miscdevice.h> -#include <linux/interrupt.h> #include <linux/ratelimit.h> #include <linux/kallsyms.h> #include <linux/rcupdate.h> @@ -37,24 +36,21 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/debugfs.h> -#include <linux/edac_mce.h> +#include <linux/irq_work.h> +#include <linux/export.h> #include <asm/processor.h> -#include <asm/hw_irq.h> -#include <asm/apic.h> -#include <asm/idle.h> -#include <asm/ipi.h> #include <asm/mce.h> #include <asm/msr.h> #include "mce-internal.h" -static DEFINE_MUTEX(mce_read_mutex); +static DEFINE_MUTEX(mce_chrdev_read_mutex); #define rcu_dereference_check_mce(p) \ rcu_dereference_index_check((p), \ rcu_read_lock_sched_held() || \ - lockdep_is_held(&mce_read_mutex)) + lockdep_is_held(&mce_chrdev_read_mutex)) #define CREATE_TRACE_POINTS #include <trace/events/mce.h> @@ -94,7 +90,8 @@ static unsigned long mce_need_notify; static char mce_helper[128]; static char *mce_helper_argv[2] = { mce_helper, NULL }; -static DECLARE_WAIT_QUEUE_HEAD(mce_wait); +static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); + static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; @@ -147,23 +144,20 @@ static struct mce_log mcelog = { void mce_log(struct mce *mce) { unsigned next, entry; + int ret = 0; /* Emit the trace record: */ trace_mce_record(mce); + ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); + if (ret == NOTIFY_STOP) + return; + mce->finished = 0; wmb(); for (;;) { entry = rcu_dereference_check_mce(mcelog.next); for (;;) { - /* - * If edac_mce is enabled, it will check the error type - * and will process it, if it is a known error. - * Otherwise, the error will be sent through mcelog - * interface - */ - if (edac_mce_parse(mce)) - return; /* * When the buffer fills up discard new entries. @@ -220,8 +214,13 @@ static void print_mce(struct mce *m) pr_cont("MISC %llx ", m->misc); pr_cont("\n"); - pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", - m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); + /* + * Note this output is parsed by external tools and old fields + * should not be changed. + */ + pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", + m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, + cpu_data(m->extcpu).microcode); /* * Print out human-readable details about the MCE error, @@ -373,6 +372,31 @@ static void mce_wrmsrl(u32 msr, u64 v) } /* + * Collect all global (w.r.t. this processor) status about this machine + * check into our "mce" struct so that we can use it later to assess + * the severity of the problem as we read per-bank specific details. + */ +static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) +{ + mce_setup(m); + + m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); + if (regs) { + /* + * Get the address of the instruction at the time of + * the machine check error. + */ + if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) { + m->ip = regs->ip; + m->cs = regs->cs; + } + /* Use accurate RIP reporting if available. */ + if (rip_msr) + m->ip = mce_rdmsrl(rip_msr); + } +} + +/* * Simple lockless ring to communicate PFNs from the exception handler with the * process context work function. This is vastly simplified because there's * only a single reader and a single writer. @@ -443,40 +467,13 @@ static void mce_schedule_work(void) } } -/* - * Get the address of the instruction at the time of the machine check - * error. - */ -static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) -{ - - if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) { - m->ip = regs->ip; - m->cs = regs->cs; - } else { - m->ip = 0; - m->cs = 0; - } - if (rip_msr) - m->ip = mce_rdmsrl(rip_msr); -} +DEFINE_PER_CPU(struct irq_work, mce_irq_work); -#ifdef CONFIG_X86_LOCAL_APIC -/* - * Called after interrupts have been reenabled again - * when a MCE happened during an interrupts off region - * in the kernel. - */ -asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs) +static void mce_irq_work_cb(struct irq_work *entry) { - ack_APIC_irq(); - exit_idle(); - irq_enter(); mce_notify_irq(); mce_schedule_work(); - irq_exit(); } -#endif static void mce_report_event(struct pt_regs *regs) { @@ -492,29 +489,7 @@ static void mce_report_event(struct pt_regs *regs) return; } -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Without APIC do not notify. The event will be picked - * up eventually. - */ - if (!cpu_has_apic) - return; - - /* - * When interrupts are disabled we cannot use - * kernel services safely. Trigger an self interrupt - * through the APIC to instead do the notification - * after interrupts are reenabled again. - */ - apic->send_IPI_self(MCE_SELF_VECTOR); - - /* - * Wait for idle afterwards again so that we don't leave the - * APIC in a non idle state because the normal APIC writes - * cannot exclude us. - */ - apic_wait_icr_idle(); -#endif + irq_work_queue(&__get_cpu_var(mce_irq_work)); } DEFINE_PER_CPU(unsigned, mce_poll_count); @@ -541,9 +516,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) percpu_inc(mce_poll_count); - mce_setup(&m); + mce_gather_info(&m, NULL); - m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); for (i = 0; i < banks; i++) { if (!mce_banks[i].ctl || !test_bit(i, *b)) continue; @@ -579,10 +553,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) * Don't get the IP here because it's unlikely to * have anything to do with the actual error location. */ - if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { + if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) mce_log(&m); - atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); - } /* * Clear state for this bank. @@ -879,9 +851,9 @@ static int mce_usable_address(struct mce *m) { if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) return 0; - if ((m->misc & 0x3f) > PAGE_SHIFT) + if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) return 0; - if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS) + if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) return 0; return 1; } @@ -936,15 +908,11 @@ void do_machine_check(struct pt_regs *regs, long error_code) percpu_inc(mce_exception_count); - if (notify_die(DIE_NMI, "machine check", regs, error_code, - 18, SIGKILL) == NOTIFY_STOP) - goto out; if (!banks) goto out; - mce_setup(&m); + mce_gather_info(&m, regs); - m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); final = &__get_cpu_var(mces_seen); *final = m; @@ -1028,7 +996,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) mce_ring_add(m.addr >> PAGE_SHIFT); - mce_get_rip(&m, regs); mce_log(&m); if (severity > worst) { @@ -1170,6 +1137,15 @@ static void mce_start_timer(unsigned long data) add_timer_on(t, smp_processor_id()); } +/* Must not be called in IRQ context where del_timer_sync() can deadlock */ +static void mce_timer_delete_all(void) +{ + int cpu; + + for_each_online_cpu(cpu) + del_timer_sync(&per_cpu(mce_timer, cpu)); +} + static void mce_do_trigger(struct work_struct *work) { call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); @@ -1190,7 +1166,8 @@ int mce_notify_irq(void) clear_thread_flag(TIF_MCE_NOTIFY); if (test_and_clear_bit(0, &mce_need_notify)) { - wake_up_interruptible(&mce_wait); + /* wake processes polling /dev/mcelog */ + wake_up_interruptible(&mce_chrdev_wait); /* * There is no risk of missing notifications because @@ -1363,18 +1340,23 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) return 0; } -static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) +static int __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) { if (c->x86 != 5) - return; + return 0; + switch (c->x86_vendor) { case X86_VENDOR_INTEL: intel_p5_mcheck_init(c); + return 1; break; case X86_VENDOR_CENTAUR: winchip_mcheck_init(c); + return 1; break; } + + return 0; } static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) @@ -1428,7 +1410,8 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) if (mce_disabled) return; - __mcheck_cpu_ancient_init(c); + if (__mcheck_cpu_ancient_init(c)) + return; if (!mce_available(c)) return; @@ -1444,44 +1427,45 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_timer(); INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); - + init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb); } /* - * Character device to read and clear the MCE log. + * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log. */ -static DEFINE_SPINLOCK(mce_state_lock); -static int open_count; /* #times opened */ -static int open_exclu; /* already open exclusive? */ +static DEFINE_SPINLOCK(mce_chrdev_state_lock); +static int mce_chrdev_open_count; /* #times opened */ +static int mce_chrdev_open_exclu; /* already open exclusive? */ -static int mce_open(struct inode *inode, struct file *file) +static int mce_chrdev_open(struct inode *inode, struct file *file) { - spin_lock(&mce_state_lock); + spin_lock(&mce_chrdev_state_lock); - if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { - spin_unlock(&mce_state_lock); + if (mce_chrdev_open_exclu || + (mce_chrdev_open_count && (file->f_flags & O_EXCL))) { + spin_unlock(&mce_chrdev_state_lock); return -EBUSY; } if (file->f_flags & O_EXCL) - open_exclu = 1; - open_count++; + mce_chrdev_open_exclu = 1; + mce_chrdev_open_count++; - spin_unlock(&mce_state_lock); + spin_unlock(&mce_chrdev_state_lock); return nonseekable_open(inode, file); } -static int mce_release(struct inode *inode, struct file *file) +static int mce_chrdev_release(struct inode *inode, struct file *file) { - spin_lock(&mce_state_lock); + spin_lock(&mce_chrdev_state_lock); - open_count--; - open_exclu = 0; + mce_chrdev_open_count--; + mce_chrdev_open_exclu = 0; - spin_unlock(&mce_state_lock); + spin_unlock(&mce_chrdev_state_lock); return 0; } @@ -1530,8 +1514,8 @@ static int __mce_read_apei(char __user **ubuf, size_t usize) return 0; } -static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, - loff_t *off) +static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, + size_t usize, loff_t *off) { char __user *buf = ubuf; unsigned long *cpu_tsc; @@ -1542,7 +1526,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, if (!cpu_tsc) return -ENOMEM; - mutex_lock(&mce_read_mutex); + mutex_lock(&mce_chrdev_read_mutex); if (!mce_apei_read_done) { err = __mce_read_apei(&buf, usize); @@ -1562,19 +1546,18 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, do { for (i = prev; i < next; i++) { unsigned long start = jiffies; + struct mce *m = &mcelog.entry[i]; - while (!mcelog.entry[i].finished) { + while (!m->finished) { if (time_after_eq(jiffies, start + 2)) { - memset(mcelog.entry + i, 0, - sizeof(struct mce)); + memset(m, 0, sizeof(*m)); goto timeout; } cpu_relax(); } smp_rmb(); - err |= copy_to_user(buf, mcelog.entry + i, - sizeof(struct mce)); - buf += sizeof(struct mce); + err |= copy_to_user(buf, m, sizeof(*m)); + buf += sizeof(*m); timeout: ; } @@ -1594,13 +1577,13 @@ timeout: on_each_cpu(collect_tscs, cpu_tsc, 1); for (i = next; i < MCE_LOG_LEN; i++) { - if (mcelog.entry[i].finished && - mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { - err |= copy_to_user(buf, mcelog.entry+i, - sizeof(struct mce)); + struct mce *m = &mcelog.entry[i]; + + if (m->finished && m->tsc < cpu_tsc[m->cpu]) { + err |= copy_to_user(buf, m, sizeof(*m)); smp_rmb(); - buf += sizeof(struct mce); - memset(&mcelog.entry[i], 0, sizeof(struct mce)); + buf += sizeof(*m); + memset(m, 0, sizeof(*m)); } } @@ -1608,15 +1591,15 @@ timeout: err = -EFAULT; out: - mutex_unlock(&mce_read_mutex); + mutex_unlock(&mce_chrdev_read_mutex); kfree(cpu_tsc); return err ? err : buf - ubuf; } -static unsigned int mce_poll(struct file *file, poll_table *wait) +static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait) { - poll_wait(file, &mce_wait, wait); + poll_wait(file, &mce_chrdev_wait, wait); if (rcu_access_index(mcelog.next)) return POLLIN | POLLRDNORM; if (!mce_apei_read_done && apei_check_mce()) @@ -1624,7 +1607,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) return 0; } -static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +static long mce_chrdev_ioctl(struct file *f, unsigned int cmd, + unsigned long arg) { int __user *p = (int __user *)arg; @@ -1650,18 +1634,37 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) } } -/* Modified in mce-inject.c, so not static or const */ -struct file_operations mce_chrdev_ops = { - .open = mce_open, - .release = mce_release, - .read = mce_read, - .poll = mce_poll, - .unlocked_ioctl = mce_ioctl, - .llseek = no_llseek, +static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf, + size_t usize, loff_t *off); + +void register_mce_write_callback(ssize_t (*fn)(struct file *filp, + const char __user *ubuf, + size_t usize, loff_t *off)) +{ + mce_write = fn; +} +EXPORT_SYMBOL_GPL(register_mce_write_callback); + +ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf, + size_t usize, loff_t *off) +{ + if (mce_write) + return mce_write(filp, ubuf, usize, off); + else + return -EINVAL; +} + +static const struct file_operations mce_chrdev_ops = { + .open = mce_chrdev_open, + .release = mce_chrdev_release, + .read = mce_chrdev_read, + .write = mce_chrdev_write, + .poll = mce_chrdev_poll, + .unlocked_ioctl = mce_chrdev_ioctl, + .llseek = no_llseek, }; -EXPORT_SYMBOL_GPL(mce_chrdev_ops); -static struct miscdevice mce_log_device = { +static struct miscdevice mce_chrdev_device = { MISC_MCELOG_MINOR, "mcelog", &mce_chrdev_ops, @@ -1719,7 +1722,7 @@ int __init mcheck_init(void) } /* - * Sysfs support + * mce_syscore: PM support */ /* @@ -1739,12 +1742,12 @@ static int mce_disable_error_reporting(void) return 0; } -static int mce_suspend(void) +static int mce_syscore_suspend(void) { return mce_disable_error_reporting(); } -static void mce_shutdown(void) +static void mce_syscore_shutdown(void) { mce_disable_error_reporting(); } @@ -1754,21 +1757,24 @@ static void mce_shutdown(void) * Only one CPU is active at this time, the others get re-added later using * CPU hotplug: */ -static void mce_resume(void) +static void mce_syscore_resume(void) { __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); } static struct syscore_ops mce_syscore_ops = { - .suspend = mce_suspend, - .shutdown = mce_shutdown, - .resume = mce_resume, + .suspend = mce_syscore_suspend, + .shutdown = mce_syscore_shutdown, + .resume = mce_syscore_resume, }; +/* + * mce_sysdev: Sysfs support + */ + static void mce_cpu_restart(void *data) { - del_timer_sync(&__get_cpu_var(mce_timer)); if (!mce_available(__this_cpu_ptr(&cpu_info))) return; __mcheck_cpu_init_generic(); @@ -1778,16 +1784,15 @@ static void mce_cpu_restart(void *data) /* Reinit MCEs after user configuration changes */ static void mce_restart(void) { + mce_timer_delete_all(); on_each_cpu(mce_cpu_restart, NULL, 1); } /* Toggle features for corrected errors */ -static void mce_disable_ce(void *all) +static void mce_disable_cmci(void *data) { if (!mce_available(__this_cpu_ptr(&cpu_info))) return; - if (all) - del_timer_sync(&__get_cpu_var(mce_timer)); cmci_clear(); } @@ -1801,11 +1806,11 @@ static void mce_enable_ce(void *all) __mcheck_cpu_init_timer(); } -static struct sysdev_class mce_sysclass = { +static struct sysdev_class mce_sysdev_class = { .name = "machinecheck", }; -DEFINE_PER_CPU(struct sys_device, mce_dev); +DEFINE_PER_CPU(struct sys_device, mce_sysdev); __cpuinitdata void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); @@ -1870,7 +1875,8 @@ static ssize_t set_ignore_ce(struct sys_device *s, if (mce_ignore_ce ^ !!new) { if (new) { /* disable ce features */ - on_each_cpu(mce_disable_ce, (void *)1, 1); + mce_timer_delete_all(); + on_each_cpu(mce_disable_cmci, NULL, 1); mce_ignore_ce = 1; } else { /* enable ce features */ @@ -1893,7 +1899,7 @@ static ssize_t set_cmci_disabled(struct sys_device *s, if (mce_cmci_disabled ^ !!new) { if (new) { /* disable cmci */ - on_each_cpu(mce_disable_ce, NULL, 1); + on_each_cpu(mce_disable_cmci, NULL, 1); mce_cmci_disabled = 1; } else { /* enable cmci */ @@ -1934,7 +1940,7 @@ static struct sysdev_ext_attribute attr_cmci_disabled = { &mce_cmci_disabled }; -static struct sysdev_attribute *mce_attrs[] = { +static struct sysdev_attribute *mce_sysdev_attrs[] = { &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, @@ -1945,66 +1951,67 @@ static struct sysdev_attribute *mce_attrs[] = { NULL }; -static cpumask_var_t mce_dev_initialized; +static cpumask_var_t mce_sysdev_initialized; /* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ -static __cpuinit int mce_create_device(unsigned int cpu) +static __cpuinit int mce_sysdev_create(unsigned int cpu) { + struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); int err; int i, j; if (!mce_available(&boot_cpu_data)) return -EIO; - memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject)); - per_cpu(mce_dev, cpu).id = cpu; - per_cpu(mce_dev, cpu).cls = &mce_sysclass; + memset(&sysdev->kobj, 0, sizeof(struct kobject)); + sysdev->id = cpu; + sysdev->cls = &mce_sysdev_class; - err = sysdev_register(&per_cpu(mce_dev, cpu)); + err = sysdev_register(sysdev); if (err) return err; - for (i = 0; mce_attrs[i]; i++) { - err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); + for (i = 0; mce_sysdev_attrs[i]; i++) { + err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]); if (err) goto error; } for (j = 0; j < banks; j++) { - err = sysdev_create_file(&per_cpu(mce_dev, cpu), - &mce_banks[j].attr); + err = sysdev_create_file(sysdev, &mce_banks[j].attr); if (err) goto error2; } - cpumask_set_cpu(cpu, mce_dev_initialized); + cpumask_set_cpu(cpu, mce_sysdev_initialized); return 0; error2: while (--j >= 0) - sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); + sysdev_remove_file(sysdev, &mce_banks[j].attr); error: while (--i >= 0) - sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); + sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); - sysdev_unregister(&per_cpu(mce_dev, cpu)); + sysdev_unregister(sysdev); return err; } -static __cpuinit void mce_remove_device(unsigned int cpu) +static __cpuinit void mce_sysdev_remove(unsigned int cpu) { + struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); int i; - if (!cpumask_test_cpu(cpu, mce_dev_initialized)) + if (!cpumask_test_cpu(cpu, mce_sysdev_initialized)) return; - for (i = 0; mce_attrs[i]; i++) - sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); + for (i = 0; mce_sysdev_attrs[i]; i++) + sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); for (i = 0; i < banks; i++) - sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); + sysdev_remove_file(sysdev, &mce_banks[i].attr); - sysdev_unregister(&per_cpu(mce_dev, cpu)); - cpumask_clear_cpu(cpu, mce_dev_initialized); + sysdev_unregister(sysdev); + cpumask_clear_cpu(cpu, mce_sysdev_initialized); } /* Make sure there are no machine checks on offlined CPUs. */ @@ -2054,7 +2061,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: - mce_create_device(cpu); + mce_sysdev_create(cpu); if (threshold_cpu_callback) threshold_cpu_callback(action, cpu); break; @@ -2062,7 +2069,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_DEAD_FROZEN: if (threshold_cpu_callback) threshold_cpu_callback(action, cpu); - mce_remove_device(cpu); + mce_sysdev_remove(cpu); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: @@ -2116,27 +2123,28 @@ static __init int mcheck_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; - zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); + zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL); mce_init_banks(); - err = sysdev_class_register(&mce_sysclass); + err = sysdev_class_register(&mce_sysdev_class); if (err) return err; for_each_online_cpu(i) { - err = mce_create_device(i); + err = mce_sysdev_create(i); if (err) return err; } register_syscore_ops(&mce_syscore_ops); register_hotcpu_notifier(&mce_cpu_notifier); - misc_register(&mce_log_device); + + /* register character device /dev/mcelog */ + misc_register(&mce_chrdev_device); return err; } - device_initcall(mcheck_init_device); /* diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index bb0adad3514..f5474218cff 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -548,7 +548,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (!b) goto out; - err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj, + err = sysfs_create_link(&per_cpu(mce_sysdev, cpu).kobj, b->kobj, name); if (err) goto out; @@ -571,7 +571,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out; } - b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj); + b->kobj = kobject_create_and_add(name, &per_cpu(mce_sysdev, cpu).kobj); if (!b->kobj) goto out_free; @@ -591,7 +591,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (i == cpu) continue; - err = sysfs_create_link(&per_cpu(mce_dev, i).kobj, + err = sysfs_create_link(&per_cpu(mce_sysdev, i).kobj, b->kobj, name); if (err) goto out; @@ -669,7 +669,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) #ifdef CONFIG_SMP /* sibling symlink */ if (shared_bank[bank] && b->blocks->cpu != cpu) { - sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name); + sysfs_remove_link(&per_cpu(mce_sysdev, cpu).kobj, name); per_cpu(threshold_banks, cpu)[bank] = NULL; return; @@ -681,7 +681,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) if (i == cpu) continue; - sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name); + sysfs_remove_link(&per_cpu(mce_sysdev, i).kobj, name); per_cpu(threshold_banks, i)[bank] = NULL; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 8694ef56459..38e49bc95ff 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -28,7 +28,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); * cmci_discover_lock protects against parallel discovery attempts * which could race against each other. */ -static DEFINE_SPINLOCK(cmci_discover_lock); +static DEFINE_RAW_SPINLOCK(cmci_discover_lock); #define CMCI_THRESHOLD 1 @@ -85,7 +85,7 @@ static void cmci_discover(int banks, int boot) int hdr = 0; int i; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { u64 val; @@ -116,7 +116,7 @@ static void cmci_discover(int banks, int boot) WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); } } - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); if (hdr) printk(KERN_CONT "\n"); } @@ -150,7 +150,7 @@ void cmci_clear(void) if (!cmci_supported(&banks)) return; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { if (!test_bit(i, __get_cpu_var(mce_banks_owned))) continue; @@ -160,7 +160,7 @@ void cmci_clear(void) wrmsrl(MSR_IA32_MCx_CTL2(i), val); __clear_bit(i, __get_cpu_var(mce_banks_owned)); } - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } /* diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 27c625178bf..787e06c84ea 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -18,6 +18,7 @@ #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/percpu.h> +#include <linux/export.h> #include <linux/sysdev.h> #include <linux/types.h> #include <linux/init.h> |