diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2007-10-23 22:37:23 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2007-10-23 22:37:23 +0200 |
commit | 01e11182e73eb36af1cc7f3b023d25aa62fd3a8d (patch) | |
tree | 0d678bb97475c3ce9a98458a662ddc6d2f0da640 /arch/x86/kernel/cpu/mcheck | |
parent | 3bc258ad87e5b0bbbca247b24ce8fac380c7d86b (diff) |
x86: consolidate the cpu/ related code usage
The x86_64 arch/x86/kernel/Makefile uses references into
arch/x86/kernel/cpu/... to use code from there.
Unifiy it with the nicely structured i386 way and reuse the existing
subdirectory make rules.
Also move the machine check related source into ...kernel/cpu/mcheck,
where the other machine check related code is.
No code change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_64.c | 890 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 690 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 90 |
4 files changed, 1673 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 403e720497e..d7d2323bbb6 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,3 +1,6 @@ obj-y = mce_$(BITS).o therm_throt.o + obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o +obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o +obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c new file mode 100644 index 00000000000..07bbfe7aa7f --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -0,0 +1,890 @@ +/* + * Machine check handler. + * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. + * Rest from unknown author(s). + * 2004 Andi Kleen. Rewrote most of it. + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/rcupdate.h> +#include <linux/kallsyms.h> +#include <linux/sysdev.h> +#include <linux/miscdevice.h> +#include <linux/fs.h> +#include <linux/capability.h> +#include <linux/cpu.h> +#include <linux/percpu.h> +#include <linux/poll.h> +#include <linux/thread_info.h> +#include <linux/ctype.h> +#include <linux/kmod.h> +#include <linux/kdebug.h> +#include <asm/processor.h> +#include <asm/msr.h> +#include <asm/mce.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <asm/idle.h> + +#define MISC_MCELOG_MINOR 227 +#define NR_BANKS 6 + +atomic_t mce_entry; + +static int mce_dont_init; + +/* + * Tolerant levels: + * 0: always panic on uncorrected errors, log corrected errors + * 1: panic or SIGBUS on uncorrected errors, log corrected errors + * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors + * 3: never panic or SIGBUS, log all errors (for testing only) + */ +static int tolerant = 1; +static int banks; +static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; +static unsigned long notify_user; +static int rip_msr; +static int mce_bootlog = 1; +static atomic_t mce_events; + +static char trigger[128]; +static char *trigger_argv[2] = { trigger, NULL }; + +static DECLARE_WAIT_QUEUE_HEAD(mce_wait); + +/* + * Lockless MCE logging infrastructure. + * This avoids deadlocks on printk locks without having to break locks. Also + * separate MCEs from kernel messages to avoid bogus bug reports. + */ + +struct mce_log mcelog = { + MCE_LOG_SIGNATURE, + MCE_LOG_LEN, +}; + +void mce_log(struct mce *mce) +{ + unsigned next, entry; + atomic_inc(&mce_events); + mce->finished = 0; + wmb(); + for (;;) { + entry = rcu_dereference(mcelog.next); + for (;;) { + /* When the buffer fills up discard new entries. Assume + that the earlier errors are the more interesting. */ + if (entry >= MCE_LOG_LEN) { + set_bit(MCE_OVERFLOW, &mcelog.flags); + return; + } + /* Old left over entry. Skip. */ + if (mcelog.entry[entry].finished) { + entry++; + continue; + } + break; + } + smp_rmb(); + next = entry + 1; + if (cmpxchg(&mcelog.next, entry, next) == entry) + break; + } + memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); + wmb(); + mcelog.entry[entry].finished = 1; + wmb(); + + set_bit(0, ¬ify_user); +} + +static void print_mce(struct mce *m) +{ + printk(KERN_EMERG "\n" + KERN_EMERG "HARDWARE ERROR\n" + KERN_EMERG + "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", + m->cpu, m->mcgstatus, m->bank, m->status); + if (m->rip) { + printk(KERN_EMERG + "RIP%s %02x:<%016Lx> ", + !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", + m->cs, m->rip); + if (m->cs == __KERNEL_CS) + print_symbol("{%s}", m->rip); + printk("\n"); + } + printk(KERN_EMERG "TSC %Lx ", m->tsc); + if (m->addr) + printk("ADDR %Lx ", m->addr); + if (m->misc) + printk("MISC %Lx ", m->misc); + printk("\n"); + printk(KERN_EMERG "This is not a software problem!\n"); + printk(KERN_EMERG + "Run through mcelog --ascii to decode and contact your hardware vendor\n"); +} + +static void mce_panic(char *msg, struct mce *backup, unsigned long start) +{ + int i; + + oops_begin(); + for (i = 0; i < MCE_LOG_LEN; i++) { + unsigned long tsc = mcelog.entry[i].tsc; + if (time_before(tsc, start)) + continue; + print_mce(&mcelog.entry[i]); + if (backup && mcelog.entry[i].tsc == backup->tsc) + backup = NULL; + } + if (backup) + print_mce(backup); + panic(msg); +} + +static int mce_available(struct cpuinfo_x86 *c) +{ + return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); +} + +static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) +{ + if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { + m->rip = regs->rip; + m->cs = regs->cs; + } else { + m->rip = 0; + m->cs = 0; + } + if (rip_msr) { + /* Assume the RIP in the MSR is exact. Is this true? */ + m->mcgstatus |= MCG_STATUS_EIPV; + rdmsrl(rip_msr, m->rip); + m->cs = 0; + } +} + +/* + * The actual machine check handler + */ + +void do_machine_check(struct pt_regs * regs, long error_code) +{ + struct mce m, panicm; + u64 mcestart = 0; + int i; + int panicm_found = 0; + /* + * If no_way_out gets set, there is no safe way to recover from this + * MCE. If tolerant is cranked up, we'll try anyway. + */ + int no_way_out = 0; + /* + * If kill_it gets set, there might be a way to recover from this + * error. + */ + int kill_it = 0; + + atomic_inc(&mce_entry); + + if (regs) + notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); + if (!banks) + goto out2; + + memset(&m, 0, sizeof(struct mce)); + m.cpu = smp_processor_id(); + rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); + /* if the restart IP is not valid, we're done for */ + if (!(m.mcgstatus & MCG_STATUS_RIPV)) + no_way_out = 1; + + rdtscll(mcestart); + barrier(); + + for (i = 0; i < banks; i++) { + if (!bank[i]) + continue; + + m.misc = 0; + m.addr = 0; + m.bank = i; + m.tsc = 0; + + rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); + if ((m.status & MCI_STATUS_VAL) == 0) + continue; + + if (m.status & MCI_STATUS_EN) { + /* if PCC was set, there's no way out */ + no_way_out |= !!(m.status & MCI_STATUS_PCC); + /* + * If this error was uncorrectable and there was + * an overflow, we're in trouble. If no overflow, + * we might get away with just killing a task. + */ + if (m.status & MCI_STATUS_UC) { + if (tolerant < 1 || m.status & MCI_STATUS_OVER) + no_way_out = 1; + kill_it = 1; + } + } + + if (m.status & MCI_STATUS_MISCV) + rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); + if (m.status & MCI_STATUS_ADDRV) + rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); + + mce_get_rip(&m, regs); + if (error_code >= 0) + rdtscll(m.tsc); + if (error_code != -2) + mce_log(&m); + + /* Did this bank cause the exception? */ + /* Assume that the bank with uncorrectable errors did it, + and that there is only a single one. */ + if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) { + panicm = m; + panicm_found = 1; + } + + add_taint(TAINT_MACHINE_CHECK); + } + + /* Never do anything final in the polling timer */ + if (!regs) + goto out; + + /* If we didn't find an uncorrectable error, pick + the last one (shouldn't happen, just being safe). */ + if (!panicm_found) + panicm = m; + + /* + * If we have decided that we just CAN'T continue, and the user + * has not set tolerant to an insane level, give up and die. + */ + if (no_way_out && tolerant < 3) + mce_panic("Machine check", &panicm, mcestart); + + /* + * If the error seems to be unrecoverable, something should be + * done. Try to kill as little as possible. If we can kill just + * one task, do that. If the user has set the tolerance very + * high, don't try to do anything at all. + */ + if (kill_it && tolerant < 3) { + int user_space = 0; + + /* + * If the EIPV bit is set, it means the saved IP is the + * instruction which caused the MCE. + */ + if (m.mcgstatus & MCG_STATUS_EIPV) + user_space = panicm.rip && (panicm.cs & 3); + + /* + * If we know that the error was in user space, send a + * SIGBUS. Otherwise, panic if tolerance is low. + * + * do_exit() takes an awful lot of locks and has a slight + * risk of deadlocking. + */ + if (user_space) { + do_exit(SIGBUS); + } else if (panic_on_oops || tolerant < 2) { + mce_panic("Uncorrected machine check", + &panicm, mcestart); + } + } + + /* notify userspace ASAP */ + set_thread_flag(TIF_MCE_NOTIFY); + + out: + /* the last thing we do is clear state */ + for (i = 0; i < banks; i++) + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + wrmsrl(MSR_IA32_MCG_STATUS, 0); + out2: + atomic_dec(&mce_entry); +} + +#ifdef CONFIG_X86_MCE_INTEL +/*** + * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog + * @cpu: The CPU on which the event occurred. + * @status: Event status information + * + * This function should be called by the thermal interrupt after the + * event has been processed and the decision was made to log the event + * further. + * + * The status parameter will be saved to the 'status' field of 'struct mce' + * and historically has been the register value of the + * MSR_IA32_THERMAL_STATUS (Intel) msr. + */ +void mce_log_therm_throt_event(unsigned int cpu, __u64 status) +{ + struct mce m; + + memset(&m, 0, sizeof(m)); + m.cpu = cpu; + m.bank = MCE_THERMAL_BANK; + m.status = status; + rdtscll(m.tsc); + mce_log(&m); +} +#endif /* CONFIG_X86_MCE_INTEL */ + +/* + * Periodic polling timer for "silent" machine check errors. If the + * poller finds an MCE, poll 2x faster. When the poller finds no more + * errors, poll 2x slower (up to check_interval seconds). + */ + +static int check_interval = 5 * 60; /* 5 minutes */ +static int next_interval; /* in jiffies */ +static void mcheck_timer(struct work_struct *work); +static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); + +static void mcheck_check_cpu(void *info) +{ + if (mce_available(¤t_cpu_data)) + do_machine_check(NULL, 0); +} + +static void mcheck_timer(struct work_struct *work) +{ + on_each_cpu(mcheck_check_cpu, NULL, 1, 1); + + /* + * Alert userspace if needed. If we logged an MCE, reduce the + * polling interval, otherwise increase the polling interval. + */ + if (mce_notify_user()) { + next_interval = max(next_interval/2, HZ/100); + } else { + next_interval = min(next_interval*2, + (int)round_jiffies_relative(check_interval*HZ)); + } + + schedule_delayed_work(&mcheck_work, next_interval); +} + +/* + * This is only called from process context. This is where we do + * anything we need to alert userspace about new MCEs. This is called + * directly from the poller and also from entry.S and idle, thanks to + * TIF_MCE_NOTIFY. + */ +int mce_notify_user(void) +{ + clear_thread_flag(TIF_MCE_NOTIFY); + if (test_and_clear_bit(0, ¬ify_user)) { + static unsigned long last_print; + unsigned long now = jiffies; + + wake_up_interruptible(&mce_wait); + if (trigger[0]) + call_usermodehelper(trigger, trigger_argv, NULL, + UMH_NO_WAIT); + + if (time_after_eq(now, last_print + (check_interval*HZ))) { + last_print = now; + printk(KERN_INFO "Machine check events logged\n"); + } + + return 1; + } + return 0; +} + +/* see if the idle task needs to notify userspace */ +static int +mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk) +{ + /* IDLE_END should be safe - interrupts are back on */ + if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) + mce_notify_user(); + + return NOTIFY_OK; +} + +static struct notifier_block mce_idle_notifier = { + .notifier_call = mce_idle_callback, +}; + +static __init int periodic_mcheck_init(void) +{ + next_interval = check_interval * HZ; + if (next_interval) + schedule_delayed_work(&mcheck_work, + round_jiffies_relative(next_interval)); + idle_notifier_register(&mce_idle_notifier); + return 0; +} +__initcall(periodic_mcheck_init); + + +/* + * Initialize Machine Checks for a CPU. + */ +static void mce_init(void *dummy) +{ + u64 cap; + int i; + + rdmsrl(MSR_IA32_MCG_CAP, cap); + banks = cap & 0xff; + if (banks > NR_BANKS) { + printk(KERN_INFO "MCE: warning: using only %d banks\n", banks); + banks = NR_BANKS; + } + /* Use accurate RIP reporting if available. */ + if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) + rip_msr = MSR_IA32_MCG_EIP; + + /* Log the machine checks left over from the previous reset. + This also clears all registers */ + do_machine_check(NULL, mce_bootlog ? -1 : -2); + + set_in_cr4(X86_CR4_MCE); + + if (cap & MCG_CTL_P) + wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + + for (i = 0; i < banks; i++) { + wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); + } +} + +/* Add per CPU specific workarounds here */ +static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) +{ + /* This should be disabled by the BIOS, but isn't always */ + if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) { + /* disable GART TBL walk error reporting, which trips off + incorrectly with the IOMMU & 3ware & Cerberus. */ + clear_bit(10, &bank[4]); + /* Lots of broken BIOS around that don't clear them + by default and leave crap in there. Don't log. */ + mce_bootlog = 0; + } + +} + +static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) +{ + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + mce_intel_feature_init(c); + break; + case X86_VENDOR_AMD: + mce_amd_feature_init(c); + break; + default: + break; + } +} + +/* + * Called for each booted CPU to set up machine checks. + * Must be called with preempt off. + */ +void __cpuinit mcheck_init(struct cpuinfo_x86 *c) +{ + static cpumask_t mce_cpus = CPU_MASK_NONE; + + mce_cpu_quirks(c); + + if (mce_dont_init || + cpu_test_and_set(smp_processor_id(), mce_cpus) || + !mce_available(c)) + return; + + mce_init(NULL); + mce_cpu_features(c); +} + +/* + * Character device to read and clear the MCE log. + */ + +static DEFINE_SPINLOCK(mce_state_lock); +static int open_count; /* #times opened */ +static int open_exclu; /* already open exclusive? */ + +static int mce_open(struct inode *inode, struct file *file) +{ + spin_lock(&mce_state_lock); + + if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { + spin_unlock(&mce_state_lock); + return -EBUSY; + } + + if (file->f_flags & O_EXCL) + open_exclu = 1; + open_count++; + + spin_unlock(&mce_state_lock); + + return nonseekable_open(inode, file); +} + +static int mce_release(struct inode *inode, struct file *file) +{ + spin_lock(&mce_state_lock); + + open_count--; + open_exclu = 0; + + spin_unlock(&mce_state_lock); + + return 0; +} + +static void collect_tscs(void *data) +{ + unsigned long *cpu_tsc = (unsigned long *)data; + rdtscll(cpu_tsc[smp_processor_id()]); +} + +static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off) +{ + unsigned long *cpu_tsc; + static DECLARE_MUTEX(mce_read_sem); + unsigned next; + char __user *buf = ubuf; + int i, err; + + cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL); + if (!cpu_tsc) + return -ENOMEM; + + down(&mce_read_sem); + next = rcu_dereference(mcelog.next); + + /* Only supports full reads right now */ + if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { + up(&mce_read_sem); + kfree(cpu_tsc); + return -EINVAL; + } + + err = 0; + for (i = 0; i < next; i++) { + unsigned long start = jiffies; + while (!mcelog.entry[i].finished) { + if (time_after_eq(jiffies, start + 2)) { + memset(mcelog.entry + i,0, sizeof(struct mce)); + goto timeout; + } + cpu_relax(); + } + smp_rmb(); + err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce)); + buf += sizeof(struct mce); + timeout: + ; + } + + memset(mcelog.entry, 0, next * sizeof(struct mce)); + mcelog.next = 0; + + synchronize_sched(); + + /* Collect entries that were still getting written before the synchronize. */ + + on_each_cpu(collect_tscs, cpu_tsc, 1, 1); + for (i = next; i < MCE_LOG_LEN; i++) { + if (mcelog.entry[i].finished && + mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { + err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce)); + smp_rmb(); + buf += sizeof(struct mce); + memset(&mcelog.entry[i], 0, sizeof(struct mce)); + } + } + up(&mce_read_sem); + kfree(cpu_tsc); + return err ? -EFAULT : buf - ubuf; +} + +static unsigned int mce_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &mce_wait, wait); + if (rcu_dereference(mcelog.next)) + return POLLIN | POLLRDNORM; + return 0; +} + +static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg) +{ + int __user *p = (int __user *)arg; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + switch (cmd) { + case MCE_GET_RECORD_LEN: + return put_user(sizeof(struct mce), p); + case MCE_GET_LOG_LEN: + return put_user(MCE_LOG_LEN, p); + case MCE_GETCLEAR_FLAGS: { + unsigned flags; + do { + flags = mcelog.flags; + } while (cmpxchg(&mcelog.flags, flags, 0) != flags); + return put_user(flags, p); + } + default: + return -ENOTTY; + } +} + +static const struct file_operations mce_chrdev_ops = { + .open = mce_open, + .release = mce_release, + .read = mce_read, + .poll = mce_poll, + .ioctl = mce_ioctl, +}; + +static struct miscdevice mce_log_device = { + MISC_MCELOG_MINOR, + "mcelog", + &mce_chrdev_ops, +}; + +static unsigned long old_cr4 __initdata; + +void __init stop_mce(void) +{ + old_cr4 = read_cr4(); + clear_in_cr4(X86_CR4_MCE); +} + +void __init restart_mce(void) +{ + if (old_cr4 & X86_CR4_MCE) + set_in_cr4(X86_CR4_MCE); +} + +/* + * Old style boot options parsing. Only for compatibility. + */ + +static int __init mcheck_disable(char *str) +{ + mce_dont_init = 1; + return 1; +} + +/* mce=off disables machine check. Note you can re-enable it later + using sysfs. + mce=TOLERANCELEVEL (number, see above) + mce=bootlog Log MCEs from before booting. Disabled by default on AMD. + mce=nobootlog Don't log MCEs from before booting. */ +static int __init mcheck_enable(char *str) +{ + if (!strcmp(str, "off")) + mce_dont_init = 1; + else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) + mce_bootlog = str[0] == 'b'; + else if (isdigit(str[0])) + get_option(&str, &tolerant); + else + printk("mce= argument %s ignored. Please use /sys", str); + return 1; +} + +__setup("nomce", mcheck_disable); +__setup("mce=", mcheck_enable); + +/* + * Sysfs support + */ + +/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. + Only one CPU is active at this time, the others get readded later using + CPU hotplug. */ +static int mce_resume(struct sys_device *dev) +{ + mce_init(NULL); + return 0; +} + +/* Reinit MCEs after user configuration changes */ +static void mce_restart(void) +{ + if (next_interval) + cancel_delayed_work(&mcheck_work); + /* Timer race is harmless here */ + on_each_cpu(mce_init, NULL, 1, 1); + next_interval = check_interval * HZ; + if (next_interval) + schedule_delayed_work(&mcheck_work, + round_jiffies_relative(next_interval)); +} + +static struct sysdev_class mce_sysclass = { + .resume = mce_resume, + set_kset_name("machinecheck"), +}; + +DEFINE_PER_CPU(struct sys_device, device_mce); + +/* Why are there no generic functions for this? */ +#define ACCESSOR(name, var, start) \ + static ssize_t show_ ## name(struct sys_device *s, char *buf) { \ + return sprintf(buf, "%lx\n", (unsigned long)var); \ + } \ + static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \ + char *end; \ + unsigned long new = simple_strtoul(buf, &end, 0); \ + if (end == buf) return -EINVAL; \ + var = new; \ + start; \ + return end-buf; \ + } \ + static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); + +/* TBD should generate these dynamically based on number of available banks */ +ACCESSOR(bank0ctl,bank[0],mce_restart()) +ACCESSOR(bank1ctl,bank[1],mce_restart()) +ACCESSOR(bank2ctl,bank[2],mce_restart()) +ACCESSOR(bank3ctl,bank[3],mce_restart()) +ACCESSOR(bank4ctl,bank[4],mce_restart()) +ACCESSOR(bank5ctl,bank[5],mce_restart()) + +static ssize_t show_trigger(struct sys_device *s, char *buf) +{ + strcpy(buf, trigger); + strcat(buf, "\n"); + return strlen(trigger) + 1; +} + +static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz) +{ + char *p; + int len; + strncpy(trigger, buf, sizeof(trigger)); + trigger[sizeof(trigger)-1] = 0; + len = strlen(trigger); + p = strchr(trigger, '\n'); + if (*p) *p = 0; + return len; +} + +static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); +ACCESSOR(tolerant,tolerant,) +ACCESSOR(check_interval,check_interval,mce_restart()) +static struct sysdev_attribute *mce_attributes[] = { + &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, + &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, + &attr_tolerant, &attr_check_interval, &attr_trigger, + NULL +}; + +/* Per cpu sysdev init. All of the cpus still share the same ctl bank */ +static __cpuinit int mce_create_device(unsigned int cpu) +{ + int err; + int i; + + if (!mce_available(&cpu_data(cpu))) + return -EIO; + + memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); + per_cpu(device_mce,cpu).id = cpu; + per_cpu(device_mce,cpu).cls = &mce_sysclass; + + err = sysdev_register(&per_cpu(device_mce,cpu)); + if (err) + return err; + + for (i = 0; mce_attributes[i]; i++) { + err = sysdev_create_file(&per_cpu(device_mce,cpu), + mce_attributes[i]); + if (err) + goto error; + } + + return 0; +error: + while (i--) { + sysdev_remove_file(&per_cpu(device_mce,cpu), + mce_attributes[i]); + } + sysdev_unregister(&per_cpu(device_mce,cpu)); + + return err; +} + +static void mce_remove_device(unsigned int cpu) +{ + int i; + + for (i = 0; mce_attributes[i]; i++) + sysdev_remove_file(&per_cpu(device_mce,cpu), + mce_attributes[i]); + sysdev_unregister(&per_cpu(device_mce,cpu)); +} + +/* Get notified when a cpu comes on/off. Be hotplug friendly. */ +static int +mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + int err = 0; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + err = mce_create_device(cpu); + break; + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + case CPU_DEAD: + case CPU_DEAD_FROZEN: + mce_remove_device(cpu); + break; + } + return err ? NOTIFY_BAD : NOTIFY_OK; +} + +static struct notifier_block mce_cpu_notifier = { + .notifier_call = mce_cpu_callback, +}; + +static __init int mce_init_device(void) +{ + int err; + int i = 0; + + if (!mce_available(&boot_cpu_data)) + return -EIO; + err = sysdev_class_register(&mce_sysclass); + if (err) + return err; + + for_each_online_cpu(i) { + err = mce_create_device(i); + if (err) + return err; + } + + register_hotcpu_notifier(&mce_cpu_notifier); + misc_register(&mce_log_device); + return err; +} + +device_initcall(mce_init_device); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c new file mode 100644 index 00000000000..752fb16a817 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -0,0 +1,690 @@ +/* + * (c) 2005, 2006 Advanced Micro Devices, Inc. + * Your use of this code is subject to the terms and conditions of the + * GNU general public license version 2. See "COPYING" or + * http://www.gnu.org/licenses/gpl.html + * + * Written by Jacob Shin - AMD, Inc. + * + * Support : jacob.shin@amd.com + * + * April 2006 + * - added support for AMD Family 0x10 processors + * + * All MC4_MISCi registers are shared between multi-cores + */ + +#include <linux/cpu.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kobject.h> +#include <linux/notifier.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/sysdev.h> +#include <linux/sysfs.h> +#include <asm/apic.h> +#include <asm/mce.h> +#include <asm/msr.h> +#include <asm/percpu.h> +#include <asm/idle.h> + +#define PFX "mce_threshold: " +#define VERSION "version 1.1.1" +#define NR_BANKS 6 +#define NR_BLOCKS 9 +#define THRESHOLD_MAX 0xFFF +#define INT_TYPE_APIC 0x00020000 +#define MASK_VALID_HI 0x80000000 +#define MASK_CNTP_HI 0x40000000 +#define MASK_LOCKED_HI 0x20000000 +#define MASK_LVTOFF_HI 0x00F00000 +#define MASK_COUNT_EN_HI 0x00080000 +#define MASK_INT_TYPE_HI 0x00060000 +#define MASK_OVERFLOW_HI 0x00010000 +#define MASK_ERR_COUNT_HI 0x00000FFF +#define MASK_BLKPTR_LO 0xFF000000 +#define MCG_XBLK_ADDR 0xC0000400 + +struct threshold_block { + unsigned int block; + unsigned int bank; + unsigned int cpu; + u32 address; + u16 interrupt_enable; + u16 threshold_limit; + struct kobject kobj; + struct list_head miscj; +}; + +/* defaults used early on boot */ +static struct threshold_block threshold_defaults = { + .interrupt_enable = 0, + .threshold_limit = THRESHOLD_MAX, +}; + +struct threshold_bank { + struct kobject kobj; + struct threshold_block *blocks; + cpumask_t cpus; +}; +static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); + +#ifdef CONFIG_SMP +static unsigned char shared_bank[NR_BANKS] = { + 0, 0, 0, 0, 1 +}; +#endif + +static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ + +/* + * CPU Initialization + */ + +/* must be called with correct cpu affinity */ +static void threshold_restart_bank(struct threshold_block *b, + int reset, u16 old_limit) +{ + u32 mci_misc_hi, mci_misc_lo; + + rdmsr(b->address, mci_misc_lo, mci_misc_hi); + + if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) + reset = 1; /* limit cannot be lower than err count */ + + if (reset) { /* reset err count and overflow bit */ + mci_misc_hi = + (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | + (THRESHOLD_MAX - b->threshold_limit); + } else if (old_limit) { /* change limit w/o reset */ + int new_count = (mci_misc_hi & THRESHOLD_MAX) + + (old_limit - b->threshold_limit); + mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | + (new_count & THRESHOLD_MAX); + } + + b->interrupt_enable ? + (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : + (mci_misc_hi &= ~MASK_INT_TYPE_HI); + + mci_misc_hi |= MASK_COUNT_EN_HI; + wrmsr(b->address, mci_misc_lo, mci_misc_hi); +} + +/* cpu init entry point, called from mce.c with preempt off */ +void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) +{ + unsigned int bank, block; + unsigned int cpu = smp_processor_id(); + u32 low = 0, high = 0, address = 0; + + for (bank = 0; bank < NR_BANKS; ++bank) { + for (block = 0; block < NR_BLOCKS; ++block) { + if (block == 0) + address = MSR_IA32_MC0_MISC + bank * 4; + else if (block == 1) { + address = (low & MASK_BLKPTR_LO) >> 21; + if (!address) + break; + address += MCG_XBLK_ADDR; + } + else + ++address; + + if (rdmsr_safe(address, &low, &high)) + break; + + if (!(high & MASK_VALID_HI)) { + if (block) + continue; + else + break; + } + + if (!(high & MASK_CNTP_HI) || + (high & MASK_LOCKED_HI)) + continue; + + if (!block) + per_cpu(bank_map, cpu) |= (1 << bank); +#ifdef CONFIG_SMP + if (shared_bank[bank] && c->cpu_core_id) + break; +#endif + high &= ~MASK_LVTOFF_HI; + high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20; + wrmsr(address, low, high); + + setup_APIC_extended_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD, + THRESHOLD_APIC_VECTOR, + K8_APIC_EXT_INT_MSG_FIX, 0); + + threshold_defaults.address = address; + threshold_restart_bank(&threshold_defaults, 0, 0); + } + } +} + +/* + * APIC Interrupt Handler + */ + +/* + * threshold interrupt handler will service THRESHOLD_APIC_VECTOR. + * the interrupt goes off when error_count reaches threshold_limit. + * the handler will simply log mcelog w/ software defined bank number. + */ +asmlinkage void mce_threshold_interrupt(void) +{ + unsigned int bank, block; + struct mce m; + u32 low = 0, high = 0, address = 0; + + ack_APIC_irq(); + exit_idle(); + irq_enter(); + + memset(&m, 0, sizeof(m)); + rdtscll(m.tsc); + m.cpu = smp_processor_id(); + + /* assume first bank caused it */ + for (bank = 0; bank < NR_BANKS; ++bank) { + if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) + continue; + for (block = 0; block < NR_BLOCKS; ++block) { + if (block == 0) + address = MSR_IA32_MC0_MISC + bank * 4; + else if (block == 1) { + address = (low & MASK_BLKPTR_LO) >> 21; + if (!address) + break; + address += MCG_XBLK_ADDR; + } + else + ++address; + + if (rdmsr_safe(address, &low, &high)) + break; + + if (!(high & MASK_VALID_HI)) { + if (block) + continue; + else + break; + } + + if (!(high & MASK_CNTP_HI) || + (high & MASK_LOCKED_HI)) + continue; + + /* Log the machine check that caused the threshold + event. */ + do_machine_check(NULL, 0); + + if (high & MASK_OVERFLOW_HI) { + rdmsrl(address, m.misc); + rdmsrl(MSR_IA32_MC0_STATUS + bank * 4, + m.status); + m.bank = K8_MCE_THRESHOLD_BASE + + bank * NR_BLOCKS + + block; + mce_log(&m); + goto out; + } + } + } +out: + add_pda(irq_threshold_count, 1); + irq_exit(); +} + +/* + * Sysfs Interface + */ + +struct threshold_attr { + struct attribute attr; + ssize_t(*show) (struct threshold_block *, char *); + ssize_t(*store) (struct threshold_block *, const char *, size_t count); +}; + +static cpumask_t affinity_set(unsigned int cpu) +{ + cpumask_t oldmask = current->cpus_allowed; + cpumask_t newmask = CPU_MASK_NONE; + cpu_set(cpu, newmask); + set_cpus_allowed(current, newmask); + return oldmask; +} + +static void affinity_restore(cpumask_t oldmask) +{ + set_cpus_allowed(current, oldmask); +} + +#define SHOW_FIELDS(name) \ +static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ +{ \ + return sprintf(buf, "%lx\n", (unsigned long) b->name); \ +} +SHOW_FIELDS(interrupt_enable) +SHOW_FIELDS(threshold_limit) + +static ssize_t store_interrupt_enable(struct threshold_block *b, + const char *buf, size_t count) +{ + char *end; + cpumask_t oldmask; + unsigned long new = simple_strtoul(buf, &end, 0); + if (end == buf) + return -EINVAL; + b->interrupt_enable = !!new; + + oldmask = affinity_set(b->cpu); + threshold_restart_bank(b, 0, 0); + affinity_restore(oldmask); + + return end - buf; +} + +static ssize_t store_threshold_limit(struct threshold_block *b, + const char *buf, size_t count) +{ + char *end; + cpumask_t oldmask; + u16 old; + unsigned long new = simple_strtoul(buf, &end, 0); + if (end == buf) + return -EINVAL; + if (new > THRESHOLD_MAX) + new = THRESHOLD_MAX; + if (new < 1) + new = 1; + old = b->threshold_limit; + b->threshold_limit = new; + + oldmask = affinity_set(b->cpu); + threshold_restart_bank(b, 0, old); + affinity_restore(oldmask); + + return end - buf; +} + +static ssize_t show_error_count(struct threshold_block *b, char *buf) +{ + u32 high, low; + cpumask_t oldmask; + oldmask = affinity_set(b->cpu); + rdmsr(b->address, low, high); + affinity_restore(oldmask); + return sprintf(buf, "%x\n", + (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); +} + +static ssize_t store_error_count(struct threshold_block *b, + const char *buf, size_t count) +{ + cpumask_t oldmask; + oldmask = affinity_set(b->cpu); + threshold_restart_bank(b, 1, 0); + affinity_restore(oldmask); + return 1; +} + +#define THRESHOLD_ATTR(_name,_mode,_show,_store) { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +#define RW_ATTR(name) \ +static struct threshold_attr name = \ + THRESHOLD_ATTR(name, 0644, show_## name, store_## name) + +RW_ATTR(interrupt_enable); +RW_ATTR(threshold_limit); +RW_ATTR(error_count); + +static struct attribute *default_attrs[] = { + &interrupt_enable.attr, + &threshold_limit.attr, + &error_count.attr, + NULL +}; + +#define to_block(k) container_of(k, struct threshold_block, kobj) +#define to_attr(a) container_of(a, struct threshold_attr, attr) + +static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct threshold_block *b = to_block(kobj); + struct threshold_attr *a = to_attr(attr); + ssize_t ret; + ret = a->show ? a->show(b, buf) : -EIO; + return ret; +} + +static ssize_t store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct threshold_block *b = to_block(kobj); + struct threshold_attr *a = to_attr(attr); + ssize_t ret; + ret = a->store ? a->store(b, buf, count) : -EIO; + return ret; +} + +static struct sysfs_ops threshold_ops = { + .show = show, + .store = store, +}; + +static struct kobj_type threshold_ktype = { + .sysfs_ops = &threshold_ops, + .default_attrs = default_attrs, +}; + +static __cpuinit int allocate_threshold_blocks(unsigned int cpu, + unsigned int bank, + unsigned int block, + u32 address) +{ + int err; + u32 low, high; + struct threshold_block *b = NULL; + + if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) + return 0; + + if (rdmsr_safe(address, &low, &high)) + return 0; + + if (!(high & MASK_VALID_HI)) { + if (block) + goto recurse; + else + return 0; + } + + if (!(high & MASK_CNTP_HI) || + (high & MASK_LOCKED_HI)) + goto recurse; + + b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL); + if (!b) + return -ENOMEM; + + b->block = block; + b->bank = bank; + b->cpu = cpu; + b->address = address; + b->interrupt_enable = 0; + b->threshold_limit = THRESHOLD_MAX; + + INIT_LIST_HEAD(&b->miscj); + + if (per_cpu(threshold_banks, cpu)[bank]->blocks) + list_add(&b->miscj, + &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); + else + per_cpu(threshold_banks, cpu)[bank]->blocks = b; + + kobject_set_name(&b->kobj, "misc%i", block); + b->kobj.parent = &per_cpu(threshold_banks, cpu)[bank]->kobj; + b->kobj.ktype = &threshold_ktype; + err = kobject_register(&b->kobj); + if (err) + goto out_free; +recurse: + if (!block) { + address = (low & MASK_BLKPTR_LO) >> 21; + if (!address) + return 0; + address += MCG_XBLK_ADDR; + } else + ++address; + + err = allocate_threshold_blocks(cpu, bank, ++block, address); + if (err) + goto out_free; + + return err; + +out_free: + if (b) { + kobject_unregister(&b->kobj); + kfree(b); + } + return err; +} + +/* symlinks sibling shared banks to first core. first core owns dir/files. */ +static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) +{ + int i, err = 0; + struct threshold_bank *b = NULL; + cpumask_t oldmask = CPU_MASK_NONE; + char name[32]; + + sprintf(name, "threshold_bank%i", bank); + +#ifdef CONFIG_SMP + if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ + i = first_cpu(per_cpu(cpu_core_map, cpu)); + + /* first core not up yet */ + if (cpu_data(i).cpu_core_id) + goto out; + + /* already linked */ + if (per_cpu(threshold_banks, cpu)[bank]) + goto out; + + b = per_cpu(threshold_banks, i)[bank]; + + if (!b) + goto out; + + err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj, + &b->kobj, name); + if (err) + goto out; + + b->cpus = per_cpu(cpu_core_map, cpu); + per_cpu(threshold_banks, cpu)[bank] = b; + goto out; + } +#endif + + b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); + if (!b) { + err = -ENOMEM; + goto out; + } + + kobject_set_name(&b->kobj, "threshold_bank%i", bank); + b->kobj.parent = &per_cpu(device_mce, cpu).kobj; +#ifndef CONFIG_SMP + b->cpus = CPU_MASK_ALL; +#else + b->cpus = per_cpu(cpu_core_map, cpu); +#endif + err = kobject_register(&b->kobj); + if (err) + goto out_free; + + per_cpu(threshold_banks, cpu)[bank] = b; + + oldmask = affinity_set(cpu); + err = allocate_threshold_blocks(cpu, bank, 0, + MSR_IA32_MC0_MISC + bank * 4); + affinity_restore(oldmask); + + if (err) + goto out_free; + + for_each_cpu_mask(i, b->cpus) { + if (i == cpu) + continue; + + err = sysfs_create_link(&per_cpu(device_mce, i).kobj, + &b->kobj, name); + if (err) + goto out; + + per_cpu(threshold_banks, i)[bank] = b; + } + + goto out; + +out_free: + per_cpu(threshold_banks, cpu)[bank] = NULL; + kfree(b); +out: + return err; +} + +/* create dir/files for all valid threshold banks */ +static __cpuinit int threshold_create_device(unsigned int cpu) +{ + unsigned int bank; + int err = 0; + + for (bank = 0; bank < NR_BANKS; ++bank) { + if (!(per_cpu(bank_map, cpu) & 1 << bank)) + continue; + err = threshold_create_bank(cpu, bank); + if (err) + goto out; + } +out: + return err; +} + +/* + * let's be hotplug friendly. + * in case of multiple core processors, the first core always takes ownership + * of shared sysfs dir/files, and rest of the cores will be symlinked to it. + */ + +static void deallocate_threshold_block(unsigned int cpu, + unsigned int bank) +{ + struct threshold_block *pos = NULL; + struct threshold_block *tmp = NULL; + struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank]; + + if (!head) + return; + + list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { + kobject_unregister(&pos->kobj); + list_del(&pos->miscj); + kfree(pos); + } + + kfree(per_cpu(threshold_banks, cpu)[bank]->blocks); + per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; +} + +static void threshold_remove_bank(unsigned int cpu, int bank) +{ + int i = 0; + struct threshold_bank *b; + char name[32]; + + b = per_cpu(threshold_banks, cpu)[bank]; + + if (!b) + return; + + if (!b->blocks) + goto free_out; + + sprintf(name, "threshold_bank%i", bank); + +#ifdef CONFIG_SMP + /* sibling symlink */ + if (shared_bank[bank] && b->blocks->cpu != cpu) { + sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); + per_cpu(threshold_banks, cpu)[bank] = NULL; + return; + } +#endif + + /* remove all sibling symlinks before unregistering */ + for_each_cpu_mask(i, b->cpus) { + if (i == cpu) + continue; + + sysfs_remove_link(&per_cpu(device_mce, i).kobj, name); + per_cpu(threshold_banks, i)[bank] = NULL; + } + + deallocate_threshold_block(cpu, bank); + +free_out: + kobject_unregister(&b->kobj); + kfree(b); + per_cpu(threshold_banks, cpu)[bank] = NULL; +} + +static void threshold_remove_device(unsigned int cpu) +{ + unsigned int bank; + + for (bank = 0; bank < NR_BANKS; ++bank) { + if (!(per_cpu(bank_map, cpu) & 1 << bank)) + continue; + threshold_remove_bank(cpu, bank); + } +} + +/* get notified when a cpu comes on/off */ +static int threshold_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + /* cpu was unsigned int to begin with */ + unsigned int cpu = (unsigned long)hcpu; + + if (cpu >= NR_CPUS) + goto out; + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + threshold_create_device(cpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + threshold_remove_device(cpu); + break; + default: + break; + } + out: + return NOTIFY_OK; +} + +static struct notifier_block threshold_cpu_notifier = { + .notifier_call = threshold_cpu_callback, +}; + +static __init int threshold_init_device(void) +{ + unsigned lcpu = 0; + + /* to hit CPUs online before the notifier is up */ + for_each_online_cpu(lcpu) { + int err = threshold_create_device(lcpu); + if (err) + return err; + } + register_hotcpu_notifier(&threshold_cpu_notifier); + return 0; +} + +device_initcall(threshold_init_device); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c new file mode 100644 index 00000000000..c17eaf5dd6d --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -0,0 +1,90 @@ +/* + * Intel specific MCE features. + * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> + */ + +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/percpu.h> +#include <asm/processor.h> +#include <asm/msr.h> +#include <asm/mce.h> +#include <asm/hw_irq.h> +#include <asm/idle.h> +#include <asm/therm_throt.h> + +asmlinkage void smp_thermal_interrupt(void) +{ + __u64 msr_val; + + ack_APIC_irq(); + + exit_idle(); + irq_enter(); + + rdmsrl(MSR_IA32_THERM_STATUS, msr_val); + if (therm_throt_process(msr_val & 1)) + mce_log_therm_throt_event(smp_processor_id(), msr_val); + + add_pda(irq_thermal_count, 1); + irq_exit(); +} + +static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) +{ + u32 l, h; + int tm2 = 0; + unsigned int cpu = smp_processor_id(); + + if (!cpu_has(c, X86_FEATURE_ACPI)) + return; + + if (!cpu_has(c, X86_FEATURE_ACC)) + return; + + /* first check if TM1 is already enabled by the BIOS, in which + * case there might be some SMM goo which handles it, so we can't even + * put a handler since it might be delivered via SMI already. + */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + h = apic_read(APIC_LVTTHMR); + if ((l & (1 << 3)) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG + "CPU%d: Thermal monitoring handled by SMI\n", cpu); + return; + } + + if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) + tm2 = 1; + + if (h & APIC_VECTOR_MASK) { + printk(KERN_DEBUG + "CPU%d: Thermal LVT vector (%#x) already " + "installed\n", cpu, (h & APIC_VECTOR_MASK)); + return; + } + + h = THERMAL_APIC_VECTOR; + h |= (APIC_DM_FIXED | APIC_LVT_MASKED); + apic_write(APIC_LVTTHMR, h); + + rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); + + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); + + l = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", + cpu, tm2 ? "TM2" : "TM1"); + + /* enable thermal throttle processing */ + atomic_set(&therm_throt_en, 1); + return; +} + +void __cpuinit mce_intel_feature_init(struct cpuinfo_x86 *c) +{ + intel_init_thermal(c); +} |