diff options
author | Len Brown <len.brown@intel.com> | 2010-08-15 01:06:31 -0400 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2010-08-15 01:06:31 -0400 |
commit | 95ee46aa8698f2000647dfb362400fadbb5807cf (patch) | |
tree | e5a05c7297f997e191c73091934e42e3195c0e40 /arch/x86/kernel/cpu/mcheck | |
parent | cfa806f059801dbe7e435745eb2e187c8bfe1e7f (diff) | |
parent | 92fa5bd9a946b6e7aab6764e7312e4e3d9bed295 (diff) |
Merge branch 'linus' into release
Conflicts:
drivers/acpi/debug.c
Signed-off-by: Len Brown <len.brown@intel.com>
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 35 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 206 |
3 files changed, 184 insertions, 66 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 18cc4256225..ed41562909f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -51,7 +51,7 @@ static DEFINE_MUTEX(mce_read_mutex); #define rcu_dereference_check_mce(p) \ - rcu_dereference_check((p), \ + rcu_dereference_index_check((p), \ rcu_read_lock_sched_held() || \ lockdep_is_held(&mce_read_mutex)) @@ -107,8 +107,8 @@ EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); static int default_decode_mce(struct notifier_block *nb, unsigned long val, void *data) { - pr_emerg("No human readable MCE decoding support on this CPU type.\n"); - pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); + pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n"); + pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n"); return NOTIFY_STOP; } @@ -211,11 +211,11 @@ void mce_log(struct mce *mce) static void print_mce(struct mce *m) { - pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", + pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", m->extcpu, m->mcgstatus, m->bank, m->status); if (m->ip) { - pr_emerg("RIP%s %02x:<%016Lx> ", + pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", m->cs, m->ip); @@ -224,14 +224,14 @@ static void print_mce(struct mce *m) pr_cont("\n"); } - pr_emerg("TSC %llx ", m->tsc); + pr_emerg(HW_ERR "TSC %llx ", m->tsc); if (m->addr) pr_cont("ADDR %llx ", m->addr); if (m->misc) pr_cont("MISC %llx ", m->misc); pr_cont("\n"); - pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", + pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); /* @@ -241,16 +241,6 @@ static void print_mce(struct mce *m) atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); } -static void print_mce_head(void) -{ - pr_emerg("\nHARDWARE ERROR\n"); -} - -static void print_mce_tail(void) -{ - pr_emerg("This is not a software problem!\n"); -} - #define PANIC_TIMEOUT 5 /* 5 seconds */ static atomic_t mce_paniced; @@ -291,7 +281,6 @@ static void mce_panic(char *msg, struct mce *final, char *exp) if (atomic_inc_return(&mce_fake_paniced) > 1) return; } - print_mce_head(); /* First print corrected ones that are still unlogged */ for (i = 0; i < MCE_LOG_LEN; i++) { struct mce *m = &mcelog.entry[i]; @@ -322,16 +311,15 @@ static void mce_panic(char *msg, struct mce *final, char *exp) apei_err = apei_write_mce(final); } if (cpu_missing) - printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); - print_mce_tail(); + pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n"); if (exp) - printk(KERN_EMERG "Machine check: %s\n", exp); + pr_emerg(HW_ERR "Machine check: %s\n", exp); if (!fake_panic) { if (panic_timeout == 0) panic_timeout = mce_panic_timeout; panic(msg); } else - printk(KERN_EMERG "Fake kernel panic: %s\n", msg); + pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); } /* Support code for software error injection */ @@ -600,6 +588,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) */ if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { mce_log(&m); + atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); add_taint(TAINT_MACHINE_CHECK); } @@ -1220,7 +1209,7 @@ int mce_notify_irq(void) schedule_work(&mce_trigger_work); if (__ratelimit(&ratelimit)) - printk(KERN_INFO "Machine check events logged\n"); + pr_info(HW_ERR "Machine check events logged\n"); return 1; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 62b48e40920..6fcd0936194 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -95,19 +95,20 @@ static void cmci_discover(int banks, int boot) rdmsrl(MSR_IA32_MCx_CTL2(i), val); /* Already owned by someone else? */ - if (val & CMCI_EN) { + if (val & MCI_CTL2_CMCI_EN) { if (test_and_clear_bit(i, owned) && !boot) print_update("SHD", &hdr, i); __clear_bit(i, __get_cpu_var(mce_poll_banks)); continue; } - val |= CMCI_EN | CMCI_THRESHOLD; + val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; + val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; wrmsrl(MSR_IA32_MCx_CTL2(i), val); rdmsrl(MSR_IA32_MCx_CTL2(i), val); /* Did the enable bit stick? -- the bank supports CMCI */ - if (val & CMCI_EN) { + if (val & MCI_CTL2_CMCI_EN) { if (!test_and_set_bit(i, owned) && !boot) print_update("CMCI", &hdr, i); __clear_bit(i, __get_cpu_var(mce_poll_banks)); @@ -155,7 +156,7 @@ void cmci_clear(void) continue; /* Disable CMCI */ rdmsrl(MSR_IA32_MCx_CTL2(i), val); - val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); + val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); wrmsrl(MSR_IA32_MCx_CTL2(i), val); __clear_bit(i, __get_cpu_var(mce_banks_owned)); } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index e1a0a3bf971..c2a8b26d4fe 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -34,15 +34,25 @@ /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) +#define THERMAL_THROTTLING_EVENT 0 +#define POWER_LIMIT_EVENT 1 + /* - * Current thermal throttling state: + * Current thermal event state: */ -struct thermal_state { - bool is_throttled; - +struct _thermal_state { + bool new_event; + int event; u64 next_check; - unsigned long throttle_count; - unsigned long last_throttle_count; + unsigned long count; + unsigned long last_count; +}; + +struct thermal_state { + struct _thermal_state core_throttle; + struct _thermal_state core_power_limit; + struct _thermal_state package_throttle; + struct _thermal_state package_power_limit; }; static DEFINE_PER_CPU(struct thermal_state, thermal_state); @@ -53,11 +63,13 @@ static u32 lvtthmr_init __read_mostly; #ifdef CONFIG_SYSFS #define define_therm_throt_sysdev_one_ro(_name) \ - static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) + static SYSDEV_ATTR(_name, 0444, \ + therm_throt_sysdev_show_##_name, \ + NULL) \ -#define define_therm_throt_sysdev_show_func(name) \ +#define define_therm_throt_sysdev_show_func(event, name) \ \ -static ssize_t therm_throt_sysdev_show_##name( \ +static ssize_t therm_throt_sysdev_show_##event##_##name( \ struct sys_device *dev, \ struct sysdev_attribute *attr, \ char *buf) \ @@ -66,30 +78,42 @@ static ssize_t therm_throt_sysdev_show_##name( \ ssize_t ret; \ \ preempt_disable(); /* CPU hotplug */ \ - if (cpu_online(cpu)) \ + if (cpu_online(cpu)) { \ ret = sprintf(buf, "%lu\n", \ - per_cpu(thermal_state, cpu).name); \ - else \ + per_cpu(thermal_state, cpu).event.name); \ + } else \ ret = 0; \ preempt_enable(); \ \ return ret; \ } -define_therm_throt_sysdev_show_func(throttle_count); -define_therm_throt_sysdev_one_ro(throttle_count); +define_therm_throt_sysdev_show_func(core_throttle, count); +define_therm_throt_sysdev_one_ro(core_throttle_count); + +define_therm_throt_sysdev_show_func(core_power_limit, count); +define_therm_throt_sysdev_one_ro(core_power_limit_count); + +define_therm_throt_sysdev_show_func(package_throttle, count); +define_therm_throt_sysdev_one_ro(package_throttle_count); + +define_therm_throt_sysdev_show_func(package_power_limit, count); +define_therm_throt_sysdev_one_ro(package_power_limit_count); static struct attribute *thermal_throttle_attrs[] = { - &attr_throttle_count.attr, + &attr_core_throttle_count.attr, NULL }; -static struct attribute_group thermal_throttle_attr_group = { +static struct attribute_group thermal_attr_group = { .attrs = thermal_throttle_attrs, .name = "thermal_throttle" }; #endif /* CONFIG_SYSFS */ +#define CORE_LEVEL 0 +#define PACKAGE_LEVEL 1 + /*** * therm_throt_process - Process thermal throttling event from interrupt * @curr: Whether the condition is current or not (boolean), since the @@ -106,39 +130,70 @@ static struct attribute_group thermal_throttle_attr_group = { * 1 : Event should be logged further, and a message has been * printed to the syslog. */ -static int therm_throt_process(bool is_throttled) +static int therm_throt_process(bool new_event, int event, int level) { - struct thermal_state *state; - unsigned int this_cpu; - bool was_throttled; + struct _thermal_state *state; + unsigned int this_cpu = smp_processor_id(); + bool old_event; u64 now; + struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); - this_cpu = smp_processor_id(); now = get_jiffies_64(); - state = &per_cpu(thermal_state, this_cpu); + if (level == CORE_LEVEL) { + if (event == THERMAL_THROTTLING_EVENT) + state = &pstate->core_throttle; + else if (event == POWER_LIMIT_EVENT) + state = &pstate->core_power_limit; + else + return 0; + } else if (level == PACKAGE_LEVEL) { + if (event == THERMAL_THROTTLING_EVENT) + state = &pstate->package_throttle; + else if (event == POWER_LIMIT_EVENT) + state = &pstate->package_power_limit; + else + return 0; + } else + return 0; - was_throttled = state->is_throttled; - state->is_throttled = is_throttled; + old_event = state->new_event; + state->new_event = new_event; - if (is_throttled) - state->throttle_count++; + if (new_event) + state->count++; if (time_before64(now, state->next_check) && - state->throttle_count != state->last_throttle_count) + state->count != state->last_count) return 0; state->next_check = now + CHECK_INTERVAL; - state->last_throttle_count = state->throttle_count; + state->last_count = state->count; /* if we just entered the thermal event */ - if (is_throttled) { - printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count); + if (new_event) { + if (event == THERMAL_THROTTLING_EVENT) + printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", + this_cpu, + level == CORE_LEVEL ? "Core" : "Package", + state->count); + else + printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n", + this_cpu, + level == CORE_LEVEL ? "Core" : "Package", + state->count); add_taint(TAINT_MACHINE_CHECK); return 1; } - if (was_throttled) { - printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu); + if (old_event) { + if (event == THERMAL_THROTTLING_EVENT) + printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", + this_cpu, + level == CORE_LEVEL ? "Core" : "Package"); + else + printk(KERN_INFO "CPU%d: %s power limit normal\n", + this_cpu, + level == CORE_LEVEL ? "Core" : "Package"); return 1; } @@ -149,13 +204,32 @@ static int therm_throt_process(bool is_throttled) /* Add/Remove thermal_throttle interface for CPU device: */ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) { - return sysfs_create_group(&sys_dev->kobj, - &thermal_throttle_attr_group); + int err; + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + + err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); + if (err) + return err; + + if (cpu_has(c, X86_FEATURE_PLN)) + err = sysfs_add_file_to_group(&sys_dev->kobj, + &attr_core_power_limit_count.attr, + thermal_attr_group.name); + if (cpu_has(c, X86_FEATURE_PTS)) + err = sysfs_add_file_to_group(&sys_dev->kobj, + &attr_package_throttle_count.attr, + thermal_attr_group.name); + if (cpu_has(c, X86_FEATURE_PLN)) + err = sysfs_add_file_to_group(&sys_dev->kobj, + &attr_package_power_limit_count.attr, + thermal_attr_group.name); + + return err; } static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) { - sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); + sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group); } /* Mutex protecting device creation against CPU hotplug: */ @@ -226,14 +300,50 @@ device_initcall(thermal_throttle_init_device); #endif /* CONFIG_SYSFS */ +/* + * Set up the most two significant bit to notify mce log that this thermal + * event type. + * This is a temp solution. May be changed in the future with mce log + * infrasture. + */ +#define CORE_THROTTLED (0) +#define CORE_POWER_LIMIT ((__u64)1 << 62) +#define PACKAGE_THROTTLED ((__u64)2 << 62) +#define PACKAGE_POWER_LIMIT ((__u64)3 << 62) + /* Thermal transition interrupt handler */ static void intel_thermal_interrupt(void) { __u64 msr_val; + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0)) - mce_log_therm_throt_event(msr_val); + + if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, + THERMAL_THROTTLING_EVENT, + CORE_LEVEL) != 0) + mce_log_therm_throt_event(CORE_THROTTLED | msr_val); + + if (cpu_has(c, X86_FEATURE_PLN)) + if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, + POWER_LIMIT_EVENT, + CORE_LEVEL) != 0) + mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); + + if (cpu_has(c, X86_FEATURE_PTS)) { + rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); + if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, + THERMAL_THROTTLING_EVENT, + PACKAGE_LEVEL) != 0) + mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); + if (cpu_has(c, X86_FEATURE_PLN)) + if (therm_throt_process(msr_val & + PACKAGE_THERM_STATUS_POWER_LIMIT, + POWER_LIMIT_EVENT, + PACKAGE_LEVEL) != 0) + mce_log_therm_throt_event(PACKAGE_POWER_LIMIT + | msr_val); + } } static void unexpected_thermal_interrupt(void) @@ -335,8 +445,26 @@ void intel_init_thermal(struct cpuinfo_x86 *c) apic_write(APIC_LVTTHMR, h); rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr(MSR_IA32_THERM_INTERRUPT, - l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); + if (cpu_has(c, X86_FEATURE_PLN)) + wrmsr(MSR_IA32_THERM_INTERRUPT, + l | (THERM_INT_LOW_ENABLE + | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); + else + wrmsr(MSR_IA32_THERM_INTERRUPT, + l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); + + if (cpu_has(c, X86_FEATURE_PTS)) { + rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); + if (cpu_has(c, X86_FEATURE_PLN)) + wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, + l | (PACKAGE_THERM_INT_LOW_ENABLE + | PACKAGE_THERM_INT_HIGH_ENABLE + | PACKAGE_THERM_INT_PLN_ENABLE), h); + else + wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, + l | (PACKAGE_THERM_INT_LOW_ENABLE + | PACKAGE_THERM_INT_HIGH_ENABLE), h); + } smp_thermal_vector = intel_thermal_interrupt; |