diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
25 files changed, 501 insertions, 220 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 68537e957a9..1d2cb383410 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -5,6 +5,7 @@ # Don't trace early stages of a secondary CPU boot ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_common.o = -pg +CFLAGS_REMOVE_perf_event.o = -pg endif # Make sure load_percpu_segment has no stackprotector diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c910a716a71..7128b3799ce 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -535,7 +535,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } - display_cacheinfo(c); + cpu_detect_cache_sizes(c); /* Multi core CPU? */ if (c->extended_cpuid_level >= 0x80000008) { diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index c95e831bb09..e58d978e075 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_REP_GOOD); } - display_cacheinfo(c); + cpu_detect_cache_sizes(c); } enum { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3192f22f2fd..20399b7b0c3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void) static void __cpuinit default_init(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 - display_cacheinfo(c); + cpu_detect_cache_sizes(c); #else /* Not much we can do here... */ /* Check if at least it has cpuid */ @@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c) } } -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); #ifdef CONFIG_X86_64 /* On K8 L1 TLB is inclusive, so don't count it */ @@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) #endif c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); } void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -659,24 +654,31 @@ void __init early_cpu_init(void) const struct cpu_dev *const *cdev; int count = 0; +#ifdef PROCESSOR_SELECT printk(KERN_INFO "KERNEL supported cpus:\n"); +#endif + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { const struct cpu_dev *cpudev = *cdev; - unsigned int j; if (count >= X86_VENDOR_NUM) break; cpu_devs[count] = cpudev; count++; - for (j = 0; j < 2; j++) { - if (!cpudev->c_ident[j]) - continue; - printk(KERN_INFO " %s %s\n", cpudev->c_vendor, - cpudev->c_ident[j]); +#ifdef PROCESSOR_SELECT + { + unsigned int j; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(KERN_INFO " %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } } +#endif } - early_identify_cpu(&boot_cpu_data); } @@ -837,10 +839,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } -#ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ - mcheck_init(c); -#endif + mcheck_cpu_init(c); select_idle_routine(c); @@ -1136,7 +1136,7 @@ void __cpuinit cpu_init(void) wrmsrl(MSR_KERNEL_GS_BASE, 0); barrier(); - check_efer(); + x86_configure_nx(); if (cpu != 0) enable_x2apic(); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 6de9a908e40..3624e8a0f71 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -32,6 +32,6 @@ struct cpu_dev { extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; -extern void display_cacheinfo(struct cpuinfo_x86 *c); +extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 43eb3465dda..f28decf8dde 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -526,15 +526,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = { static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) { - /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf + /* Intel Xeon Processor 7100 Series Specification Update + * http://www.intel.com/Assets/PDF/specupdate/314554.pdf * AL30: A Machine Check Exception (MCE) Occurring during an * Enhanced Intel SpeedStep Technology Ratio Change May Cause - * Both Processor Cores to Lock Up when HT is enabled*/ + * Both Processor Cores to Lock Up. */ if (c->x86_vendor == X86_VENDOR_INTEL) { if ((c->x86 == 15) && (c->x86_model == 6) && - (c->x86_mask == 8) && smt_capable()) + (c->x86_mask == 8)) { + printk(KERN_INFO "acpi-cpufreq: Intel(R) " + "Xeon(R) 7100 Errata AL30, processors may " + "lock up on frequency changes: disabling " + "acpi-cpufreq.\n"); return -ENODEV; + } } return 0; } @@ -549,13 +555,18 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) unsigned int result = 0; struct cpuinfo_x86 *c = &cpu_data(policy->cpu); struct acpi_processor_performance *perf; +#ifdef CONFIG_SMP + static int blacklisted; +#endif dprintk("acpi_cpufreq_cpu_init\n"); #ifdef CONFIG_SMP - result = acpi_cpufreq_blacklist(c); - if (result) - return result; + if (blacklisted) + return blacklisted; + blacklisted = acpi_cpufreq_blacklist(c); + if (blacklisted) + return blacklisted; #endif data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); @@ -753,14 +764,15 @@ static struct freq_attr *acpi_cpufreq_attr[] = { }; static struct cpufreq_driver acpi_cpufreq_driver = { - .verify = acpi_cpufreq_verify, - .target = acpi_cpufreq_target, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, - .resume = acpi_cpufreq_resume, - .name = "acpi-cpufreq", - .owner = THIS_MODULE, - .attr = acpi_cpufreq_attr, + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .bios_limit = acpi_processor_get_bios_limit, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .resume = acpi_cpufreq_resume, + .name = "acpi-cpufreq", + .owner = THIS_MODULE, + .attr = acpi_cpufreq_attr, }; static int __init acpi_cpufreq_init(void) diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index ce2ed3e4aad..7e7eea4f826 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -813,7 +813,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr)); break; case 1 ... 15: - longhaul_version = TYPE_LONGHAUL_V1; + longhaul_version = TYPE_LONGHAUL_V2; if (c->x86_mask < 8) { cpu_model = CPU_SAMUEL2; cpuname = "C3 'Samuel 2' [C5B]"; @@ -885,7 +885,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) /* Find ACPI data for processor */ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, &longhaul_walk_callback, + ACPI_UINT32_MAX, &longhaul_walk_callback, NULL, NULL, (void *)&pr); /* Check ACPI support for C3 state */ diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index f10dea409f4..cb01dac267d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -164,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) } /* cpuinfo and default policy values */ - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cpuinfo.transition_latency = 200000; policy->cur = busfreq * max_multiplier; result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index d47c775eb0a..9a97116f89e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -714,14 +714,17 @@ static struct freq_attr *powernow_table_attr[] = { }; static struct cpufreq_driver powernow_driver = { - .verify = powernow_verify, - .target = powernow_target, - .get = powernow_get, - .init = powernow_cpu_init, - .exit = powernow_cpu_exit, - .name = "powernow-k7", - .owner = THIS_MODULE, - .attr = powernow_table_attr, + .verify = powernow_verify, + .target = powernow_target, + .get = powernow_get, +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + .bios_limit = acpi_processor_get_bios_limit, +#endif + .init = powernow_cpu_init, + .exit = powernow_cpu_exit, + .name = "powernow-k7", + .owner = THIS_MODULE, + .attr = powernow_table_attr, }; static int __init powernow_init(void) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 6394aa5c798..a9df9441a9a 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1022,7 +1022,7 @@ static int get_transition_latency(struct powernow_k8_data *data) * set it to 1 to avoid problems in the future. * For all others it's a BIOS bug. */ - if (!boot_cpu_data.x86 == 0x11) + if (boot_cpu_data.x86 != 0x11) printk(KERN_ERR FW_WARN PFX "Invalid zero transition " "latency\n"); max_latency = 1; @@ -1118,7 +1118,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { - cpumask_t oldmask; + cpumask_var_t oldmask; struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1131,9 +1131,13 @@ static int powernowk8_target(struct cpufreq_policy *pol, checkfid = data->currfid; checkvid = data->currvid; - /* only run on specific CPU from here on */ - oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); + /* only run on specific CPU from here on. */ + /* This is poor form: use a workqueue or smp_call_function_single */ + if (!alloc_cpumask_var(&oldmask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_copy(oldmask, tsk_cpumask(current)); + set_cpus_allowed_ptr(current, cpumask_of(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1193,7 +1197,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, ret = 0; err_out: - set_cpus_allowed_ptr(current, &oldmask); + set_cpus_allowed_ptr(current, oldmask); + free_cpumask_var(oldmask); return ret; } @@ -1393,14 +1398,15 @@ static struct freq_attr *powernow_k8_attr[] = { }; static struct cpufreq_driver cpufreq_amd64_driver = { - .verify = powernowk8_verify, - .target = powernowk8_target, - .init = powernowk8_cpu_init, - .exit = __devexit_p(powernowk8_cpu_exit), - .get = powernowk8_get, - .name = "powernow-k8", - .owner = THIS_MODULE, - .attr = powernow_k8_attr, + .verify = powernowk8_verify, + .target = powernowk8_target, + .bios_limit = acpi_processor_get_bios_limit, + .init = powernowk8_cpu_init, + .exit = __devexit_p(powernowk8_cpu_exit), + .get = powernowk8_get, + .name = "powernow-k8", + .owner = THIS_MODULE, + .attr = powernow_k8_attr, }; /* driver entry point for init */ diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 6911e91fb4f..2ce8e0b5cc5 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev; /* speedstep_processor */ -static unsigned int speedstep_processor; +static enum speedstep_processor speedstep_processor; static u32 pmbase; @@ -232,28 +232,23 @@ static unsigned int speedstep_detect_chipset(void) return 0; } -struct get_freq_data { - unsigned int speed; - unsigned int processor; -}; - -static void get_freq_data(void *_data) +static void get_freq_data(void *_speed) { - struct get_freq_data *data = _data; + unsigned int *speed = _speed; - data->speed = speedstep_get_frequency(data->processor); + *speed = speedstep_get_frequency(speedstep_processor); } static unsigned int speedstep_get(unsigned int cpu) { - struct get_freq_data data = { .processor = cpu }; + unsigned int speed; /* You're supposed to ensure CPU is online. */ - if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0) + if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0) BUG(); - dprintk("detected %u kHz as current frequency\n", data.speed); - return data.speed; + dprintk("detected %u kHz as current frequency\n", speed); + return speed; } /** diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index f4c290b8482..ad0083abfa2 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -34,7 +34,7 @@ static int relaxed_check; * GET PROCESSOR CORE SPEED IN KHZ * *********************************************************************/ -static unsigned int pentium3_get_frequency(unsigned int processor) +static unsigned int pentium3_get_frequency(enum speedstep_processor processor) { /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ struct { @@ -227,7 +227,7 @@ static unsigned int pentium4_get_frequency(void) /* Warning: may get called from smp_call_function_single. */ -unsigned int speedstep_get_frequency(unsigned int processor) +unsigned int speedstep_get_frequency(enum speedstep_processor processor) { switch (processor) { case SPEEDSTEP_CPU_PCORE: @@ -380,7 +380,7 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor); * DETECT SPEEDSTEP SPEEDS * *********************************************************************/ -unsigned int speedstep_get_freqs(unsigned int processor, +unsigned int speedstep_get_freqs(enum speedstep_processor processor, unsigned int *low_speed, unsigned int *high_speed, unsigned int *transition_latency, diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h index 2b6c04e5a30..70d9cea1219 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h @@ -11,18 +11,18 @@ /* processors */ - -#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */ -#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */ -#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */ -#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */ - +enum speedstep_processor { + SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */ + SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */ + SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */ + SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */ /* the following processors are not speedstep-capable and are not auto-detected * in speedstep_detect_processor(). However, their speed can be detected using * the speedstep_get_frequency() call. */ -#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */ -#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */ -#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */ + SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */ + SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */ + SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */ +}; /* speedstep states -- only two of them */ @@ -31,10 +31,10 @@ /* detect a speedstep-capable processor */ -extern unsigned int speedstep_detect_processor (void); +extern enum speedstep_processor speedstep_detect_processor(void); /* detect the current speed (in khz) of the processor */ -extern unsigned int speedstep_get_frequency(unsigned int processor); +extern unsigned int speedstep_get_frequency(enum speedstep_processor processor); /* detect the low and high speeds of the processor. The callback @@ -42,7 +42,7 @@ extern unsigned int speedstep_get_frequency(unsigned int processor); * SPEEDSTEP_LOW; the second argument is zero so that no * cpufreq_notify_transition calls are initiated. */ -extern unsigned int speedstep_get_freqs(unsigned int processor, +extern unsigned int speedstep_get_freqs(enum speedstep_processor processor, unsigned int *low_speed, unsigned int *high_speed, unsigned int *transition_latency, diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index befea088e4f..04d73c114e4 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -35,7 +35,7 @@ static int smi_cmd; static unsigned int smi_sig; /* info about the processor */ -static unsigned int speedstep_processor; +static enum speedstep_processor speedstep_processor; /* * There are only two frequency states for each processor. Values diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 19807b89f05..4fbd384fb64 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c) /* Handle the GX (Formally known as the GX2) */ if (c->x86 == 5 && c->x86_model == 5) - display_cacheinfo(c); + cpu_detect_cache_sizes(c); else init_cyrix(c); } diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 40e1835b35e..c900b73f922 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -263,8 +263,12 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) /* Don't do the funky fallback heuristics the AMD version employs for now. */ node = apicid_to_node[apicid]; - if (node == NUMA_NO_NODE || !node_online(node)) + if (node == NUMA_NO_NODE) node = first_node(node_online_map); + else if (!node_online(node)) { + /* reuse the value from init_cpu_to_node() */ + node = cpu_to_node(cpu); + } numa_set_node(cpu, node); printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index f5ccb4fa5a5..0c06bca2a1d 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -94,7 +94,7 @@ static const struct _cache_table __cpuinitconst cache_table[] = { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ - { 0xd7, LVL_3, 2038 }, /* 8-way set assoc, 64 byte line size */ + { 0xd7, LVL_3, 2048 }, /* 8-way set assoc, 64 byte line size */ { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ @@ -102,6 +102,9 @@ static const struct _cache_table __cpuinitconst cache_table[] = { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ + { 0xea, LVL_3, 12288 }, /* 24-way set assoc, 64 byte line size */ + { 0xeb, LVL_3, 18432 }, /* 24-way set assoc, 64 byte line size */ + { 0xec, LVL_3, 24576 }, /* 24-way set assoc, 64 byte line size */ { 0x00, 0, 0} }; @@ -488,22 +491,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) #endif } - if (trace) - printk(KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if (l1i) - printk(KERN_INFO "CPU: L1 I cache: %dK", l1i); - - if (l1d) - printk(KERN_CONT ", L1 D cache: %dK\n", l1d); - else - printk(KERN_CONT "\n"); - - if (l2) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - - if (l3) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); return l2; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 183c3457d2f..a8aacd4b513 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -46,6 +46,9 @@ #include "mce-internal.h" +#define CREATE_TRACE_POINTS +#include <trace/events/mce.h> + int mce_disabled __read_mostly; #define MISC_MCELOG_MINOR 227 @@ -85,6 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; +/* + * CPU/chipset specific EDAC code can register a notifier call here to print + * MCE errors in a human-readable form. + */ +ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); +EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); + +static int default_decode_mce(struct notifier_block *nb, unsigned long val, + void *data) +{ + pr_emerg("No human readable MCE decoding support on this CPU type.\n"); + pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); + + return NOTIFY_STOP; +} + +static struct notifier_block mce_dec_nb = { + .notifier_call = default_decode_mce, + .priority = -1, +}; /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { @@ -129,6 +152,9 @@ void mce_log(struct mce *mce) { unsigned next, entry; + /* Emit the trace record: */ + trace_mce_record(mce); + mce->finished = 0; wmb(); for (;;) { @@ -165,46 +191,46 @@ void mce_log(struct mce *mce) set_bit(0, &mce_need_notify); } -void __weak decode_mce(struct mce *m) -{ - return; -} - static void print_mce(struct mce *m) { - printk(KERN_EMERG - "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", + pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", m->extcpu, m->mcgstatus, m->bank, m->status); + if (m->ip) { - printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", - !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->ip); + pr_emerg("RIP%s %02x:<%016Lx> ", + !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", + m->cs, m->ip); + if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->ip); - printk(KERN_CONT "\n"); + pr_cont("\n"); } - printk(KERN_EMERG "TSC %llx ", m->tsc); + + pr_emerg("TSC %llx ", m->tsc); if (m->addr) - printk(KERN_CONT "ADDR %llx ", m->addr); + pr_cont("ADDR %llx ", m->addr); if (m->misc) - printk(KERN_CONT "MISC %llx ", m->misc); - printk(KERN_CONT "\n"); - printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", - m->cpuvendor, m->cpuid, m->time, m->socketid, - m->apicid); + pr_cont("MISC %llx ", m->misc); + + pr_cont("\n"); + pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", + m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); - decode_mce(m); + /* + * Print out human-readable details about the MCE error, + * (if the CPU has an implementation for that) + */ + atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); } static void print_mce_head(void) { - printk(KERN_EMERG "\nHARDWARE ERROR\n"); + pr_emerg("\nHARDWARE ERROR\n"); } static void print_mce_tail(void) { - printk(KERN_EMERG "This is not a software problem!\n" - "Run through mcelog --ascii to decode and contact your hardware vendor\n"); + pr_emerg("This is not a software problem!\n"); } #define PANIC_TIMEOUT 5 /* 5 seconds */ @@ -218,6 +244,7 @@ static atomic_t mce_fake_paniced; static void wait_for_panic(void) { long timeout = PANIC_TIMEOUT*USEC_PER_SEC; + preempt_disable(); local_irq_enable(); while (timeout-- > 0) @@ -285,6 +312,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) static int msr_to_offset(u32 msr) { unsigned bank = __get_cpu_var(injectm.bank); + if (msr == rip_msr) return offsetof(struct mce, ip); if (msr == MSR_IA32_MCx_STATUS(bank)) @@ -1108,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */ static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); -static void mcheck_timer(unsigned long data) +static void mce_start_timer(unsigned long data) { struct timer_list *t = &per_cpu(mce_timer, data); int *n; @@ -1173,7 +1201,7 @@ int mce_notify_irq(void) } EXPORT_SYMBOL_GPL(mce_notify_irq); -static int mce_banks_init(void) +static int __cpuinit __mcheck_cpu_mce_banks_init(void) { int i; @@ -1192,7 +1220,7 @@ static int mce_banks_init(void) /* * Initialize Machine Checks for a CPU. */ -static int __cpuinit mce_cap_init(void) +static int __cpuinit __mcheck_cpu_cap_init(void) { unsigned b; u64 cap; @@ -1200,7 +1228,8 @@ static int __cpuinit mce_cap_init(void) rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); + if (!banks) + printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); if (b > MAX_NR_BANKS) { printk(KERN_WARNING @@ -1213,7 +1242,7 @@ static int __cpuinit mce_cap_init(void) WARN_ON(banks != 0 && b != banks); banks = b; if (!mce_banks) { - int err = mce_banks_init(); + int err = __mcheck_cpu_mce_banks_init(); if (err) return err; @@ -1229,7 +1258,7 @@ static int __cpuinit mce_cap_init(void) return 0; } -static void mce_init(void) +static void __mcheck_cpu_init_generic(void) { mce_banks_t all_banks; u64 cap; @@ -1258,7 +1287,7 @@ static void mce_init(void) } /* Add per CPU specific workarounds here */ -static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) +static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { if (c->x86_vendor == X86_VENDOR_UNKNOWN) { pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); @@ -1326,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) return 0; } -static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) +static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) { if (c->x86 != 5) return; @@ -1340,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) } } -static void mce_cpu_features(struct cpuinfo_x86 *c) +static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { case X86_VENDOR_INTEL: @@ -1354,18 +1383,19 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) } } -static void mce_init_timer(void) +static void __mcheck_cpu_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); int *n = &__get_cpu_var(mce_next_interval); + setup_timer(t, mce_start_timer, smp_processor_id()); + if (mce_ignore_ce) return; *n = check_interval * HZ; if (!*n) return; - setup_timer(t, mcheck_timer, smp_processor_id()); t->expires = round_jiffies(jiffies + *n); add_timer_on(t, smp_processor_id()); } @@ -1385,27 +1415,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = * Called for each booted CPU to set up machine checks. * Must be called with preempt off: */ -void __cpuinit mcheck_init(struct cpuinfo_x86 *c) +void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) { if (mce_disabled) return; - mce_ancient_init(c); + __mcheck_cpu_ancient_init(c); if (!mce_available(c)) return; - if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { + if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { mce_disabled = 1; return; } machine_check_vector = do_machine_check; - mce_init(); - mce_cpu_features(c); - mce_init_timer(); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_vendor(c); + __mcheck_cpu_init_timer(); INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); + } /* @@ -1625,6 +1656,15 @@ static int __init mcheck_enable(char *str) } __setup("mce", mcheck_enable); +int __init mcheck_init(void) +{ + atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); + + mcheck_intel_therm_init(); + + return 0; +} + /* * Sysfs support */ @@ -1633,7 +1673,7 @@ __setup("mce", mcheck_enable); * Disable machine checks on suspend and shutdown. We can't really handle * them later. */ -static int mce_disable(void) +static int mce_disable_error_reporting(void) { int i; @@ -1648,12 +1688,12 @@ static int mce_disable(void) static int mce_suspend(struct sys_device *dev, pm_message_t state) { - return mce_disable(); + return mce_disable_error_reporting(); } static int mce_shutdown(struct sys_device *dev) { - return mce_disable(); + return mce_disable_error_reporting(); } /* @@ -1663,8 +1703,8 @@ static int mce_shutdown(struct sys_device *dev) */ static int mce_resume(struct sys_device *dev) { - mce_init(); - mce_cpu_features(¤t_cpu_data); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_vendor(¤t_cpu_data); return 0; } @@ -1674,8 +1714,8 @@ static void mce_cpu_restart(void *data) del_timer_sync(&__get_cpu_var(mce_timer)); if (!mce_available(¤t_cpu_data)) return; - mce_init(); - mce_init_timer(); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_timer(); } /* Reinit MCEs after user configuration changes */ @@ -1701,7 +1741,7 @@ static void mce_enable_ce(void *all) cmci_reenable(); cmci_recheck(); if (all) - mce_init_timer(); + __mcheck_cpu_init_timer(); } static struct sysdev_class mce_sysclass = { @@ -1889,7 +1929,7 @@ error2: sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); error: while (--i >= 0) - sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); + sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); sysdev_unregister(&per_cpu(mce_dev, cpu)); @@ -1914,13 +1954,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu) } /* Make sure there are no machine checks on offlined CPUs. */ -static void mce_disable_cpu(void *h) +static void __cpuinit mce_disable_cpu(void *h) { unsigned long action = *(unsigned long *)h; int i; if (!mce_available(¤t_cpu_data)) return; + if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); for (i = 0; i < banks; i++) { @@ -1931,7 +1972,7 @@ static void mce_disable_cpu(void *h) } } -static void mce_reenable_cpu(void *h) +static void __cpuinit mce_reenable_cpu(void *h) { unsigned long action = *(unsigned long *)h; int i; @@ -1976,9 +2017,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - t->expires = round_jiffies(jiffies + + if (!mce_ignore_ce && check_interval) { + t->expires = round_jiffies(jiffies + __get_cpu_var(mce_next_interval)); - add_timer_on(t, cpu); + add_timer_on(t, cpu); + } smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); break; case CPU_POST_DEAD: @@ -2010,7 +2053,7 @@ static __init void mce_init_banks(void) } } -static __init int mce_init_device(void) +static __init int mcheck_init_device(void) { int err; int i = 0; @@ -2038,7 +2081,7 @@ static __init int mce_init_device(void) return err; } -device_initcall(mce_init_device); +device_initcall(mcheck_init_device); /* * Old style boot options parsing. Only for compatibility. @@ -2086,7 +2129,7 @@ static int fake_panic_set(void *data, u64 val) DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set, "%llu\n"); -static int __init mce_debugfs_init(void) +static int __init mcheck_debugfs_init(void) { struct dentry *dmce, *ffake_panic; @@ -2100,5 +2143,5 @@ static int __init mce_debugfs_init(void) return 0; } -late_initcall(mce_debugfs_init); +late_initcall(mcheck_debugfs_init); #endif diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 889f665fe93..7c785634af2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -8,6 +8,7 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/percpu.h> +#include <linux/sched.h> #include <asm/apic.h> #include <asm/processor.h> #include <asm/msr.h> diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index b3a1dba7533..4fef985fc22 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state); static atomic_t therm_throt_en = ATOMIC_INIT(0); +static u32 lvtthmr_init __read_mostly; + #ifdef CONFIG_SYSFS #define define_therm_throt_sysdev_one_ro(_name) \ static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) @@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); } +void __init mcheck_intel_therm_init(void) +{ + /* + * This function is only called on boot CPU. Save the init thermal + * LVT value on BSP and use that value to restore APs' thermal LVT + * entry BIOS programmed later + */ + if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) && + cpu_has(&boot_cpu_data, X86_FEATURE_ACC)) + lvtthmr_init = apic_read(APIC_LVTTHMR); +} + void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); @@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) * since it might be delivered via SMI already: */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); + + /* + * The initial value of thermal LVT entries on all APs always reads + * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI + * sequence to them and LVT registers are reset to 0s except for + * the mask bits which are set to 1s when APs receive INIT IPI. + * Always restore the value that BIOS has programmed on AP based on + * BSP's info we saved since BIOS is always setting the same value + * for all threads/cores + */ + apic_write(APIC_LVTTHMR, lvtthmr_init); + + h = lvtthmr_init; + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 315738c74aa..09b1698e046 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -170,6 +170,41 @@ static int __init cmp_range(const void *x1, const void *x2) return start1 - start2; } +static int __init clean_sort_range(struct res_range *range, int az) +{ + int i, j, k = az - 1, nr_range = 0; + + for (i = 0; i < k; i++) { + if (range[i].end) + continue; + for (j = k; j > i; j--) { + if (range[j].end) { + k = j; + break; + } + } + if (j == i) + break; + range[i].start = range[k].start; + range[i].end = range[k].end; + range[k].start = 0; + range[k].end = 0; + k--; + } + /* count it */ + for (i = 0; i < az; i++) { + if (!range[i].end) { + nr_range = i; + break; + } + } + + /* sort them */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + + return nr_range; +} + #define BIOS_BUG_MSG KERN_WARNING \ "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" @@ -223,22 +258,18 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, subtract_range(range, extra_remove_base, extra_remove_base + extra_remove_size - 1); - /* get new range num */ - nr_range = 0; - for (i = 0; i < RANGE_NUM; i++) { - if (!range[i].end) - continue; - nr_range++; - } if (debug_print) { printk(KERN_DEBUG "After UC checking\n"); - for (i = 0; i < nr_range; i++) + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); + } } /* sort the ranges */ - sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + nr_range = clean_sort_range(range, RANGE_NUM); if (debug_print) { printk(KERN_DEBUG "After sorting\n"); for (i = 0; i < nr_range; i++) @@ -689,8 +720,6 @@ static int __init mtrr_need_cleanup(void) continue; if (!size) type = MTRR_NUM_TYPES; - if (type == MTRR_TYPE_WRPROT) - type = MTRR_TYPE_UNCACHABLE; num[type]++; } @@ -846,7 +875,7 @@ int __init mtrr_cleanup(unsigned address_bits) sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); range_sums = sum_ranges(range, nr_range); - printk(KERN_INFO "total RAM coverred: %ldM\n", + printk(KERN_INFO "total RAM covered: %ldM\n", range_sums >> (20 - PAGE_SHIFT)); if (mtrr_chunk_size && mtrr_gran_size) { diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index f04e7252760..3c1b12d461d 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) unsigned long long base, size; char *ptr; char line[LINE_SIZE]; + int length; size_t linelen; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!len) - return -EINVAL; memset(line, 0, LINE_SIZE); - if (len > LINE_SIZE) - len = LINE_SIZE; - if (copy_from_user(line, buf, len - 1)) + + length = len; + length--; + + if (length > LINE_SIZE - 1) + length = LINE_SIZE - 1; + + if (length < 0) + return -EINVAL; + + if (copy_from_user(line, buf, length)) return -EFAULT; linelen = strlen(line); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b5801c31184..45506d5dd8d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -77,6 +77,18 @@ struct cpu_hw_events { struct debug_store *ds; }; +struct event_constraint { + unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + int code; +}; + +#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } +#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } + +#define for_each_event_constraint(e, c) \ + for ((e) = (c); (e)->idxmsk[0]; (e)++) + + /* * struct x86_pmu - generic x86 pmu */ @@ -102,6 +114,8 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); + int (*get_event_idx)(struct cpu_hw_events *cpuc, + struct hw_perf_event *hwc); }; static struct x86_pmu x86_pmu __read_mostly; @@ -110,6 +124,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; +static const struct event_constraint *event_constraints; + /* * Not sure about some of these */ @@ -155,6 +171,16 @@ static u64 p6_pmu_raw_event(u64 hw_event) return hw_event & P6_EVNTSEL_MASK; } +static const struct event_constraint intel_p6_event_constraints[] = +{ + EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ + EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT_END +}; /* * Intel PerfMon v3. Used on Core2 and later. @@ -170,6 +196,35 @@ static const u64 intel_perfmon_event_map[] = [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, }; +static const struct event_constraint intel_core_event_constraints[] = +{ + EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ + EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ + EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ + EVENT_CONSTRAINT_END +}; + +static const struct event_constraint intel_nehalem_event_constraints[] = +{ + EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ + EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ + EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ + EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ + EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ + EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ + EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + static u64 intel_pmu_event_map(int hw_event) { return intel_perfmon_event_map[hw_event]; @@ -190,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; -static const u64 nehalem_hw_cache_event_ids +static __initconst u64 nehalem_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -281,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids }, }; -static const u64 core2_hw_cache_event_ids +static __initconst u64 core2_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -372,7 +427,7 @@ static const u64 core2_hw_cache_event_ids }, }; -static const u64 atom_hw_cache_event_ids +static __initconst u64 atom_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -469,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL #define CORE_EVNTSEL_INV_MASK 0x00800000ULL -#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL +#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL #define CORE_EVNTSEL_MASK \ (CORE_EVNTSEL_EVENT_MASK | \ @@ -481,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) return hw_event & CORE_EVNTSEL_MASK; } -static const u64 amd_hw_cache_event_ids +static __initconst u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -932,6 +987,8 @@ static int __hw_perf_event_init(struct perf_event *event) */ hwc->config = ARCH_PERFMON_EVENTSEL_INT; + hwc->idx = -1; + /* * Count user and OS events unless requested not to. */ @@ -1229,7 +1286,7 @@ x86_perf_event_set_period(struct perf_event *event, return 0; /* - * If we are way outside a reasoable range then just skip forward: + * If we are way outside a reasonable range then just skip forward: */ if (unlikely(left <= -period)) { left = period; @@ -1334,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) x86_pmu_enable_event(hwc, idx); } -static int -fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) +static int fixed_mode_idx(struct hw_perf_event *hwc) { unsigned int hw_event; @@ -1349,6 +1405,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) if (!x86_pmu.num_events_fixed) return -1; + /* + * fixed counters do not take all possible filters + */ + if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) + return -1; + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) @@ -1360,22 +1422,57 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) } /* - * Find a PMC slot for the freshly enabled / scheduled in event: + * generic counter allocator: get next free counter */ -static int x86_pmu_enable(struct perf_event *event) +static int +gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) +{ + int idx; + + idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); + return idx == x86_pmu.num_events ? -1 : idx; +} + +/* + * intel-specific counter allocator: check event constraints + */ +static int +intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) +{ + const struct event_constraint *event_constraint; + int i, code; + + if (!event_constraints) + goto skip; + + code = hwc->config & CORE_EVNTSEL_EVENT_MASK; + + for_each_event_constraint(event_constraint, event_constraints) { + if (code == event_constraint->code) { + for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { + if (!test_and_set_bit(i, cpuc->used_mask)) + return i; + } + return -1; + } + } +skip: + return gen_get_event_idx(cpuc, hwc); +} + +static int +x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; int idx; - idx = fixed_mode_idx(event, hwc); + idx = fixed_mode_idx(hwc); if (idx == X86_PMC_IDX_FIXED_BTS) { /* BTS is already occupied. */ if (test_and_set_bit(idx, cpuc->used_mask)) return -EAGAIN; hwc->config_base = 0; - hwc->event_base = 0; + hwc->event_base = 0; hwc->idx = idx; } else if (idx >= 0) { /* @@ -1396,20 +1493,35 @@ static int x86_pmu_enable(struct perf_event *event) } else { idx = hwc->idx; /* Try to get the previous generic event again */ - if (test_and_set_bit(idx, cpuc->used_mask)) { + if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { try_generic: - idx = find_first_zero_bit(cpuc->used_mask, - x86_pmu.num_events); - if (idx == x86_pmu.num_events) + idx = x86_pmu.get_event_idx(cpuc, hwc); + if (idx == -1) return -EAGAIN; set_bit(idx, cpuc->used_mask); hwc->idx = idx; } - hwc->config_base = x86_pmu.eventsel; - hwc->event_base = x86_pmu.perfctr; + hwc->config_base = x86_pmu.eventsel; + hwc->event_base = x86_pmu.perfctr; } + return idx; +} + +/* + * Find a PMC slot for the freshly enabled / scheduled in event: + */ +static int x86_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + + idx = x86_schedule_event(cpuc, hwc); + if (idx < 0) + return idx; + perf_events_lapic_init(); x86_pmu.disable(hwc, idx); @@ -1520,6 +1632,7 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) data.period = event->hw.last_period; data.addr = 0; + data.raw = NULL; regs.ip = 0; /* @@ -1637,6 +1750,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs) u64 val; data.addr = 0; + data.raw = NULL; cpuc = &__get_cpu_var(cpu_hw_events); @@ -1682,6 +1796,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) u64 ack, status; data.addr = 0; + data.raw = NULL; cpuc = &__get_cpu_var(cpu_hw_events); @@ -1745,6 +1860,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) u64 val; data.addr = 0; + data.raw = NULL; cpuc = &__get_cpu_var(cpu_hw_events); @@ -1852,7 +1968,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { .priority = 1 }; -static struct x86_pmu p6_pmu = { +static __initconst struct x86_pmu p6_pmu = { .name = "p6", .handle_irq = p6_pmu_handle_irq, .disable_all = p6_pmu_disable_all, @@ -1877,9 +1993,10 @@ static struct x86_pmu p6_pmu = { */ .event_bits = 32, .event_mask = (1ULL << 32) - 1, + .get_event_idx = intel_get_event_idx, }; -static struct x86_pmu intel_pmu = { +static __initconst struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, .disable_all = intel_pmu_disable_all, @@ -1900,9 +2017,10 @@ static struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .enable_bts = intel_pmu_enable_bts, .disable_bts = intel_pmu_disable_bts, + .get_event_idx = intel_get_event_idx, }; -static struct x86_pmu amd_pmu = { +static __initconst struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = amd_pmu_handle_irq, .disable_all = amd_pmu_disable_all, @@ -1920,9 +2038,10 @@ static struct x86_pmu amd_pmu = { .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, + .get_event_idx = gen_get_event_idx, }; -static int p6_pmu_init(void) +static __init int p6_pmu_init(void) { switch (boot_cpu_data.x86_model) { case 1: @@ -1932,10 +2051,12 @@ static int p6_pmu_init(void) case 7: case 8: case 11: /* Pentium III */ + event_constraints = intel_p6_event_constraints; break; case 9: case 13: /* Pentium M */ + event_constraints = intel_p6_event_constraints; break; default: pr_cont("unsupported p6 CPU model %d ", @@ -1945,16 +2066,10 @@ static int p6_pmu_init(void) x86_pmu = p6_pmu; - if (!cpu_has_apic) { - pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); - pr_info("no hardware sampling interrupt available.\n"); - x86_pmu.apic = 0; - } - return 0; } -static int intel_pmu_init(void) +static __init int intel_pmu_init(void) { union cpuid10_edx edx; union cpuid10_eax eax; @@ -2007,12 +2122,14 @@ static int intel_pmu_init(void) sizeof(hw_cache_event_ids)); pr_cont("Core2 events, "); + event_constraints = intel_core_event_constraints; break; default: case 26: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + event_constraints = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; case 28: @@ -2025,7 +2142,7 @@ static int intel_pmu_init(void) return 0; } -static int amd_pmu_init(void) +static __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ if (boot_cpu_data.x86 < 6) @@ -2040,6 +2157,16 @@ static int amd_pmu_init(void) return 0; } +static void __init pmu_check_apic(void) +{ + if (cpu_has_apic) + return; + + x86_pmu.apic = 0; + pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); + pr_info("no hardware sampling interrupt available.\n"); +} + void __init init_hw_perf_events(void) { int err; @@ -2061,6 +2188,8 @@ void __init init_hw_perf_events(void) return; } + pmu_check_apic(); + pr_cont("%s PMU driver.\n", x86_pmu.name); if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { @@ -2105,11 +2234,47 @@ static const struct pmu pmu = { .unthrottle = x86_pmu_unthrottle, }; +static int +validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct hw_perf_event fake_event = event->hw; + + if (event->pmu && event->pmu != &pmu) + return 0; + + return x86_schedule_event(cpuc, &fake_event) >= 0; +} + +static int validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct cpu_hw_events fake_pmu; + + memset(&fake_pmu, 0, sizeof(fake_pmu)); + + if (!validate_event(&fake_pmu, leader)) + return -ENOSPC; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (!validate_event(&fake_pmu, sibling)) + return -ENOSPC; + } + + if (!validate_event(&fake_pmu, event)) + return -ENOSPC; + + return 0; +} + const struct pmu *hw_perf_event_init(struct perf_event *event) { int err; err = __hw_perf_event_init(event); + if (!err) { + if (event->group_leader != event) + err = validate_group(event); + } if (err) { if (event->destroy) event->destroy(event); @@ -2132,7 +2297,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); -static DEFINE_PER_CPU(int, in_nmi_frame); +static DEFINE_PER_CPU(int, in_ignored_frame); static void @@ -2148,8 +2313,9 @@ static void backtrace_warning(void *data, char *msg) static int backtrace_stack(void *data, char *name) { - per_cpu(in_nmi_frame, smp_processor_id()) = - x86_is_stack_id(NMI_STACK, name); + per_cpu(in_ignored_frame, smp_processor_id()) = + x86_is_stack_id(NMI_STACK, name) || + x86_is_stack_id(DEBUG_STACK, name); return 0; } @@ -2158,7 +2324,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - if (per_cpu(in_nmi_frame, smp_processor_id())) + if (per_cpu(in_ignored_frame, smp_processor_id())) return; if (reliable) diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index fab786f60ed..898df9719af 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -712,7 +712,7 @@ static void probe_nmi_watchdog(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && - boot_cpu_data.x86 != 16) + boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) return; wd_ops = &k7_wd_ops; break; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index bb62b3e5caa..28000743bbb 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) early_init_transmeta(c); - display_cacheinfo(c); + cpu_detect_cache_sizes(c); /* Print CMS and CPU revision */ max = cpuid_eax(0x80860000); |