diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 28 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 44 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/hypervisor.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-inject.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 84 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 67 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/if.c | 29 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c (renamed from arch/x86/kernel/cpu/perf_counter.c) | 604 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perfctr-watchdog.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/vmware.c | 27 |
15 files changed, 501 insertions, 418 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 8dd30638fe4..68537e957a9 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -27,7 +27,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f32fa71ccf9..c910a716a71 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -184,7 +184,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) * approved Athlon */ WARN_ONCE(1, "WARNING: This combination of AMD" - "processors is not suitable for SMP.\n"); + " processors is not suitable for SMP.\n"); if (!test_taint(TAINT_UNSAFE_SMP)) add_taint(TAINT_UNSAFE_SMP); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2055fc2b2e6..cc25c2b4a56 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -13,7 +13,7 @@ #include <linux/io.h> #include <asm/stackprotector.h> -#include <asm/perf_counter.h> +#include <asm/perf_event.h> #include <asm/mmu_context.h> #include <asm/hypervisor.h> #include <asm/processor.h> @@ -34,7 +34,6 @@ #include <asm/mce.h> #include <asm/msr.h> #include <asm/pat.h> -#include <linux/smp.h> #ifdef CONFIG_X86_LOCAL_APIC #include <asm/uv/uv.h> @@ -870,7 +869,7 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif - init_hw_perf_counters(); + init_hw_perf_events(); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 4109679863c..7d5c3b0ea8d 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -33,7 +33,7 @@ #include <linux/cpufreq.h> #include <linux/compiler.h> #include <linux/dmi.h> -#include <trace/power.h> +#include <trace/events/power.h> #include <linux/acpi.h> #include <linux/io.h> @@ -72,8 +72,6 @@ static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); static DEFINE_PER_CPU(struct aperfmperf, old_perf); -DEFINE_TRACE(power_mark); - /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance *acpi_perf_data; @@ -332,7 +330,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int next_perf_state = 0; /* Index into perf table */ unsigned int i; int result = 0; - struct power_trace it; dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); @@ -364,7 +361,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } } - trace_power_mark(&it, POWER_PSTATE, next_perf_state); + trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency); switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: @@ -526,6 +523,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = { }, { } }; + +static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) +{ + /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf + * AL30: A Machine Check Exception (MCE) Occurring during an + * Enhanced Intel SpeedStep Technology Ratio Change May Cause + * Both Processor Cores to Lock Up when HT is enabled*/ + if (c->x86_vendor == X86_VENDOR_INTEL) { + if ((c->x86 == 15) && + (c->x86_model == 6) && + (c->x86_mask == 8) && smt_capable()) + return -ENODEV; + } + return 0; +} #endif static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) @@ -540,6 +552,12 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) dprintk("acpi_cpufreq_cpu_init\n"); +#ifdef CONFIG_SMP + result = acpi_cpufreq_blacklist(c); + if (result) + return result; +#endif + data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); if (!data) return -ENOMEM; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 2a50ef89100..6394aa5c798 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -605,9 +605,10 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, return 0; } -static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry) +static void invalidate_entry(struct cpufreq_frequency_table *powernow_table, + unsigned int entry) { - data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; + powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; } static void print_basics(struct powernow_k8_data *data) @@ -854,6 +855,10 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) goto err_out; } + /* fill in data */ + data->numps = data->acpi_data.state_count; + powernow_k8_acpi_pst_values(data, 0); + if (cpu_family == CPU_HW_PSTATE) ret_val = fill_powernow_table_pstate(data, powernow_table); else @@ -866,11 +871,8 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) powernow_table[data->acpi_data.state_count].index = 0; data->powernow_table = powernow_table; - /* fill in data */ - data->numps = data->acpi_data.state_count; if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) print_basics(data); - powernow_k8_acpi_pst_values(data, 0); /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); @@ -914,13 +916,13 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, "bad value %d.\n", i, index); printk(KERN_ERR PFX "Please report to BIOS " "manufacturer\n"); - invalidate_entry(data, i); + invalidate_entry(powernow_table, i); continue; } rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); if (!(hi & HW_PSTATE_VALID_MASK)) { dprintk("invalid pstate %d, ignoring\n", index); - invalidate_entry(data, i); + invalidate_entry(powernow_table, i); continue; } @@ -941,7 +943,6 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) { int i; - int cntlofreq = 0; for (i = 0; i < data->acpi_data.state_count; i++) { u32 fid; @@ -970,7 +971,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, /* verify frequency is OK */ if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { dprintk("invalid freq %u kHz, ignoring\n", freq); - invalidate_entry(data, i); + invalidate_entry(powernow_table, i); continue; } @@ -978,38 +979,17 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, * BIOSs are using "off" to indicate invalid */ if (vid == VID_OFF) { dprintk("invalid vid %u, ignoring\n", vid); - invalidate_entry(data, i); + invalidate_entry(powernow_table, i); continue; } - /* verify only 1 entry from the lo frequency table */ - if (fid < HI_FID_TABLE_BOTTOM) { - if (cntlofreq) { - /* if both entries are the same, - * ignore this one ... */ - if ((freq != powernow_table[cntlofreq].frequency) || - (index != powernow_table[cntlofreq].index)) { - printk(KERN_ERR PFX - "Too many lo freq table " - "entries\n"); - return 1; - } - - dprintk("double low frequency table entry, " - "ignoring it.\n"); - invalidate_entry(data, i); - continue; - } else - cntlofreq = i; - } - if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { printk(KERN_INFO PFX "invalid freq entries " "%u kHz vs. %u kHz\n", freq, (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); - invalidate_entry(data, i); + invalidate_entry(powernow_table, i); continue; } } diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 93ba8eeb100..08be922de33 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -34,13 +34,6 @@ detect_hypervisor_vendor(struct cpuinfo_x86 *c) c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; } -unsigned long get_hypervisor_tsc_freq(void) -{ - if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) - return vmware_get_tsc_khz(); - return 0; -} - static inline void __cpuinit hypervisor_set_feature_bits(struct cpuinfo_x86 *c) { @@ -55,3 +48,10 @@ void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) detect_hypervisor_vendor(c); hypervisor_set_feature_bits(c); } + +void __init init_hypervisor_platform(void) +{ + init_hypervisor(&boot_cpu_data); + if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) + vmware_platform_setup(); +} diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 7029f0e2aca..472763d9209 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -98,8 +98,9 @@ static struct notifier_block mce_raise_nb = { }; /* Inject mce on current CPU */ -static int raise_local(struct mce *m) +static int raise_local(void) { + struct mce *m = &__get_cpu_var(injectm); int context = MCJ_CTX(m->inject_flags); int ret = 0; int cpu = m->extcpu; @@ -167,12 +168,12 @@ static void raise_mce(struct mce *m) } cpu_relax(); } - raise_local(m); + raise_local(); put_cpu(); put_online_cpus(); } else #endif - raise_local(m); + raise_local(); } /* Error injection interface */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2f5aab26320..b1598a9436d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; +static void default_decode_mce(struct mce *m) +{ + pr_emerg("No human readable MCE decoding support on this CPU type.\n"); + pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); +} + +/* + * CPU/chipset specific EDAC code can register a callback here to print + * MCE errors in a human-readable form: + */ +void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; +EXPORT_SYMBOL(x86_mce_decode_callback); /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { @@ -165,49 +177,46 @@ void mce_log(struct mce *mce) set_bit(0, &mce_need_notify); } -void __weak decode_mce(struct mce *m) -{ - return; -} - static void print_mce(struct mce *m) { - printk(KERN_EMERG - "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", + pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", m->extcpu, m->mcgstatus, m->bank, m->status); + if (m->ip) { - printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", - !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->ip); + pr_emerg("RIP%s %02x:<%016Lx> ", + !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", + m->cs, m->ip); + if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->ip); - printk(KERN_CONT "\n"); + pr_cont("\n"); } - printk(KERN_EMERG "TSC %llx ", m->tsc); + + pr_emerg("TSC %llx ", m->tsc); if (m->addr) - printk(KERN_CONT "ADDR %llx ", m->addr); + pr_cont("ADDR %llx ", m->addr); if (m->misc) - printk(KERN_CONT "MISC %llx ", m->misc); - printk(KERN_CONT "\n"); - printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", - m->cpuvendor, m->cpuid, m->time, m->socketid, - m->apicid); + pr_cont("MISC %llx ", m->misc); + + pr_cont("\n"); + pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", + m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); - decode_mce(m); + /* + * Print out human-readable details about the MCE error, + * (if the CPU has an implementation for that): + */ + x86_mce_decode_callback(m); } static void print_mce_head(void) { - printk(KERN_EMERG "\nHARDWARE ERROR\n"); + pr_emerg("\nHARDWARE ERROR\n"); } static void print_mce_tail(void) { - printk(KERN_EMERG "This is not a software problem!\n" -#if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD)) - "Run through mcelog --ascii to decode and contact your hardware vendor\n" -#endif - ); + pr_emerg("This is not a software problem!\n"); } #define PANIC_TIMEOUT 5 /* 5 seconds */ @@ -221,6 +230,7 @@ static atomic_t mce_fake_paniced; static void wait_for_panic(void) { long timeout = PANIC_TIMEOUT*USEC_PER_SEC; + preempt_disable(); local_irq_enable(); while (timeout-- > 0) @@ -288,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) static int msr_to_offset(u32 msr) { unsigned bank = __get_cpu_var(injectm.bank); + if (msr == rip_msr) return offsetof(struct mce, ip); if (msr == MSR_IA32_MCx_STATUS(bank)) @@ -305,13 +316,25 @@ static int msr_to_offset(u32 msr) static u64 mce_rdmsrl(u32 msr) { u64 v; + if (__get_cpu_var(injectm).finished) { int offset = msr_to_offset(msr); + if (offset < 0) return 0; return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); } - rdmsrl(msr, v); + + if (rdmsrl_safe(msr, &v)) { + WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr); + /* + * Return zero in case the access faulted. This should + * not happen normally but can happen if the CPU does + * something weird, or if the code is buggy. + */ + v = 0; + } + return v; } @@ -319,6 +342,7 @@ static void mce_wrmsrl(u32 msr, u64 v) { if (__get_cpu_var(injectm).finished) { int offset = msr_to_offset(msr); + if (offset >= 0) *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; return; @@ -415,7 +439,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) m->ip = mce_rdmsrl(rip_msr); } -#ifdef CONFIG_X86_LOCAL_APIC +#ifdef CONFIG_X86_LOCAL_APIC /* * Called after interrupts have been reenabled again * when a MCE happened during an interrupts off region @@ -1172,6 +1196,7 @@ static int mce_banks_init(void) return -ENOMEM; for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; + b->ctl = -1ULL; b->init = 1; } @@ -1203,6 +1228,7 @@ static int __cpuinit mce_cap_init(void) banks = b; if (!mce_banks) { int err = mce_banks_init(); + if (err) return err; } @@ -1237,6 +1263,7 @@ static void mce_init(void) for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; + if (!b->init) continue; wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); @@ -1626,6 +1653,7 @@ static int mce_disable(void) for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; + if (b->init) wrmsrl(MSR_IA32_MCx_CTL(i), 0); } @@ -1911,6 +1939,7 @@ static void mce_disable_cpu(void *h) cmci_clear(); for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; + if (b->init) wrmsrl(MSR_IA32_MCx_CTL(i), 0); } @@ -1928,6 +1957,7 @@ static void mce_reenable_cpu(void *h) cmci_reenable(); for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; + if (b->init) wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 8cd5224943b..83a3d1f4efc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -489,8 +489,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) int i, err = 0; struct threshold_bank *b = NULL; char name[32]; +#ifdef CONFIG_SMP struct cpuinfo_x86 *c = &cpu_data(cpu); - +#endif sprintf(name, "threshold_bank%i", bank); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 889f665fe93..7c785634af2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -8,6 +8,7 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/percpu.h> +#include <linux/sched.h> #include <asm/apic.h> #include <asm/processor.h> #include <asm/msr.h> diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 63a56d147e4..b3a1dba7533 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -34,20 +34,31 @@ /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) -static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; -static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); -static DEFINE_PER_CPU(bool, thermal_throttle_active); +/* + * Current thermal throttling state: + */ +struct thermal_state { + bool is_throttled; + + u64 next_check; + unsigned long throttle_count; + unsigned long last_throttle_count; +}; + +static DEFINE_PER_CPU(struct thermal_state, thermal_state); -static atomic_t therm_throt_en = ATOMIC_INIT(0); +static atomic_t therm_throt_en = ATOMIC_INIT(0); #ifdef CONFIG_SYSFS #define define_therm_throt_sysdev_one_ro(_name) \ static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) #define define_therm_throt_sysdev_show_func(name) \ -static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ - char *buf) \ + \ +static ssize_t therm_throt_sysdev_show_##name( \ + struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ { \ unsigned int cpu = dev->id; \ ssize_t ret; \ @@ -55,7 +66,7 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ preempt_disable(); /* CPU hotplug */ \ if (cpu_online(cpu)) \ ret = sprintf(buf, "%lu\n", \ - per_cpu(thermal_throttle_##name, cpu)); \ + per_cpu(thermal_state, cpu).name); \ else \ ret = 0; \ preempt_enable(); \ @@ -63,11 +74,11 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ return ret; \ } -define_therm_throt_sysdev_show_func(count); -define_therm_throt_sysdev_one_ro(count); +define_therm_throt_sysdev_show_func(throttle_count); +define_therm_throt_sysdev_one_ro(throttle_count); static struct attribute *thermal_throttle_attrs[] = { - &attr_count.attr, + &attr_throttle_count.attr, NULL }; @@ -93,33 +104,39 @@ static struct attribute_group thermal_throttle_attr_group = { * 1 : Event should be logged further, and a message has been * printed to the syslog. */ -static int therm_throt_process(int curr) +static int therm_throt_process(bool is_throttled) { - unsigned int cpu = smp_processor_id(); - __u64 tmp_jiffs = get_jiffies_64(); - bool was_throttled = __get_cpu_var(thermal_throttle_active); - bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr; + struct thermal_state *state; + unsigned int this_cpu; + bool was_throttled; + u64 now; + + this_cpu = smp_processor_id(); + now = get_jiffies_64(); + state = &per_cpu(thermal_state, this_cpu); + + was_throttled = state->is_throttled; + state->is_throttled = is_throttled; if (is_throttled) - __get_cpu_var(thermal_throttle_count)++; + state->throttle_count++; - if (!(was_throttled ^ is_throttled) && - time_before64(tmp_jiffs, __get_cpu_var(next_check))) + if (time_before64(now, state->next_check) && + state->throttle_count != state->last_throttle_count) return 0; - __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; + state->next_check = now + CHECK_INTERVAL; + state->last_throttle_count = state->throttle_count; /* if we just entered the thermal event */ if (is_throttled) { - printk(KERN_CRIT "CPU%d: Temperature above threshold, " - "cpu clock throttled (total events = %lu)\n", - cpu, __get_cpu_var(thermal_throttle_count)); + printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count); add_taint(TAINT_MACHINE_CHECK); return 1; } if (was_throttled) { - printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); + printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu); return 1; } @@ -213,7 +230,7 @@ static void intel_thermal_interrupt(void) __u64 msr_val; rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) + if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0)) mce_log_therm_throt_event(msr_val); } diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 08b6ea4c62b..3c1b12d461d 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) unsigned long long base, size; char *ptr; char line[LINE_SIZE]; + int length; size_t linelen; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!len) - return -EINVAL; memset(line, 0, LINE_SIZE); - if (len > LINE_SIZE) - len = LINE_SIZE; - if (copy_from_user(line, buf, len - 1)) + + length = len; + length--; + + if (length > LINE_SIZE - 1) + length = LINE_SIZE - 1; + + if (length < 0) + return -EINVAL; + + if (copy_from_user(line, buf, length)) return -EFAULT; linelen = strlen(line); @@ -126,8 +133,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) return -EINVAL; base = simple_strtoull(line + 5, &ptr, 0); - for (; isspace(*ptr); ++ptr) - ; + while (isspace(*ptr)) + ptr++; if (strncmp(ptr, "size=", 5)) return -EINVAL; @@ -135,14 +142,14 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) size = simple_strtoull(ptr + 5, &ptr, 0); if ((base & 0xfff) || (size & 0xfff)) return -EINVAL; - for (; isspace(*ptr); ++ptr) - ; + while (isspace(*ptr)) + ptr++; if (strncmp(ptr, "type=", 5)) return -EINVAL; ptr += 5; - for (; isspace(*ptr); ++ptr) - ; + while (isspace(*ptr)) + ptr++; for (i = 0; i < MTRR_NUM_TYPES; ++i) { if (strcmp(ptr, mtrr_strings[i])) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_event.c index 2732e2c1e4d..b5801c31184 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1,5 +1,5 @@ /* - * Performance counter x86 architecture code + * Performance events x86 architecture code * * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar @@ -11,7 +11,7 @@ * For licencing details see kernel-base/COPYING */ -#include <linux/perf_counter.h> +#include <linux/perf_event.h> #include <linux/capability.h> #include <linux/notifier.h> #include <linux/hardirq.h> @@ -27,19 +27,19 @@ #include <asm/stacktrace.h> #include <asm/nmi.h> -static u64 perf_counter_mask __read_mostly; +static u64 perf_event_mask __read_mostly; -/* The maximal number of PEBS counters: */ -#define MAX_PEBS_COUNTERS 4 +/* The maximal number of PEBS events: */ +#define MAX_PEBS_EVENTS 4 /* The size of a BTS record in bytes: */ #define BTS_RECORD_SIZE 24 /* The size of a per-cpu BTS buffer in bytes: */ -#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024) +#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) /* The BTS overflow threshold in bytes from the end of the buffer: */ -#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64) +#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) /* @@ -65,11 +65,11 @@ struct debug_store { u64 pebs_index; u64 pebs_absolute_maximum; u64 pebs_interrupt_threshold; - u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; + u64 pebs_event_reset[MAX_PEBS_EVENTS]; }; -struct cpu_hw_counters { - struct perf_counter *counters[X86_PMC_IDX_MAX]; +struct cpu_hw_events { + struct perf_event *events[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; @@ -86,17 +86,17 @@ struct x86_pmu { int (*handle_irq)(struct pt_regs *); void (*disable_all)(void); void (*enable_all)(void); - void (*enable)(struct hw_perf_counter *, int); - void (*disable)(struct hw_perf_counter *, int); + void (*enable)(struct hw_perf_event *, int); + void (*disable)(struct hw_perf_event *, int); unsigned eventsel; unsigned perfctr; u64 (*event_map)(int); u64 (*raw_event)(u64); int max_events; - int num_counters; - int num_counters_fixed; - int counter_bits; - u64 counter_mask; + int num_events; + int num_events_fixed; + int event_bits; + u64 event_mask; int apic; u64 max_period; u64 intel_ctrl; @@ -106,7 +106,7 @@ struct x86_pmu { static struct x86_pmu x86_pmu __read_mostly; -static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; @@ -124,35 +124,35 @@ static const u64 p6_perfmon_event_map[] = [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, }; -static u64 p6_pmu_event_map(int event) +static u64 p6_pmu_event_map(int hw_event) { - return p6_perfmon_event_map[event]; + return p6_perfmon_event_map[hw_event]; } /* - * Counter setting that is specified not to count anything. + * Event setting that is specified not to count anything. * We use this to effectively disable a counter. * * L2_RQSTS with 0 MESI unit mask. */ -#define P6_NOP_COUNTER 0x0000002EULL +#define P6_NOP_EVENT 0x0000002EULL -static u64 p6_pmu_raw_event(u64 event) +static u64 p6_pmu_raw_event(u64 hw_event) { #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL #define P6_EVNTSEL_INV_MASK 0x00800000ULL -#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL +#define P6_EVNTSEL_REG_MASK 0xFF000000ULL #define P6_EVNTSEL_MASK \ (P6_EVNTSEL_EVENT_MASK | \ P6_EVNTSEL_UNIT_MASK | \ P6_EVNTSEL_EDGE_MASK | \ P6_EVNTSEL_INV_MASK | \ - P6_EVNTSEL_COUNTER_MASK) + P6_EVNTSEL_REG_MASK) - return event & P6_EVNTSEL_MASK; + return hw_event & P6_EVNTSEL_MASK; } @@ -170,16 +170,16 @@ static const u64 intel_perfmon_event_map[] = [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, }; -static u64 intel_pmu_event_map(int event) +static u64 intel_pmu_event_map(int hw_event) { - return intel_perfmon_event_map[event]; + return intel_perfmon_event_map[hw_event]; } /* - * Generalized hw caching related event table, filled + * Generalized hw caching related hw_event table, filled * in on a per model basis. A value of 0 means - * 'not supported', -1 means 'event makes no sense on - * this CPU', any other value means the raw event + * 'not supported', -1 means 'hw_event makes no sense on + * this CPU', any other value means the raw hw_event * ID. */ @@ -463,22 +463,22 @@ static const u64 atom_hw_cache_event_ids }, }; -static u64 intel_pmu_raw_event(u64 event) +static u64 intel_pmu_raw_event(u64 hw_event) { #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL #define CORE_EVNTSEL_INV_MASK 0x00800000ULL -#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL +#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL #define CORE_EVNTSEL_MASK \ (CORE_EVNTSEL_EVENT_MASK | \ CORE_EVNTSEL_UNIT_MASK | \ CORE_EVNTSEL_EDGE_MASK | \ CORE_EVNTSEL_INV_MASK | \ - CORE_EVNTSEL_COUNTER_MASK) + CORE_EVNTSEL_REG_MASK) - return event & CORE_EVNTSEL_MASK; + return hw_event & CORE_EVNTSEL_MASK; } static const u64 amd_hw_cache_event_ids @@ -585,39 +585,39 @@ static const u64 amd_perfmon_event_map[] = [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, }; -static u64 amd_pmu_event_map(int event) +static u64 amd_pmu_event_map(int hw_event) { - return amd_perfmon_event_map[event]; + return amd_perfmon_event_map[hw_event]; } -static u64 amd_pmu_raw_event(u64 event) +static u64 amd_pmu_raw_event(u64 hw_event) { #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL #define K7_EVNTSEL_INV_MASK 0x000800000ULL -#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL +#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL #define K7_EVNTSEL_MASK \ (K7_EVNTSEL_EVENT_MASK | \ K7_EVNTSEL_UNIT_MASK | \ K7_EVNTSEL_EDGE_MASK | \ K7_EVNTSEL_INV_MASK | \ - K7_EVNTSEL_COUNTER_MASK) + K7_EVNTSEL_REG_MASK) - return event & K7_EVNTSEL_MASK; + return hw_event & K7_EVNTSEL_MASK; } /* - * Propagate counter elapsed time into the generic counter. - * Can only be executed on the CPU where the counter is active. + * Propagate event elapsed time into the generic event. + * Can only be executed on the CPU where the event is active. * Returns the delta events processed. */ static u64 -x86_perf_counter_update(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) +x86_perf_event_update(struct perf_event *event, + struct hw_perf_event *hwc, int idx) { - int shift = 64 - x86_pmu.counter_bits; + int shift = 64 - x86_pmu.event_bits; u64 prev_raw_count, new_raw_count; s64 delta; @@ -625,15 +625,15 @@ x86_perf_counter_update(struct perf_counter *counter, return 0; /* - * Careful: an NMI might modify the previous counter value. + * Careful: an NMI might modify the previous event value. * * Our tactic to handle this is to first atomically read and * exchange a new raw count - then add that new-prev delta - * count to the generic counter atomically: + * count to the generic event atomically: */ again: prev_raw_count = atomic64_read(&hwc->prev_count); - rdmsrl(hwc->counter_base + idx, new_raw_count); + rdmsrl(hwc->event_base + idx, new_raw_count); if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) @@ -642,7 +642,7 @@ again: /* * Now we have the new raw value and have updated the prev * timestamp already. We can now calculate the elapsed delta - * (counter-)time and add that to the generic counter. + * (event-)time and add that to the generic event. * * Careful, not all hw sign-extends above the physical width * of the count. @@ -650,13 +650,13 @@ again: delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - atomic64_add(delta, &counter->count); + atomic64_add(delta, &event->count); atomic64_sub(delta, &hwc->period_left); return new_raw_count; } -static atomic_t active_counters; +static atomic_t active_events; static DEFINE_MUTEX(pmc_reserve_mutex); static bool reserve_pmc_hardware(void) @@ -667,12 +667,12 @@ static bool reserve_pmc_hardware(void) if (nmi_watchdog == NMI_LOCAL_APIC) disable_lapic_nmi_watchdog(); - for (i = 0; i < x86_pmu.num_counters; i++) { + for (i = 0; i < x86_pmu.num_events; i++) { if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) goto perfctr_fail; } - for (i = 0; i < x86_pmu.num_counters; i++) { + for (i = 0; i < x86_pmu.num_events; i++) { if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) goto eventsel_fail; } @@ -685,7 +685,7 @@ eventsel_fail: for (i--; i >= 0; i--) release_evntsel_nmi(x86_pmu.eventsel + i); - i = x86_pmu.num_counters; + i = x86_pmu.num_events; perfctr_fail: for (i--; i >= 0; i--) @@ -703,7 +703,7 @@ static void release_pmc_hardware(void) #ifdef CONFIG_X86_LOCAL_APIC int i; - for (i = 0; i < x86_pmu.num_counters; i++) { + for (i = 0; i < x86_pmu.num_events; i++) { release_perfctr_nmi(x86_pmu.perfctr + i); release_evntsel_nmi(x86_pmu.eventsel + i); } @@ -720,7 +720,7 @@ static inline bool bts_available(void) static inline void init_debug_store_on_cpu(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; if (!ds) return; @@ -732,7 +732,7 @@ static inline void init_debug_store_on_cpu(int cpu) static inline void fini_debug_store_on_cpu(int cpu) { - if (!per_cpu(cpu_hw_counters, cpu).ds) + if (!per_cpu(cpu_hw_events, cpu).ds) return; wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); @@ -751,12 +751,12 @@ static void release_bts_hardware(void) fini_debug_store_on_cpu(cpu); for_each_possible_cpu(cpu) { - struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; if (!ds) continue; - per_cpu(cpu_hw_counters, cpu).ds = NULL; + per_cpu(cpu_hw_events, cpu).ds = NULL; kfree((void *)(unsigned long)ds->bts_buffer_base); kfree(ds); @@ -796,7 +796,7 @@ static int reserve_bts_hardware(void) ds->bts_interrupt_threshold = ds->bts_absolute_maximum - BTS_OVFL_TH; - per_cpu(cpu_hw_counters, cpu).ds = ds; + per_cpu(cpu_hw_events, cpu).ds = ds; err = 0; } @@ -812,9 +812,9 @@ static int reserve_bts_hardware(void) return err; } -static void hw_perf_counter_destroy(struct perf_counter *counter) +static void hw_perf_event_destroy(struct perf_event *event) { - if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { + if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { release_pmc_hardware(); release_bts_hardware(); mutex_unlock(&pmc_reserve_mutex); @@ -827,7 +827,7 @@ static inline int x86_pmu_initialized(void) } static inline int -set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) +set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) { unsigned int cache_type, cache_op, cache_result; u64 config, val; @@ -880,7 +880,7 @@ static void intel_pmu_enable_bts(u64 config) static void intel_pmu_disable_bts(void) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); unsigned long debugctlmsr; if (!cpuc->ds) @@ -898,10 +898,10 @@ static void intel_pmu_disable_bts(void) /* * Setup the hardware configuration for a given attr_type */ -static int __hw_perf_counter_init(struct perf_counter *counter) +static int __hw_perf_event_init(struct perf_event *event) { - struct perf_counter_attr *attr = &counter->attr; - struct hw_perf_counter *hwc = &counter->hw; + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; u64 config; int err; @@ -909,21 +909,23 @@ static int __hw_perf_counter_init(struct perf_counter *counter) return -ENODEV; err = 0; - if (!atomic_inc_not_zero(&active_counters)) { + if (!atomic_inc_not_zero(&active_events)) { mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_counters) == 0) { + if (atomic_read(&active_events) == 0) { if (!reserve_pmc_hardware()) err = -EBUSY; else err = reserve_bts_hardware(); } if (!err) - atomic_inc(&active_counters); + atomic_inc(&active_events); mutex_unlock(&pmc_reserve_mutex); } if (err) return err; + event->destroy = hw_perf_event_destroy; + /* * Generate PMC IRQs: * (keep 'enabled' bit clear for now) @@ -946,17 +948,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) /* * If we have a PMU initialized but no APIC * interrupts, we cannot sample hardware - * counters (user-space has to fall back and - * sample via a hrtimer based software counter): + * events (user-space has to fall back and + * sample via a hrtimer based software event): */ if (!x86_pmu.apic) return -EOPNOTSUPP; } - counter->destroy = hw_perf_counter_destroy; - /* - * Raw event type provide the config in the event structure + * Raw hw_event type provide the config in the hw_event structure */ if (attr->type == PERF_TYPE_RAW) { hwc->config |= x86_pmu.raw_event(attr->config); @@ -1001,7 +1001,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter) static void p6_pmu_disable_all(void) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 val; if (!cpuc->enabled) @@ -1018,7 +1018,7 @@ static void p6_pmu_disable_all(void) static void intel_pmu_disable_all(void) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); if (!cpuc->enabled) return; @@ -1034,7 +1034,7 @@ static void intel_pmu_disable_all(void) static void amd_pmu_disable_all(void) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; if (!cpuc->enabled) @@ -1043,12 +1043,12 @@ static void amd_pmu_disable_all(void) cpuc->enabled = 0; /* * ensure we write the disable before we start disabling the - * counters proper, so that amd_pmu_enable_counter() does the + * events proper, so that amd_pmu_enable_event() does the * right thing. */ barrier(); - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for (idx = 0; idx < x86_pmu.num_events; idx++) { u64 val; if (!test_bit(idx, cpuc->active_mask)) @@ -1070,7 +1070,7 @@ void hw_perf_disable(void) static void p6_pmu_enable_all(void) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); unsigned long val; if (cpuc->enabled) @@ -1087,7 +1087,7 @@ static void p6_pmu_enable_all(void) static void intel_pmu_enable_all(void) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); if (cpuc->enabled) return; @@ -1098,19 +1098,19 @@ static void intel_pmu_enable_all(void) wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { - struct perf_counter *counter = - cpuc->counters[X86_PMC_IDX_FIXED_BTS]; + struct perf_event *event = + cpuc->events[X86_PMC_IDX_FIXED_BTS]; - if (WARN_ON_ONCE(!counter)) + if (WARN_ON_ONCE(!event)) return; - intel_pmu_enable_bts(counter->hw.config); + intel_pmu_enable_bts(event->hw.config); } } static void amd_pmu_enable_all(void) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; if (cpuc->enabled) @@ -1119,14 +1119,14 @@ static void amd_pmu_enable_all(void) cpuc->enabled = 1; barrier(); - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct perf_counter *counter = cpuc->counters[idx]; + for (idx = 0; idx < x86_pmu.num_events; idx++) { + struct perf_event *event = cpuc->events[idx]; u64 val; if (!test_bit(idx, cpuc->active_mask)) continue; - val = counter->hw.config; + val = event->hw.config; val |= ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(MSR_K7_EVNTSEL0 + idx, val); } @@ -1153,19 +1153,19 @@ static inline void intel_pmu_ack_status(u64 ack) wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); } -static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) +static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) { (void)checking_wrmsrl(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); } -static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) +static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) { (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); } static inline void -intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) +intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) { int idx = __idx - X86_PMC_IDX_FIXED; u64 ctrl_val, mask; @@ -1178,10 +1178,10 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) } static inline void -p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) +p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val = P6_NOP_COUNTER; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val = P6_NOP_EVENT; if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL0_ENABLE; @@ -1190,7 +1190,7 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) } static inline void -intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) +intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) { if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { intel_pmu_disable_bts(); @@ -1202,24 +1202,24 @@ intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) return; } - x86_pmu_disable_counter(hwc, idx); + x86_pmu_disable_event(hwc, idx); } static inline void -amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) +amd_pmu_disable_event(struct hw_perf_event *hwc, int idx) { - x86_pmu_disable_counter(hwc, idx); + x86_pmu_disable_event(hwc, idx); } static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); /* * Set the next IRQ period, based on the hwc->period_left value. - * To be called with the counter disabled in hw: + * To be called with the event disabled in hw: */ static int -x86_perf_counter_set_period(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) +x86_perf_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx) { s64 left = atomic64_read(&hwc->period_left); s64 period = hwc->sample_period; @@ -1245,7 +1245,7 @@ x86_perf_counter_set_period(struct perf_counter *counter, ret = 1; } /* - * Quirk: certain CPUs dont like it if just 1 event is left: + * Quirk: certain CPUs dont like it if just 1 hw_event is left: */ if (unlikely(left < 2)) left = 2; @@ -1256,21 +1256,21 @@ x86_perf_counter_set_period(struct perf_counter *counter, per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; /* - * The hw counter starts counting from this counter offset, + * The hw event starts counting from this event offset, * mark it to be able to extra future deltas: */ atomic64_set(&hwc->prev_count, (u64)-left); - err = checking_wrmsrl(hwc->counter_base + idx, - (u64)(-left) & x86_pmu.counter_mask); + err = checking_wrmsrl(hwc->event_base + idx, + (u64)(-left) & x86_pmu.event_mask); - perf_counter_update_userpage(counter); + perf_event_update_userpage(event); return ret; } static inline void -intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) +intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) { int idx = __idx - X86_PMC_IDX_FIXED; u64 ctrl_val, bits, mask; @@ -1295,9 +1295,9 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) err = checking_wrmsrl(hwc->config_base, ctrl_val); } -static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) +static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 val; val = hwc->config; @@ -1308,10 +1308,10 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) } -static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) +static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) { if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { - if (!__get_cpu_var(cpu_hw_counters).enabled) + if (!__get_cpu_var(cpu_hw_events).enabled) return; intel_pmu_enable_bts(hwc->config); @@ -1323,134 +1323,134 @@ static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) return; } - x86_pmu_enable_counter(hwc, idx); + x86_pmu_enable_event(hwc, idx); } -static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) +static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); if (cpuc->enabled) - x86_pmu_enable_counter(hwc, idx); + x86_pmu_enable_event(hwc, idx); } static int -fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) +fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) { - unsigned int event; + unsigned int hw_event; - event = hwc->config & ARCH_PERFMON_EVENT_MASK; + hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; - if (unlikely((event == + if (unlikely((hw_event == x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && (hwc->sample_period == 1))) return X86_PMC_IDX_FIXED_BTS; - if (!x86_pmu.num_counters_fixed) + if (!x86_pmu.num_events_fixed) return -1; - if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; - if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) return X86_PMC_IDX_FIXED_CPU_CYCLES; - if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) return X86_PMC_IDX_FIXED_BUS_CYCLES; return -1; } /* - * Find a PMC slot for the freshly enabled / scheduled in counter: + * Find a PMC slot for the freshly enabled / scheduled in event: */ -static int x86_pmu_enable(struct perf_counter *counter) +static int x86_pmu_enable(struct perf_event *event) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; int idx; - idx = fixed_mode_idx(counter, hwc); + idx = fixed_mode_idx(event, hwc); if (idx == X86_PMC_IDX_FIXED_BTS) { /* BTS is already occupied. */ if (test_and_set_bit(idx, cpuc->used_mask)) return -EAGAIN; hwc->config_base = 0; - hwc->counter_base = 0; + hwc->event_base = 0; hwc->idx = idx; } else if (idx >= 0) { /* - * Try to get the fixed counter, if that is already taken - * then try to get a generic counter: + * Try to get the fixed event, if that is already taken + * then try to get a generic event: */ if (test_and_set_bit(idx, cpuc->used_mask)) goto try_generic; hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; /* - * We set it so that counter_base + idx in wrmsr/rdmsr maps to + * We set it so that event_base + idx in wrmsr/rdmsr maps to * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: */ - hwc->counter_base = + hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; hwc->idx = idx; } else { idx = hwc->idx; - /* Try to get the previous generic counter again */ + /* Try to get the previous generic event again */ if (test_and_set_bit(idx, cpuc->used_mask)) { try_generic: idx = find_first_zero_bit(cpuc->used_mask, - x86_pmu.num_counters); - if (idx == x86_pmu.num_counters) + x86_pmu.num_events); + if (idx == x86_pmu.num_events) return -EAGAIN; set_bit(idx, cpuc->used_mask); hwc->idx = idx; } hwc->config_base = x86_pmu.eventsel; - hwc->counter_base = x86_pmu.perfctr; + hwc->event_base = x86_pmu.perfctr; } - perf_counters_lapic_init(); + perf_events_lapic_init(); x86_pmu.disable(hwc, idx); - cpuc->counters[idx] = counter; + cpuc->events[idx] = event; set_bit(idx, cpuc->active_mask); - x86_perf_counter_set_period(counter, hwc, idx); + x86_perf_event_set_period(event, hwc, idx); x86_pmu.enable(hwc, idx); - perf_counter_update_userpage(counter); + perf_event_update_userpage(event); return 0; } -static void x86_pmu_unthrottle(struct perf_counter *counter) +static void x86_pmu_unthrottle(struct perf_event *event) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || - cpuc->counters[hwc->idx] != counter)) + cpuc->events[hwc->idx] != event)) return; x86_pmu.enable(hwc, hwc->idx); } -void perf_counter_print_debug(void) +void perf_event_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; - struct cpu_hw_counters *cpuc; + struct cpu_hw_events *cpuc; unsigned long flags; int cpu, idx; - if (!x86_pmu.num_counters) + if (!x86_pmu.num_events) return; local_irq_save(flags); cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); + cpuc = &per_cpu(cpu_hw_events, cpu); if (x86_pmu.version >= 2) { rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); @@ -1466,7 +1466,7 @@ void perf_counter_print_debug(void) } pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for (idx = 0; idx < x86_pmu.num_events; idx++) { rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); rdmsrl(x86_pmu.perfctr + idx, pmc_count); @@ -1479,7 +1479,7 @@ void perf_counter_print_debug(void) pr_info("CPU#%d: gen-PMC%d left: %016llx\n", cpu, idx, prev_left); } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { + for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", @@ -1488,8 +1488,7 @@ void perf_counter_print_debug(void) local_irq_restore(flags); } -static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, - struct perf_sample_data *data) +static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) { struct debug_store *ds = cpuc->ds; struct bts_record { @@ -1497,11 +1496,14 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, u64 to; u64 flags; }; - struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; - unsigned long orig_ip = data->regs->ip; + struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; struct bts_record *at, *top; + struct perf_output_handle handle; + struct perf_event_header header; + struct perf_sample_data data; + struct pt_regs regs; - if (!counter) + if (!event) return; if (!ds) @@ -1510,26 +1512,45 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; top = (struct bts_record *)(unsigned long)ds->bts_index; + if (top <= at) + return; + ds->bts_index = ds->bts_buffer_base; + + data.period = event->hw.last_period; + data.addr = 0; + regs.ip = 0; + + /* + * Prepare a generic sample, i.e. fill in the invariant fields. + * We will overwrite the from and to address before we output + * the sample. + */ + perf_prepare_sample(&header, &data, event, ®s); + + if (perf_output_begin(&handle, event, + header.size * (top - at), 1, 1)) + return; + for (; at < top; at++) { - data->regs->ip = at->from; - data->addr = at->to; + data.ip = at->from; + data.addr = at->to; - perf_counter_output(counter, 1, data); + perf_output_sample(&handle, &header, &data, event); } - data->regs->ip = orig_ip; - data->addr = 0; + perf_output_end(&handle); /* There's new data available. */ - counter->pending_kill = POLL_IN; + event->hw.interrupts++; + event->pending_kill = POLL_IN; } -static void x86_pmu_disable(struct perf_counter *counter) +static void x86_pmu_disable(struct perf_event *event) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; /* @@ -1541,67 +1562,63 @@ static void x86_pmu_disable(struct perf_counter *counter) /* * Make sure the cleared pointer becomes visible before we - * (potentially) free the counter: + * (potentially) free the event: */ barrier(); /* - * Drain the remaining delta count out of a counter + * Drain the remaining delta count out of a event * that we are disabling: */ - x86_perf_counter_update(counter, hwc, idx); + x86_perf_event_update(event, hwc, idx); /* Drain the remaining BTS records. */ - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { - struct perf_sample_data data; - struct pt_regs regs; + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) + intel_pmu_drain_bts_buffer(cpuc); - data.regs = ®s; - intel_pmu_drain_bts_buffer(cpuc, &data); - } - cpuc->counters[idx] = NULL; + cpuc->events[idx] = NULL; clear_bit(idx, cpuc->used_mask); - perf_counter_update_userpage(counter); + perf_event_update_userpage(event); } /* - * Save and restart an expired counter. Called by NMI contexts, - * so it has to be careful about preempting normal counter ops: + * Save and restart an expired event. Called by NMI contexts, + * so it has to be careful about preempting normal event ops: */ -static int intel_pmu_save_and_restart(struct perf_counter *counter) +static int intel_pmu_save_and_restart(struct perf_event *event) { - struct hw_perf_counter *hwc = &counter->hw; + struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; int ret; - x86_perf_counter_update(counter, hwc, idx); - ret = x86_perf_counter_set_period(counter, hwc, idx); + x86_perf_event_update(event, hwc, idx); + ret = x86_perf_event_set_period(event, hwc, idx); - if (counter->state == PERF_COUNTER_STATE_ACTIVE) - intel_pmu_enable_counter(hwc, idx); + if (event->state == PERF_EVENT_STATE_ACTIVE) + intel_pmu_enable_event(hwc, idx); return ret; } static void intel_pmu_reset(void) { - struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; + struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; unsigned long flags; int idx; - if (!x86_pmu.num_counters) + if (!x86_pmu.num_events) return; local_irq_save(flags); printk("clearing PMU state on CPU#%d\n", smp_processor_id()); - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for (idx = 0; idx < x86_pmu.num_events; idx++) { checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { + for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); } if (ds) @@ -1613,39 +1630,38 @@ static void intel_pmu_reset(void) static int p6_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - struct perf_counter *counter; - struct hw_perf_counter *hwc; + struct cpu_hw_events *cpuc; + struct perf_event *event; + struct hw_perf_event *hwc; int idx, handled = 0; u64 val; - data.regs = regs; data.addr = 0; - cpuc = &__get_cpu_var(cpu_hw_counters); + cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for (idx = 0; idx < x86_pmu.num_events; idx++) { if (!test_bit(idx, cpuc->active_mask)) continue; - counter = cpuc->counters[idx]; - hwc = &counter->hw; + event = cpuc->events[idx]; + hwc = &event->hw; - val = x86_perf_counter_update(counter, hwc, idx); - if (val & (1ULL << (x86_pmu.counter_bits - 1))) + val = x86_perf_event_update(event, hwc, idx); + if (val & (1ULL << (x86_pmu.event_bits - 1))) continue; /* - * counter overflow + * event overflow */ handled = 1; - data.period = counter->hw.last_period; + data.period = event->hw.last_period; - if (!x86_perf_counter_set_period(counter, hwc, idx)) + if (!x86_perf_event_set_period(event, hwc, idx)) continue; - if (perf_counter_overflow(counter, 1, &data)) - p6_pmu_disable_counter(hwc, idx); + if (perf_event_overflow(event, 1, &data, regs)) + p6_pmu_disable_event(hwc, idx); } if (handled) @@ -1661,17 +1677,16 @@ static int p6_pmu_handle_irq(struct pt_regs *regs) static int intel_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; - struct cpu_hw_counters *cpuc; + struct cpu_hw_events *cpuc; int bit, loops; u64 ack, status; - data.regs = regs; data.addr = 0; - cpuc = &__get_cpu_var(cpu_hw_counters); + cpuc = &__get_cpu_var(cpu_hw_events); perf_disable(); - intel_pmu_drain_bts_buffer(cpuc, &data); + intel_pmu_drain_bts_buffer(cpuc); status = intel_pmu_get_status(); if (!status) { perf_enable(); @@ -1681,8 +1696,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) loops = 0; again: if (++loops > 100) { - WARN_ONCE(1, "perfcounters: irq loop stuck!\n"); - perf_counter_print_debug(); + WARN_ONCE(1, "perfevents: irq loop stuck!\n"); + perf_event_print_debug(); intel_pmu_reset(); perf_enable(); return 1; @@ -1691,19 +1706,19 @@ again: inc_irq_stat(apic_perf_irqs); ack = status; for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { - struct perf_counter *counter = cpuc->counters[bit]; + struct perf_event *event = cpuc->events[bit]; clear_bit(bit, (unsigned long *) &status); if (!test_bit(bit, cpuc->active_mask)) continue; - if (!intel_pmu_save_and_restart(counter)) + if (!intel_pmu_save_and_restart(event)) continue; - data.period = counter->hw.last_period; + data.period = event->hw.last_period; - if (perf_counter_overflow(counter, 1, &data)) - intel_pmu_disable_counter(&counter->hw, bit); + if (perf_event_overflow(event, 1, &data, regs)) + intel_pmu_disable_event(&event->hw, bit); } intel_pmu_ack_status(ack); @@ -1723,39 +1738,38 @@ again: static int amd_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - struct perf_counter *counter; - struct hw_perf_counter *hwc; + struct cpu_hw_events *cpuc; + struct perf_event *event; + struct hw_perf_event *hwc; int idx, handled = 0; u64 val; - data.regs = regs; data.addr = 0; - cpuc = &__get_cpu_var(cpu_hw_counters); + cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for (idx = 0; idx < x86_pmu.num_events; idx++) { if (!test_bit(idx, cpuc->active_mask)) continue; - counter = cpuc->counters[idx]; - hwc = &counter->hw; + event = cpuc->events[idx]; + hwc = &event->hw; - val = x86_perf_counter_update(counter, hwc, idx); - if (val & (1ULL << (x86_pmu.counter_bits - 1))) + val = x86_perf_event_update(event, hwc, idx); + if (val & (1ULL << (x86_pmu.event_bits - 1))) continue; /* - * counter overflow + * event overflow */ handled = 1; - data.period = counter->hw.last_period; + data.period = event->hw.last_period; - if (!x86_perf_counter_set_period(counter, hwc, idx)) + if (!x86_perf_event_set_period(event, hwc, idx)) continue; - if (perf_counter_overflow(counter, 1, &data)) - amd_pmu_disable_counter(hwc, idx); + if (perf_event_overflow(event, 1, &data, regs)) + amd_pmu_disable_event(hwc, idx); } if (handled) @@ -1769,18 +1783,21 @@ void smp_perf_pending_interrupt(struct pt_regs *regs) irq_enter(); ack_APIC_irq(); inc_irq_stat(apic_pending_irqs); - perf_counter_do_pending(); + perf_event_do_pending(); irq_exit(); } -void set_perf_counter_pending(void) +void set_perf_event_pending(void) { #ifdef CONFIG_X86_LOCAL_APIC + if (!x86_pmu.apic || !x86_pmu_initialized()) + return; + apic->send_IPI_self(LOCAL_PENDING_VECTOR); #endif } -void perf_counters_lapic_init(void) +void perf_events_lapic_init(void) { #ifdef CONFIG_X86_LOCAL_APIC if (!x86_pmu.apic || !x86_pmu_initialized()) @@ -1794,13 +1811,13 @@ void perf_counters_lapic_init(void) } static int __kprobes -perf_counter_nmi_handler(struct notifier_block *self, +perf_event_nmi_handler(struct notifier_block *self, unsigned long cmd, void *__args) { struct die_args *args = __args; struct pt_regs *regs; - if (!atomic_read(&active_counters)) + if (!atomic_read(&active_events)) return NOTIFY_DONE; switch (cmd) { @@ -1819,7 +1836,7 @@ perf_counter_nmi_handler(struct notifier_block *self, #endif /* * Can't rely on the handled return value to say it was our NMI, two - * counters could trigger 'simultaneously' raising two back-to-back NMIs. + * events could trigger 'simultaneously' raising two back-to-back NMIs. * * If the first NMI handles both, the latter will be empty and daze * the CPU. @@ -1829,8 +1846,8 @@ perf_counter_nmi_handler(struct notifier_block *self, return NOTIFY_STOP; } -static __read_mostly struct notifier_block perf_counter_nmi_notifier = { - .notifier_call = perf_counter_nmi_handler, +static __read_mostly struct notifier_block perf_event_nmi_notifier = { + .notifier_call = perf_event_nmi_handler, .next = NULL, .priority = 1 }; @@ -1840,8 +1857,8 @@ static struct x86_pmu p6_pmu = { .handle_irq = p6_pmu_handle_irq, .disable_all = p6_pmu_disable_all, .enable_all = p6_pmu_enable_all, - .enable = p6_pmu_enable_counter, - .disable = p6_pmu_disable_counter, + .enable = p6_pmu_enable_event, + .disable = p6_pmu_disable_event, .eventsel = MSR_P6_EVNTSEL0, .perfctr = MSR_P6_PERFCTR0, .event_map = p6_pmu_event_map, @@ -1850,16 +1867,16 @@ static struct x86_pmu p6_pmu = { .apic = 1, .max_period = (1ULL << 31) - 1, .version = 0, - .num_counters = 2, + .num_events = 2, /* - * Counters have 40 bits implemented. However they are designed such + * Events have 40 bits implemented. However they are designed such * that bits [32-39] are sign extensions of bit 31. As such the - * effective width of a counter for P6-like PMU is 32 bits only. + * effective width of a event for P6-like PMU is 32 bits only. * * See IA-32 Intel Architecture Software developer manual Vol 3B */ - .counter_bits = 32, - .counter_mask = (1ULL << 32) - 1, + .event_bits = 32, + .event_mask = (1ULL << 32) - 1, }; static struct x86_pmu intel_pmu = { @@ -1867,8 +1884,8 @@ static struct x86_pmu intel_pmu = { .handle_irq = intel_pmu_handle_irq, .disable_all = intel_pmu_disable_all, .enable_all = intel_pmu_enable_all, - .enable = intel_pmu_enable_counter, - .disable = intel_pmu_disable_counter, + .enable = intel_pmu_enable_event, + .disable = intel_pmu_disable_event, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, .event_map = intel_pmu_event_map, @@ -1878,7 +1895,7 @@ static struct x86_pmu intel_pmu = { /* * Intel PMCs cannot be accessed sanely above 32 bit width, * so we install an artificial 1<<31 period regardless of - * the generic counter period: + * the generic event period: */ .max_period = (1ULL << 31) - 1, .enable_bts = intel_pmu_enable_bts, @@ -1890,16 +1907,16 @@ static struct x86_pmu amd_pmu = { .handle_irq = amd_pmu_handle_irq, .disable_all = amd_pmu_disable_all, .enable_all = amd_pmu_enable_all, - .enable = amd_pmu_enable_counter, - .disable = amd_pmu_disable_counter, + .enable = amd_pmu_enable_event, + .disable = amd_pmu_disable_event, .eventsel = MSR_K7_EVNTSEL0, .perfctr = MSR_K7_PERFCTR0, .event_map = amd_pmu_event_map, .raw_event = amd_pmu_raw_event, .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = 4, - .counter_bits = 48, - .counter_mask = (1ULL << 48) - 1, + .num_events = 4, + .event_bits = 48, + .event_mask = (1ULL << 48) - 1, .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, @@ -1956,7 +1973,7 @@ static int intel_pmu_init(void) /* * Check whether the Architectural PerfMon supports - * Branch Misses Retired Event or not. + * Branch Misses Retired hw_event or not. */ cpuid(10, &eax.full, &ebx, &unused, &edx.full); if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) @@ -1968,15 +1985,15 @@ static int intel_pmu_init(void) x86_pmu = intel_pmu; x86_pmu.version = version; - x86_pmu.num_counters = eax.split.num_counters; - x86_pmu.counter_bits = eax.split.bit_width; - x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1; + x86_pmu.num_events = eax.split.num_events; + x86_pmu.event_bits = eax.split.bit_width; + x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; /* - * Quirk: v2 perfmon does not report fixed-purpose counters, so - * assume at least 3 counters: + * Quirk: v2 perfmon does not report fixed-purpose events, so + * assume at least 3 events: */ - x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); + x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); /* * Install the hw-cache-events table: @@ -2023,11 +2040,11 @@ static int amd_pmu_init(void) return 0; } -void __init init_hw_perf_counters(void) +void __init init_hw_perf_events(void) { int err; - pr_info("Performance Counters: "); + pr_info("Performance Events: "); switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: @@ -2040,45 +2057,45 @@ void __init init_hw_perf_counters(void) return; } if (err != 0) { - pr_cont("no PMU driver, software counters only.\n"); + pr_cont("no PMU driver, software events only.\n"); return; } pr_cont("%s PMU driver.\n", x86_pmu.name); - if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { - WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", - x86_pmu.num_counters, X86_PMC_MAX_GENERIC); - x86_pmu.num_counters = X86_PMC_MAX_GENERIC; + if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { + WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", + x86_pmu.num_events, X86_PMC_MAX_GENERIC); + x86_pmu.num_events = X86_PMC_MAX_GENERIC; } - perf_counter_mask = (1 << x86_pmu.num_counters) - 1; - perf_max_counters = x86_pmu.num_counters; + perf_event_mask = (1 << x86_pmu.num_events) - 1; + perf_max_events = x86_pmu.num_events; - if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { - WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", - x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); - x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; + if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { + WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", + x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); + x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; } - perf_counter_mask |= - ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; - x86_pmu.intel_ctrl = perf_counter_mask; + perf_event_mask |= + ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; + x86_pmu.intel_ctrl = perf_event_mask; - perf_counters_lapic_init(); - register_die_notifier(&perf_counter_nmi_notifier); + perf_events_lapic_init(); + register_die_notifier(&perf_event_nmi_notifier); - pr_info("... version: %d\n", x86_pmu.version); - pr_info("... bit width: %d\n", x86_pmu.counter_bits); - pr_info("... generic counters: %d\n", x86_pmu.num_counters); - pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask); - pr_info("... max period: %016Lx\n", x86_pmu.max_period); - pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed); - pr_info("... counter mask: %016Lx\n", perf_counter_mask); + pr_info("... version: %d\n", x86_pmu.version); + pr_info("... bit width: %d\n", x86_pmu.event_bits); + pr_info("... generic registers: %d\n", x86_pmu.num_events); + pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); + pr_info("... max period: %016Lx\n", x86_pmu.max_period); + pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); + pr_info("... event mask: %016Lx\n", perf_event_mask); } -static inline void x86_pmu_read(struct perf_counter *counter) +static inline void x86_pmu_read(struct perf_event *event) { - x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); + x86_perf_event_update(event, &event->hw, event->hw.idx); } static const struct pmu pmu = { @@ -2088,13 +2105,16 @@ static const struct pmu pmu = { .unthrottle = x86_pmu_unthrottle, }; -const struct pmu *hw_perf_counter_init(struct perf_counter *counter) +const struct pmu *hw_perf_event_init(struct perf_event *event) { int err; - err = __hw_perf_counter_init(counter); - if (err) + err = __hw_perf_event_init(event); + if (err) { + if (event->destroy) + event->destroy(event); return ERR_PTR(err); + } return &pmu; } @@ -2275,7 +2295,7 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return entry; } -void hw_perf_counter_setup_online(int cpu) +void hw_perf_event_setup_online(int cpu) { init_debug_store_on_cpu(cpu); } diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 392bea43b89..fab786f60ed 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -20,7 +20,7 @@ #include <linux/kprobes.h> #include <asm/apic.h> -#include <asm/perf_counter.h> +#include <asm/perf_event.h> struct nmi_watchdog_ctlblk { unsigned int cccr_msr; diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index bc24f514ec9..1cbed97b59c 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -24,6 +24,7 @@ #include <linux/dmi.h> #include <asm/div64.h> #include <asm/vmware.h> +#include <asm/x86_init.h> #define CPUID_VMWARE_INFO_LEAF 0x40000000 #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 @@ -47,21 +48,35 @@ static inline int __vmware_platform(void) return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; } -static unsigned long __vmware_get_tsc_khz(void) +static unsigned long vmware_get_tsc_khz(void) { uint64_t tsc_hz; uint32_t eax, ebx, ecx, edx; VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); - if (ebx == UINT_MAX) - return 0; tsc_hz = eax | (((uint64_t)ebx) << 32); do_div(tsc_hz, 1000); BUG_ON(tsc_hz >> 32); + printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", + (unsigned long) tsc_hz / 1000, + (unsigned long) tsc_hz % 1000); return tsc_hz; } +void __init vmware_platform_setup(void) +{ + uint32_t eax, ebx, ecx, edx; + + VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); + + if (ebx != UINT_MAX) + x86_platform.calibrate_tsc = vmware_get_tsc_khz; + else + printk(KERN_WARNING + "Failed to get TSC freq from the hypervisor\n"); +} + /* * While checking the dmi string infomation, just checking the product * serial key should be enough, as this will always have a VMware @@ -87,12 +102,6 @@ int vmware_platform(void) return 0; } -unsigned long vmware_get_tsc_khz(void) -{ - BUG_ON(!vmware_platform()); - return __vmware_get_tsc_khz(); -} - /* * VMware hypervisor takes care of exporting a reliable TSC to the guest. * Still, due to timing difference when running on virtual cpus, the TSC can |