From 5c167b8585c8d91206b395d57011ead7711e322f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 09:02:19 +0100 Subject: x86, perfcounters: rename intel_arch_perfmon.h => perf_counter.h Impact: rename include file We'll be providing an asm/perf_counter.h to the generic perfcounter code, so use the already existing x86 file for this purpose and rename it. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 41 +++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 arch/x86/include/asm/perf_counter.h (limited to 'arch/x86/include/asm/perf_counter.h') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h new file mode 100644 index 00000000000..9dadce1124e --- /dev/null +++ b/arch/x86/include/asm/perf_counter.h @@ -0,0 +1,41 @@ +#ifndef _ASM_X86_PERF_COUNTER_H +#define _ASM_X86_PERF_COUNTER_H + +#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 +#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 + +#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 +#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 + +#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) +#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) +#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) + +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ + (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) + +#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 + +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_counters:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; + +#ifdef CONFIG_PERF_COUNTERS +extern void init_hw_perf_counters(void); +extern void perf_counters_lapic_init(int nmi); +#else +static inline void init_hw_perf_counters(void) { } +static inline void perf_counters_lapic_init(int nmi) { } +#endif + +#endif /* _ASM_X86_PERF_COUNTER_H */ -- cgit v1.2.3-70-g09d2 From eb2b861810d4ff72454c83996b891df4e0aaff9a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 09:09:13 +0100 Subject: x86, perfcounters: prepare for fixed-mode PMCs Impact: refactor the x86 code for fixed-mode PMCs Extend the data structures and rename the existing facilities to allow for a 'generic' versus 'fixed' counter distinction. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 11 ++++++++ arch/x86/kernel/cpu/perf_counter.c | 53 ++++++++++++++++++------------------- include/linux/perf_counter.h | 1 + 3 files changed, 38 insertions(+), 27 deletions(-) (limited to 'arch/x86/include/asm/perf_counter.h') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 9dadce1124e..dd5a4a559e2 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -1,6 +1,13 @@ #ifndef _ASM_X86_PERF_COUNTER_H #define _ASM_X86_PERF_COUNTER_H +/* + * Performance counter hw details: + */ + +#define X86_PMC_MAX_GENERIC 8 +#define X86_PMC_MAX_FIXED 3 + #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 @@ -20,6 +27,10 @@ #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 +/* + * Intel "Architectural Performance Monitoring" CPUID + * detection/enumeration details: + */ union cpuid10_eax { struct { unsigned int version_id:8; diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a4a3a09a654..fc3af868823 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -27,13 +27,12 @@ static bool perf_counters_initialized __read_mostly; static int nr_hw_counters __read_mostly; static u32 perf_counter_mask __read_mostly; -/* No support for fixed function counters yet */ - -#define MAX_HW_COUNTERS 8 - struct cpu_hw_counters { - struct perf_counter *counters[MAX_HW_COUNTERS]; - unsigned long used[BITS_TO_LONGS(MAX_HW_COUNTERS)]; + struct perf_counter *generic[X86_PMC_MAX_GENERIC]; + unsigned long used[BITS_TO_LONGS(X86_PMC_MAX_GENERIC)]; + + struct perf_counter *fixed[X86_PMC_MAX_FIXED]; + unsigned long used_fixed[BITS_TO_LONGS(X86_PMC_MAX_FIXED)]; }; /* @@ -185,7 +184,7 @@ void hw_perf_restore(u64 ctrl) EXPORT_SYMBOL_GPL(hw_perf_restore); static inline void -__x86_perf_counter_disable(struct perf_counter *counter, +__pmc_generic_disable(struct perf_counter *counter, struct hw_perf_counter *hwc, unsigned int idx) { int err; @@ -193,7 +192,7 @@ __x86_perf_counter_disable(struct perf_counter *counter, err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0); } -static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]); +static DEFINE_PER_CPU(u64, prev_left[X86_PMC_MAX_GENERIC]); /* * Set the next IRQ period, based on the hwc->period_left value. @@ -231,7 +230,7 @@ __hw_perf_counter_set_period(struct perf_counter *counter, } static void -__x86_perf_counter_enable(struct perf_counter *counter, +__pmc_generic_enable(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { wrmsr(hwc->config_base + idx, @@ -241,7 +240,7 @@ __x86_perf_counter_enable(struct perf_counter *counter, /* * Find a PMC slot for the freshly enabled / scheduled in counter: */ -static void x86_perf_counter_enable(struct perf_counter *counter) +static void pmc_generic_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; @@ -256,12 +255,12 @@ static void x86_perf_counter_enable(struct perf_counter *counter) perf_counters_lapic_init(hwc->nmi); - __x86_perf_counter_disable(counter, hwc, idx); + __pmc_generic_disable(counter, hwc, idx); - cpuc->counters[idx] = counter; + cpuc->generic[idx] = counter; __hw_perf_counter_set_period(counter, hwc, idx); - __x86_perf_counter_enable(counter, hwc, idx); + __pmc_generic_enable(counter, hwc, idx); } void perf_counter_print_debug(void) @@ -301,16 +300,16 @@ void perf_counter_print_debug(void) local_irq_enable(); } -static void x86_perf_counter_disable(struct perf_counter *counter) +static void pmc_generic_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; unsigned int idx = hwc->idx; - __x86_perf_counter_disable(counter, hwc, idx); + __pmc_generic_disable(counter, hwc, idx); clear_bit(idx, cpuc->used); - cpuc->counters[idx] = NULL; + cpuc->generic[idx] = NULL; /* * Drain the remaining delta count out of a counter @@ -349,7 +348,7 @@ static void perf_save_and_restart(struct perf_counter *counter) __hw_perf_counter_set_period(counter, hwc, idx); if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE) - __x86_perf_counter_enable(counter, hwc, idx); + __pmc_generic_enable(counter, hwc, idx); } static void @@ -392,7 +391,7 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) again: ack = status; for_each_bit(bit, (unsigned long *) &status, nr_hw_counters) { - struct perf_counter *counter = cpuc->counters[bit]; + struct perf_counter *counter = cpuc->generic[bit]; clear_bit(bit, (unsigned long *) &status); if (!counter) @@ -412,7 +411,7 @@ again: } /* * From NMI context we cannot call into the scheduler to - * do a task wakeup - but we mark these counters as + * do a task wakeup - but we mark these generic as * wakeup_pending and initate a wakeup callback: */ if (nmi) { @@ -462,7 +461,7 @@ void perf_counter_notify(struct pt_regs *regs) cpuc = &per_cpu(cpu_hw_counters, cpu); for_each_bit(bit, cpuc->used, nr_hw_counters) { - struct perf_counter *counter = cpuc->counters[bit]; + struct perf_counter *counter = cpuc->generic[bit]; if (!counter) continue; @@ -539,10 +538,10 @@ void __init init_hw_perf_counters(void) printk(KERN_INFO "... version: %d\n", eax.split.version_id); printk(KERN_INFO "... num_counters: %d\n", eax.split.num_counters); nr_hw_counters = eax.split.num_counters; - if (nr_hw_counters > MAX_HW_COUNTERS) { - nr_hw_counters = MAX_HW_COUNTERS; + if (nr_hw_counters > X86_PMC_MAX_GENERIC) { + nr_hw_counters = X86_PMC_MAX_GENERIC; WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", - nr_hw_counters, MAX_HW_COUNTERS); + nr_hw_counters, X86_PMC_MAX_GENERIC); } perf_counter_mask = (1 << nr_hw_counters) - 1; perf_max_counters = nr_hw_counters; @@ -556,15 +555,15 @@ void __init init_hw_perf_counters(void) register_die_notifier(&perf_counter_nmi_notifier); } -static void x86_perf_counter_read(struct perf_counter *counter) +static void pmc_generic_read(struct perf_counter *counter) { x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); } static const struct hw_perf_counter_ops x86_perf_counter_ops = { - .hw_perf_counter_enable = x86_perf_counter_enable, - .hw_perf_counter_disable = x86_perf_counter_disable, - .hw_perf_counter_read = x86_perf_counter_read, + .hw_perf_counter_enable = pmc_generic_enable, + .hw_perf_counter_disable = pmc_generic_disable, + .hw_perf_counter_read = pmc_generic_read, }; const struct hw_perf_counter_ops * diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d038450de87..984da540224 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -14,6 +14,7 @@ #define _LINUX_PERF_COUNTER_H #include +#include #include #include -- cgit v1.2.3-70-g09d2 From 703e937c83bbad79075a7846e062e447c2fee6a4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 10:51:15 +0100 Subject: perfcounters: add fixed-mode PMC enumeration Enumerate fixed-mode PMCs based on CPUID, and feed that into the perfcounter code. Does not use fixed-mode PMCs yet. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 23 +++++++++++++++++++++++ arch/x86/kernel/cpu/perf_counter.c | 23 +++++++++++++++++------ 2 files changed, 40 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm/perf_counter.h') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index dd5a4a559e2..945a315e6d6 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -41,6 +41,29 @@ union cpuid10_eax { unsigned int full; }; +union cpuid10_edx { + struct { + unsigned int num_counters_fixed:4; + unsigned int reserved:28; + } split; + unsigned int full; +}; + + +/* + * Fixed-purpose performance counters: + */ + +/* Instr_Retired.Any: */ +#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 + +/* CPU_CLK_Unhalted.Core: */ +#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a + +/* CPU_CLK_Unhalted.Ref: */ +#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b + + #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(int nmi); diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index fc3af868823..2fca50c4597 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -27,6 +27,8 @@ static bool perf_counters_initialized __read_mostly; static int nr_hw_counters __read_mostly; static u32 perf_counter_mask __read_mostly; +static int nr_hw_counters_fixed __read_mostly; + struct cpu_hw_counters { struct perf_counter *generic[X86_PMC_MAX_GENERIC]; unsigned long used[BITS_TO_LONGS(X86_PMC_MAX_GENERIC)]; @@ -519,8 +521,9 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { void __init init_hw_perf_counters(void) { union cpuid10_eax eax; - unsigned int unused; unsigned int ebx; + unsigned int unused; + union cpuid10_edx edx; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) return; @@ -529,14 +532,14 @@ void __init init_hw_perf_counters(void) * Check whether the Architectural PerfMon supports * Branch Misses Retired Event or not. */ - cpuid(10, &(eax.full), &ebx, &unused, &unused); + cpuid(10, &eax.full, &ebx, &unused, &edx.full); if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) return; printk(KERN_INFO "Intel Performance Monitoring support detected.\n"); - printk(KERN_INFO "... version: %d\n", eax.split.version_id); - printk(KERN_INFO "... num_counters: %d\n", eax.split.num_counters); + printk(KERN_INFO "... version: %d\n", eax.split.version_id); + printk(KERN_INFO "... num counters: %d\n", eax.split.num_counters); nr_hw_counters = eax.split.num_counters; if (nr_hw_counters > X86_PMC_MAX_GENERIC) { nr_hw_counters = X86_PMC_MAX_GENERIC; @@ -546,8 +549,16 @@ void __init init_hw_perf_counters(void) perf_counter_mask = (1 << nr_hw_counters) - 1; perf_max_counters = nr_hw_counters; - printk(KERN_INFO "... bit_width: %d\n", eax.split.bit_width); - printk(KERN_INFO "... mask_length: %d\n", eax.split.mask_length); + printk(KERN_INFO "... bit width: %d\n", eax.split.bit_width); + printk(KERN_INFO "... mask length: %d\n", eax.split.mask_length); + + nr_hw_counters_fixed = edx.split.num_counters_fixed; + if (nr_hw_counters_fixed > X86_PMC_MAX_FIXED) { + nr_hw_counters_fixed = X86_PMC_MAX_FIXED; + WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", + nr_hw_counters_fixed, X86_PMC_MAX_FIXED); + } + printk(KERN_INFO "... fixed counters: %d\n", nr_hw_counters_fixed); perf_counters_initialized = true; -- cgit v1.2.3-70-g09d2 From 862a1a5f346fe7e9181ea51eaae48cf2cd70f746 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 13:09:20 +0100 Subject: x86, perfcounters: refactor code for fixed-function PMCs Impact: clean up Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 14 ++++++- arch/x86/kernel/cpu/perf_counter.c | 73 ++++++++++++++++++++----------------- 2 files changed, 52 insertions(+), 35 deletions(-) (limited to 'arch/x86/include/asm/perf_counter.h') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 945a315e6d6..13745deb16c 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -8,6 +8,10 @@ #define X86_PMC_MAX_GENERIC 8 #define X86_PMC_MAX_FIXED 3 +#define X86_PMC_IDX_GENERIC 0 +#define X86_PMC_IDX_FIXED 32 +#define X86_PMC_IDX_MAX 64 + #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 @@ -54,6 +58,15 @@ union cpuid10_edx { * Fixed-purpose performance counters: */ +/* + * All 3 fixed-mode PMCs are configured via this single MSR: + */ +#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d + +/* + * The counts are available in three separate MSRs: + */ + /* Instr_Retired.Any: */ #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 @@ -63,7 +76,6 @@ union cpuid10_edx { /* CPU_CLK_Unhalted.Ref: */ #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b - #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(int nmi); diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 2fca50c4597..358af526640 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -24,17 +24,14 @@ static bool perf_counters_initialized __read_mostly; /* * Number of (generic) HW counters: */ -static int nr_hw_counters __read_mostly; -static u32 perf_counter_mask __read_mostly; +static int nr_counters_generic __read_mostly; +static u64 perf_counter_mask __read_mostly; -static int nr_hw_counters_fixed __read_mostly; +static int nr_counters_fixed __read_mostly; struct cpu_hw_counters { - struct perf_counter *generic[X86_PMC_MAX_GENERIC]; - unsigned long used[BITS_TO_LONGS(X86_PMC_MAX_GENERIC)]; - - struct perf_counter *fixed[X86_PMC_MAX_FIXED]; - unsigned long used_fixed[BITS_TO_LONGS(X86_PMC_MAX_FIXED)]; + struct perf_counter *counters[X86_PMC_IDX_MAX]; + unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; }; /* @@ -159,7 +156,7 @@ void hw_perf_enable_all(void) if (unlikely(!perf_counters_initialized)) return; - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask); } u64 hw_perf_save_disable(void) @@ -170,7 +167,7 @@ u64 hw_perf_save_disable(void) return 0; rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); return ctrl; } @@ -181,7 +178,7 @@ void hw_perf_restore(u64 ctrl) if (unlikely(!perf_counters_initialized)) return; - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); } EXPORT_SYMBOL_GPL(hw_perf_restore); @@ -239,6 +236,11 @@ __pmc_generic_enable(struct perf_counter *counter, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); } +static int fixed_mode_idx(struct hw_perf_counter *hwc) +{ + return -1; +} + /* * Find a PMC slot for the freshly enabled / scheduled in counter: */ @@ -250,7 +252,7 @@ static void pmc_generic_enable(struct perf_counter *counter) /* Try to get the previous counter again */ if (test_and_set_bit(idx, cpuc->used)) { - idx = find_first_zero_bit(cpuc->used, nr_hw_counters); + idx = find_first_zero_bit(cpuc->used, nr_counters_generic); set_bit(idx, cpuc->used); hwc->idx = idx; } @@ -259,7 +261,7 @@ static void pmc_generic_enable(struct perf_counter *counter) __pmc_generic_disable(counter, hwc, idx); - cpuc->generic[idx] = counter; + cpuc->counters[idx] = counter; __hw_perf_counter_set_period(counter, hwc, idx); __pmc_generic_enable(counter, hwc, idx); @@ -270,7 +272,7 @@ void perf_counter_print_debug(void) u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left; int cpu, idx; - if (!nr_hw_counters) + if (!nr_counters_generic) return; local_irq_disable(); @@ -286,7 +288,7 @@ void perf_counter_print_debug(void) printk(KERN_INFO "CPU#%d: status: %016llx\n", cpu, status); printk(KERN_INFO "CPU#%d: overflow: %016llx\n", cpu, overflow); - for (idx = 0; idx < nr_hw_counters; idx++) { + for (idx = 0; idx < nr_counters_generic; idx++) { rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); rdmsrl(MSR_ARCH_PERFMON_PERFCTR0 + idx, pmc_count); @@ -311,7 +313,7 @@ static void pmc_generic_disable(struct perf_counter *counter) __pmc_generic_disable(counter, hwc, idx); clear_bit(idx, cpuc->used); - cpuc->generic[idx] = NULL; + cpuc->counters[idx] = NULL; /* * Drain the remaining delta count out of a counter @@ -381,7 +383,7 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global); /* Disable counters globally */ - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); ack_APIC_irq(); cpuc = &per_cpu(cpu_hw_counters, cpu); @@ -392,8 +394,8 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) again: ack = status; - for_each_bit(bit, (unsigned long *) &status, nr_hw_counters) { - struct perf_counter *counter = cpuc->generic[bit]; + for_each_bit(bit, (unsigned long *) &status, nr_counters_generic) { + struct perf_counter *counter = cpuc->counters[bit]; clear_bit(bit, (unsigned long *) &status); if (!counter) @@ -424,7 +426,7 @@ again: } } - wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack, 0); + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); /* * Repeat if there is more work to be done: @@ -436,7 +438,7 @@ out: /* * Restore - do not reenable when global enable is off: */ - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, saved_global, 0); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global); } void smp_perf_counter_interrupt(struct pt_regs *regs) @@ -462,8 +464,8 @@ void perf_counter_notify(struct pt_regs *regs) cpu = smp_processor_id(); cpuc = &per_cpu(cpu_hw_counters, cpu); - for_each_bit(bit, cpuc->used, nr_hw_counters) { - struct perf_counter *counter = cpuc->generic[bit]; + for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) { + struct perf_counter *counter = cpuc->counters[bit]; if (!counter) continue; @@ -540,26 +542,29 @@ void __init init_hw_perf_counters(void) printk(KERN_INFO "... version: %d\n", eax.split.version_id); printk(KERN_INFO "... num counters: %d\n", eax.split.num_counters); - nr_hw_counters = eax.split.num_counters; - if (nr_hw_counters > X86_PMC_MAX_GENERIC) { - nr_hw_counters = X86_PMC_MAX_GENERIC; + nr_counters_generic = eax.split.num_counters; + if (nr_counters_generic > X86_PMC_MAX_GENERIC) { + nr_counters_generic = X86_PMC_MAX_GENERIC; WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", - nr_hw_counters, X86_PMC_MAX_GENERIC); + nr_counters_generic, X86_PMC_MAX_GENERIC); } - perf_counter_mask = (1 << nr_hw_counters) - 1; - perf_max_counters = nr_hw_counters; + perf_counter_mask = (1 << nr_counters_generic) - 1; + perf_max_counters = nr_counters_generic; printk(KERN_INFO "... bit width: %d\n", eax.split.bit_width); printk(KERN_INFO "... mask length: %d\n", eax.split.mask_length); - nr_hw_counters_fixed = edx.split.num_counters_fixed; - if (nr_hw_counters_fixed > X86_PMC_MAX_FIXED) { - nr_hw_counters_fixed = X86_PMC_MAX_FIXED; + nr_counters_fixed = edx.split.num_counters_fixed; + if (nr_counters_fixed > X86_PMC_MAX_FIXED) { + nr_counters_fixed = X86_PMC_MAX_FIXED; WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", - nr_hw_counters_fixed, X86_PMC_MAX_FIXED); + nr_counters_fixed, X86_PMC_MAX_FIXED); } - printk(KERN_INFO "... fixed counters: %d\n", nr_hw_counters_fixed); + printk(KERN_INFO "... fixed counters: %d\n", nr_counters_fixed); + + perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED; + printk(KERN_INFO "... counter mask: %016Lx\n", perf_counter_mask); perf_counters_initialized = true; perf_counters_lapic_init(0); -- cgit v1.2.3-70-g09d2 From 2f18d1e8d07ae67dd0afce875287756d4bd31a46 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 22 Dec 2008 11:10:42 +0100 Subject: x86, perfcounters: add support for fixed-function pmcs Impact: extend performance counter support on x86 Intel CPUs Modern Intel CPUs have 3 "fixed-function" performance counters, which count these hardware events: Instr_Retired.Any CPU_CLK_Unhalted.Core CPU_CLK_Unhalted.Ref Add support for them to the performance counters subsystem. Their use is transparent to user-space: the counter scheduler is extended to automatically recognize the cases where a fixed-function PMC can be utilized instead of a generic PMC. In such cases the generic PMC is kept available for more counters. The above fixed-function events map to these generic counter hw events: PERF_COUNT_INSTRUCTIONS PERF_COUNT_CPU_CYCLES PERF_COUNT_BUS_CYCLES (The 'bus' cycles are in reality often CPU-ish cycles, just with a fixed frequency.) Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 8 ++ arch/x86/kernel/cpu/perf_counter.c | 149 ++++++++++++++++++++++++++++++------ 2 files changed, 133 insertions(+), 24 deletions(-) (limited to 'arch/x86/include/asm/perf_counter.h') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 13745deb16c..2e08ed73664 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -23,6 +23,11 @@ #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) +/* + * Includes eventsel and unit mask as well: + */ +#define ARCH_PERFMON_EVENT_MASK 0xffff + #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 @@ -69,12 +74,15 @@ union cpuid10_edx { /* Instr_Retired.Any: */ #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 +#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) /* CPU_CLK_Unhalted.Core: */ #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a +#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) /* CPU_CLK_Unhalted.Ref: */ #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b +#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 86b2fdd344a..da46eca1254 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -26,6 +26,7 @@ static bool perf_counters_initialized __read_mostly; */ static int nr_counters_generic __read_mostly; static u64 perf_counter_mask __read_mostly; +static u64 counter_value_mask __read_mostly; static int nr_counters_fixed __read_mostly; @@ -120,9 +121,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter) hwc->nmi = 1; } - hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; - hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; - hwc->irq_period = hw_event->irq_period; /* * Intel PMCs cannot be accessed sanely above 32 bit width, @@ -183,16 +181,34 @@ void hw_perf_restore(u64 ctrl) } EXPORT_SYMBOL_GPL(hw_perf_restore); +static inline void +__pmc_fixed_disable(struct perf_counter *counter, + struct hw_perf_counter *hwc, unsigned int __idx) +{ + int idx = __idx - X86_PMC_IDX_FIXED; + u64 ctrl_val, mask; + int err; + + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + err = checking_wrmsrl(hwc->config_base, ctrl_val); +} + static inline void __pmc_generic_disable(struct perf_counter *counter, struct hw_perf_counter *hwc, unsigned int idx) { int err; + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) + return __pmc_fixed_disable(counter, hwc, idx); + err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0); } -static DEFINE_PER_CPU(u64, prev_left[X86_PMC_MAX_GENERIC]); +static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); /* * Set the next IRQ period, based on the hwc->period_left value. @@ -202,8 +218,9 @@ static void __hw_perf_counter_set_period(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { - s32 left = atomic64_read(&hwc->period_left); + s64 left = atomic64_read(&hwc->period_left); s32 period = hwc->irq_period; + int err; /* * If we are way outside a reasoable range then just skip forward: @@ -224,21 +241,64 @@ __hw_perf_counter_set_period(struct perf_counter *counter, * The hw counter starts counting from this counter offset, * mark it to be able to extra future deltas: */ - atomic64_set(&hwc->prev_count, (u64)(s64)-left); + atomic64_set(&hwc->prev_count, (u64)-left); - wrmsr(hwc->counter_base + idx, -left, 0); + err = checking_wrmsrl(hwc->counter_base + idx, + (u64)(-left) & counter_value_mask); +} + +static inline void +__pmc_fixed_enable(struct perf_counter *counter, + struct hw_perf_counter *hwc, unsigned int __idx) +{ + int idx = __idx - X86_PMC_IDX_FIXED; + u64 ctrl_val, bits, mask; + int err; + + /* + * Enable IRQ generation (0x8) and ring-3 counting (0x2), + * and enable ring-0 counting if allowed: + */ + bits = 0x8ULL | 0x2ULL; + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + bits |= 0x1; + bits <<= (idx * 4); + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + ctrl_val |= bits; + err = checking_wrmsrl(hwc->config_base, ctrl_val); } static void __pmc_generic_enable(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) + return __pmc_fixed_enable(counter, hwc, idx); + wrmsr(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); } -static int fixed_mode_idx(struct hw_perf_counter *hwc) +static int +fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) { + unsigned int event; + + if (unlikely(hwc->nmi)) + return -1; + + event = hwc->config & ARCH_PERFMON_EVENT_MASK; + + if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_INSTRUCTIONS])) + return X86_PMC_IDX_FIXED_INSTRUCTIONS; + if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_CPU_CYCLES])) + return X86_PMC_IDX_FIXED_CPU_CYCLES; + if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_BUS_CYCLES])) + return X86_PMC_IDX_FIXED_BUS_CYCLES; + return -1; } @@ -249,16 +309,39 @@ static int pmc_generic_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; + int idx; - /* Try to get the previous counter again */ - if (test_and_set_bit(idx, cpuc->used)) { - idx = find_first_zero_bit(cpuc->used, nr_counters_generic); - if (idx == nr_counters_generic) - return -EAGAIN; + idx = fixed_mode_idx(counter, hwc); + if (idx >= 0) { + /* + * Try to get the fixed counter, if that is already taken + * then try to get a generic counter: + */ + if (test_and_set_bit(idx, cpuc->used)) + goto try_generic; - set_bit(idx, cpuc->used); + hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; + /* + * We set it so that counter_base + idx in wrmsr/rdmsr maps to + * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: + */ + hwc->counter_base = + MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; hwc->idx = idx; + } else { + idx = hwc->idx; + /* Try to get the previous generic counter again */ + if (test_and_set_bit(idx, cpuc->used)) { +try_generic: + idx = find_first_zero_bit(cpuc->used, nr_counters_generic); + if (idx == nr_counters_generic) + return -EAGAIN; + + set_bit(idx, cpuc->used); + hwc->idx = idx; + } + hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; + hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; } perf_counters_lapic_init(hwc->nmi); @@ -266,6 +349,10 @@ static int pmc_generic_enable(struct perf_counter *counter) __pmc_generic_disable(counter, hwc, idx); cpuc->counters[idx] = counter; + /* + * Make it visible before enabling the hw: + */ + smp_wmb(); __hw_perf_counter_set_period(counter, hwc, idx); __pmc_generic_enable(counter, hwc, idx); @@ -275,7 +362,7 @@ static int pmc_generic_enable(struct perf_counter *counter) void perf_counter_print_debug(void) { - u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left; + u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; struct cpu_hw_counters *cpuc; int cpu, idx; @@ -290,11 +377,13 @@ void perf_counter_print_debug(void) rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); + rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); printk(KERN_INFO "\n"); printk(KERN_INFO "CPU#%d: ctrl: %016llx\n", cpu, ctrl); printk(KERN_INFO "CPU#%d: status: %016llx\n", cpu, status); printk(KERN_INFO "CPU#%d: overflow: %016llx\n", cpu, overflow); + printk(KERN_INFO "CPU#%d: fixed: %016llx\n", cpu, fixed); printk(KERN_INFO "CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used); for (idx = 0; idx < nr_counters_generic; idx++) { @@ -303,13 +392,19 @@ void perf_counter_print_debug(void) prev_left = per_cpu(prev_left[idx], cpu); - printk(KERN_INFO "CPU#%d: PMC%d ctrl: %016llx\n", + printk(KERN_INFO "CPU#%d: gen-PMC%d ctrl: %016llx\n", cpu, idx, pmc_ctrl); - printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n", + printk(KERN_INFO "CPU#%d: gen-PMC%d count: %016llx\n", cpu, idx, pmc_count); - printk(KERN_INFO "CPU#%d: PMC%d left: %016llx\n", + printk(KERN_INFO "CPU#%d: gen-PMC%d left: %016llx\n", cpu, idx, prev_left); } + for (idx = 0; idx < nr_counters_fixed; idx++) { + rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); + + printk(KERN_INFO "CPU#%d: fixed-PMC%d count: %016llx\n", + cpu, idx, pmc_count); + } local_irq_enable(); } @@ -323,6 +418,11 @@ static void pmc_generic_disable(struct perf_counter *counter) clear_bit(idx, cpuc->used); cpuc->counters[idx] = NULL; + /* + * Make sure the cleared pointer becomes visible before we + * (potentially) free the counter: + */ + smp_wmb(); /* * Drain the remaining delta count out of a counter @@ -353,14 +453,11 @@ static void perf_save_and_restart(struct perf_counter *counter) { struct hw_perf_counter *hwc = &counter->hw; int idx = hwc->idx; - u64 pmc_ctrl; - - rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); x86_perf_counter_update(counter, hwc, idx); __hw_perf_counter_set_period(counter, hwc, idx); - if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE) + if (counter->state == PERF_COUNTER_STATE_ACTIVE) __pmc_generic_enable(counter, hwc, idx); } @@ -373,6 +470,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) * Store sibling timestamps (if any): */ list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { + x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); perf_store_irq_data(sibling, counter->hw_event.type); perf_store_irq_data(sibling, atomic64_read(&counter->count)); @@ -403,7 +501,7 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) again: ack = status; - for_each_bit(bit, (unsigned long *) &status, nr_counters_generic) { + for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_counter *counter = cpuc->counters[bit]; clear_bit(bit, (unsigned long *) &status); @@ -561,6 +659,9 @@ void __init init_hw_perf_counters(void) perf_max_counters = nr_counters_generic; printk(KERN_INFO "... bit width: %d\n", eax.split.bit_width); + counter_value_mask = (1ULL << eax.split.bit_width) - 1; + printk(KERN_INFO "... value mask: %016Lx\n", counter_value_mask); + printk(KERN_INFO "... mask length: %d\n", eax.split.mask_length); nr_counters_fixed = edx.split.num_counters_fixed; -- cgit v1.2.3-70-g09d2 From b6c5a71da1477d261bc36254fe1f20d32b57598d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 16 Mar 2009 21:00:00 +1100 Subject: perf_counter: abstract wakeup flag setting in core to fix powerpc build Impact: build fix for powerpc Commit bd753921015e7905 ("perf_counter: software counter event infrastructure") introduced a use of TIF_PERF_COUNTERS into the core perfcounter code. This breaks the build on powerpc because we use a flag in a per-cpu area to signal wakeups on powerpc rather than a thread_info flag, because the thread_info flags have to be manipulated with atomic operations and are thus slower than per-cpu flags. This fixes the by changing the core to use an abstracted set_perf_counter_pending() function, which is defined on x86 to set the TIF_PERF_COUNTERS flag and on powerpc to set the per-cpu flag (paca->perf_counter_pending). It changes the previous powerpc definition of set_perf_counter_pending to not take an argument and adds a clear_perf_counter_pending, so as to simplify the definition on x86. On x86, set_perf_counter_pending() is defined as a macro. Defining it as a static inline in arch/x86/include/asm/perf_counters.h causes compile failures because gets included early in , and the definitions of set_tsk_thread_flag etc. are therefore not available in . (On powerpc this problem is avoided by defining set_perf_counter_pending etc. in .) Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/hw_irq.h | 14 +++++++++++--- arch/powerpc/kernel/irq.c | 11 +++-------- arch/powerpc/kernel/perf_counter.c | 3 +-- arch/x86/include/asm/perf_counter.h | 3 +++ kernel/perf_counter.c | 2 +- 5 files changed, 19 insertions(+), 14 deletions(-) (limited to 'arch/x86/include/asm/perf_counter.h') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index b43076ff92c..cb32d571c9c 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -142,10 +142,17 @@ static inline unsigned long get_perf_counter_pending(void) return x; } -static inline void set_perf_counter_pending(int x) +static inline void set_perf_counter_pending(void) { asm volatile("stb %0,%1(13)" : : - "r" (x), + "r" (1), + "i" (offsetof(struct paca_struct, perf_counter_pending))); +} + +static inline void clear_perf_counter_pending(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (0), "i" (offsetof(struct paca_struct, perf_counter_pending))); } @@ -158,7 +165,8 @@ static inline unsigned long get_perf_counter_pending(void) return 0; } -static inline void set_perf_counter_pending(int x) {} +static inline void set_perf_counter_pending(void) {} +static inline void clear_perf_counter_pending(void) {} static inline void perf_counter_do_pending(void) {} #endif /* CONFIG_PERF_COUNTERS */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0d2e37c5773..469e9635ff0 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -104,13 +104,6 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } -#ifdef CONFIG_PERF_COUNTERS -notrace void __weak perf_counter_do_pending(void) -{ - set_perf_counter_pending(0); -} -#endif - notrace void raw_local_irq_restore(unsigned long en) { /* @@ -142,8 +135,10 @@ notrace void raw_local_irq_restore(unsigned long en) iseries_handle_interrupts(); } - if (get_perf_counter_pending()) + if (get_perf_counter_pending()) { + clear_perf_counter_pending(); perf_counter_do_pending(); + } /* * if (get_paca()->hard_enabled) return; diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 0e33d27cd46..5008762e8bf 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -653,7 +653,6 @@ void perf_counter_do_pending(void) struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); struct perf_counter *counter; - set_perf_counter_pending(0); for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; if (counter && counter->wakeup_pending) { @@ -811,7 +810,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) perf_counter_do_pending(); irq_exit(); } else { - set_perf_counter_pending(1); + set_perf_counter_pending(); } } } diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 2e08ed73664..1662043b340 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,6 +84,9 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) +#define set_perf_counter_pending() \ + set_tsk_thread_flag(current, TIF_PERF_COUNTERS); + #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(int nmi); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0018c5e8124..b39456ad74a 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1433,7 +1433,7 @@ static void perf_swcounter_interrupt(struct perf_counter *counter, if (nmi) { counter->wakeup_pending = 1; - set_tsk_thread_flag(current, TIF_PERF_COUNTERS); + set_perf_counter_pending(); } else wake_up(&counter->waitq); } -- cgit v1.2.3-70-g09d2 From 925d519ab82b6dd7aca9420d809ee83819c08db2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:02 +0200 Subject: perf_counter: unify and fix delayed counter wakeup While going over the wakeup code I noticed delayed wakeups only work for hardware counters but basically all software counters rely on them. This patch unifies and generalizes the delayed wakeup to fix this issue. Since we're dealing with NMI context bits here, use a cmpxchg() based single link list implementation to track counters that have pending wakeups. [ This should really be generic code for delayed wakeups, but since we cannot use cmpxchg()/xchg() in generic code, I've let it live in the perf_counter code. -- Eric Dumazet could use it to aggregate the network wakeups. ] Furthermore, the x86 method of using TIF flags was flawed in that its quite possible to end up setting the bit on the idle task, loosing the wakeup. The powerpc method uses per-cpu storage and does appear to be sufficient. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.153932974@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/hw_irq.h | 4 +- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/kernel/perf_counter.c | 22 +------ arch/x86/include/asm/perf_counter.h | 5 +- arch/x86/include/asm/thread_info.h | 4 +- arch/x86/kernel/cpu/perf_counter.c | 29 -------- arch/x86/kernel/signal.c | 6 -- include/linux/perf_counter.h | 15 +++-- kernel/perf_counter.c | 128 +++++++++++++++++++++++++++++++++--- kernel/timer.c | 3 + 10 files changed, 142 insertions(+), 76 deletions(-) (limited to 'arch/x86/include/asm/perf_counter.h') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index cb32d571c9c..20a44d0c9fd 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -132,7 +132,7 @@ static inline int irqs_disabled_flags(unsigned long flags) struct irq_chip; #ifdef CONFIG_PERF_COUNTERS -static inline unsigned long get_perf_counter_pending(void) +static inline unsigned long test_perf_counter_pending(void) { unsigned long x; @@ -160,7 +160,7 @@ extern void perf_counter_do_pending(void); #else -static inline unsigned long get_perf_counter_pending(void) +static inline unsigned long test_perf_counter_pending(void) { return 0; } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 469e9635ff0..2cd471f92fe 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -135,7 +135,7 @@ notrace void raw_local_irq_restore(unsigned long en) iseries_handle_interrupts(); } - if (get_perf_counter_pending()) { + if (test_perf_counter_pending()) { clear_perf_counter_pending(); perf_counter_do_pending(); } diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index df007fe0cc0..cde720fc495 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -649,24 +649,6 @@ hw_perf_counter_init(struct perf_counter *counter) return &power_perf_ops; } -/* - * Handle wakeups. - */ -void perf_counter_do_pending(void) -{ - int i; - struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); - struct perf_counter *counter; - - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter && counter->wakeup_pending) { - counter->wakeup_pending = 0; - wake_up(&counter->waitq); - } - } -} - /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -720,7 +702,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); struct perf_counter *counter; long val; - int need_wakeup = 0, found = 0; + int found = 0; for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; @@ -761,7 +743,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) * immediately; otherwise we'll have do the wakeup when interrupts * get soft-enabled. */ - if (get_perf_counter_pending() && regs->softe) { + if (test_perf_counter_pending() && regs->softe) { irq_enter(); clear_perf_counter_pending(); perf_counter_do_pending(); diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 1662043b340..e2b0e66b235 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,8 +84,9 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -#define set_perf_counter_pending() \ - set_tsk_thread_flag(current, TIF_PERF_COUNTERS); +#define set_perf_counter_pending() do { } while (0) +#define clear_perf_counter_pending() do { } while (0) +#define test_perf_counter_pending() (0) #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 3ffd5d2a367..8820a73ae09 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,7 +83,6 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ -#define TIF_PERF_COUNTERS 11 /* notify perf counter work */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -107,7 +106,6 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) -#define _TIF_PERF_COUNTERS (1 << TIF_PERF_COUNTERS) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -141,7 +139,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME) + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 3f95b0cdc55..7aab177fb56 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -227,7 +227,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter) */ hwc->config |= pmc_ops->event_map(perf_event_id(hw_event)); } - counter->wakeup_pending = 0; return 0; } @@ -773,34 +772,6 @@ void smp_perf_counter_interrupt(struct pt_regs *regs) irq_exit(); } -/* - * This handler is triggered by NMI contexts: - */ -void perf_counter_notify(struct pt_regs *regs) -{ - struct cpu_hw_counters *cpuc; - unsigned long flags; - int bit, cpu; - - local_irq_save(flags); - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); - - for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) { - struct perf_counter *counter = cpuc->counters[bit]; - - if (!counter) - continue; - - if (counter->wakeup_pending) { - counter->wakeup_pending = 0; - wake_up(&counter->waitq); - } - } - - local_irq_restore(flags); -} - void perf_counters_lapic_init(int nmi) { u32 apic_val; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 611615a92c9..0a813b17b17 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -6,7 +6,6 @@ * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes * 2000-2002 x86-64 support by Andi Kleen */ -#include #include #include #include @@ -872,11 +871,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) tracehook_notify_resume(regs); } - if (thread_info_flags & _TIF_PERF_COUNTERS) { - clear_thread_flag(TIF_PERF_COUNTERS); - perf_counter_notify(regs); - } - #ifdef CONFIG_X86_32 clear_thread_flag(TIF_IRET); #endif /* CONFIG_X86_32 */ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6bf67ce1762..0d833228eee 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -275,6 +275,10 @@ struct perf_mmap_data { void *data_pages[0]; }; +struct perf_wakeup_entry { + struct perf_wakeup_entry *next; +}; + /** * struct perf_counter - performance counter kernel representation: */ @@ -350,7 +354,7 @@ struct perf_counter { /* poll related */ wait_queue_head_t waitq; /* optional: for NMIs */ - int wakeup_pending; + struct perf_wakeup_entry wakeup; void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; @@ -427,7 +431,7 @@ extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern void perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); -extern void perf_counter_notify(struct pt_regs *regs); +extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void perf_counter_unthrottle(void); extern u64 hw_perf_save_disable(void); @@ -461,7 +465,7 @@ static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline void perf_counter_init_task(struct task_struct *child) { } static inline void perf_counter_exit_task(struct task_struct *child) { } -static inline void perf_counter_notify(struct pt_regs *regs) { } +static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } static inline void perf_counter_unthrottle(void) { } static inline void hw_perf_restore(u64 ctrl) { } @@ -469,8 +473,9 @@ static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } -static inline void perf_swcounter_event(u32 event, u64 nr, - int nmi, struct pt_regs *regs) { } +static inline void +perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } + #endif #endif /* __KERNEL__ */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 3b862a7988c..f70ff80e79d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1197,8 +1197,12 @@ static void free_counter_rcu(struct rcu_head *head) kfree(counter); } +static void perf_pending_sync(struct perf_counter *counter); + static void free_counter(struct perf_counter *counter) { + perf_pending_sync(counter); + if (counter->destroy) counter->destroy(counter); @@ -1528,6 +1532,118 @@ static const struct file_operations perf_fops = { .mmap = perf_mmap, }; +/* + * Perf counter wakeup + * + * If there's data, ensure we set the poll() state and publish everything + * to user-space before waking everybody up. + */ + +void perf_counter_wakeup(struct perf_counter *counter) +{ + struct perf_mmap_data *data; + + rcu_read_lock(); + data = rcu_dereference(counter->data); + if (data) { + (void)atomic_xchg(&data->wakeup, POLL_IN); + __perf_counter_update_userpage(counter, data); + } + rcu_read_unlock(); + + wake_up_all(&counter->waitq); +} + +/* + * Pending wakeups + * + * Handle the case where we need to wakeup up from NMI (or rq->lock) context. + * + * The NMI bit means we cannot possibly take locks. Therefore, maintain a + * single linked list and use cmpxchg() to add entries lockless. + */ + +#define PENDING_TAIL ((struct perf_wakeup_entry *)-1UL) + +static DEFINE_PER_CPU(struct perf_wakeup_entry *, perf_wakeup_head) = { + PENDING_TAIL, +}; + +static void perf_pending_queue(struct perf_counter *counter) +{ + struct perf_wakeup_entry **head; + struct perf_wakeup_entry *prev, *next; + + if (cmpxchg(&counter->wakeup.next, NULL, PENDING_TAIL) != NULL) + return; + + head = &get_cpu_var(perf_wakeup_head); + + do { + prev = counter->wakeup.next = *head; + next = &counter->wakeup; + } while (cmpxchg(head, prev, next) != prev); + + set_perf_counter_pending(); + + put_cpu_var(perf_wakeup_head); +} + +static int __perf_pending_run(void) +{ + struct perf_wakeup_entry *list; + int nr = 0; + + list = xchg(&__get_cpu_var(perf_wakeup_head), PENDING_TAIL); + while (list != PENDING_TAIL) { + struct perf_counter *counter = container_of(list, + struct perf_counter, wakeup); + + list = list->next; + + counter->wakeup.next = NULL; + /* + * Ensure we observe the unqueue before we issue the wakeup, + * so that we won't be waiting forever. + * -- see perf_not_pending(). + */ + smp_wmb(); + + perf_counter_wakeup(counter); + nr++; + } + + return nr; +} + +static inline int perf_not_pending(struct perf_counter *counter) +{ + /* + * If we flush on whatever cpu we run, there is a chance we don't + * need to wait. + */ + get_cpu(); + __perf_pending_run(); + put_cpu(); + + /* + * Ensure we see the proper queue state before going to sleep + * so that we do not miss the wakeup. -- see perf_pending_handle() + */ + smp_rmb(); + return counter->wakeup.next == NULL; +} + +static void perf_pending_sync(struct perf_counter *counter) +{ + wait_event(counter->waitq, perf_not_pending(counter)); +} + +void perf_counter_do_pending(void) +{ + __perf_pending_run(); +} + /* * Output */ @@ -1611,13 +1727,10 @@ static void perf_output_copy(struct perf_output_handle *handle, static void perf_output_end(struct perf_output_handle *handle, int nmi) { if (handle->wakeup) { - (void)atomic_xchg(&handle->data->wakeup, POLL_IN); - __perf_counter_update_userpage(handle->counter, handle->data); - if (nmi) { - handle->counter->wakeup_pending = 1; - set_perf_counter_pending(); - } else - wake_up(&handle->counter->waitq); + if (nmi) + perf_pending_queue(handle->counter); + else + perf_counter_wakeup(handle->counter); } rcu_read_unlock(); } @@ -2211,7 +2324,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->cpu = cpu; counter->hw_event = *hw_event; - counter->wakeup_pending = 0; counter->group_leader = group_leader; counter->hw_ops = NULL; counter->ctx = ctx; diff --git a/kernel/timer.c b/kernel/timer.c index b4555568b4e..672ca25fbc4 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1167,6 +1168,8 @@ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = __get_cpu_var(tvec_bases); + perf_counter_do_pending(); + hrtimer_run_pending(); if (time_after_eq(jiffies, base->timer_jiffies)) -- cgit v1.2.3-70-g09d2 From b6276f353bf490add62dcf7db0ebd75baa3e1a37 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:03 +0200 Subject: perf_counter: x86: self-IPI for pending work Implement set_perf_counter_pending() with a self-IPI so that it will run ASAP in a usable context. For now use a second IRQ vector, because the primary vector pokes the apic in funny ways that seem to confuse things. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.724626696@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/entry_arch.h | 1 + arch/x86/include/asm/hardirq.h | 1 + arch/x86/include/asm/hw_irq.h | 1 + arch/x86/include/asm/irq_vectors.h | 5 +++++ arch/x86/include/asm/perf_counter.h | 3 ++- arch/x86/kernel/cpu/perf_counter.c | 14 ++++++++++++++ arch/x86/kernel/entry_64.S | 2 ++ arch/x86/kernel/irq.c | 5 +++++ arch/x86/kernel/irqinit_32.c | 1 + arch/x86/kernel/irqinit_64.c | 1 + 10 files changed, 33 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm/perf_counter.h') diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index c2e6bedaf25..fe24d280249 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -50,6 +50,7 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) #ifdef CONFIG_PERF_COUNTERS BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) +BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) #endif #ifdef CONFIG_X86_MCE_P4THERMAL diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 25454427cee..f5ebe2aaca4 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -14,6 +14,7 @@ typedef struct { #endif unsigned int generic_irqs; /* arch dependent */ unsigned int apic_perf_irqs; + unsigned int apic_pending_irqs; #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index ae80f64973e..7309c0ad690 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -30,6 +30,7 @@ extern void apic_timer_interrupt(void); extern void generic_interrupt(void); extern void error_interrupt(void); extern void perf_counter_interrupt(void); +extern void perf_pending_interrupt(void); extern void spurious_interrupt(void); extern void thermal_interrupt(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 3cbd79bbb47..545bb811ccb 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -116,6 +116,11 @@ */ #define GENERIC_INTERRUPT_VECTOR 0xed +/* + * Performance monitoring pending work vector: + */ +#define LOCAL_PENDING_VECTOR 0xec + /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index e2b0e66b235..d08dd52cb8f 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,7 +84,8 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -#define set_perf_counter_pending() do { } while (0) +extern void set_perf_counter_pending(void); + #define clear_perf_counter_pending() do { } while (0) #define test_perf_counter_pending() (0) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index c74e20d593a..438415866fe 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -849,6 +849,20 @@ void smp_perf_counter_interrupt(struct pt_regs *regs) irq_exit(); } +void smp_perf_pending_interrupt(struct pt_regs *regs) +{ + irq_enter(); + ack_APIC_irq(); + inc_irq_stat(apic_pending_irqs); + perf_counter_do_pending(); + irq_exit(); +} + +void set_perf_counter_pending(void) +{ + apic->send_IPI_self(LOCAL_PENDING_VECTOR); +} + void perf_counters_lapic_init(int nmi) { u32 apic_val; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 3f129d963a0..1d46cba56fd 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1028,6 +1028,8 @@ apicinterrupt SPURIOUS_APIC_VECTOR \ #ifdef CONFIG_PERF_COUNTERS apicinterrupt LOCAL_PERF_VECTOR \ perf_counter_interrupt smp_perf_counter_interrupt +apicinterrupt LOCAL_PENDING_VECTOR \ + perf_pending_interrupt smp_perf_pending_interrupt #endif /* diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 9c2754302ec..d465487da58 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -67,6 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); seq_printf(p, " Performance counter interrupts\n"); + seq_printf(p, "PND: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); + seq_printf(p, " Performance pending work\n"); #endif if (generic_interrupt_extension) { seq_printf(p, "PLT: "); @@ -171,6 +175,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->apic_timer_irqs; sum += irq_stats(cpu)->irq_spurious_count; sum += irq_stats(cpu)->apic_perf_irqs; + sum += irq_stats(cpu)->apic_pending_irqs; #endif if (generic_interrupt_extension) sum += irq_stats(cpu)->generic_irqs; diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 925d87cfc55..3190a6b961e 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -166,6 +166,7 @@ static void __init apic_intr_init(void) alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); # ifdef CONFIG_PERF_COUNTERS alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); + alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); # endif # ifdef CONFIG_X86_MCE_P4THERMAL diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 665e2ab48ab..53ceb26f80f 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -156,6 +156,7 @@ static void __init apic_intr_init(void) /* Performance monitoring interrupt: */ #ifdef CONFIG_PERF_COUNTERS alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); + alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); #endif } -- cgit v1.2.3-70-g09d2 From c323d95fa4dbe0b6bf6d59e24a0b7db067dd08a7 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 29 May 2009 13:28:35 +0800 Subject: perf_counter/x86: Always use NMI for performance-monitoring interrupt Always use NMI for performance-monitoring interrupt as there could be racy situations if we switch between irq and nmi mode frequently. Signed-off-by: Yong Wang LKML-Reference: <20090529052835.GA13657@ywang-moblin2.bj.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 4 ++-- arch/x86/kernel/apic/apic.c | 2 +- arch/x86/kernel/cpu/perf_counter.c | 19 +++++-------------- 3 files changed, 8 insertions(+), 17 deletions(-) (limited to 'arch/x86/include/asm/perf_counter.h') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index d08dd52cb8f..876ed97147b 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -91,10 +91,10 @@ extern void set_perf_counter_pending(void); #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); -extern void perf_counters_lapic_init(int nmi); +extern void perf_counters_lapic_init(void); #else static inline void init_hw_perf_counters(void) { } -static inline void perf_counters_lapic_init(int nmi) { } +static inline void perf_counters_lapic_init(void) { } #endif #endif /* _ASM_X86_PERF_COUNTER_H */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 89b63b5fad3..60df2efd7c8 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1135,7 +1135,7 @@ void __cpuinit setup_local_APIC(void) apic_write(APIC_ESR, 0); } #endif - perf_counters_lapic_init(0); + perf_counters_lapic_init(); preempt_disable(); diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 2eeaa99add1..316b0c995f3 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -604,7 +604,7 @@ try_generic: hwc->counter_base = x86_pmu.perfctr; } - perf_counters_lapic_init(hwc->nmi); + perf_counters_lapic_init(); x86_pmu.disable(hwc, idx); @@ -863,24 +863,15 @@ void set_perf_counter_pending(void) apic->send_IPI_self(LOCAL_PENDING_VECTOR); } -void perf_counters_lapic_init(int nmi) +void perf_counters_lapic_init(void) { - u32 apic_val; - if (!x86_pmu_initialized()) return; /* - * Enable the performance counter vector in the APIC LVT: + * Always use NMI for PMU */ - apic_val = apic_read(APIC_LVTERR); - - apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED); - if (nmi) - apic_write(APIC_LVTPC, APIC_DM_NMI); - else - apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR); - apic_write(APIC_LVTERR, apic_val); + apic_write(APIC_LVTPC, APIC_DM_NMI); } static int __kprobes @@ -1054,7 +1045,7 @@ void __init init_hw_perf_counters(void) pr_info("... counter mask: %016Lx\n", perf_counter_mask); - perf_counters_lapic_init(0); + perf_counters_lapic_init(); register_die_notifier(&perf_counter_nmi_notifier); } -- cgit v1.2.3-70-g09d2