From c9cf4dbb4d9ca715d8fedf13301a53296429abc6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 19 May 2010 21:35:17 +0200 Subject: x86: Unify dumpstack.h and stacktrace.h arch/x86/include/asm/stacktrace.h and arch/x86/kernel/dumpstack.h declare headers of objects that deal with the same topic. Actually most of the files that include stacktrace.h also include dumpstack.h Although dumpstack.h seems more reserved for internals of stack traces, those are quite often needed to define specialized stack trace operations. And perf event arch headers are going to need access to such low level operations anyway. So don't continue to bother with dumpstack.h as it's not anymore about isolated deep internals. v2: fix struct stack_frame definition conflict in sysprof Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: Soeren Sandmann --- arch/x86/kernel/cpu/perf_event.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c77586061bc..9632fb61e8f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1585,8 +1585,6 @@ static const struct stacktrace_ops backtrace_ops = { .walk_stack = print_context_stack_bp, }; -#include "../dumpstack.h" - static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { -- cgit v1.2.3-70-g09d2 From b0f82b81fe6bbcf78d478071f33e44554726bc81 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 20 May 2010 07:47:21 +0200 Subject: perf: Drop the skip argument from perf_arch_fetch_regs_caller Drop this argument now that we always want to rewind only to the state of the first caller. It means frame pointers are not necessary anymore to reliably get the source of an event. But this also means we need this helper to be a macro now, as an inline function is not an option since we need to know when to provide a default implentation. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul Mackerras Cc: David Miller Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo --- arch/powerpc/include/asm/perf_event.h | 12 ++++++++++++ arch/powerpc/kernel/misc.S | 26 -------------------------- arch/sparc/include/asm/perf_event.h | 8 ++++++++ arch/sparc/kernel/helpers.S | 6 +++--- arch/x86/include/asm/perf_event.h | 13 +++++++++++++ arch/x86/include/asm/stacktrace.h | 7 ++----- arch/x86/kernel/cpu/perf_event.c | 16 ---------------- include/linux/perf_event.h | 32 +++++++------------------------- include/trace/ftrace.h | 2 +- kernel/perf_event.c | 5 ----- kernel/trace/trace_event_perf.c | 2 -- 11 files changed, 46 insertions(+), 83 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index e6d4ce69b12..5c16b891d50 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -21,3 +21,15 @@ #ifdef CONFIG_FSL_EMB_PERF_EVENT #include #endif + +#ifdef CONFIG_PERF_EVENTS +#include +#include + +#define perf_arch_fetch_caller_regs(regs, __ip) \ + do { \ + (regs)->nip = __ip; \ + (regs)->gpr[1] = *(unsigned long *)__get_SP(); \ + asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \ + } while (0) +#endif diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 22e507c8a55..2d29752cbe1 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7) _GLOBAL(__restore_cpu_power7) /* place holder */ blr - -/* - * Get a minimal set of registers for our caller's nth caller. - * r3 = regs pointer, r5 = n. - * - * We only get R1 (stack pointer), NIP (next instruction pointer) - * and LR (link register). These are all we can get in the - * general case without doing complicated stack unwinding, but - * fortunately they are enough to do a stack backtrace, which - * is all we need them for. - */ -_GLOBAL(perf_arch_fetch_caller_regs) - mr r6,r1 - cmpwi r5,0 - mflr r4 - ble 2f - mtctr r5 -1: PPC_LL r6,0(r6) - bdnz 1b - PPC_LL r4,PPC_LR_STKOFF(r6) -2: PPC_LL r7,0(r6) - PPC_LL r7,PPC_LR_STKOFF(r7) - PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3) - PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3) - PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3) - blr diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h index 7e2669894ce..74c4e0cd889 100644 --- a/arch/sparc/include/asm/perf_event.h +++ b/arch/sparc/include/asm/perf_event.h @@ -6,7 +6,15 @@ extern void set_perf_event_pending(void); #define PERF_EVENT_INDEX_OFFSET 0 #ifdef CONFIG_PERF_EVENTS +#include + extern void init_hw_perf_events(void); + +extern void +__perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip); + +#define perf_arch_fetch_caller_regs(pt_regs, ip) \ + __perf_arch_fetch_caller_regs(pt_regs, ip, 1); #else static inline void init_hw_perf_events(void) { } #endif diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S index 92090cc9e82..682fee06a16 100644 --- a/arch/sparc/kernel/helpers.S +++ b/arch/sparc/kernel/helpers.S @@ -47,9 +47,9 @@ stack_trace_flush: .size stack_trace_flush,.-stack_trace_flush #ifdef CONFIG_PERF_EVENTS - .globl perf_arch_fetch_caller_regs - .type perf_arch_fetch_caller_regs,#function -perf_arch_fetch_caller_regs: + .globl __perf_arch_fetch_caller_regs + .type __perf_arch_fetch_caller_regs,#function +__perf_arch_fetch_caller_regs: /* We always read the %pstate into %o5 since we will use * that to construct a fake %tstate to store into the regs. */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 254883d0c7e..02de29830ff 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -140,6 +140,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +#include + +/* + * We abuse bit 3 from flags to pass exact information, see perf_misc_flags + * and the comment with PERF_EFLAGS_EXACT. + */ +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->ip = (__ip); \ + (regs)->bp = caller_frame_pointer(); \ + (regs)->cs = __KERNEL_CS; \ + regs->flags = 0; \ +} + #else static inline void init_hw_perf_events(void) { } static inline void perf_events_lapic_init(void) { } diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index a957463d3c7..2b16a2ad23d 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -78,17 +78,14 @@ struct stack_frame_ia32 { u32 return_address; }; -static inline unsigned long rewind_frame_pointer(int n) +static inline unsigned long caller_frame_pointer(void) { struct stack_frame *frame; get_bp(frame); #ifdef CONFIG_FRAME_POINTER - while (n--) { - if (probe_kernel_address(&frame->next_frame, frame)) - break; - } + frame = frame->next_frame; #endif return (unsigned long)frame; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9632fb61e8f..2c075fe573d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1706,22 +1706,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return entry; } -void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) -{ - regs->ip = ip; - /* - * perf_arch_fetch_caller_regs adds another call, we need to increment - * the skip level - */ - regs->bp = rewind_frame_pointer(skip + 1); - regs->cs = __KERNEL_CS; - /* - * We abuse bit 3 to pass exact information, see perf_misc_flags - * and the comment with PERF_EFLAGS_EXACT. - */ - regs->flags = 0; -} - unsigned long perf_instruction_pointer(struct pt_regs *regs) { unsigned long ip; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fb6c91eac7e..bea785cef49 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -905,8 +905,10 @@ extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); -extern void -perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip); +#ifndef perf_arch_fetch_caller_regs +static inline void +perf_arch_fetch_caller_regs(struct regs *regs, unsigned long ip) { } +#endif /* * Take a snapshot of the regs. Skip ip and frame pointer to @@ -916,31 +918,11 @@ perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip); * - bp for callchains * - eflags, for future purposes, just in case */ -static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip) +static inline void perf_fetch_caller_regs(struct pt_regs *regs) { - unsigned long ip; - memset(regs, 0, sizeof(*regs)); - switch (skip) { - case 1 : - ip = CALLER_ADDR0; - break; - case 2 : - ip = CALLER_ADDR1; - break; - case 3 : - ip = CALLER_ADDR2; - break; - case 4: - ip = CALLER_ADDR3; - break; - /* No need to support further for now */ - default: - ip = 0; - } - - return perf_arch_fetch_caller_regs(regs, ip, skip); + perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); } static inline void @@ -950,7 +932,7 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) struct pt_regs hot_regs; if (!regs) { - perf_fetch_caller_regs(&hot_regs, 1); + perf_fetch_caller_regs(&hot_regs); regs = &hot_regs; } __perf_sw_event(event_id, nr, nmi, regs, addr); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 3d685d1f2a0..8ee8b6e6b25 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -705,7 +705,7 @@ perf_trace_##call(void *__data, proto) \ int __data_size; \ int rctx; \ \ - perf_fetch_caller_regs(&__regs, 1); \ + perf_fetch_caller_regs(&__regs); \ \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ diff --git a/kernel/perf_event.c b/kernel/perf_event.c index e099650cd24..9ae4dbcdf46 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -2851,11 +2851,6 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return NULL; } -__weak -void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) -{ -} - /* * We assume there is only KVM supporting the callbacks. diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index cb6f365016e..21db1d3a48d 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -9,8 +9,6 @@ #include #include "trace.h" -EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); - static char *perf_trace_buf[4]; /* -- cgit v1.2.3-70-g09d2 From 8d2cacbbb8deadfae78aa16e4e1ee619bdd7019e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 May 2010 17:49:05 +0200 Subject: perf: Cleanup {start,commit,cancel}_txn details Clarify some of the transactional group scheduling API details and change it so that a successfull ->commit_txn also closes the transaction. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: <1274803086.5882.1752.camel@twins> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_event.c | 7 ++++--- arch/sparc/kernel/perf_event.c | 7 ++++--- arch/x86/kernel/cpu/perf_event.c | 14 +++++--------- include/linux/perf_event.h | 27 ++++++++++++++++++++++----- kernel/perf_event.c | 9 +-------- 5 files changed, 36 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 43b83c35cf5..ac2a8c2554d 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event) * skip the schedulability test here, it will be peformed * at commit time(->commit_txn) as a whole */ - if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED) + if (cpuhw->group_flag & PERF_EVENT_TXN) goto nocheck; if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) @@ -858,7 +858,7 @@ void power_pmu_start_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; + cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -871,7 +871,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; + cpuhw->group_flag &= ~PERF_EVENT_TXN; } /* @@ -897,6 +897,7 @@ int power_pmu_commit_txn(const struct pmu *pmu) for (i = cpuhw->n_txn_start; i < n; ++i) cpuhw->event[i]->hw.config = cpuhw->events[i]; + cpuhw->group_flag &= ~PERF_EVENT_TXN; return 0; } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 0ec92c8861d..beeb92fa3ac 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1005,7 +1005,7 @@ static int sparc_pmu_enable(struct perf_event *event) * skip the schedulability test here, it will be peformed * at commit time(->commit_txn) as a whole */ - if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) + if (cpuc->group_flag & PERF_EVENT_TXN) goto nocheck; if (check_excludes(cpuc->event, n0, 1)) @@ -1102,7 +1102,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; + cpuhw->group_flag |= PERF_EVENT_TXN; } /* @@ -1114,7 +1114,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; + cpuhw->group_flag &= ~PERF_EVENT_TXN; } /* @@ -1137,6 +1137,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu) if (sparc_check_constraints(cpuc->event, cpuc->events, n)) return -EAGAIN; + cpuc->group_flag &= ~PERF_EVENT_TXN; return 0; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5db5b7d65a1..af04c6fa59c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -969,7 +969,7 @@ static int x86_pmu_enable(struct perf_event *event) * skip the schedulability test here, it will be peformed * at commit time(->commit_txn) as a whole */ - if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) + if (cpuc->group_flag & PERF_EVENT_TXN) goto out; ret = x86_pmu.schedule_events(cpuc, n, assign); @@ -1096,7 +1096,7 @@ static void x86_pmu_disable(struct perf_event *event) * The events never got scheduled and ->cancel_txn will truncate * the event_list. */ - if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) + if (cpuc->group_flag & PERF_EVENT_TXN) return; x86_pmu_stop(event); @@ -1388,7 +1388,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - cpuc->group_flag |= PERF_EVENT_TXN_STARTED; + cpuc->group_flag |= PERF_EVENT_TXN; cpuc->n_txn = 0; } @@ -1401,7 +1401,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; + cpuc->group_flag &= ~PERF_EVENT_TXN; /* * Truncate the collected events. */ @@ -1435,11 +1435,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) */ memcpy(cpuc->assign, assign, n*sizeof(int)); - /* - * Clear out the txn count so that ->cancel_txn() which gets - * run after ->commit_txn() doesn't undo things. - */ - cpuc->n_txn = 0; + cpuc->group_flag &= ~PERF_EVENT_TXN; return 0; } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 36efad90cd4..f1b6ba0770e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -549,7 +549,10 @@ struct hw_perf_event { struct perf_event; -#define PERF_EVENT_TXN_STARTED 1 +/* + * Common implementation detail of pmu::{start,commit,cancel}_txn + */ +#define PERF_EVENT_TXN 0x1 /** * struct pmu - generic performance monitoring unit @@ -563,14 +566,28 @@ struct pmu { void (*unthrottle) (struct perf_event *event); /* - * group events scheduling is treated as a transaction, - * add group events as a whole and perform one schedulability test. - * If test fails, roll back the whole group + * Group events scheduling is treated as a transaction, add group + * events as a whole and perform one schedulability test. If the test + * fails, roll back the whole group */ + /* + * Start the transaction, after this ->enable() doesn't need + * to do schedulability tests. + */ void (*start_txn) (const struct pmu *pmu); - void (*cancel_txn) (const struct pmu *pmu); + /* + * If ->start_txn() disabled the ->enable() schedulability test + * then ->commit_txn() is required to perform one. On success + * the transaction is closed. On error the transaction is kept + * open until ->cancel_txn() is called. + */ int (*commit_txn) (const struct pmu *pmu); + /* + * Will cancel the transaction, assumes ->disable() is called for + * each successfull ->enable() during the transaction. + */ + void (*cancel_txn) (const struct pmu *pmu); }; /** diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 227ed9c8ec3..6f60920772b 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -675,7 +675,6 @@ group_sched_in(struct perf_event *group_event, struct perf_event *event, *partial_group = NULL; const struct pmu *pmu = group_event->pmu; bool txn = false; - int ret; if (group_event->state == PERF_EVENT_STATE_OFF) return 0; @@ -703,15 +702,9 @@ group_sched_in(struct perf_event *group_event, } } - if (!txn) + if (!txn || !pmu->commit_txn(pmu)) return 0; - ret = pmu->commit_txn(pmu); - if (!ret) { - pmu->cancel_txn(pmu); - return 0; - } - group_error: /* * Groups can be scheduled in as one unit only, so undo any -- cgit v1.2.3-70-g09d2 From 68aa00ac0a82e9a876c799bf6be7622b8f1c8517 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 3 Jun 2010 01:23:04 +0400 Subject: perf, x86: Make a second write to performance counter if needed On Netburst PMU we need a second write to a performance counter due to cpu erratum. A simple flag test instead of alternative instructions was choosen because wrmsrl is already a macro and if virtualization is turned on will need an additional wrapper call which is more expencise. nb: we should propably switch to jump-labels as only this facility reach the mainline. Signed-off-by: Cyrill Gorcunov Signed-off-by: Peter Zijlstra Cc: Robert Richter Cc: Lin Ming Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20100602212304.GC5264@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 12 +++++++++++- arch/x86/kernel/cpu/perf_event_p4.c | 9 +++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index af04c6fa59c..79e199843db 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -220,6 +220,7 @@ struct x86_pmu { struct perf_event *event); struct event_constraint *event_constraints; void (*quirks)(void); + int perfctr_second_write; int (*cpu_prepare)(int cpu); void (*cpu_starting)(int cpu); @@ -925,8 +926,17 @@ x86_perf_event_set_period(struct perf_event *event) */ atomic64_set(&hwc->prev_count, (u64)-left); - wrmsrl(hwc->event_base + idx, + wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); + + /* + * Due to erratum on certan cpu we need + * a second write to be sure the register + * is updated properly + */ + if (x86_pmu.perfctr_second_write) { + wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); + } perf_event_update_userpage(event); diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ae85d69644d..9286e736a70 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -829,6 +829,15 @@ static __initconst const struct x86_pmu p4_pmu = { .max_period = (1ULL << 39) - 1, .hw_config = p4_hw_config, .schedule_events = p4_pmu_schedule_events, + /* + * This handles erratum N15 in intel doc 249199-029, + * the counter may not be updated correctly on write + * so we need a second write operation to do the trick + * (the official workaround didn't work) + * + * the former idea is taken from OProfile code + */ + .perfctr_second_write = 1, }; static __init int p4_pmu_init(void) -- cgit v1.2.3-70-g09d2 From e78505958cf123048fb48cb56b79cebb8edd15fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 May 2010 14:43:08 +0200 Subject: perf: Convert perf_event to local_t Since now all modification to event->count (and ->prev_count and ->period_left) are local to a cpu, change then to local64_t so we avoid the LOCK'ed ops. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/arm/kernel/perf_event.c | 18 ++++++++--------- arch/powerpc/kernel/perf_event.c | 34 ++++++++++++++++---------------- arch/sh/kernel/perf_event.c | 6 +++--- arch/sparc/kernel/perf_event.c | 18 ++++++++--------- arch/x86/kernel/cpu/perf_event.c | 18 ++++++++--------- include/linux/perf_event.h | 7 ++++--- kernel/perf_event.c | 42 ++++++++++++++++++++-------------------- 7 files changed, 72 insertions(+), 71 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index c45768614c8..5b7cfafc072 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -164,20 +164,20 @@ armpmu_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, int idx) { - s64 left = atomic64_read(&hwc->period_left); + s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; int ret = 0; if (unlikely(left <= -period)) { left = period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } if (unlikely(left <= 0)) { left += period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } @@ -185,7 +185,7 @@ armpmu_event_set_period(struct perf_event *event, if (left > (s64)armpmu->max_period) left = armpmu->max_period; - atomic64_set(&hwc->prev_count, (u64)-left); + local64_set(&hwc->prev_count, (u64)-left); armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); @@ -204,18 +204,18 @@ armpmu_event_update(struct perf_event *event, s64 delta; again: - prev_raw_count = atomic64_read(&hwc->prev_count); + prev_raw_count = local64_read(&hwc->prev_count); new_raw_count = armpmu->read_counter(idx); - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) goto again; delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - atomic64_add(delta, &event->count); - atomic64_sub(delta, &hwc->period_left); + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); return new_raw_count; } @@ -478,7 +478,7 @@ __hw_perf_event_init(struct perf_event *event) if (!hwc->sample_period) { hwc->sample_period = armpmu->max_period; hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); + local64_set(&hwc->period_left, hwc->sample_period); } err = 0; diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index ac2a8c2554d..af1d9a7c65d 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event) * Therefore we treat them like NMIs. */ do { - prev = atomic64_read(&event->hw.prev_count); + prev = local64_read(&event->hw.prev_count); barrier(); val = read_pmc(event->hw.idx); - } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); + } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); /* The counters are only 32 bits wide */ delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &event->count); - atomic64_sub(delta, &event->hw.period_left); + local64_add(delta, &event->count); + local64_sub(delta, &event->hw.period_left); } /* @@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw, if (!event->hw.idx) continue; val = (event->hw.idx == 5) ? pmc5 : pmc6; - prev = atomic64_read(&event->hw.prev_count); + prev = local64_read(&event->hw.prev_count); event->hw.idx = 0; delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &event->count); + local64_add(delta, &event->count); } } @@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw, event = cpuhw->limited_counter[i]; event->hw.idx = cpuhw->limited_hwidx[i]; val = (event->hw.idx == 5) ? pmc5 : pmc6; - atomic64_set(&event->hw.prev_count, val); + local64_set(&event->hw.prev_count, val); perf_event_update_userpage(event); } } @@ -666,11 +666,11 @@ void hw_perf_enable(void) } val = 0; if (event->hw.sample_period) { - left = atomic64_read(&event->hw.period_left); + left = local64_read(&event->hw.period_left); if (left < 0x80000000L) val = 0x80000000L - left; } - atomic64_set(&event->hw.prev_count, val); + local64_set(&event->hw.prev_count, val); event->hw.idx = idx; write_pmc(idx, val); perf_event_update_userpage(event); @@ -842,8 +842,8 @@ static void power_pmu_unthrottle(struct perf_event *event) if (left < 0x80000000L) val = 0x80000000L - left; write_pmc(event->hw.idx, val); - atomic64_set(&event->hw.prev_count, val); - atomic64_set(&event->hw.period_left, left); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); perf_event_update_userpage(event); perf_enable(); local_irq_restore(flags); @@ -1109,7 +1109,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) event->hw.config = events[n]; event->hw.event_base = cflags[n]; event->hw.last_period = event->hw.sample_period; - atomic64_set(&event->hw.period_left, event->hw.last_period); + local64_set(&event->hw.period_left, event->hw.last_period); /* * See if we need to reserve the PMU. @@ -1147,16 +1147,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val, int record = 0; /* we don't have to worry about interrupts here */ - prev = atomic64_read(&event->hw.prev_count); + prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &event->count); + local64_add(delta, &event->count); /* * See if the total period for this event has expired, * and update for the next period. */ val = 0; - left = atomic64_read(&event->hw.period_left) - delta; + left = local64_read(&event->hw.period_left) - delta; if (period) { if (left <= 0) { left += period; @@ -1194,8 +1194,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val, } write_pmc(event->hw.idx, val); - atomic64_set(&event->hw.prev_count, val); - atomic64_set(&event->hw.period_left, left); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); perf_event_update_userpage(event); } diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 81b6de41ae5..7a3dc356725 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -185,10 +185,10 @@ static void sh_perf_event_update(struct perf_event *event, * this is the simplest approach for maintaining consistency. */ again: - prev_raw_count = atomic64_read(&hwc->prev_count); + prev_raw_count = local64_read(&hwc->prev_count); new_raw_count = sh_pmu->read(idx); - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) goto again; @@ -203,7 +203,7 @@ again: delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - atomic64_add(delta, &event->count); + local64_add(delta, &event->count); } static void sh_pmu_disable(struct perf_event *event) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index beeb92fa3ac..8a6660da8e0 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -572,18 +572,18 @@ static u64 sparc_perf_event_update(struct perf_event *event, s64 delta; again: - prev_raw_count = atomic64_read(&hwc->prev_count); + prev_raw_count = local64_read(&hwc->prev_count); new_raw_count = read_pmc(idx); - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) goto again; delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - atomic64_add(delta, &event->count); - atomic64_sub(delta, &hwc->period_left); + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); return new_raw_count; } @@ -591,27 +591,27 @@ again: static int sparc_perf_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, int idx) { - s64 left = atomic64_read(&hwc->period_left); + s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; int ret = 0; if (unlikely(left <= -period)) { left = period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } if (unlikely(left <= 0)) { left += period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } if (left > MAX_PERIOD) left = MAX_PERIOD; - atomic64_set(&hwc->prev_count, (u64)-left); + local64_set(&hwc->prev_count, (u64)-left); write_pmc(idx, (u64)(-left) & 0xffffffff); @@ -1087,7 +1087,7 @@ static int __hw_perf_event_init(struct perf_event *event) if (!hwc->sample_period) { hwc->sample_period = MAX_PERIOD; hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); + local64_set(&hwc->period_left, hwc->sample_period); } return 0; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 79e199843db..2d0d2906927 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -296,10 +296,10 @@ x86_perf_event_update(struct perf_event *event) * count to the generic event atomically: */ again: - prev_raw_count = atomic64_read(&hwc->prev_count); + prev_raw_count = local64_read(&hwc->prev_count); rdmsrl(hwc->event_base + idx, new_raw_count); - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) goto again; @@ -314,8 +314,8 @@ again: delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - atomic64_add(delta, &event->count); - atomic64_sub(delta, &hwc->period_left); + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); return new_raw_count; } @@ -439,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event) if (!hwc->sample_period) { hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); + local64_set(&hwc->period_left, hwc->sample_period); } else { /* * If we have a PMU initialized but no APIC @@ -886,7 +886,7 @@ static int x86_perf_event_set_period(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - s64 left = atomic64_read(&hwc->period_left); + s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; int ret = 0, idx = hwc->idx; @@ -898,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event) */ if (unlikely(left <= -period)) { left = period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } if (unlikely(left <= 0)) { left += period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } @@ -924,7 +924,7 @@ x86_perf_event_set_period(struct perf_event *event) * The hw event starts counting from this event offset, * mark it to be able to extra future deltas: */ - atomic64_set(&hwc->prev_count, (u64)-left); + local64_set(&hwc->prev_count, (u64)-left); wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f34dab9b275..7342979f95f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -487,6 +487,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #define PERF_MAX_STACK_DEPTH 255 @@ -536,10 +537,10 @@ struct hw_perf_event { struct arch_hw_breakpoint info; #endif }; - atomic64_t prev_count; + local64_t prev_count; u64 sample_period; u64 last_period; - atomic64_t period_left; + local64_t period_left; u64 interrupts; u64 freq_time_stamp; @@ -670,7 +671,7 @@ struct perf_event { enum perf_event_active_state state; unsigned int attach_state; - atomic64_t count; + local64_t count; atomic64_t child_count; /* diff --git a/kernel/perf_event.c b/kernel/perf_event.c index a395fda2d94..97c73018592 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1148,9 +1148,9 @@ static void __perf_event_sync_stat(struct perf_event *event, * In order to keep per-task stats reliable we need to flip the event * values when we flip the contexts. */ - value = atomic64_read(&next_event->count); - value = atomic64_xchg(&event->count, value); - atomic64_set(&next_event->count, value); + value = local64_read(&next_event->count); + value = local64_xchg(&event->count, value); + local64_set(&next_event->count, value); swap(event->total_time_enabled, next_event->total_time_enabled); swap(event->total_time_running, next_event->total_time_running); @@ -1540,10 +1540,10 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) hwc->sample_period = sample_period; - if (atomic64_read(&hwc->period_left) > 8*sample_period) { + if (local64_read(&hwc->period_left) > 8*sample_period) { perf_disable(); perf_event_stop(event); - atomic64_set(&hwc->period_left, 0); + local64_set(&hwc->period_left, 0); perf_event_start(event); perf_enable(); } @@ -1584,7 +1584,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) perf_disable(); event->pmu->read(event); - now = atomic64_read(&event->count); + now = local64_read(&event->count); delta = now - hwc->freq_count_stamp; hwc->freq_count_stamp = now; @@ -1738,7 +1738,7 @@ static void __perf_event_read(void *info) static inline u64 perf_event_count(struct perf_event *event) { - return atomic64_read(&event->count) + atomic64_read(&event->child_count); + return local64_read(&event->count) + atomic64_read(&event->child_count); } static u64 perf_event_read(struct perf_event *event) @@ -2141,7 +2141,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void perf_event_reset(struct perf_event *event) { (void)perf_event_read(event); - atomic64_set(&event->count, 0); + local64_set(&event->count, 0); perf_event_update_userpage(event); } @@ -2359,7 +2359,7 @@ void perf_event_update_userpage(struct perf_event *event) userpg->index = perf_event_index(event); userpg->offset = perf_event_count(event); if (event->state == PERF_EVENT_STATE_ACTIVE) - userpg->offset -= atomic64_read(&event->hw.prev_count); + userpg->offset -= local64_read(&event->hw.prev_count); userpg->time_enabled = event->total_time_enabled + atomic64_read(&event->child_total_time_enabled); @@ -4035,14 +4035,14 @@ static u64 perf_swevent_set_period(struct perf_event *event) hwc->last_period = hwc->sample_period; again: - old = val = atomic64_read(&hwc->period_left); + old = val = local64_read(&hwc->period_left); if (val < 0) return 0; nr = div64_u64(period + val, period); offset = nr * period; val -= offset; - if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) + if (local64_cmpxchg(&hwc->period_left, old, val) != old) goto again; return nr; @@ -4081,7 +4081,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, { struct hw_perf_event *hwc = &event->hw; - atomic64_add(nr, &event->count); + local64_add(nr, &event->count); if (!regs) return; @@ -4092,7 +4092,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) return perf_swevent_overflow(event, 1, nmi, data, regs); - if (atomic64_add_negative(nr, &hwc->period_left)) + if (local64_add_negative(nr, &hwc->period_left)) return; perf_swevent_overflow(event, 0, nmi, data, regs); @@ -4383,8 +4383,8 @@ static void cpu_clock_perf_event_update(struct perf_event *event) u64 now; now = cpu_clock(cpu); - prev = atomic64_xchg(&event->hw.prev_count, now); - atomic64_add(now - prev, &event->count); + prev = local64_xchg(&event->hw.prev_count, now); + local64_add(now - prev, &event->count); } static int cpu_clock_perf_event_enable(struct perf_event *event) @@ -4392,7 +4392,7 @@ static int cpu_clock_perf_event_enable(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int cpu = raw_smp_processor_id(); - atomic64_set(&hwc->prev_count, cpu_clock(cpu)); + local64_set(&hwc->prev_count, cpu_clock(cpu)); perf_swevent_start_hrtimer(event); return 0; @@ -4424,9 +4424,9 @@ static void task_clock_perf_event_update(struct perf_event *event, u64 now) u64 prev; s64 delta; - prev = atomic64_xchg(&event->hw.prev_count, now); + prev = local64_xchg(&event->hw.prev_count, now); delta = now - prev; - atomic64_add(delta, &event->count); + local64_add(delta, &event->count); } static int task_clock_perf_event_enable(struct perf_event *event) @@ -4436,7 +4436,7 @@ static int task_clock_perf_event_enable(struct perf_event *event) now = event->ctx->time; - atomic64_set(&hwc->prev_count, now); + local64_set(&hwc->prev_count, now); perf_swevent_start_hrtimer(event); @@ -4879,7 +4879,7 @@ perf_event_alloc(struct perf_event_attr *attr, hwc->sample_period = 1; hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); + local64_set(&hwc->period_left, hwc->sample_period); /* * we currently do not support PERF_FORMAT_GROUP on inherited events @@ -5313,7 +5313,7 @@ inherit_event(struct perf_event *parent_event, hwc->sample_period = sample_period; hwc->last_period = sample_period; - atomic64_set(&hwc->period_left, sample_period); + local64_set(&hwc->period_left, sample_period); } child_event->overflow_handler = parent_event->overflow_handler; -- cgit v1.2.3-70-g09d2