diff options
49 files changed, 4676 insertions, 1218 deletions
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 2f9115c0ae6..61c291cddf1 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -165,8 +165,8 @@ the user entry_handler invocation is also skipped. 1.4 How Does Jump Optimization Work? -If you configured your kernel with CONFIG_OPTPROBES=y (currently -this option is supported on x86/x86-64, non-preemptive kernel) and +If your kernel is built with CONFIG_OPTPROBES=y (currently this flag +is automatically set 'y' on x86/x86-64, non-preemptive kernel) and the "debug.kprobes_optimization" kernel parameter is set to 1 (see sysctl(8)), Kprobes tries to reduce probe-hit overhead by using a jump instruction instead of a breakpoint instruction at each probepoint. @@ -271,8 +271,6 @@ tweak the kernel's execution path, you need to suppress optimization, using one of the following techniques: - Specify an empty function for the kprobe's post_handler or break_handler. or -- Config CONFIG_OPTPROBES=n. - or - Execute 'sysctl -w debug.kprobes_optimization=n' 2. Architectures Supported @@ -307,10 +305,6 @@ it useful to "Compile the kernel with debug info" (CONFIG_DEBUG_INFO), so you can use "objdump -d -l vmlinux" to see the source-to-object code mapping. -If you want to reduce probing overhead, set "Kprobes jump optimization -support" (CONFIG_OPTPROBES) to "y". You can find this option under the -"Kprobes" line. - 4. API Reference The Kprobes API includes a "register" function and an "unregister" diff --git a/MAINTAINERS b/MAINTAINERS index 449d4440208..28150567644 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4338,13 +4338,13 @@ M: Paul Mackerras <paulus@samba.org> M: Ingo Molnar <mingo@elte.hu> M: Arnaldo Carvalho de Melo <acme@redhat.com> S: Supported -F: kernel/perf_event.c +F: kernel/perf_event*.c F: include/linux/perf_event.h -F: arch/*/kernel/perf_event.c -F: arch/*/kernel/*/perf_event.c -F: arch/*/kernel/*/*/perf_event.c +F: arch/*/kernel/perf_event*.c +F: arch/*/kernel/*/perf_event*.c +F: arch/*/kernel/*/*/perf_event*.c F: arch/*/include/asm/perf_event.h -F: arch/*/lib/perf_event.c +F: arch/*/lib/perf_event*.c F: arch/*/kernel/perf_callchain.c F: tools/perf/ diff --git a/arch/Kconfig b/arch/Kconfig index e5eb1337a53..f06010fb483 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -42,15 +42,10 @@ config KPROBES If in doubt, say "N". config OPTPROBES - bool "Kprobes jump optimization support (EXPERIMENTAL)" - default y - depends on KPROBES + def_bool y + depends on KPROBES && HAVE_OPTPROBES depends on !PREEMPT - depends on HAVE_OPTPROBES select KALLSYMS_ALL - help - This option will allow kprobes to optimize breakpoint to - a jump for reducing its overhead. config HAVE_EFFICIENT_UNALIGNED_ACCESS bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0eacb1ffb42..7191b6eb16d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -58,6 +58,9 @@ config X86 select HAVE_ARCH_KMEMCHECK select HAVE_USER_RETURN_NOTIFIER +config INSTRUCTION_DECODER + def_bool (KPROBES || PERF_EVENTS) + config OUTPUT_FORMAT string default "elf32-i386" if X86_32 diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b4ac2cdcb64..1fa03e04ae4 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -373,6 +373,7 @@ extern atomic_t init_deasserted; extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); #endif +#ifdef CONFIG_X86_LOCAL_APIC static inline u32 apic_read(u32 reg) { return apic->read(reg); @@ -403,10 +404,19 @@ static inline u32 safe_apic_wait_icr_idle(void) return apic->safe_wait_icr_idle(); } +#else /* CONFIG_X86_LOCAL_APIC */ + +static inline u32 apic_read(u32 reg) { return 0; } +static inline void apic_write(u32 reg, u32 val) { } +static inline u64 apic_icr_read(void) { return 0; } +static inline void apic_icr_write(u32 low, u32 high) { } +static inline void apic_wait_icr_idle(void) { } +static inline u32 safe_apic_wait_icr_idle(void) { return 0; } + +#endif /* CONFIG_X86_LOCAL_APIC */ static inline void ack_APIC_irq(void) { -#ifdef CONFIG_X86_LOCAL_APIC /* * ack_APIC_irq() actually gets compiled as a single instruction * ... yummie. @@ -414,7 +424,6 @@ static inline void ack_APIC_irq(void) /* Docs say use 0 for future compatibility */ apic_write(APIC_EOI, 0); -#endif } static inline unsigned default_get_apic_id(unsigned long x) diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 96c2e0ad04c..88c765e1641 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -68,6 +68,8 @@ struct insn { const insn_byte_t *next_byte; }; +#define MAX_INSN_SIZE 16 + #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) #define X86_MODRM_RM(modrm) ((modrm) & 0x07) diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4ffa345a8cc..54788253915 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -24,6 +24,7 @@ #include <linux/types.h> #include <linux/ptrace.h> #include <linux/percpu.h> +#include <asm/insn.h> #define __ARCH_WANT_KPROBES_INSN_SLOT @@ -36,7 +37,6 @@ typedef u8 kprobe_opcode_t; #define RELATIVEJUMP_SIZE 5 #define RELATIVECALL_OPCODE 0xe8 #define RELATIVE_ADDR_SIZE 4 -#define MAX_INSN_SIZE 16 #define MAX_STACK_SIZE 64 #define MIN_STACK_SIZE(ADDR) \ (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1cd58cdbc03..aef562c0a64 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -357,6 +357,8 @@ #define MSR_P4_U2L_ESCR0 0x000003b0 #define MSR_P4_U2L_ESCR1 0x000003b1 +#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 + /* Intel Core-based CPU performance counters */ #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index db6109a885a..124dddd598f 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -5,7 +5,7 @@ * Performance event hw details: */ -#define X86_PMC_MAX_GENERIC 8 +#define X86_PMC_MAX_GENERIC 32 #define X86_PMC_MAX_FIXED 3 #define X86_PMC_IDX_GENERIC 0 @@ -136,6 +136,25 @@ extern void perf_events_lapic_init(void); #define PERF_EVENT_INDEX_OFFSET 0 +/* + * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. + * This flag is otherwise unused and ABI specified to be 0, so nobody should + * care what we do with it. + */ +#define PERF_EFLAGS_EXACT (1UL << 3) + +#define perf_misc_flags(regs) \ +({ int misc = 0; \ + if (user_mode(regs)) \ + misc |= PERF_RECORD_MISC_USER; \ + else \ + misc |= PERF_RECORD_MISC_KERNEL; \ + if (regs->flags & PERF_EFLAGS_EXACT) \ + misc |= PERF_RECORD_MISC_EXACT; \ + misc; }) + +#define perf_instruction_pointer(regs) ((regs)->ip) + #else static inline void init_hw_perf_events(void) { } static inline void perf_events_lapic_init(void) { } diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h new file mode 100644 index 00000000000..facf96186b2 --- /dev/null +++ b/arch/x86/include/asm/perf_event_p4.h @@ -0,0 +1,721 @@ +/* + * Netburst Perfomance Events (P4, old Xeon) + */ + +#ifndef PERF_EVENT_P4_H +#define PERF_EVENT_P4_H + +#include <linux/cpu.h> +#include <linux/bitops.h> + +/* + * NetBurst has perfomance MSRs shared between + * threads if HT is turned on, ie for both logical + * processors (mem: in turn in Atom with HT support + * perf-MSRs are not shared and every thread has its + * own perf-MSRs set) + */ +#define ARCH_P4_TOTAL_ESCR (46) +#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ +#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) +#define ARCH_P4_MAX_CCCR (18) +#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) + +#define P4_EVNTSEL_EVENT_MASK 0x7e000000U +#define P4_EVNTSEL_EVENT_SHIFT 25 +#define P4_EVNTSEL_EVENTMASK_MASK 0x01fffe00U +#define P4_EVNTSEL_EVENTMASK_SHIFT 9 +#define P4_EVNTSEL_TAG_MASK 0x000001e0U +#define P4_EVNTSEL_TAG_SHIFT 5 +#define P4_EVNTSEL_TAG_ENABLE 0x00000010U +#define P4_EVNTSEL_T0_OS 0x00000008U +#define P4_EVNTSEL_T0_USR 0x00000004U +#define P4_EVNTSEL_T1_OS 0x00000002U +#define P4_EVNTSEL_T1_USR 0x00000001U + +/* Non HT mask */ +#define P4_EVNTSEL_MASK \ + (P4_EVNTSEL_EVENT_MASK | \ + P4_EVNTSEL_EVENTMASK_MASK | \ + P4_EVNTSEL_TAG_MASK | \ + P4_EVNTSEL_TAG_ENABLE | \ + P4_EVNTSEL_T0_OS | \ + P4_EVNTSEL_T0_USR) + +/* HT mask */ +#define P4_EVNTSEL_MASK_HT \ + (P4_EVNTSEL_MASK | \ + P4_EVNTSEL_T1_OS | \ + P4_EVNTSEL_T1_USR) + +#define P4_CCCR_OVF 0x80000000U +#define P4_CCCR_CASCADE 0x40000000U +#define P4_CCCR_OVF_PMI_T0 0x04000000U +#define P4_CCCR_OVF_PMI_T1 0x08000000U +#define P4_CCCR_FORCE_OVF 0x02000000U +#define P4_CCCR_EDGE 0x01000000U +#define P4_CCCR_THRESHOLD_MASK 0x00f00000U +#define P4_CCCR_THRESHOLD_SHIFT 20 +#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) +#define P4_CCCR_COMPLEMENT 0x00080000U +#define P4_CCCR_COMPARE 0x00040000U +#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U +#define P4_CCCR_ESCR_SELECT_SHIFT 13 +#define P4_CCCR_ENABLE 0x00001000U +#define P4_CCCR_THREAD_SINGLE 0x00010000U +#define P4_CCCR_THREAD_BOTH 0x00020000U +#define P4_CCCR_THREAD_ANY 0x00030000U +#define P4_CCCR_RESERVED 0x00000fffU + +/* Non HT mask */ +#define P4_CCCR_MASK \ + (P4_CCCR_OVF | \ + P4_CCCR_CASCADE | \ + P4_CCCR_OVF_PMI_T0 | \ + P4_CCCR_FORCE_OVF | \ + P4_CCCR_EDGE | \ + P4_CCCR_THRESHOLD_MASK | \ + P4_CCCR_COMPLEMENT | \ + P4_CCCR_COMPARE | \ + P4_CCCR_ESCR_SELECT_MASK | \ + P4_CCCR_ENABLE) + +/* HT mask */ +#define P4_CCCR_MASK_HT \ + (P4_CCCR_MASK | \ + P4_CCCR_THREAD_ANY) + +/* + * format is 32 bit: ee ss aa aa + * where + * ee - 8 bit event + * ss - 8 bit selector + * aa aa - 16 bits reserved for tags/attributes + */ +#define P4_EVENT_PACK(event, selector) (((event) << 24) | ((selector) << 16)) +#define P4_EVENT_UNPACK_EVENT(packed) (((packed) >> 24) & 0xff) +#define P4_EVENT_UNPACK_SELECTOR(packed) (((packed) >> 16) & 0xff) +#define P4_EVENT_PACK_ATTR(attr) ((attr)) +#define P4_EVENT_UNPACK_ATTR(packed) ((packed) & 0xffff) +#define P4_MAKE_EVENT_ATTR(class, name, bit) class##_##name = (1 << bit) +#define P4_EVENT_ATTR(class, name) class##_##name +#define P4_EVENT_ATTR_STR(class, name) __stringify(class##_##name) + +/* + * config field is 64bit width and consists of + * HT << 63 | ESCR << 32 | CCCR + * where HT is HyperThreading bit (since ESCR + * has it reserved we may use it for own purpose) + * + * note that this is NOT the addresses of respective + * ESCR and CCCR but rather an only packed value should + * be unpacked and written to a proper addresses + * + * the base idea is to pack as much info as + * possible + */ +#define p4_config_pack_escr(v) (((u64)(v)) << 32) +#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) +#define p4_config_unpack_escr(v) (((u64)(v)) >> 32) +#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xfffff000ULL) + +#define p4_config_unpack_emask(v) \ + ({ \ + u32 t = p4_config_unpack_escr((v)); \ + t &= P4_EVNTSEL_EVENTMASK_MASK; \ + t >>= P4_EVNTSEL_EVENTMASK_SHIFT; \ + t; \ + }) + +#define p4_config_unpack_key(v) (((u64)(v)) & P4_CCCR_RESERVED) + +#define P4_CONFIG_HT_SHIFT 63 +#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) + +static inline u32 p4_config_unpack_opcode(u64 config) +{ + u32 e, s; + + /* + * we don't care about HT presence here since + * event opcode doesn't depend on it + */ + e = (p4_config_unpack_escr(config) & P4_EVNTSEL_EVENT_MASK) >> P4_EVNTSEL_EVENT_SHIFT; + s = (p4_config_unpack_cccr(config) & P4_CCCR_ESCR_SELECT_MASK) >> P4_CCCR_ESCR_SELECT_SHIFT; + + return P4_EVENT_PACK(e, s); +} + +static inline bool p4_is_event_cascaded(u64 config) +{ + u32 cccr = p4_config_unpack_cccr(config); + return !!(cccr & P4_CCCR_CASCADE); +} + +static inline int p4_ht_config_thread(u64 config) +{ + return !!(config & P4_CONFIG_HT); +} + +static inline u64 p4_set_ht_bit(u64 config) +{ + return config | P4_CONFIG_HT; +} + +static inline u64 p4_clear_ht_bit(u64 config) +{ + return config & ~P4_CONFIG_HT; +} + +static inline int p4_ht_active(void) +{ +#ifdef CONFIG_SMP + return smp_num_siblings > 1; +#endif + return 0; +} + +static inline int p4_ht_thread(int cpu) +{ +#ifdef CONFIG_SMP + if (smp_num_siblings == 2) + return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); +#endif + return 0; +} + +static inline int p4_should_swap_ts(u64 config, int cpu) +{ + return p4_ht_config_thread(config) ^ p4_ht_thread(cpu); +} + +static inline u32 p4_default_cccr_conf(int cpu) +{ + /* + * Note that P4_CCCR_THREAD_ANY is "required" on + * non-HT machines (on HT machines we count TS events + * regardless the state of second logical processor + */ + u32 cccr = P4_CCCR_THREAD_ANY; + + if (!p4_ht_thread(cpu)) + cccr |= P4_CCCR_OVF_PMI_T0; + else + cccr |= P4_CCCR_OVF_PMI_T1; + + return cccr; +} + +static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) +{ + u32 escr = 0; + + if (!p4_ht_thread(cpu)) { + if (!exclude_os) + escr |= P4_EVNTSEL_T0_OS; + if (!exclude_usr) + escr |= P4_EVNTSEL_T0_USR; + } else { + if (!exclude_os) + escr |= P4_EVNTSEL_T1_OS; + if (!exclude_usr) + escr |= P4_EVNTSEL_T1_USR; + } + + return escr; +} + +/* + * Comments below the event represent ESCR restriction + * for this event and counter index per ESCR + * + * MSR_P4_IQ_ESCR0 and MSR_P4_IQ_ESCR1 are available only on early + * processor builds (family 0FH, models 01H-02H). These MSRs + * are not available on later versions, so that we don't use + * them completely + * + * Also note that CCCR1 do not have P4_CCCR_ENABLE bit properly + * working so that we should not use this CCCR and respective + * counter as result + */ +#define P4_TC_DELIVER_MODE P4_EVENT_PACK(0x01, 0x01) + /* + * MSR_P4_TC_ESCR0: 4, 5 + * MSR_P4_TC_ESCR1: 6, 7 + */ + +#define P4_BPU_FETCH_REQUEST P4_EVENT_PACK(0x03, 0x00) + /* + * MSR_P4_BPU_ESCR0: 0, 1 + * MSR_P4_BPU_ESCR1: 2, 3 + */ + +#define P4_ITLB_REFERENCE P4_EVENT_PACK(0x18, 0x03) + /* + * MSR_P4_ITLB_ESCR0: 0, 1 + * MSR_P4_ITLB_ESCR1: 2, 3 + */ + +#define P4_MEMORY_CANCEL P4_EVENT_PACK(0x02, 0x05) + /* + * MSR_P4_DAC_ESCR0: 8, 9 + * MSR_P4_DAC_ESCR1: 10, 11 + */ + +#define P4_MEMORY_COMPLETE P4_EVENT_PACK(0x08, 0x02) + /* + * MSR_P4_SAAT_ESCR0: 8, 9 + * MSR_P4_SAAT_ESCR1: 10, 11 + */ + +#define P4_LOAD_PORT_REPLAY P4_EVENT_PACK(0x04, 0x02) + /* + * MSR_P4_SAAT_ESCR0: 8, 9 + * MSR_P4_SAAT_ESCR1: 10, 11 + */ + +#define P4_STORE_PORT_REPLAY P4_EVENT_PACK(0x05, 0x02) + /* + * MSR_P4_SAAT_ESCR0: 8, 9 + * MSR_P4_SAAT_ESCR1: 10, 11 + */ + +#define P4_MOB_LOAD_REPLAY P4_EVENT_PACK(0x03, 0x02) + /* + * MSR_P4_MOB_ESCR0: 0, 1 + * MSR_P4_MOB_ESCR1: 2, 3 + */ + +#define P4_PAGE_WALK_TYPE P4_EVENT_PACK(0x01, 0x04) + /* + * MSR_P4_PMH_ESCR0: 0, 1 + * MSR_P4_PMH_ESCR1: 2, 3 + */ + +#define P4_BSQ_CACHE_REFERENCE P4_EVENT_PACK(0x0c, 0x07) + /* + * MSR_P4_BSU_ESCR0: 0, 1 + * MSR_P4_BSU_ESCR1: 2, 3 + */ + +#define P4_IOQ_ALLOCATION P4_EVENT_PACK(0x03, 0x06) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_IOQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x1a, 0x06) + /* + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_FSB_DATA_ACTIVITY P4_EVENT_PACK(0x17, 0x06) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_BSQ_ALLOCATION P4_EVENT_PACK(0x05, 0x07) + /* + * MSR_P4_BSU_ESCR0: 0, 1 + */ + +#define P4_BSQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x06, 0x07) + /* + * NOTE: no ESCR name in docs, it's guessed + * MSR_P4_BSU_ESCR1: 2, 3 + */ + +#define P4_SSE_INPUT_ASSIST P4_EVENT_PACK(0x34, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_PACKED_SP_UOP P4_EVENT_PACK(0x08, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_PACKED_DP_UOP P4_EVENT_PACK(0x0c, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_SCALAR_SP_UOP P4_EVENT_PACK(0x0a, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_SCALAR_DP_UOP P4_EVENT_PACK(0x0e, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_64BIT_MMX_UOP P4_EVENT_PACK(0x02, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_128BIT_MMX_UOP P4_EVENT_PACK(0x1a, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_X87_FP_UOP P4_EVENT_PACK(0x04, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_TC_MISC P4_EVENT_PACK(0x06, 0x01) + /* + * MSR_P4_TC_ESCR0: 4, 5 + * MSR_P4_TC_ESCR1: 6, 7 + */ + +#define P4_GLOBAL_POWER_EVENTS P4_EVENT_PACK(0x13, 0x06) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_TC_MS_XFER P4_EVENT_PACK(0x05, 0x00) + /* + * MSR_P4_MS_ESCR0: 4, 5 + * MSR_P4_MS_ESCR1: 6, 7 + */ + +#define P4_UOP_QUEUE_WRITES P4_EVENT_PACK(0x09, 0x00) + /* + * MSR_P4_MS_ESCR0: 4, 5 + * MSR_P4_MS_ESCR1: 6, 7 + */ + +#define P4_RETIRED_MISPRED_BRANCH_TYPE P4_EVENT_PACK(0x05, 0x02) + /* + * MSR_P4_TBPU_ESCR0: 4, 5 + * MSR_P4_TBPU_ESCR1: 6, 7 + */ + +#define P4_RETIRED_BRANCH_TYPE P4_EVENT_PACK(0x04, 0x02) + /* + * MSR_P4_TBPU_ESCR0: 4, 5 + * MSR_P4_TBPU_ESCR1: 6, 7 + */ + +#define P4_RESOURCE_STALL P4_EVENT_PACK(0x01, 0x01) + /* + * MSR_P4_ALF_ESCR0: 12, 13, 16 + * MSR_P4_ALF_ESCR1: 14, 15, 17 + */ + +#define P4_WC_BUFFER P4_EVENT_PACK(0x05, 0x05) + /* + * MSR_P4_DAC_ESCR0: 8, 9 + * MSR_P4_DAC_ESCR1: 10, 11 + */ + +#define P4_B2B_CYCLES P4_EVENT_PACK(0x16, 0x03) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_BNR P4_EVENT_PACK(0x08, 0x03) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_SNOOP P4_EVENT_PACK(0x06, 0x03) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_RESPONSE P4_EVENT_PACK(0x04, 0x03) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_FRONT_END_EVENT P4_EVENT_PACK(0x08, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_EXECUTION_EVENT P4_EVENT_PACK(0x0c, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_REPLAY_EVENT P4_EVENT_PACK(0x09, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_INSTR_RETIRED P4_EVENT_PACK(0x02, 0x04) + /* + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 + */ + +#define P4_UOPS_RETIRED P4_EVENT_PACK(0x01, 0x04) + /* + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 + */ + +#define P4_UOP_TYPE P4_EVENT_PACK(0x02, 0x02) + /* + * MSR_P4_RAT_ESCR0: 12, 13, 16 + * MSR_P4_RAT_ESCR1: 14, 15, 17 + */ + +#define P4_BRANCH_RETIRED P4_EVENT_PACK(0x06, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_MISPRED_BRANCH_RETIRED P4_EVENT_PACK(0x03, 0x04) + /* + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 + */ + +#define P4_X87_ASSIST P4_EVENT_PACK(0x03, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_MACHINE_CLEAR P4_EVENT_PACK(0x02, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_INSTR_COMPLETED P4_EVENT_PACK(0x07, 0x04) + /* + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 + */ + +/* + * a caller should use P4_EVENT_ATTR helper to + * pick the attribute needed, for example + * + * P4_EVENT_ATTR(P4_TC_DELIVER_MODE, DD) + */ +enum P4_EVENTS_ATTR { + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DD, 0), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DB, 1), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DI, 2), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BD, 3), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BB, 4), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BI, 5), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, ID, 6), + + P4_MAKE_EVENT_ATTR(P4_BPU_FETCH_REQUEST, TCMISS, 0), + + P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, HIT, 0), + P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, MISS, 1), + P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, HIT_UK, 2), + + P4_MAKE_EVENT_ATTR(P4_MEMORY_CANCEL, ST_RB_FULL, 2), + P4_MAKE_EVENT_ATTR(P4_MEMORY_CANCEL, 64K_CONF, 3), + + P4_MAKE_EVENT_ATTR(P4_MEMORY_COMPLETE, LSC, 0), + P4_MAKE_EVENT_ATTR(P4_MEMORY_COMPLETE, SSC, 1), + + P4_MAKE_EVENT_ATTR(P4_LOAD_PORT_REPLAY, SPLIT_LD, 1), + + P4_MAKE_EVENT_ATTR(P4_STORE_PORT_REPLAY, SPLIT_ST, 1), + + P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, NO_STA, 1), + P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, NO_STD, 3), + P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, PARTIAL_DATA, 4), + P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, UNALGN_ADDR, 5), + + P4_MAKE_EVENT_ATTR(P4_PAGE_WALK_TYPE, DTMISS, 0), + P4_MAKE_EVENT_ATTR(P4_PAGE_WALK_TYPE, ITMISS, 1), + + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10), + + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, DEFAULT, 0), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, ALL_READ, 5), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, ALL_WRITE, 6), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_UC, 7), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WC, 8), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WT, 9), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WP, 10), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WB, 11), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, OWN, 13), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, OTHER, 14), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, PREFETCH, 15), + + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, DEFAULT, 0), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, ALL_READ, 5), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_UC, 7), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WC, 8), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WT, 9), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WP, 10), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WB, 11), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, OWN, 13), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, OTHER, 14), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, PREFETCH, 15), + + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV, 0), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN, 1), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OTHER, 2), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_DRV, 3), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_OWN, 4), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_OTHER, 5), + + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_TYPE0, 0), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_TYPE1, 1), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LEN0, 2), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LEN1, 3), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_IO_TYPE, 5), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_DEM_TYPE, 9), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_ORD_TYPE, 10), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE0, 11), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE1, 12), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE2, 13), + + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13), + + P4_MAKE_EVENT_ATTR(P4_SSE_INPUT_ASSIST, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_PACKED_SP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_PACKED_DP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_SCALAR_SP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_SCALAR_DP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_64BIT_MMX_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_128BIT_MMX_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_X87_FP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_TC_MISC, FLUSH, 4), + + P4_MAKE_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING, 0), + + P4_MAKE_EVENT_ATTR(P4_TC_MS_XFER, CISC, 0), + + P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0), + P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1), + P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_ROM, 2), + + P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1), + P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2), + P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3), + P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4), + + P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL, 1), + P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL, 2), + P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN, 3), + P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT, 4), + + P4_MAKE_EVENT_ATTR(P4_RESOURCE_STALL, SBFULL, 5), + + P4_MAKE_EVENT_ATTR(P4_WC_BUFFER, WCB_EVICTS, 0), + P4_MAKE_EVENT_ATTR(P4_WC_BUFFER, WCB_FULL_EVICTS, 1), + + P4_MAKE_EVENT_ATTR(P4_FRONT_END_EVENT, NBOGUS, 0), + P4_MAKE_EVENT_ATTR(P4_FRONT_END_EVENT, BOGUS, 1), + + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS0, 0), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS1, 1), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS2, 2), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS3, 3), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS0, 4), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS1, 5), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS2, 6), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS3, 7), + + P4_MAKE_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS, 0), + P4_MAKE_EVENT_ATTR(P4_REPLAY_EVENT, BOGUS, 1), + + P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG, 0), + P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSTAG, 1), + P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG, 2), + P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSTAG, 3), + + P4_MAKE_EVENT_ATTR(P4_UOPS_RETIRED, NBOGUS, 0), + P4_MAKE_EVENT_ATTR(P4_UOPS_RETIRED, BOGUS, 1), + + P4_MAKE_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS, 1), + P4_MAKE_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES, 2), + + P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMNP, 0), + P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMNM, 1), + P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMTP, 2), + P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMTM, 3), + + P4_MAKE_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS, 0), + + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, FPSU, 0), + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, FPSO, 1), + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, POAO, 2), + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, POAU, 3), + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, PREA, 4), + + P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, CLEAR, 0), + P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, MOCLEAR, 1), + P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, SMCLEAR, 2), + + P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, NBOGUS, 0), + P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1), +}; + +enum { + KEY_P4_L1D_OP_READ_RESULT_MISS = PERF_COUNT_HW_MAX, + KEY_P4_LL_OP_READ_RESULT_MISS, + KEY_P4_DTLB_OP_READ_RESULT_MISS, + KEY_P4_DTLB_OP_WRITE_RESULT_MISS, + KEY_P4_ITLB_OP_READ_RESULT_ACCESS, + KEY_P4_ITLB_OP_READ_RESULT_MISS, + KEY_P4_UOP_TYPE, +}; + +#endif /* PERF_EVENT_P4_H */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 60398a0d947..f571f514de2 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -29,46 +29,51 @@ #include <asm/stacktrace.h> #include <asm/nmi.h> -static u64 perf_event_mask __read_mostly; +#if 0 +#undef wrmsrl +#define wrmsrl(msr, val) \ +do { \ + trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ + (unsigned long)(val)); \ + native_write_msr((msr), (u32)((u64)(val)), \ + (u32)((u64)(val) >> 32)); \ +} while (0) +#endif -/* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS 4 +/* + * best effort, GUP based copy_from_user() that assumes IRQ or NMI context + */ +static unsigned long +copy_from_user_nmi(void *to, const void __user *from, unsigned long n) +{ + unsigned long offset, addr = (unsigned long)from; + int type = in_nmi() ? KM_NMI : KM_IRQ0; + unsigned long size, len = 0; + struct page *page; + void *map; + int ret; -/* The size of a BTS record in bytes: */ -#define BTS_RECORD_SIZE 24 + do { + ret = __get_user_pages_fast(addr, 1, 0, &page); + if (!ret) + break; -/* The size of a per-cpu BTS buffer in bytes: */ -#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) + offset = addr & (PAGE_SIZE - 1); + size = min(PAGE_SIZE - offset, n - len); -/* The BTS overflow threshold in bytes from the end of the buffer: */ -#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) + map = kmap_atomic(page, type); + memcpy(to, map+offset, size); + kunmap_atomic(map, type); + put_page(page); + len += size; + to += size; + addr += size; -/* - * Bits in the debugctlmsr controlling branch tracing. - */ -#define X86_DEBUGCTL_TR (1 << 6) -#define X86_DEBUGCTL_BTS (1 << 7) -#define X86_DEBUGCTL_BTINT (1 << 8) -#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) -#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) + } while (len < n); -/* - * A debug store configuration. - * - * We only support architectures that use 64bit fields. - */ -struct debug_store { - u64 bts_buffer_base; - u64 bts_index; - u64 bts_absolute_maximum; - u64 bts_interrupt_threshold; - u64 pebs_buffer_base; - u64 pebs_index; - u64 pebs_absolute_maximum; - u64 pebs_interrupt_threshold; - u64 pebs_event_reset[MAX_PEBS_EVENTS]; -}; + return len; +} struct event_constraint { union { @@ -87,18 +92,39 @@ struct amd_nb { struct event_constraint event_constraints[X86_PMC_IDX_MAX]; }; +#define MAX_LBR_ENTRIES 16 + struct cpu_hw_events { + /* + * Generic x86 PMC bits + */ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long interrupts; int enabled; - struct debug_store *ds; int n_events; int n_added; int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ u64 tags[X86_PMC_IDX_MAX]; struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ + + /* + * Intel DebugStore bits + */ + struct debug_store *ds; + u64 pebs_enabled; + + /* + * Intel LBR bits + */ + int lbr_users; + void *lbr_context; + struct perf_branch_stack lbr_stack; + struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; + + /* + * AMD specific bits + */ struct amd_nb *amd_nb; }; @@ -112,22 +138,48 @@ struct cpu_hw_events { #define EVENT_CONSTRAINT(c, n, m) \ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) +/* + * Constraint on the Event code. + */ #define INTEL_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) +/* + * Constraint on the Event code + UMask + fixed-mask + */ #define FIXED_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) +/* + * Constraint on the Event code + UMask + */ +#define PEBS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) + #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) #define for_each_event_constraint(e, c) \ for ((e) = (c); (e)->cmask; (e)++) +union perf_capabilities { + struct { + u64 lbr_format : 6; + u64 pebs_trap : 1; + u64 pebs_arch_reg : 1; + u64 pebs_format : 4; + u64 smm_freeze : 1; + }; + u64 capabilities; +}; + /* * struct x86_pmu - generic x86 pmu */ struct x86_pmu { + /* + * Generic x86 PMC bits + */ const char *name; int version; int (*handle_irq)(struct pt_regs *); @@ -135,6 +187,8 @@ struct x86_pmu { void (*enable_all)(void); void (*enable)(struct perf_event *); void (*disable)(struct perf_event *); + int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc); + int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; unsigned perfctr; u64 (*event_map)(int); @@ -146,10 +200,6 @@ struct x86_pmu { u64 event_mask; int apic; u64 max_period; - u64 intel_ctrl; - void (*enable_bts)(u64 config); - void (*disable_bts)(void); - struct event_constraint * (*get_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); @@ -157,11 +207,32 @@ struct x86_pmu { void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); struct event_constraint *event_constraints; + void (*quirks)(void); void (*cpu_prepare)(int cpu); void (*cpu_starting)(int cpu); void (*cpu_dying)(int cpu); void (*cpu_dead)(int cpu); + + /* + * Intel Arch Perfmon v2+ + */ + u64 intel_ctrl; + union perf_capabilities intel_cap; + + /* + * Intel DebugStore bits + */ + int bts, pebs; + int pebs_record_size; + void (*drain_pebs)(struct pt_regs *regs); + struct event_constraint *pebs_constraints; + + /* + * Intel LBR + */ + unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ + int lbr_nr; /* hardware stack size */ }; static struct x86_pmu x86_pmu __read_mostly; @@ -239,9 +310,10 @@ again: static atomic_t active_events; static DEFINE_MUTEX(pmc_reserve_mutex); +#ifdef CONFIG_X86_LOCAL_APIC + static bool reserve_pmc_hardware(void) { -#ifdef CONFIG_X86_LOCAL_APIC int i; if (nmi_watchdog == NMI_LOCAL_APIC) @@ -256,11 +328,9 @@ static bool reserve_pmc_hardware(void) if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) goto eventsel_fail; } -#endif return true; -#ifdef CONFIG_X86_LOCAL_APIC eventsel_fail: for (i--; i >= 0; i--) release_evntsel_nmi(x86_pmu.eventsel + i); @@ -275,12 +345,10 @@ perfctr_fail: enable_lapic_nmi_watchdog(); return false; -#endif } static void release_pmc_hardware(void) { -#ifdef CONFIG_X86_LOCAL_APIC int i; for (i = 0; i < x86_pmu.num_events; i++) { @@ -290,113 +358,23 @@ static void release_pmc_hardware(void) if (nmi_watchdog == NMI_LOCAL_APIC) enable_lapic_nmi_watchdog(); -#endif -} - -static inline bool bts_available(void) -{ - return x86_pmu.enable_bts != NULL; } -static void init_debug_store_on_cpu(int cpu) -{ - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, - (u32)((u64)(unsigned long)ds), - (u32)((u64)(unsigned long)ds >> 32)); -} - -static void fini_debug_store_on_cpu(int cpu) -{ - if (!per_cpu(cpu_hw_events, cpu).ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); -} - -static void release_bts_hardware(void) -{ - int cpu; - - if (!bts_available()) - return; - - get_online_cpus(); - - for_each_online_cpu(cpu) - fini_debug_store_on_cpu(cpu); - - for_each_possible_cpu(cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - continue; - - per_cpu(cpu_hw_events, cpu).ds = NULL; - - kfree((void *)(unsigned long)ds->bts_buffer_base); - kfree(ds); - } - - put_online_cpus(); -} - -static int reserve_bts_hardware(void) -{ - int cpu, err = 0; - - if (!bts_available()) - return 0; - - get_online_cpus(); - - for_each_possible_cpu(cpu) { - struct debug_store *ds; - void *buffer; - - err = -ENOMEM; - buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); - if (unlikely(!buffer)) - break; - - ds = kzalloc(sizeof(*ds), GFP_KERNEL); - if (unlikely(!ds)) { - kfree(buffer); - break; - } - - ds->bts_buffer_base = (u64)(unsigned long)buffer; - ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = - ds->bts_buffer_base + BTS_BUFFER_SIZE; - ds->bts_interrupt_threshold = - ds->bts_absolute_maximum - BTS_OVFL_TH; +#else - per_cpu(cpu_hw_events, cpu).ds = ds; - err = 0; - } +static bool reserve_pmc_hardware(void) { return true; } +static void release_pmc_hardware(void) {} - if (err) - release_bts_hardware(); - else { - for_each_online_cpu(cpu) - init_debug_store_on_cpu(cpu); - } - - put_online_cpus(); +#endif - return err; -} +static int reserve_ds_buffers(void); +static void release_ds_buffers(void); static void hw_perf_event_destroy(struct perf_event *event) { if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { release_pmc_hardware(); - release_bts_hardware(); + release_ds_buffers(); mutex_unlock(&pmc_reserve_mutex); } } @@ -439,6 +417,25 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) return 0; } +static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) +{ + /* + * Generate PMC IRQs: + * (keep 'enabled' bit clear for now) + */ + hwc->config = ARCH_PERFMON_EVENTSEL_INT; + + /* + * Count user and OS events unless requested not to + */ + if (!attr->exclude_user) + hwc->config |= ARCH_PERFMON_EVENTSEL_USR; + if (!attr->exclude_kernel) + hwc->config |= ARCH_PERFMON_EVENTSEL_OS; + + return 0; +} + /* * Setup the hardware configuration for a given attr_type */ @@ -458,8 +455,11 @@ static int __hw_perf_event_init(struct perf_event *event) if (atomic_read(&active_events) == 0) { if (!reserve_pmc_hardware()) err = -EBUSY; - else - err = reserve_bts_hardware(); + else { + err = reserve_ds_buffers(); + if (err) + release_pmc_hardware(); + } } if (!err) atomic_inc(&active_events); @@ -470,23 +470,14 @@ static int __hw_perf_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; - /* - * Generate PMC IRQs: - * (keep 'enabled' bit clear for now) - */ - hwc->config = ARCH_PERFMON_EVENTSEL_INT; - hwc->idx = -1; hwc->last_cpu = -1; hwc->last_tag = ~0ULL; - /* - * Count user and OS events unless requested not to. - */ - if (!attr->exclude_user) - hwc->config |= ARCH_PERFMON_EVENTSEL_USR; - if (!attr->exclude_kernel) - hwc->config |= ARCH_PERFMON_EVENTSEL_OS; + /* Processor specifics */ + err = x86_pmu.hw_config(attr, hwc); + if (err) + return err; if (!hwc->sample_period) { hwc->sample_period = x86_pmu.max_period; @@ -537,11 +528,11 @@ static int __hw_perf_event_init(struct perf_event *event) if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && (hwc->sample_period == 1)) { /* BTS is not supported by this architecture. */ - if (!bts_available()) + if (!x86_pmu.bts) return -EOPNOTSUPP; /* BTS is currently only allowed for user-mode. */ - if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + if (!attr->exclude_kernel) return -EOPNOTSUPP; } @@ -850,14 +841,15 @@ void hw_perf_enable(void) static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) { - (void)checking_wrmsrl(hwc->config_base + hwc->idx, + wrmsrl(hwc->config_base + hwc->idx, hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); } static inline void x86_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); + + wrmsrl(hwc->config_base + hwc->idx, hwc->config); } static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); @@ -872,7 +864,7 @@ x86_perf_event_set_period(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; s64 left = atomic64_read(&hwc->period_left); s64 period = hwc->sample_period; - int err, ret = 0, idx = hwc->idx; + int ret = 0, idx = hwc->idx; if (idx == X86_PMC_IDX_FIXED_BTS) return 0; @@ -910,8 +902,8 @@ x86_perf_event_set_period(struct perf_event *event) */ atomic64_set(&hwc->prev_count, (u64)-left); - err = checking_wrmsrl(hwc->event_base + idx, - (u64)(-left) & x86_pmu.event_mask); + wrmsrl(hwc->event_base + idx, + (u64)(-left) & x86_pmu.event_mask); perf_event_update_userpage(event); @@ -948,7 +940,7 @@ static int x86_pmu_enable(struct perf_event *event) if (n < 0) return n; - ret = x86_schedule_events(cpuc, n, assign); + ret = x86_pmu.schedule_events(cpuc, n, assign); if (ret) return ret; /* @@ -989,6 +981,7 @@ static void x86_pmu_unthrottle(struct perf_event *event) void perf_event_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; + u64 pebs; struct cpu_hw_events *cpuc; unsigned long flags; int cpu, idx; @@ -1006,14 +999,16 @@ void perf_event_print_debug(void) rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); + rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); pr_info("\n"); pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); pr_info("CPU#%d: status: %016llx\n", cpu, status); pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); + pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); } - pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); + pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); for (idx = 0; idx < x86_pmu.num_events; idx++) { rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); @@ -1144,7 +1139,6 @@ void set_perf_event_pending(void) void perf_events_lapic_init(void) { -#ifdef CONFIG_X86_LOCAL_APIC if (!x86_pmu.apic || !x86_pmu_initialized()) return; @@ -1152,7 +1146,6 @@ void perf_events_lapic_init(void) * Always use NMI for PMU */ apic_write(APIC_LVTPC, APIC_DM_NMI); -#endif } static int __kprobes @@ -1176,9 +1169,7 @@ perf_event_nmi_handler(struct notifier_block *self, regs = args->regs; -#ifdef CONFIG_X86_LOCAL_APIC apic_write(APIC_LVTPC, APIC_DM_NMI); -#endif /* * Can't rely on the handled return value to say it was our NMI, two * events could trigger 'simultaneously' raising two back-to-back NMIs. @@ -1272,12 +1263,15 @@ int hw_perf_group_sched_in(struct perf_event *leader, int assign[X86_PMC_IDX_MAX]; int n0, n1, ret; + if (!x86_pmu_initialized()) + return 0; + /* n0 = total number of events */ n0 = collect_events(cpuc, leader, true); if (n0 < 0) return n0; - ret = x86_schedule_events(cpuc, n0, assign); + ret = x86_pmu.schedule_events(cpuc, n0, assign); if (ret) return ret; @@ -1327,6 +1321,9 @@ undo: #include "perf_event_amd.c" #include "perf_event_p6.c" +#include "perf_event_p4.c" +#include "perf_event_intel_lbr.c" +#include "perf_event_intel_ds.c" #include "perf_event_intel.c" static int __cpuinit @@ -1398,12 +1395,15 @@ void __init init_hw_perf_events(void) pr_cont("%s PMU driver.\n", x86_pmu.name); + if (x86_pmu.quirks) + x86_pmu.quirks(); + if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", x86_pmu.num_events, X86_PMC_MAX_GENERIC); x86_pmu.num_events = X86_PMC_MAX_GENERIC; } - perf_event_mask = (1 << x86_pmu.num_events) - 1; + x86_pmu.intel_ctrl = (1 << x86_pmu.num_events) - 1; perf_max_events = x86_pmu.num_events; if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { @@ -1412,9 +1412,8 @@ void __init init_hw_perf_events(void) x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; } - perf_event_mask |= + x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; - x86_pmu.intel_ctrl = perf_event_mask; perf_events_lapic_init(); register_die_notifier(&perf_event_nmi_notifier); @@ -1439,7 +1438,7 @@ void __init init_hw_perf_events(void) pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); pr_info("... max period: %016Lx\n", x86_pmu.max_period); pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); - pr_info("... event mask: %016Lx\n", perf_event_mask); + pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); perf_cpu_notifier(x86_pmu_notifier); } @@ -1459,6 +1458,32 @@ static const struct pmu pmu = { }; /* + * validate that we can schedule this event + */ +static int validate_event(struct perf_event *event) +{ + struct cpu_hw_events *fake_cpuc; + struct event_constraint *c; + int ret = 0; + + fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); + if (!fake_cpuc) + return -ENOMEM; + + c = x86_pmu.get_event_constraints(fake_cpuc, event); + + if (!c || !c->weight) + ret = -ENOSPC; + + if (x86_pmu.put_event_constraints) + x86_pmu.put_event_constraints(fake_cpuc, event); + + kfree(fake_cpuc); + + return ret; +} + +/* * validate a single event group * * validation include: @@ -1498,7 +1523,7 @@ static int validate_group(struct perf_event *event) fake_cpuc->n_events = n; - ret = x86_schedule_events(fake_cpuc, n, NULL); + ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); out_free: kfree(fake_cpuc); @@ -1523,6 +1548,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) if (event->group_leader != event) err = validate_group(event); + else + err = validate_event(event); event->pmu = tmp; } @@ -1593,41 +1620,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } -/* - * best effort, GUP based copy_from_user() that assumes IRQ or NMI context - */ -static unsigned long -copy_from_user_nmi(void *to, const void __user *from, unsigned long n) -{ - unsigned long offset, addr = (unsigned long)from; - int type = in_nmi() ? KM_NMI : KM_IRQ0; - unsigned long size, len = 0; - struct page *page; - void *map; - int ret; - - do { - ret = __get_user_pages_fast(addr, 1, 0, &page); - if (!ret) - break; - - offset = addr & (PAGE_SIZE - 1); - size = min(PAGE_SIZE - offset, n - len); - - map = kmap_atomic(page, type); - memcpy(to, map+offset, size); - kunmap_atomic(map, type); - put_page(page); - - len += size; - to += size; - addr += size; - - } while (len < n); - - return len; -} - static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) { unsigned long bytes; diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 573458f1caf..358a8e3d05f 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -363,6 +363,8 @@ static __initconst struct x86_pmu amd_pmu = { .enable_all = x86_pmu_enable_all, .enable = x86_pmu_enable_event, .disable = x86_pmu_disable_event, + .hw_config = x86_hw_config, + .schedule_events = x86_schedule_events, .eventsel = MSR_K7_EVNTSEL0, .perfctr = MSR_K7_PERFCTR0, .event_map = amd_pmu_event_map, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 84bfde64a33..044b8436b19 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -470,42 +470,6 @@ static u64 intel_pmu_raw_event(u64 hw_event) return hw_event & CORE_EVNTSEL_MASK; } -static void intel_pmu_enable_bts(u64 config) -{ - unsigned long debugctlmsr; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr |= X86_DEBUGCTL_TR; - debugctlmsr |= X86_DEBUGCTL_BTS; - debugctlmsr |= X86_DEBUGCTL_BTINT; - - if (!(config & ARCH_PERFMON_EVENTSEL_OS)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; - - if (!(config & ARCH_PERFMON_EVENTSEL_USR)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; - - update_debugctlmsr(debugctlmsr); -} - -static void intel_pmu_disable_bts(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - unsigned long debugctlmsr; - - if (!cpuc->ds) - return; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr &= - ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | - X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); - - update_debugctlmsr(debugctlmsr); -} - static void intel_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -514,12 +478,17 @@ static void intel_pmu_disable_all(void) if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); + + intel_pmu_pebs_disable_all(); + intel_pmu_lbr_disable_all(); } static void intel_pmu_enable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + intel_pmu_pebs_enable_all(); + intel_pmu_lbr_enable_all(); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { @@ -547,8 +516,7 @@ static inline void intel_pmu_ack_status(u64 ack) wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); } -static inline void -intel_pmu_disable_fixed(struct hw_perf_event *hwc) +static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) { int idx = hwc->idx - X86_PMC_IDX_FIXED; u64 ctrl_val, mask; @@ -557,71 +525,10 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc) rdmsrl(hwc->config_base, ctrl_val); ctrl_val &= ~mask; - (void)checking_wrmsrl(hwc->config_base, ctrl_val); + wrmsrl(hwc->config_base, ctrl_val); } -static void intel_pmu_drain_bts_buffer(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct debug_store *ds = cpuc->ds; - struct bts_record { - u64 from; - u64 to; - u64 flags; - }; - struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; - struct bts_record *at, *top; - struct perf_output_handle handle; - struct perf_event_header header; - struct perf_sample_data data; - struct pt_regs regs; - - if (!event) - return; - - if (!ds) - return; - - at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; - top = (struct bts_record *)(unsigned long)ds->bts_index; - - if (top <= at) - return; - - ds->bts_index = ds->bts_buffer_base; - - perf_sample_data_init(&data, 0); - - data.period = event->hw.last_period; - regs.ip = 0; - - /* - * Prepare a generic sample, i.e. fill in the invariant fields. - * We will overwrite the from and to address before we output - * the sample. - */ - perf_prepare_sample(&header, &data, event, ®s); - - if (perf_output_begin(&handle, event, - header.size * (top - at), 1, 1)) - return; - - for (; at < top; at++) { - data.ip = at->from; - data.addr = at->to; - - perf_output_sample(&handle, &header, &data, event); - } - - perf_output_end(&handle); - - /* There's new data available. */ - event->hw.interrupts++; - event->pending_kill = POLL_IN; -} - -static inline void -intel_pmu_disable_event(struct perf_event *event) +static void intel_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -637,14 +544,15 @@ intel_pmu_disable_event(struct perf_event *event) } x86_pmu_disable_event(event); + + if (unlikely(event->attr.precise)) + intel_pmu_pebs_disable(event); } -static inline void -intel_pmu_enable_fixed(struct hw_perf_event *hwc) +static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) { int idx = hwc->idx - X86_PMC_IDX_FIXED; u64 ctrl_val, bits, mask; - int err; /* * Enable IRQ generation (0x8), @@ -669,7 +577,7 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc) rdmsrl(hwc->config_base, ctrl_val); ctrl_val &= ~mask; ctrl_val |= bits; - err = checking_wrmsrl(hwc->config_base, ctrl_val); + wrmsrl(hwc->config_base, ctrl_val); } static void intel_pmu_enable_event(struct perf_event *event) @@ -689,6 +597,9 @@ static void intel_pmu_enable_event(struct perf_event *event) return; } + if (unlikely(event->attr.precise)) + intel_pmu_pebs_enable(event); + __x86_pmu_enable_event(hwc); } @@ -762,6 +673,15 @@ again: inc_irq_stat(apic_perf_irqs); ack = status; + + intel_pmu_lbr_read(); + + /* + * PEBS overflow sets bit 62 in the global status register + */ + if (__test_and_clear_bit(62, (unsigned long *)&status)) + x86_pmu.drain_pebs(regs); + for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; @@ -791,22 +711,18 @@ done: return 1; } -static struct event_constraint bts_constraint = - EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); - static struct event_constraint * -intel_special_constraints(struct perf_event *event) +intel_bts_constraints(struct perf_event *event) { - unsigned int hw_event; - - hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; + struct hw_perf_event *hwc = &event->hw; + unsigned int hw_event, bts_event; - if (unlikely((hw_event == - x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && - (event->hw.sample_period == 1))) { + hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; + bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); + if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) return &bts_constraint; - } + return NULL; } @@ -815,7 +731,11 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event { struct event_constraint *c; - c = intel_special_constraints(event); + c = intel_bts_constraints(event); + if (c) + return c; + + c = intel_pebs_constraints(event); if (c) return c; @@ -829,6 +749,8 @@ static __initconst struct x86_pmu core_pmu = { .enable_all = x86_pmu_enable_all, .enable = x86_pmu_enable_event, .disable = x86_pmu_disable_event, + .hw_config = x86_hw_config, + .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, .event_map = intel_pmu_event_map, @@ -845,6 +767,20 @@ static __initconst struct x86_pmu core_pmu = { .event_constraints = intel_core_event_constraints, }; +static void intel_pmu_cpu_starting(int cpu) +{ + init_debug_store_on_cpu(cpu); + /* + * Deal with CPUs that don't clear their LBRs on power-up. + */ + intel_pmu_lbr_reset(); +} + +static void intel_pmu_cpu_dying(int cpu) +{ + fini_debug_store_on_cpu(cpu); +} + static __initconst struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -852,6 +788,8 @@ static __initconst struct x86_pmu intel_pmu = { .enable_all = intel_pmu_enable_all, .enable = intel_pmu_enable_event, .disable = intel_pmu_disable_event, + .hw_config = x86_hw_config, + .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, .event_map = intel_pmu_event_map, @@ -864,14 +802,38 @@ static __initconst struct x86_pmu intel_pmu = { * the generic event period: */ .max_period = (1ULL << 31) - 1, - .enable_bts = intel_pmu_enable_bts, - .disable_bts = intel_pmu_disable_bts, .get_event_constraints = intel_get_event_constraints, - .cpu_starting = init_debug_store_on_cpu, - .cpu_dying = fini_debug_store_on_cpu, + .cpu_starting = intel_pmu_cpu_starting, + .cpu_dying = intel_pmu_cpu_dying, }; +static void intel_clovertown_quirks(void) +{ + /* + * PEBS is unreliable due to: + * + * AJ67 - PEBS may experience CPL leaks + * AJ68 - PEBS PMI may be delayed by one event + * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] + * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS + * + * AJ67 could be worked around by restricting the OS/USR flags. + * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. + * + * AJ106 could possibly be worked around by not allowing LBR + * usage from PEBS, including the fixup. + * AJ68 could possibly be worked around by always programming + * a pebs_event_reset[0] value and coping with the lost events. + * + * But taken together it might just make sense to not enable PEBS on + * these chips. + */ + printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); + x86_pmu.pebs = 0; + x86_pmu.pebs_constraints = NULL; +} + static __init int intel_pmu_init(void) { union cpuid10_edx edx; @@ -881,12 +843,13 @@ static __init int intel_pmu_init(void) int version; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - /* check for P6 processor family */ - if (boot_cpu_data.x86 == 6) { - return p6_pmu_init(); - } else { + switch (boot_cpu_data.x86) { + case 0x6: + return p6_pmu_init(); + case 0xf: + return p4_pmu_init(); + } return -ENODEV; - } } /* @@ -916,6 +879,18 @@ static __init int intel_pmu_init(void) x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); /* + * v2 and above have a perf capabilities MSR + */ + if (version > 1) { + u64 capabilities; + + rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); + x86_pmu.intel_cap.capabilities = capabilities; + } + + intel_ds_init(); + + /* * Install the hw-cache-events table: */ switch (boot_cpu_data.x86_model) { @@ -924,12 +899,15 @@ static __init int intel_pmu_init(void) break; case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ + x86_pmu.quirks = intel_clovertown_quirks; case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ case 29: /* six-core 45 nm xeon "Dunnington" */ memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + intel_pmu_lbr_init_core(); + x86_pmu.event_constraints = intel_core2_event_constraints; pr_cont("Core2 events, "); break; @@ -939,13 +917,18 @@ static __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + intel_pmu_lbr_init_nhm(); + x86_pmu.event_constraints = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; + case 28: /* Atom */ memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + intel_pmu_lbr_init_atom(); + x86_pmu.event_constraints = intel_gen_event_constraints; pr_cont("Atom events, "); break; @@ -955,6 +938,8 @@ static __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + intel_pmu_lbr_init_nhm(); + x86_pmu.event_constraints = intel_westmere_event_constraints; pr_cont("Westmere events, "); break; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c new file mode 100644 index 00000000000..c59678a14a2 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -0,0 +1,673 @@ +#ifdef CONFIG_CPU_SUP_INTEL + +/* The maximal number of PEBS events: */ +#define MAX_PEBS_EVENTS 4 + +/* The size of a BTS record in bytes: */ +#define BTS_RECORD_SIZE 24 + +#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) +#define PEBS_BUFFER_SIZE PAGE_SIZE + +/* + * pebs_record_32 for p4 and core not supported + +struct pebs_record_32 { + u32 flags, ip; + u32 ax, bc, cx, dx; + u32 si, di, bp, sp; +}; + + */ + +struct pebs_record_core { + u64 flags, ip; + u64 ax, bx, cx, dx; + u64 si, di, bp, sp; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; +}; + +struct pebs_record_nhm { + u64 flags, ip; + u64 ax, bx, cx, dx; + u64 si, di, bp, sp; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; + u64 status, dla, dse, lat; +}; + +/* + * Bits in the debugctlmsr controlling branch tracing. + */ +#define X86_DEBUGCTL_TR (1 << 6) +#define X86_DEBUGCTL_BTS (1 << 7) +#define X86_DEBUGCTL_BTINT (1 << 8) +#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) +#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_event_reset[MAX_PEBS_EVENTS]; +}; + +static void init_debug_store_on_cpu(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, + (u32)((u64)(unsigned long)ds), + (u32)((u64)(unsigned long)ds >> 32)); +} + +static void fini_debug_store_on_cpu(int cpu) +{ + if (!per_cpu(cpu_hw_events, cpu).ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); +} + +static void release_ds_buffers(void) +{ + int cpu; + + if (!x86_pmu.bts && !x86_pmu.pebs) + return; + + get_online_cpus(); + + for_each_online_cpu(cpu) + fini_debug_store_on_cpu(cpu); + + for_each_possible_cpu(cpu) { + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds) + continue; + + per_cpu(cpu_hw_events, cpu).ds = NULL; + + kfree((void *)(unsigned long)ds->pebs_buffer_base); + kfree((void *)(unsigned long)ds->bts_buffer_base); + kfree(ds); + } + + put_online_cpus(); +} + +static int reserve_ds_buffers(void) +{ + int cpu, err = 0; + + if (!x86_pmu.bts && !x86_pmu.pebs) + return 0; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + struct debug_store *ds; + void *buffer; + int max, thresh; + + err = -ENOMEM; + ds = kzalloc(sizeof(*ds), GFP_KERNEL); + if (unlikely(!ds)) + break; + per_cpu(cpu_hw_events, cpu).ds = ds; + + if (x86_pmu.bts) { + buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); + if (unlikely(!buffer)) + break; + + max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; + thresh = max / 16; + + ds->bts_buffer_base = (u64)(unsigned long)buffer; + ds->bts_index = ds->bts_buffer_base; + ds->bts_absolute_maximum = ds->bts_buffer_base + + max * BTS_RECORD_SIZE; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - + thresh * BTS_RECORD_SIZE; + } + + if (x86_pmu.pebs) { + buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); + if (unlikely(!buffer)) + break; + + max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; + + ds->pebs_buffer_base = (u64)(unsigned long)buffer; + ds->pebs_index = ds->pebs_buffer_base; + ds->pebs_absolute_maximum = ds->pebs_buffer_base + + max * x86_pmu.pebs_record_size; + /* + * Always use single record PEBS + */ + ds->pebs_interrupt_threshold = ds->pebs_buffer_base + + x86_pmu.pebs_record_size; + } + + err = 0; + } + + if (err) + release_ds_buffers(); + else { + for_each_online_cpu(cpu) + init_debug_store_on_cpu(cpu); + } + + put_online_cpus(); + + return err; +} + +/* + * BTS + */ + +static struct event_constraint bts_constraint = + EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); + +static void intel_pmu_enable_bts(u64 config) +{ + unsigned long debugctlmsr; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr |= X86_DEBUGCTL_TR; + debugctlmsr |= X86_DEBUGCTL_BTS; + debugctlmsr |= X86_DEBUGCTL_BTINT; + + if (!(config & ARCH_PERFMON_EVENTSEL_OS)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; + + if (!(config & ARCH_PERFMON_EVENTSEL_USR)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; + + update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_disable_bts(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + unsigned long debugctlmsr; + + if (!cpuc->ds) + return; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr &= + ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | + X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); + + update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_drain_bts_buffer(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct bts_record { + u64 from; + u64 to; + u64 flags; + }; + struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; + struct bts_record *at, *top; + struct perf_output_handle handle; + struct perf_event_header header; + struct perf_sample_data data; + struct pt_regs regs; + + if (!event) + return; + + if (!ds) + return; + + at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; + top = (struct bts_record *)(unsigned long)ds->bts_index; + + if (top <= at) + return; + + ds->bts_index = ds->bts_buffer_base; + + perf_sample_data_init(&data, 0); + data.period = event->hw.last_period; + regs.ip = 0; + + /* + * Prepare a generic sample, i.e. fill in the invariant fields. + * We will overwrite the from and to address before we output + * the sample. + */ + perf_prepare_sample(&header, &data, event, ®s); + + if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) + return; + + for (; at < top; at++) { + data.ip = at->from; + data.addr = at->to; + + perf_output_sample(&handle, &header, &data, event); + } + + perf_output_end(&handle); + + /* There's new data available. */ + event->hw.interrupts++; + event->pending_kill = POLL_IN; +} + +/* + * PEBS + */ + +static struct event_constraint intel_core_pebs_events[] = { + PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ + PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ + PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ + PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ + PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ + PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ + PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ + PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ + PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_nehalem_pebs_events[] = { + PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ + PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ + PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ + PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ + PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ + PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ + PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ + PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ + PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint * +intel_pebs_constraints(struct perf_event *event) +{ + struct event_constraint *c; + + if (!event->attr.precise) + return NULL; + + if (x86_pmu.pebs_constraints) { + for_each_event_constraint(c, x86_pmu.pebs_constraints) { + if ((event->hw.config & c->cmask) == c->code) + return c; + } + } + + return &emptyconstraint; +} + +static void intel_pmu_pebs_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; + + cpuc->pebs_enabled |= 1ULL << hwc->idx; + WARN_ON_ONCE(cpuc->enabled); + + if (x86_pmu.intel_cap.pebs_trap) + intel_pmu_lbr_enable(event); +} + +static void intel_pmu_pebs_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + cpuc->pebs_enabled &= ~(1ULL << hwc->idx); + if (cpuc->enabled) + wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + + hwc->config |= ARCH_PERFMON_EVENTSEL_INT; + + if (x86_pmu.intel_cap.pebs_trap) + intel_pmu_lbr_disable(event); +} + +static void intel_pmu_pebs_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->pebs_enabled) + wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); +} + +static void intel_pmu_pebs_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->pebs_enabled) + wrmsrl(MSR_IA32_PEBS_ENABLE, 0); +} + +#include <asm/insn.h> + +static inline bool kernel_ip(unsigned long ip) +{ +#ifdef CONFIG_X86_32 + return ip > PAGE_OFFSET; +#else + return (long)ip < 0; +#endif +} + +static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + unsigned long from = cpuc->lbr_entries[0].from; + unsigned long old_to, to = cpuc->lbr_entries[0].to; + unsigned long ip = regs->ip; + + /* + * We don't need to fixup if the PEBS assist is fault like + */ + if (!x86_pmu.intel_cap.pebs_trap) + return 1; + + /* + * No LBR entry, no basic block, no rewinding + */ + if (!cpuc->lbr_stack.nr || !from || !to) + return 0; + + /* + * Basic blocks should never cross user/kernel boundaries + */ + if (kernel_ip(ip) != kernel_ip(to)) + return 0; + + /* + * unsigned math, either ip is before the start (impossible) or + * the basic block is larger than 1 page (sanity) + */ + if ((ip - to) > PAGE_SIZE) + return 0; + + /* + * We sampled a branch insn, rewind using the LBR stack + */ + if (ip == to) { + regs->ip = from; + return 1; + } + + do { + struct insn insn; + u8 buf[MAX_INSN_SIZE]; + void *kaddr; + + old_to = to; + if (!kernel_ip(ip)) { + int bytes, size = MAX_INSN_SIZE; + + bytes = copy_from_user_nmi(buf, (void __user *)to, size); + if (bytes != size) + return 0; + + kaddr = buf; + } else + kaddr = (void *)to; + + kernel_insn_init(&insn, kaddr); + insn_get_length(&insn); + to += insn.length; + } while (to < ip); + + if (to == ip) { + regs->ip = old_to; + return 1; + } + + /* + * Even though we decoded the basic block, the instruction stream + * never matched the given IP, either the TO or the IP got corrupted. + */ + return 0; +} + +static int intel_pmu_save_and_restart(struct perf_event *event); + +static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct perf_event *event = cpuc->events[0]; /* PMC0 only */ + struct pebs_record_core *at, *top; + struct perf_sample_data data; + struct perf_raw_record raw; + struct pt_regs regs; + int n; + + if (!ds || !x86_pmu.pebs) + return; + + at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; + + /* + * Whatever else happens, drain the thing + */ + ds->pebs_index = ds->pebs_buffer_base; + + if (!test_bit(0, cpuc->active_mask)) + return; + + WARN_ON_ONCE(!event); + + if (!event->attr.precise) + return; + + n = top - at; + if (n <= 0) + return; + + if (!intel_pmu_save_and_restart(event)) + return; + + /* + * Should not happen, we program the threshold at 1 and do not + * set a reset value. + */ + WARN_ON_ONCE(n > 1); + at += n - 1; + + perf_sample_data_init(&data, 0); + data.period = event->hw.last_period; + + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + raw.size = x86_pmu.pebs_record_size; + raw.data = at; + data.raw = &raw; + } + + /* + * We use the interrupt regs as a base because the PEBS record + * does not contain a full regs set, specifically it seems to + * lack segment descriptors, which get used by things like + * user_mode(). + * + * In the simple case fix up only the IP and BP,SP regs, for + * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. + * A possible PERF_SAMPLE_REGS will have to transfer all regs. + */ + regs = *iregs; + regs.ip = at->ip; + regs.bp = at->bp; + regs.sp = at->sp; + + if (intel_pmu_pebs_fixup_ip(®s)) + regs.flags |= PERF_EFLAGS_EXACT; + else + regs.flags &= ~PERF_EFLAGS_EXACT; + + if (perf_event_overflow(event, 1, &data, ®s)) + x86_pmu_stop(event); +} + +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct pebs_record_nhm *at, *top; + struct perf_sample_data data; + struct perf_event *event = NULL; + struct perf_raw_record raw; + struct pt_regs regs; + u64 status = 0; + int bit, n; + + if (!ds || !x86_pmu.pebs) + return; + + at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; + + ds->pebs_index = ds->pebs_buffer_base; + + n = top - at; + if (n <= 0) + return; + + /* + * Should not happen, we program the threshold at 1 and do not + * set a reset value. + */ + WARN_ON_ONCE(n > MAX_PEBS_EVENTS); + + for ( ; at < top; at++) { + for_each_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { + event = cpuc->events[bit]; + if (!test_bit(bit, cpuc->active_mask)) + continue; + + WARN_ON_ONCE(!event); + + if (!event->attr.precise) + continue; + + if (__test_and_set_bit(bit, (unsigned long *)&status)) + continue; + + break; + } + + if (!event || bit >= MAX_PEBS_EVENTS) + continue; + + if (!intel_pmu_save_and_restart(event)) + continue; + + perf_sample_data_init(&data, 0); + data.period = event->hw.last_period; + + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + raw.size = x86_pmu.pebs_record_size; + raw.data = at; + data.raw = &raw; + } + + /* + * See the comment in intel_pmu_drain_pebs_core() + */ + regs = *iregs; + regs.ip = at->ip; + regs.bp = at->bp; + regs.sp = at->sp; + + if (intel_pmu_pebs_fixup_ip(®s)) + regs.flags |= PERF_EFLAGS_EXACT; + else + regs.flags &= ~PERF_EFLAGS_EXACT; + + if (perf_event_overflow(event, 1, &data, ®s)) + x86_pmu_stop(event); + } +} + +/* + * BTS, PEBS probe and setup + */ + +static void intel_ds_init(void) +{ + /* + * No support for 32bit formats + */ + if (!boot_cpu_has(X86_FEATURE_DTES64)) + return; + + x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); + x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); + if (x86_pmu.pebs) { + char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; + int format = x86_pmu.intel_cap.pebs_format; + + switch (format) { + case 0: + printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); + x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); + x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; + x86_pmu.pebs_constraints = intel_core_pebs_events; + break; + + case 1: + printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); + x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); + x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; + x86_pmu.pebs_constraints = intel_nehalem_pebs_events; + break; + + default: + printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); + x86_pmu.pebs = 0; + break; + } + } +} + +#else /* CONFIG_CPU_SUP_INTEL */ + +static int reserve_ds_buffers(void) +{ + return 0; +} + +static void release_ds_buffers(void) +{ +} + +#endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c new file mode 100644 index 00000000000..df4c98e26c5 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -0,0 +1,221 @@ +#ifdef CONFIG_CPU_SUP_INTEL + +enum { + LBR_FORMAT_32 = 0x00, + LBR_FORMAT_LIP = 0x01, + LBR_FORMAT_EIP = 0x02, + LBR_FORMAT_EIP_FLAGS = 0x03, +}; + +/* + * We only support LBR implementations that have FREEZE_LBRS_ON_PMI + * otherwise it becomes near impossible to get a reliable stack. + */ + +#define X86_DEBUGCTL_LBR (1 << 0) +#define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI (1 << 11) + +static void __intel_pmu_lbr_enable(void) +{ + u64 debugctl; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); +} + +static void __intel_pmu_lbr_disable(void) +{ + u64 debugctl; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); +} + +static void intel_pmu_lbr_reset_32(void) +{ + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) + wrmsrl(x86_pmu.lbr_from + i, 0); +} + +static void intel_pmu_lbr_reset_64(void) +{ + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + wrmsrl(x86_pmu.lbr_from + i, 0); + wrmsrl(x86_pmu.lbr_to + i, 0); + } +} + +static void intel_pmu_lbr_reset(void) +{ + if (!x86_pmu.lbr_nr) + return; + + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) + intel_pmu_lbr_reset_32(); + else + intel_pmu_lbr_reset_64(); +} + +static void intel_pmu_lbr_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + WARN_ON_ONCE(cpuc->enabled); + + /* + * Reset the LBR stack if we changed task context to + * avoid data leaks. + */ + + if (event->ctx->task && cpuc->lbr_context != event->ctx) { + intel_pmu_lbr_reset(); + cpuc->lbr_context = event->ctx; + } + + cpuc->lbr_users++; +} + +static void intel_pmu_lbr_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + cpuc->lbr_users--; + WARN_ON_ONCE(cpuc->lbr_users < 0); + + if (cpuc->enabled && !cpuc->lbr_users) + __intel_pmu_lbr_disable(); +} + +static void intel_pmu_lbr_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->lbr_users) + __intel_pmu_lbr_enable(); +} + +static void intel_pmu_lbr_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->lbr_users) + __intel_pmu_lbr_disable(); +} + +static inline u64 intel_pmu_lbr_tos(void) +{ + u64 tos; + + rdmsrl(x86_pmu.lbr_tos, tos); + + return tos; +} + +static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) +{ + unsigned long mask = x86_pmu.lbr_nr - 1; + u64 tos = intel_pmu_lbr_tos(); + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + unsigned long lbr_idx = (tos - i) & mask; + union { + struct { + u32 from; + u32 to; + }; + u64 lbr; + } msr_lastbranch; + + rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); + + cpuc->lbr_entries[i].from = msr_lastbranch.from; + cpuc->lbr_entries[i].to = msr_lastbranch.to; + cpuc->lbr_entries[i].flags = 0; + } + cpuc->lbr_stack.nr = i; +} + +#define LBR_FROM_FLAG_MISPRED (1ULL << 63) + +/* + * Due to lack of segmentation in Linux the effective address (offset) + * is the same as the linear address, allowing us to merge the LIP and EIP + * LBR formats. + */ +static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) +{ + unsigned long mask = x86_pmu.lbr_nr - 1; + int lbr_format = x86_pmu.intel_cap.lbr_format; + u64 tos = intel_pmu_lbr_tos(); + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + unsigned long lbr_idx = (tos - i) & mask; + u64 from, to, flags = 0; + + rdmsrl(x86_pmu.lbr_from + lbr_idx, from); + rdmsrl(x86_pmu.lbr_to + lbr_idx, to); + + if (lbr_format == LBR_FORMAT_EIP_FLAGS) { + flags = !!(from & LBR_FROM_FLAG_MISPRED); + from = (u64)((((s64)from) << 1) >> 1); + } + + cpuc->lbr_entries[i].from = from; + cpuc->lbr_entries[i].to = to; + cpuc->lbr_entries[i].flags = flags; + } + cpuc->lbr_stack.nr = i; +} + +static void intel_pmu_lbr_read(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!cpuc->lbr_users) + return; + + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) + intel_pmu_lbr_read_32(cpuc); + else + intel_pmu_lbr_read_64(cpuc); +} + +static void intel_pmu_lbr_init_core(void) +{ + x86_pmu.lbr_nr = 4; + x86_pmu.lbr_tos = 0x01c9; + x86_pmu.lbr_from = 0x40; + x86_pmu.lbr_to = 0x60; +} + +static void intel_pmu_lbr_init_nhm(void) +{ + x86_pmu.lbr_nr = 16; + x86_pmu.lbr_tos = 0x01c9; + x86_pmu.lbr_from = 0x680; + x86_pmu.lbr_to = 0x6c0; +} + +static void intel_pmu_lbr_init_atom(void) +{ + x86_pmu.lbr_nr = 8; + x86_pmu.lbr_tos = 0x01c9; + x86_pmu.lbr_from = 0x40; + x86_pmu.lbr_to = 0x60; +} + +#endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c new file mode 100644 index 00000000000..b8a811ab760 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -0,0 +1,728 @@ +/* + * Netburst Perfomance Events (P4, old Xeon) + * + * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> + * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> + * + * For licencing details see kernel-base/COPYING + */ + +#ifdef CONFIG_CPU_SUP_INTEL + +#include <asm/perf_event_p4.h> + +/* + * array indices: 0,1 - HT threads, used with HT enabled cpu + */ +struct p4_event_template { + u32 opcode; /* ESCR event + CCCR selector */ + u64 config; /* packed predefined bits */ + int dep; /* upstream dependency event index */ + int key; /* index into p4_templates */ + u64 msr; /* + * the high 32 bits set into MSR_IA32_PEBS_ENABLE and + * the low 32 bits set into MSR_P4_PEBS_MATRIX_VERT + * for cache events + */ + unsigned int emask; /* ESCR EventMask */ + unsigned int escr_msr[2]; /* ESCR MSR for this event */ + unsigned int cntr[2]; /* counter index (offset) */ +}; + +struct p4_pmu_res { + /* maps hw_conf::idx into template for ESCR sake */ + struct p4_event_template *tpl[ARCH_P4_MAX_CCCR]; +}; + +static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config); + +#define P4_CACHE_EVENT_CONFIG(event, bit) \ + p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(event) << P4_EVNTSEL_EVENT_SHIFT) | \ + p4_config_pack_escr((event##_##bit) << P4_EVNTSEL_EVENTMASK_SHIFT) | \ + p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(event) << P4_CCCR_ESCR_SELECT_SHIFT) + +static __initconst u64 p4_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + /* 1stL_cache_load_miss_retired */ + [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) + | KEY_P4_L1D_OP_READ_RESULT_MISS, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + /* 2ndL_cache_load_miss_retired */ + [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) + | KEY_P4_LL_OP_READ_RESULT_MISS, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + /* DTLB_load_miss_retired */ + [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) + | KEY_P4_DTLB_OP_READ_RESULT_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + /* DTLB_store_miss_retired */ + [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) + | KEY_P4_DTLB_OP_WRITE_RESULT_MISS, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + /* ITLB_reference.HIT */ + [ C(RESULT_ACCESS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, HIT) + | KEY_P4_ITLB_OP_READ_RESULT_ACCESS, + + /* ITLB_reference.MISS */ + [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, MISS) + | KEY_P4_ITLB_OP_READ_RESULT_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +/* + * WARN: CCCR1 doesn't have a working enable bit so try to not + * use it if possible + * + * Also as only we start to support raw events we will need to + * append _all_ P4_EVENT_PACK'ed events here + */ +struct p4_event_template p4_templates[] = { + [0] = { + .opcode = P4_GLOBAL_POWER_EVENTS, + .config = 0, + .dep = -1, + .key = 0, + .emask = + P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .cntr = { 0, 2 }, + }, + [1] = { + .opcode = P4_INSTR_RETIRED, + .config = 0, + .dep = -1, /* needs front-end tagging */ + .key = 1, + .emask = + P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) | + P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG), + .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .cntr = { 12, 14 }, + }, + [2] = { + .opcode = P4_BSQ_CACHE_REFERENCE, + .config = 0, + .dep = -1, + .key = 2, + .emask = + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM), + .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, + .cntr = { 0, 2 }, + }, + [3] = { + .opcode = P4_BSQ_CACHE_REFERENCE, + .config = 0, + .dep = -1, + .key = 3, + .emask = + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), + .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, + .cntr = { 0, 3 }, + }, + [4] = { + .opcode = P4_RETIRED_BRANCH_TYPE, + .config = 0, + .dep = -1, + .key = 4, + .emask = + P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) | + P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) | + P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN) | + P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT), + .escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 }, + .cntr = { 4, 6 }, + }, + [5] = { + .opcode = P4_MISPRED_BRANCH_RETIRED, + .config = 0, + .dep = -1, + .key = 5, + .emask = + P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS), + .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .cntr = { 12, 14 }, + }, + [6] = { + .opcode = P4_FSB_DATA_ACTIVITY, + .config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), + .dep = -1, + .key = 6, + .emask = + P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) | + P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .cntr = { 0, 2 }, + }, + [KEY_P4_L1D_OP_READ_RESULT_MISS] = { + .opcode = P4_REPLAY_EVENT, + .config = 0, + .dep = -1, + .msr = (u64)(1 << 0 | 1 << 24) << 32 | (1 << 0), + .key = KEY_P4_L1D_OP_READ_RESULT_MISS, + .emask = + P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, + .cntr = { 16, 17 }, + }, + [KEY_P4_LL_OP_READ_RESULT_MISS] = { + .opcode = P4_REPLAY_EVENT, + .config = 0, + .dep = -1, + .msr = (u64)(1 << 1 | 1 << 24) << 32 | (1 << 0), + .key = KEY_P4_LL_OP_READ_RESULT_MISS, + .emask = + P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, + .cntr = { 16, 17 }, + }, + [KEY_P4_DTLB_OP_READ_RESULT_MISS] = { + .opcode = P4_REPLAY_EVENT, + .config = 0, + .dep = -1, + .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 0), + .key = KEY_P4_DTLB_OP_READ_RESULT_MISS, + .emask = + P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, + .cntr = { 16, 17 }, + }, + [KEY_P4_DTLB_OP_WRITE_RESULT_MISS] = { + .opcode = P4_REPLAY_EVENT, + .config = 0, + .dep = -1, + .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 1), + .key = KEY_P4_DTLB_OP_WRITE_RESULT_MISS, + .emask = + P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, + .cntr = { 16, 17 }, + }, + [KEY_P4_ITLB_OP_READ_RESULT_ACCESS] = { + .opcode = P4_ITLB_REFERENCE, + .config = 0, + .dep = -1, + .msr = 0, + .key = KEY_P4_ITLB_OP_READ_RESULT_ACCESS, + .emask = + P4_EVENT_ATTR(P4_ITLB_REFERENCE, HIT), + .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, + .cntr = { 0, 2 }, + }, + [KEY_P4_ITLB_OP_READ_RESULT_MISS] = { + .opcode = P4_ITLB_REFERENCE, + .config = 0, + .dep = -1, + .msr = 0, + .key = KEY_P4_ITLB_OP_READ_RESULT_MISS, + .emask = + P4_EVENT_ATTR(P4_ITLB_REFERENCE, MISS), + .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, + .cntr = { 0, 2 }, + }, + [KEY_P4_UOP_TYPE] = { + .opcode = P4_UOP_TYPE, + .config = 0, + .dep = -1, + .key = KEY_P4_UOP_TYPE, + .emask = + P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | + P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), + .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, + .cntr = { 16, 17 }, + }, +}; + +static u64 p4_pmu_event_map(int hw_event) +{ + struct p4_event_template *tpl; + u64 config; + + if (hw_event > ARRAY_SIZE(p4_templates)) { + printk_once(KERN_ERR "PMU: Incorrect event index\n"); + return 0; + } + tpl = &p4_templates[hw_event]; + + /* + * fill config up according to + * a predefined event template + */ + config = tpl->config; + config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT); + config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT); + config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); + config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED); + + return config; +} + +/* + * Note that we still have 5 events (from global events SDM list) + * intersected in opcode+emask bits so we will need another + * scheme there do distinguish templates. + */ +static inline int p4_pmu_emask_match(unsigned int dst, unsigned int src) +{ + return dst & src; +} + +static struct p4_event_template *p4_pmu_template_lookup(u64 config) +{ + int key = p4_config_unpack_key(config); + + if (key < ARRAY_SIZE(p4_templates)) + return &p4_templates[key]; + else + return NULL; +} + +/* + * We don't control raw events so it's up to the caller + * to pass sane values (and we don't count the thread number + * on HT machine but allow HT-compatible specifics to be + * passed on) + */ +static u64 p4_pmu_raw_event(u64 hw_event) +{ + return hw_event & + (p4_config_pack_escr(P4_EVNTSEL_MASK_HT) | + p4_config_pack_cccr(P4_CCCR_MASK_HT)); +} + +static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) +{ + int cpu = raw_smp_processor_id(); + + /* + * the reason we use cpu that early is that: if we get scheduled + * first time on the same cpu -- we will not need swap thread + * specific flags in config (and will save some cpu cycles) + */ + + /* CCCR by default */ + hwc->config = p4_config_pack_cccr(p4_default_cccr_conf(cpu)); + + /* Count user and OS events unless not requested to */ + hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel, + attr->exclude_user)); + /* on HT machine we need a special bit */ + if (p4_ht_active() && p4_ht_thread(cpu)) + hwc->config = p4_set_ht_bit(hwc->config); + + return 0; +} + +static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) +{ + unsigned long dummy; + + rdmsrl(hwc->config_base + hwc->idx, dummy); + if (dummy & P4_CCCR_OVF) { + (void)checking_wrmsrl(hwc->config_base + hwc->idx, + ((u64)dummy) & ~P4_CCCR_OVF); + } +} + +static inline void p4_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* + * If event gets disabled while counter is in overflowed + * state we need to clear P4_CCCR_OVF, otherwise interrupt get + * asserted again and again + */ + (void)checking_wrmsrl(hwc->config_base + hwc->idx, + (u64)(p4_config_unpack_cccr(hwc->config)) & + ~P4_CCCR_ENABLE & ~P4_CCCR_OVF); +} + +static void p4_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + struct perf_event *event = cpuc->events[idx]; + if (!test_bit(idx, cpuc->active_mask)) + continue; + p4_pmu_disable_event(event); + } +} + +static void p4_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int thread = p4_ht_config_thread(hwc->config); + u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); + u64 escr_base; + struct p4_event_template *tpl; + struct p4_pmu_res *c; + + /* + * some preparation work from per-cpu private fields + * since we need to find out which ESCR to use + */ + c = &__get_cpu_var(p4_pmu_config); + tpl = c->tpl[hwc->idx]; + if (!tpl) { + pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx); + return; + } + + if (tpl->msr) { + (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, tpl->msr >> 32); + (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, tpl->msr & 0xffffffff); + } + + escr_base = (u64)tpl->escr_msr[thread]; + + /* + * - we dont support cascaded counters yet + * - and counter 1 is broken (erratum) + */ + WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); + WARN_ON_ONCE(hwc->idx == 1); + + (void)checking_wrmsrl(escr_base, escr_conf); + (void)checking_wrmsrl(hwc->config_base + hwc->idx, + (u64)(p4_config_unpack_cccr(hwc->config)) | P4_CCCR_ENABLE); +} + +static void p4_pmu_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + struct perf_event *event = cpuc->events[idx]; + if (!test_bit(idx, cpuc->active_mask)) + continue; + p4_pmu_enable_event(event); + } +} + +static int p4_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct perf_event *event; + struct hw_perf_event *hwc; + int idx, handled = 0; + u64 val; + + data.addr = 0; + data.raw = NULL; + + cpuc = &__get_cpu_var(cpu_hw_events); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + event = cpuc->events[idx]; + hwc = &event->hw; + + WARN_ON_ONCE(hwc->idx != idx); + + /* + * FIXME: Redundant call, actually not needed + * but just to check if we're screwed + */ + p4_pmu_clear_cccr_ovf(hwc); + + val = x86_perf_event_update(event); + if (val & (1ULL << (x86_pmu.event_bits - 1))) + continue; + + /* + * event overflow + */ + handled = 1; + data.period = event->hw.last_period; + + if (!x86_perf_event_set_period(event)) + continue; + if (perf_event_overflow(event, 1, &data, regs)) + p4_pmu_disable_event(event); + } + + if (handled) { + /* p4 quirk: unmask it again */ + apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); + inc_irq_stat(apic_perf_irqs); + } + + return handled; +} + +/* + * swap thread specific fields according to a thread + * we are going to run on + */ +static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) +{ + u32 escr, cccr; + + /* + * we either lucky and continue on same cpu or no HT support + */ + if (!p4_should_swap_ts(hwc->config, cpu)) + return; + + /* + * the event is migrated from an another logical + * cpu, so we need to swap thread specific flags + */ + + escr = p4_config_unpack_escr(hwc->config); + cccr = p4_config_unpack_cccr(hwc->config); + + if (p4_ht_thread(cpu)) { + cccr &= ~P4_CCCR_OVF_PMI_T0; + cccr |= P4_CCCR_OVF_PMI_T1; + if (escr & P4_EVNTSEL_T0_OS) { + escr &= ~P4_EVNTSEL_T0_OS; + escr |= P4_EVNTSEL_T1_OS; + } + if (escr & P4_EVNTSEL_T0_USR) { + escr &= ~P4_EVNTSEL_T0_USR; + escr |= P4_EVNTSEL_T1_USR; + } + hwc->config = p4_config_pack_escr(escr); + hwc->config |= p4_config_pack_cccr(cccr); + hwc->config |= P4_CONFIG_HT; + } else { + cccr &= ~P4_CCCR_OVF_PMI_T1; + cccr |= P4_CCCR_OVF_PMI_T0; + if (escr & P4_EVNTSEL_T1_OS) { + escr &= ~P4_EVNTSEL_T1_OS; + escr |= P4_EVNTSEL_T0_OS; + } + if (escr & P4_EVNTSEL_T1_USR) { + escr &= ~P4_EVNTSEL_T1_USR; + escr |= P4_EVNTSEL_T0_USR; + } + hwc->config = p4_config_pack_escr(escr); + hwc->config |= p4_config_pack_cccr(cccr); + hwc->config &= ~P4_CONFIG_HT; + } +} + +/* ESCRs are not sequential in memory so we need a map */ +static const unsigned int p4_escr_map[ARCH_P4_TOTAL_ESCR] = { + MSR_P4_ALF_ESCR0, /* 0 */ + MSR_P4_ALF_ESCR1, /* 1 */ + MSR_P4_BPU_ESCR0, /* 2 */ + MSR_P4_BPU_ESCR1, /* 3 */ + MSR_P4_BSU_ESCR0, /* 4 */ + MSR_P4_BSU_ESCR1, /* 5 */ + MSR_P4_CRU_ESCR0, /* 6 */ + MSR_P4_CRU_ESCR1, /* 7 */ + MSR_P4_CRU_ESCR2, /* 8 */ + MSR_P4_CRU_ESCR3, /* 9 */ + MSR_P4_CRU_ESCR4, /* 10 */ + MSR_P4_CRU_ESCR5, /* 11 */ + MSR_P4_DAC_ESCR0, /* 12 */ + MSR_P4_DAC_ESCR1, /* 13 */ + MSR_P4_FIRM_ESCR0, /* 14 */ + MSR_P4_FIRM_ESCR1, /* 15 */ + MSR_P4_FLAME_ESCR0, /* 16 */ + MSR_P4_FLAME_ESCR1, /* 17 */ + MSR_P4_FSB_ESCR0, /* 18 */ + MSR_P4_FSB_ESCR1, /* 19 */ + MSR_P4_IQ_ESCR0, /* 20 */ + MSR_P4_IQ_ESCR1, /* 21 */ + MSR_P4_IS_ESCR0, /* 22 */ + MSR_P4_IS_ESCR1, /* 23 */ + MSR_P4_ITLB_ESCR0, /* 24 */ + MSR_P4_ITLB_ESCR1, /* 25 */ + MSR_P4_IX_ESCR0, /* 26 */ + MSR_P4_IX_ESCR1, /* 27 */ + MSR_P4_MOB_ESCR0, /* 28 */ + MSR_P4_MOB_ESCR1, /* 29 */ + MSR_P4_MS_ESCR0, /* 30 */ + MSR_P4_MS_ESCR1, /* 31 */ + MSR_P4_PMH_ESCR0, /* 32 */ + MSR_P4_PMH_ESCR1, /* 33 */ + MSR_P4_RAT_ESCR0, /* 34 */ + MSR_P4_RAT_ESCR1, /* 35 */ + MSR_P4_SAAT_ESCR0, /* 36 */ + MSR_P4_SAAT_ESCR1, /* 37 */ + MSR_P4_SSU_ESCR0, /* 38 */ + MSR_P4_SSU_ESCR1, /* 39 */ + MSR_P4_TBPU_ESCR0, /* 40 */ + MSR_P4_TBPU_ESCR1, /* 41 */ + MSR_P4_TC_ESCR0, /* 42 */ + MSR_P4_TC_ESCR1, /* 43 */ + MSR_P4_U2L_ESCR0, /* 44 */ + MSR_P4_U2L_ESCR1, /* 45 */ +}; + +static int p4_get_escr_idx(unsigned int addr) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(p4_escr_map); i++) { + if (addr == p4_escr_map[i]) + return i; + } + + return -1; +} + +static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) +{ + unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long escr_mask[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR)]; + + struct hw_perf_event *hwc; + struct p4_event_template *tpl; + struct p4_pmu_res *c; + int cpu = raw_smp_processor_id(); + int escr_idx, thread, i, num; + + bitmap_zero(used_mask, X86_PMC_IDX_MAX); + bitmap_zero(escr_mask, ARCH_P4_TOTAL_ESCR); + + c = &__get_cpu_var(p4_pmu_config); + /* + * Firstly find out which resource events are going + * to use, if ESCR+CCCR tuple is already borrowed + * then get out of here + */ + for (i = 0, num = n; i < n; i++, num--) { + hwc = &cpuc->event_list[i]->hw; + tpl = p4_pmu_template_lookup(hwc->config); + if (!tpl) + goto done; + thread = p4_ht_thread(cpu); + escr_idx = p4_get_escr_idx(tpl->escr_msr[thread]); + if (escr_idx == -1) + goto done; + + /* already allocated and remains on the same cpu */ + if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { + if (assign) + assign[i] = hwc->idx; + /* upstream dependent event */ + if (unlikely(tpl->dep != -1)) + printk_once(KERN_WARNING "PMU: Dep events are " + "not implemented yet\n"); + goto reserve; + } + + /* it may be already borrowed */ + if (test_bit(tpl->cntr[thread], used_mask) || + test_bit(escr_idx, escr_mask)) + goto done; + + /* + * ESCR+CCCR+COUNTERs are available to use lets swap + * thread specific bits, push assigned bits + * back and save template into per-cpu + * area (which will allow us to find out the ESCR + * to be used at moment of "enable event via real MSR") + */ + p4_pmu_swap_config_ts(hwc, cpu); + if (assign) { + assign[i] = tpl->cntr[thread]; + c->tpl[assign[i]] = tpl; + } +reserve: + set_bit(tpl->cntr[thread], used_mask); + set_bit(escr_idx, escr_mask); + } + +done: + return num ? -ENOSPC : 0; +} + +static __initconst struct x86_pmu p4_pmu = { + .name = "Netburst P4/Xeon", + .handle_irq = p4_pmu_handle_irq, + .disable_all = p4_pmu_disable_all, + .enable_all = p4_pmu_enable_all, + .enable = p4_pmu_enable_event, + .disable = p4_pmu_disable_event, + .eventsel = MSR_P4_BPU_CCCR0, + .perfctr = MSR_P4_BPU_PERFCTR0, + .event_map = p4_pmu_event_map, + .raw_event = p4_pmu_raw_event, + .max_events = ARRAY_SIZE(p4_templates), + .get_event_constraints = x86_get_event_constraints, + /* + * IF HT disabled we may need to use all + * ARCH_P4_MAX_CCCR counters simulaneously + * though leave it restricted at moment assuming + * HT is on + */ + .num_events = ARCH_P4_MAX_CCCR, + .apic = 1, + .event_bits = 40, + .event_mask = (1ULL << 40) - 1, + .max_period = (1ULL << 39) - 1, + .hw_config = p4_hw_config, + .schedule_events = p4_pmu_schedule_events, +}; + +static __init int p4_pmu_init(void) +{ + unsigned int low, high; + + /* If we get stripped -- indexig fails */ + BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); + + rdmsr(MSR_IA32_MISC_ENABLE, low, high); + if (!(low & (1 << 7))) { + pr_cont("unsupported Netburst CPU model %d ", + boot_cpu_data.x86_model); + return -ENODEV; + } + + memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + pr_cont("Netburst events, "); + + x86_pmu = p4_pmu; + + return 0; +} + +#endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index a330485d14d..6ff4d01d880 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -109,6 +109,8 @@ static __initconst struct x86_pmu p6_pmu = { .enable_all = p6_pmu_enable_all, .enable = p6_pmu_enable_event, .disable = p6_pmu_disable_event, + .hw_config = x86_hw_config, + .schedule_events = x86_schedule_events, .eventsel = MSR_P6_EVNTSEL0, .perfctr = MSR_P6_PERFCTR0, .event_map = p6_pmu_event_map, diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 419386c24b8..cbaf8f2b83d 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -20,7 +20,7 @@ lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o -lib-$(CONFIG_KPROBES) += insn.o inat.o +lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o obj-y += msr.o msr-reg.o msr-reg-export.o diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 95477038a72..2bccb7b9da2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -203,8 +203,9 @@ struct perf_event_attr { enable_on_exec : 1, /* next exec enables */ task : 1, /* trace fork/exit */ watermark : 1, /* wakeup_watermark */ + precise : 1, /* OoO invariant counter */ - __reserved_1 : 49; + __reserved_1 : 48; union { __u32 wakeup_events; /* wakeup every n events */ @@ -293,6 +294,12 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_USER (2 << 0) #define PERF_RECORD_MISC_HYPERVISOR (3 << 0) +#define PERF_RECORD_MISC_EXACT (1 << 14) +/* + * Reserve the last bit to indicate some extended misc field + */ +#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) + struct perf_event_header { __u32 type; __u16 misc; @@ -468,6 +475,17 @@ struct perf_raw_record { void *data; }; +struct perf_branch_entry { + __u64 from; + __u64 to; + __u64 flags; +}; + +struct perf_branch_stack { + __u64 nr; + struct perf_branch_entry entries[0]; +}; + struct task_struct; /** diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 574ee58a304..455393e71ca 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1368,6 +1368,8 @@ void perf_event_task_sched_in(struct task_struct *task) if (cpuctx->task_ctx == ctx) return; + perf_disable(); + /* * We want to keep the following priority order: * cpu pinned (that don't need to move), task pinned, @@ -1380,6 +1382,8 @@ void perf_event_task_sched_in(struct task_struct *task) ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); cpuctx->task_ctx = ctx; + + perf_enable(); } #define MAX_INTERRUPTS (~0ULL) diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 34202b1be0b..0f944b3be9e 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -57,6 +57,11 @@ OPTIONS --force:: Forcibly add events with existing name. +-n:: +--dry-run:: + Dry run. With this option, --add and --del doesn't execute actual + adding and removal operations. + PROBE SYNTAX ------------ Probe points are defined by following syntax. diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 8a8f52db7e3..0abd25ee595 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -513,6 +513,14 @@ else LIB_OBJS += util/probe-finder.o endif +ifneq ($(shell sh -c "(echo '\#include <newt.h>'; echo 'int main(void) { newtInit(); newtCls(); return newtFinished(); }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -lnewt -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) + msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev); + BASIC_CFLAGS += -DNO_NEWT_SUPPORT +else + EXTLIBS += -lnewt + LIB_OBJS += util/newt.o +endif + ifndef NO_LIBPERL PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null` PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6ad7148451c..45d14660d53 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -452,6 +452,16 @@ static void annotate_sym(struct hist_entry *he) if (!filename) return; + if (dso->origin == DSO__ORIG_KERNEL) { + if (dso->annotate_warned) + return; + dso->annotate_warned = 1; + pr_err("Can't annotate %s: No vmlinux file was found in the " + "path:\n", sym->name); + vmlinux_path__fprintf(stderr); + return; + } + pr_debug("%s: filename=%s, sym=%s, start=%#Lx, end=%#Lx\n", __func__, filename, sym->name, map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end)); diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index c30a3359234..e0dafd9dfeb 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -36,47 +36,41 @@ #include "builtin.h" #include "util/util.h" #include "util/strlist.h" -#include "util/event.h" +#include "util/symbol.h" #include "util/debug.h" #include "util/debugfs.h" -#include "util/symbol.h" -#include "util/thread.h" #include "util/parse-options.h" #include "util/parse-events.h" /* For debugfs_path */ #include "util/probe-finder.h" #include "util/probe-event.h" #define MAX_PATH_LEN 256 -#define MAX_PROBES 128 /* Session management structure */ static struct { - bool need_dwarf; bool list_events; bool force_add; bool show_lines; - int nr_probe; - struct probe_point probes[MAX_PROBES]; + int nevents; + struct perf_probe_event events[MAX_PROBES]; struct strlist *dellist; - struct map_groups kmap_groups; - struct map *kmaps[MAP__NR_TYPES]; struct line_range line_range; -} session; +} params; /* Parse an event definition. Note that any error must die. */ static void parse_probe_event(const char *str) { - struct probe_point *pp = &session.probes[session.nr_probe]; + struct perf_probe_event *pev = ¶ms.events[params.nevents]; - pr_debug("probe-definition(%d): %s\n", session.nr_probe, str); - if (++session.nr_probe == MAX_PROBES) + pr_debug("probe-definition(%d): %s\n", params.nevents, str); + if (++params.nevents == MAX_PROBES) die("Too many probes (> %d) are specified.", MAX_PROBES); - /* Parse perf-probe event into probe_point */ - parse_perf_probe_event(str, pp, &session.need_dwarf); + /* Parse a perf-probe command into event */ + parse_perf_probe_command(str, pev); - pr_debug("%d arguments\n", pp->nr_args); + pr_debug("%d arguments\n", pev->nargs); } static void parse_probe_event_argv(int argc, const char **argv) @@ -88,9 +82,7 @@ static void parse_probe_event_argv(int argc, const char **argv) len = 0; for (i = 0; i < argc; i++) len += strlen(argv[i]) + 1; - buf = zalloc(len + 1); - if (!buf) - die("Failed to allocate memory for binding arguments."); + buf = xzalloc(len + 1); len = 0; for (i = 0; i < argc; i++) len += sprintf(&buf[len], "%s ", argv[i]); @@ -110,43 +102,21 @@ static int opt_del_probe_event(const struct option *opt __used, const char *str, int unset __used) { if (str) { - if (!session.dellist) - session.dellist = strlist__new(true, NULL); - strlist__add(session.dellist, str); + if (!params.dellist) + params.dellist = strlist__new(true, NULL); + strlist__add(params.dellist, str); } return 0; } -/* Currently just checking function name from symbol map */ -static void evaluate_probe_point(struct probe_point *pp) -{ - struct symbol *sym; - sym = map__find_symbol_by_name(session.kmaps[MAP__FUNCTION], - pp->function, NULL); - if (!sym) - die("Kernel symbol \'%s\' not found - probe not added.", - pp->function); -} - #ifndef NO_DWARF_SUPPORT -static int open_vmlinux(void) -{ - if (map__load(session.kmaps[MAP__FUNCTION], NULL) < 0) { - pr_debug("Failed to load kernel map.\n"); - return -EINVAL; - } - pr_debug("Try to open %s\n", - session.kmaps[MAP__FUNCTION]->dso->long_name); - return open(session.kmaps[MAP__FUNCTION]->dso->long_name, O_RDONLY); -} - static int opt_show_lines(const struct option *opt __used, const char *str, int unset __used) { if (str) - parse_line_range_desc(str, &session.line_range); - INIT_LIST_HEAD(&session.line_range.line_list); - session.show_lines = true; + parse_line_range_desc(str, ¶ms.line_range); + INIT_LIST_HEAD(¶ms.line_range.line_list); + params.show_lines = true; return 0; } #endif @@ -169,7 +139,7 @@ static const struct option options[] = { OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), #endif - OPT_BOOLEAN('l', "list", &session.list_events, + OPT_BOOLEAN('l', "list", ¶ms.list_events, "list up current probe events"), OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.", opt_del_probe_event), @@ -197,41 +167,19 @@ static const struct option options[] = { #endif "\t\t\tkprobe-tracer argument format.)\n", opt_add_probe_event), - OPT_BOOLEAN('f', "force", &session.force_add, "forcibly add events" + OPT_BOOLEAN('f', "force", ¶ms.force_add, "forcibly add events" " with existing name"), #ifndef NO_DWARF_SUPPORT OPT_CALLBACK('L', "line", NULL, "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]", "Show source code lines.", opt_show_lines), #endif + OPT__DRY_RUN(&probe_event_dry_run), OPT_END() }; -/* Initialize symbol maps for vmlinux */ -static void init_vmlinux(void) -{ - symbol_conf.sort_by_name = true; - if (symbol_conf.vmlinux_name == NULL) - symbol_conf.try_vmlinux_path = true; - else - pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name); - if (symbol__init() < 0) - die("Failed to init symbol map."); - - map_groups__init(&session.kmap_groups); - if (map_groups__create_kernel_maps(&session.kmap_groups, - session.kmaps) < 0) - die("Failed to create kernel maps."); -} - int cmd_probe(int argc, const char **argv, const char *prefix __used) { - int i, ret; -#ifndef NO_DWARF_SUPPORT - int fd; -#endif - struct probe_point *pp; - argc = parse_options(argc, argv, options, probe_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (argc > 0) { @@ -242,20 +190,20 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) parse_probe_event_argv(argc, argv); } - if ((!session.nr_probe && !session.dellist && !session.list_events && - !session.show_lines)) + if ((!params.nevents && !params.dellist && !params.list_events && + !params.show_lines)) usage_with_options(probe_usage, options); if (debugfs_valid_mountpoint(debugfs_path) < 0) die("Failed to find debugfs path."); - if (session.list_events) { - if (session.nr_probe != 0 || session.dellist) { + if (params.list_events) { + if (params.nevents != 0 || params.dellist) { pr_warning(" Error: Don't use --list with" " --add/--del.\n"); usage_with_options(probe_usage, options); } - if (session.show_lines) { + if (params.show_lines) { pr_warning(" Error: Don't use --list with --line.\n"); usage_with_options(probe_usage, options); } @@ -264,98 +212,26 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) } #ifndef NO_DWARF_SUPPORT - if (session.show_lines) { - if (session.nr_probe != 0 || session.dellist) { + if (params.show_lines) { + if (params.nevents != 0 || params.dellist) { pr_warning(" Error: Don't use --line with" " --add/--del.\n"); usage_with_options(probe_usage, options); } - init_vmlinux(); - fd = open_vmlinux(); - if (fd < 0) - die("Could not open debuginfo file."); - ret = find_line_range(fd, &session.line_range); - if (ret <= 0) - die("Source line is not found.\n"); - close(fd); - show_line_range(&session.line_range); + + show_line_range(¶ms.line_range); return 0; } #endif - if (session.dellist) { - del_trace_kprobe_events(session.dellist); - strlist__delete(session.dellist); - if (session.nr_probe == 0) + if (params.dellist) { + del_perf_probe_events(params.dellist); + strlist__delete(params.dellist); + if (params.nevents == 0) return 0; } - /* Add probes */ - init_vmlinux(); - - if (session.need_dwarf) -#ifdef NO_DWARF_SUPPORT - die("Debuginfo-analysis is not supported"); -#else /* !NO_DWARF_SUPPORT */ - pr_debug("Some probes require debuginfo.\n"); - - fd = open_vmlinux(); - if (fd < 0) { - if (session.need_dwarf) - die("Could not open debuginfo file."); - - pr_debug("Could not open vmlinux/module file." - " Try to use symbols.\n"); - goto end_dwarf; - } - - /* Searching probe points */ - for (i = 0; i < session.nr_probe; i++) { - pp = &session.probes[i]; - if (pp->found) - continue; - - lseek(fd, SEEK_SET, 0); - ret = find_probe_point(fd, pp); - if (ret > 0) - continue; - if (ret == 0) { /* No error but failed to find probe point. */ - synthesize_perf_probe_point(pp); - die("Probe point '%s' not found. - probe not added.", - pp->probes[0]); - } - /* Error path */ - if (session.need_dwarf) { - if (ret == -ENOENT) - pr_warning("No dwarf info found in the vmlinux - please rebuild with CONFIG_DEBUG_INFO=y.\n"); - die("Could not analyze debuginfo."); - } - pr_debug("An error occurred in debuginfo analysis." - " Try to use symbols.\n"); - break; - } - close(fd); - -end_dwarf: -#endif /* !NO_DWARF_SUPPORT */ - - /* Synthesize probes without dwarf */ - for (i = 0; i < session.nr_probe; i++) { - pp = &session.probes[i]; - if (pp->found) /* This probe is already found. */ - continue; - - evaluate_probe_point(pp); - ret = synthesize_trace_kprobe_event(pp); - if (ret == -E2BIG) - die("probe point definition becomes too long."); - else if (ret < 0) - die("Failed to synthesize a probe point."); - } - - /* Settng up probe points */ - add_trace_kprobe_events(session.probes, session.nr_probe, - session.force_add); + add_perf_probe_events(params.events, params.nevents, params.force_add); return 0; } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3b8b6387c47..bb5b23db423 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -27,7 +27,7 @@ #include <unistd.h> #include <sched.h> -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; +static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; static long default_interval = 0; @@ -43,6 +43,9 @@ static int raw_samples = 0; static int system_wide = 0; static int profile_cpu = -1; static pid_t target_pid = -1; +static pid_t target_tid = -1; +static pid_t *all_tids = NULL; +static int thread_num = 0; static pid_t child_pid = -1; static int inherit = 1; static int force = 0; @@ -60,7 +63,7 @@ static struct timeval this_read; static u64 bytes_written = 0; -static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; +static struct pollfd *event_array; static int nr_poll = 0; static int nr_cpu = 0; @@ -77,7 +80,7 @@ struct mmap_data { unsigned int prev; }; -static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; +static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; static unsigned long mmap_read_head(struct mmap_data *md) { @@ -225,12 +228,13 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n return h_attr; } -static void create_counter(int counter, int cpu, pid_t pid) +static void create_counter(int counter, int cpu) { char *filter = filters[counter]; struct perf_event_attr *attr = attrs + counter; struct perf_header_attr *h_attr; int track = !counter; /* only the first counter needs these */ + int thread_index; int ret; struct { u64 count; @@ -275,118 +279,129 @@ static void create_counter(int counter, int cpu, pid_t pid) attr->mmap = track; attr->comm = track; attr->inherit = inherit; - attr->disabled = 1; + if (target_pid == -1 && !system_wide) { + attr->disabled = 1; + attr->enable_on_exec = 1; + } + for (thread_index = 0; thread_index < thread_num; thread_index++) { try_again: - fd[nr_cpu][counter] = sys_perf_event_open(attr, pid, cpu, group_fd, 0); - - if (fd[nr_cpu][counter] < 0) { - int err = errno; - - if (err == EPERM || err == EACCES) - die("Permission error - are you root?\n"); - else if (err == ENODEV && profile_cpu != -1) - die("No such device - did you specify an out-of-range profile CPU?\n"); + fd[nr_cpu][counter][thread_index] = sys_perf_event_open(attr, + all_tids[thread_index], cpu, group_fd, 0); + + if (fd[nr_cpu][counter][thread_index] < 0) { + int err = errno; + + if (err == EPERM || err == EACCES) + die("Permission error - are you root?\n" + "\t Consider tweaking" + " /proc/sys/kernel/perf_event_paranoid.\n"); + else if (err == ENODEV && profile_cpu != -1) { + die("No such device - did you specify" + " an out-of-range profile CPU?\n"); + } - /* - * If it's cycles then fall back to hrtimer - * based cpu-clock-tick sw counter, which - * is always available even if no PMU support: - */ - if (attr->type == PERF_TYPE_HARDWARE - && attr->config == PERF_COUNT_HW_CPU_CYCLES) { - - if (verbose) - warning(" ... trying to fall back to cpu-clock-ticks\n"); - attr->type = PERF_TYPE_SOFTWARE; - attr->config = PERF_COUNT_SW_CPU_CLOCK; - goto try_again; - } - printf("\n"); - error("perfcounter syscall returned with %d (%s)\n", - fd[nr_cpu][counter], strerror(err)); + /* + * If it's cycles then fall back to hrtimer + * based cpu-clock-tick sw counter, which + * is always available even if no PMU support: + */ + if (attr->type == PERF_TYPE_HARDWARE + && attr->config == PERF_COUNT_HW_CPU_CYCLES) { + + if (verbose) + warning(" ... trying to fall back to cpu-clock-ticks\n"); + attr->type = PERF_TYPE_SOFTWARE; + attr->config = PERF_COUNT_SW_CPU_CLOCK; + goto try_again; + } + printf("\n"); + error("perfcounter syscall returned with %d (%s)\n", + fd[nr_cpu][counter][thread_index], strerror(err)); #if defined(__i386__) || defined(__x86_64__) - if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) - die("No hardware sampling interrupt available. No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.\n"); + if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) + die("No hardware sampling interrupt available." + " No APIC? If so then you can boot the kernel" + " with the \"lapic\" boot parameter to" + " force-enable it.\n"); #endif - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); - exit(-1); - } + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + exit(-1); + } - h_attr = get_header_attr(attr, counter); - if (h_attr == NULL) - die("nomem\n"); + h_attr = get_header_attr(attr, counter); + if (h_attr == NULL) + die("nomem\n"); - if (!file_new) { - if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { - fprintf(stderr, "incompatible append\n"); - exit(-1); + if (!file_new) { + if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { + fprintf(stderr, "incompatible append\n"); + exit(-1); + } } - } - if (read(fd[nr_cpu][counter], &read_data, sizeof(read_data)) == -1) { - perror("Unable to read perf file descriptor\n"); - exit(-1); - } + if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) { + perror("Unable to read perf file descriptor\n"); + exit(-1); + } - if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { - pr_warning("Not enough memory to add id\n"); - exit(-1); - } + if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { + pr_warning("Not enough memory to add id\n"); + exit(-1); + } - assert(fd[nr_cpu][counter] >= 0); - fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); + assert(fd[nr_cpu][counter][thread_index] >= 0); + fcntl(fd[nr_cpu][counter][thread_index], F_SETFL, O_NONBLOCK); - /* - * First counter acts as the group leader: - */ - if (group && group_fd == -1) - group_fd = fd[nr_cpu][counter]; - if (multiplex && multiplex_fd == -1) - multiplex_fd = fd[nr_cpu][counter]; + /* + * First counter acts as the group leader: + */ + if (group && group_fd == -1) + group_fd = fd[nr_cpu][counter][thread_index]; + if (multiplex && multiplex_fd == -1) + multiplex_fd = fd[nr_cpu][counter][thread_index]; - if (multiplex && fd[nr_cpu][counter] != multiplex_fd) { + if (multiplex && fd[nr_cpu][counter][thread_index] != multiplex_fd) { - ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd); - assert(ret != -1); - } else { - event_array[nr_poll].fd = fd[nr_cpu][counter]; - event_array[nr_poll].events = POLLIN; - nr_poll++; - - mmap_array[nr_cpu][counter].counter = counter; - mmap_array[nr_cpu][counter].prev = 0; - mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; - mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); - if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { - error("failed to mmap with %d (%s)\n", errno, strerror(errno)); - exit(-1); + ret = ioctl(fd[nr_cpu][counter][thread_index], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd); + assert(ret != -1); + } else { + event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index]; + event_array[nr_poll].events = POLLIN; + nr_poll++; + + mmap_array[nr_cpu][counter][thread_index].counter = counter; + mmap_array[nr_cpu][counter][thread_index].prev = 0; + mmap_array[nr_cpu][counter][thread_index].mask = mmap_pages*page_size - 1; + mmap_array[nr_cpu][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size, + PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0); + if (mmap_array[nr_cpu][counter][thread_index].base == MAP_FAILED) { + error("failed to mmap with %d (%s)\n", errno, strerror(errno)); + exit(-1); + } } - } - if (filter != NULL) { - ret = ioctl(fd[nr_cpu][counter], - PERF_EVENT_IOC_SET_FILTER, filter); - if (ret) { - error("failed to set filter with %d (%s)\n", errno, - strerror(errno)); - exit(-1); + if (filter != NULL) { + ret = ioctl(fd[nr_cpu][counter][thread_index], + PERF_EVENT_IOC_SET_FILTER, filter); + if (ret) { + error("failed to set filter with %d (%s)\n", errno, + strerror(errno)); + exit(-1); + } } } - - ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE); } -static void open_counters(int cpu, pid_t pid) +static void open_counters(int cpu) { int counter; group_fd = -1; for (counter = 0; counter < nr_counters; counter++) - create_counter(counter, cpu, pid); + create_counter(counter, cpu); nr_cpu++; } @@ -421,7 +436,7 @@ static int __cmd_record(int argc, const char **argv) int err; unsigned long waking = 0; int child_ready_pipe[2], go_pipe[2]; - const bool forks = target_pid == -1 && argc > 0; + const bool forks = argc > 0; char buf; page_size = sysconf(_SC_PAGE_SIZE); @@ -492,13 +507,13 @@ static int __cmd_record(int argc, const char **argv) atexit(atexit_header); if (forks) { - pid = fork(); + child_pid = fork(); if (pid < 0) { perror("failed to fork"); exit(-1); } - if (!pid) { + if (!child_pid) { close(child_ready_pipe[0]); close(go_pipe[1]); fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); @@ -527,10 +542,8 @@ static int __cmd_record(int argc, const char **argv) exit(-1); } - child_pid = pid; - - if (!system_wide) - target_pid = pid; + if (!system_wide && target_tid == -1 && target_pid == -1) + all_tids[0] = child_pid; close(child_ready_pipe[1]); close(go_pipe[0]); @@ -544,13 +557,12 @@ static int __cmd_record(int argc, const char **argv) close(child_ready_pipe[0]); } - if ((!system_wide && !inherit) || profile_cpu != -1) { - open_counters(profile_cpu, target_pid); + open_counters(profile_cpu); } else { nr_cpus = read_cpu_map(); for (i = 0; i < nr_cpus; i++) - open_counters(cpumap[i], target_pid); + open_counters(cpumap[i]); } if (file_new) { @@ -575,7 +587,7 @@ static int __cmd_record(int argc, const char **argv) } if (!system_wide && profile_cpu == -1) - event__synthesize_thread(target_pid, process_synthesized_event, + event__synthesize_thread(target_tid, process_synthesized_event, session); else event__synthesize_threads(process_synthesized_event, session); @@ -598,11 +610,16 @@ static int __cmd_record(int argc, const char **argv) for (;;) { int hits = samples; + int thread; for (i = 0; i < nr_cpu; i++) { for (counter = 0; counter < nr_counters; counter++) { - if (mmap_array[i][counter].base) - mmap_read(&mmap_array[i][counter]); + for (thread = 0; + thread < thread_num; thread++) { + if (mmap_array[i][counter][thread].base) + mmap_read(&mmap_array[i][counter][thread]); + } + } } @@ -615,8 +632,15 @@ static int __cmd_record(int argc, const char **argv) if (done) { for (i = 0; i < nr_cpu; i++) { - for (counter = 0; counter < nr_counters; counter++) - ioctl(fd[i][counter], PERF_EVENT_IOC_DISABLE); + for (counter = 0; + counter < nr_counters; + counter++) { + for (thread = 0; + thread < thread_num; + thread++) + ioctl(fd[i][counter][thread], + PERF_EVENT_IOC_DISABLE); + } } } } @@ -648,7 +672,9 @@ static const struct option options[] = { OPT_CALLBACK(0, "filter", NULL, "filter", "event filter", parse_filter), OPT_INTEGER('p', "pid", &target_pid, - "record events on existing pid"), + "record events on existing process id"), + OPT_INTEGER('t', "tid", &target_tid, + "record events on existing thread id"), OPT_INTEGER('r', "realtime", &realtime_prio, "collect data with this RT SCHED_FIFO priority"), OPT_BOOLEAN('R', "raw-samples", &raw_samples, @@ -689,10 +715,12 @@ static const struct option options[] = { int cmd_record(int argc, const char **argv, const char *prefix __used) { int counter; + int i,j; argc = parse_options(argc, argv, options, record_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc && target_pid == -1 && !system_wide && profile_cpu == -1) + if (!argc && target_pid == -1 && target_tid == -1 && + !system_wide && profile_cpu == -1) usage_with_options(record_usage, options); symbol__init(); @@ -703,6 +731,37 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) attrs[0].config = PERF_COUNT_HW_CPU_CYCLES; } + if (target_pid != -1) { + target_tid = target_pid; + thread_num = find_all_tid(target_pid, &all_tids); + if (thread_num <= 0) { + fprintf(stderr, "Can't find all threads of pid %d\n", + target_pid); + usage_with_options(record_usage, options); + } + } else { + all_tids=malloc(sizeof(pid_t)); + if (!all_tids) + return -ENOMEM; + + all_tids[0] = target_tid; + thread_num = 1; + } + + for (i = 0; i < MAX_NR_CPUS; i++) { + for (j = 0; j < MAX_COUNTERS; j++) { + fd[i][j] = malloc(sizeof(int)*thread_num); + mmap_array[i][j] = malloc( + sizeof(struct mmap_data)*thread_num); + if (!fd[i][j] || !mmap_array[i][j]) + return -ENOMEM; + } + } + event_array = malloc( + sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num); + if (!event_array) + return -ENOMEM; + /* * User specified count overrides default frequency. */ diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f815de25d0f..1f9f8695f05 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -267,6 +267,7 @@ static int __cmd_report(void) int ret = -EINVAL; struct perf_session *session; struct rb_node *next; + const char *help = "For a higher level overview, try: perf report --sort comm,dso"; session = perf_session__new(input_name, O_RDONLY, force); if (session == NULL) @@ -301,30 +302,38 @@ static int __cmd_report(void) stats = rb_entry(next, struct event_stat_id, rb_node); perf_session__collapse_resort(&stats->hists); perf_session__output_resort(&stats->hists, stats->stats.total); - if (rb_first(&session->stats_by_id) == - rb_last(&session->stats_by_id)) - fprintf(stdout, "# Samples: %Ld\n#\n", - stats->stats.total); - else - fprintf(stdout, "# Samples: %Ld %s\n#\n", - stats->stats.total, - __event_name(stats->type, stats->config)); - perf_session__fprintf_hists(&stats->hists, NULL, false, stdout, + if (use_browser) + perf_session__browse_hists(&stats->hists, + stats->stats.total, help); + else { + if (rb_first(&session->stats_by_id) == + rb_last(&session->stats_by_id)) + fprintf(stdout, "# Samples: %Ld\n#\n", + stats->stats.total); + else + fprintf(stdout, "# Samples: %Ld %s\n#\n", + stats->stats.total, + __event_name(stats->type, stats->config)); + + perf_session__fprintf_hists(&stats->hists, NULL, false, stdout, stats->stats.total); - fprintf(stdout, "\n\n"); + fprintf(stdout, "\n\n"); + } + next = rb_next(&stats->rb_node); } - if (sort_order == default_sort_order && - parent_pattern == default_parent_pattern) - fprintf(stdout, "#\n# (For a higher level overview, try: perf report --sort comm,dso)\n#\n"); + if (!use_browser && sort_order == default_sort_order && + parent_pattern == default_parent_pattern) { + fprintf(stdout, "#\n# (%s)\n#\n", help); - if (show_threads) { - bool raw_printing_style = !strcmp(pretty_printing_style, "raw"); - perf_read_values_display(stdout, &show_threads_values, - raw_printing_style); - perf_read_values_destroy(&show_threads_values); + if (show_threads) { + bool style = !strcmp(pretty_printing_style, "raw"); + perf_read_values_display(stdout, &show_threads_values, + style); + perf_read_values_destroy(&show_threads_values); + } } out_delete: perf_session__delete(session); @@ -447,7 +456,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) { argc = parse_options(argc, argv, options, report_usage, 0); - setup_pager(); + setup_browser(); if (symbol__init() < 0) return -1; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 95db31cff6f..c92f90ff5a9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -46,6 +46,7 @@ #include "util/debug.h" #include "util/header.h" #include "util/cpumap.h" +#include "util/thread.h" #include <sys/prctl.h> #include <math.h> @@ -74,10 +75,13 @@ static int run_count = 1; static int inherit = 1; static int scale = 1; static pid_t target_pid = -1; +static pid_t target_tid = -1; +static pid_t *all_tids = NULL; +static int thread_num = 0; static pid_t child_pid = -1; static int null_run = 0; -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; +static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; static int event_scaled[MAX_COUNTERS]; @@ -140,9 +144,10 @@ struct stats runtime_branches_stats; #define ERR_PERF_OPEN \ "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" -static void create_perf_stat_counter(int counter, int pid) +static void create_perf_stat_counter(int counter) { struct perf_event_attr *attr = attrs + counter; + int thread; if (scale) attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | @@ -152,20 +157,26 @@ static void create_perf_stat_counter(int counter, int pid) unsigned int cpu; for (cpu = 0; cpu < nr_cpus; cpu++) { - fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); - if (fd[cpu][counter] < 0 && verbose) + fd[cpu][counter][0] = sys_perf_event_open(attr, + -1, cpumap[cpu], -1, 0); + if (fd[cpu][counter][0] < 0 && verbose) fprintf(stderr, ERR_PERF_OPEN, counter, - fd[cpu][counter], strerror(errno)); + fd[cpu][counter][0], strerror(errno)); } } else { attr->inherit = inherit; - attr->disabled = 1; - attr->enable_on_exec = 1; - - fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); - if (fd[0][counter] < 0 && verbose) - fprintf(stderr, ERR_PERF_OPEN, counter, - fd[0][counter], strerror(errno)); + if (target_pid == -1) { + attr->disabled = 1; + attr->enable_on_exec = 1; + } + for (thread = 0; thread < thread_num; thread++) { + fd[0][counter][thread] = sys_perf_event_open(attr, + all_tids[thread], -1, -1, 0); + if (fd[0][counter][thread] < 0 && verbose) + fprintf(stderr, ERR_PERF_OPEN, counter, + fd[0][counter][thread], + strerror(errno)); + } } } @@ -190,25 +201,28 @@ static void read_counter(int counter) unsigned int cpu; size_t res, nv; int scaled; - int i; + int i, thread; count[0] = count[1] = count[2] = 0; nv = scale ? 3 : 1; for (cpu = 0; cpu < nr_cpus; cpu++) { - if (fd[cpu][counter] < 0) - continue; - - res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); - assert(res == nv * sizeof(u64)); - - close(fd[cpu][counter]); - fd[cpu][counter] = -1; - - count[0] += single_count[0]; - if (scale) { - count[1] += single_count[1]; - count[2] += single_count[2]; + for (thread = 0; thread < thread_num; thread++) { + if (fd[cpu][counter][thread] < 0) + continue; + + res = read(fd[cpu][counter][thread], + single_count, nv * sizeof(u64)); + assert(res == nv * sizeof(u64)); + + close(fd[cpu][counter][thread]); + fd[cpu][counter][thread] = -1; + + count[0] += single_count[0]; + if (scale) { + count[1] += single_count[1]; + count[2] += single_count[2]; + } } } @@ -251,9 +265,8 @@ static int run_perf_stat(int argc __used, const char **argv) unsigned long long t0, t1; int status = 0; int counter; - int pid = target_pid; int child_ready_pipe[2], go_pipe[2]; - const bool forks = (target_pid == -1 && argc > 0); + const bool forks = (argc > 0); char buf; if (!system_wide) @@ -265,10 +278,10 @@ static int run_perf_stat(int argc __used, const char **argv) } if (forks) { - if ((pid = fork()) < 0) + if ((child_pid = fork()) < 0) perror("failed to fork"); - if (!pid) { + if (!child_pid) { close(child_ready_pipe[0]); close(go_pipe[1]); fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); @@ -297,7 +310,8 @@ static int run_perf_stat(int argc __used, const char **argv) exit(-1); } - child_pid = pid; + if (target_tid == -1 && target_pid == -1 && !system_wide) + all_tids[0] = child_pid; /* * Wait for the child to be ready to exec. @@ -310,7 +324,7 @@ static int run_perf_stat(int argc __used, const char **argv) } for (counter = 0; counter < nr_counters; counter++) - create_perf_stat_counter(counter, pid); + create_perf_stat_counter(counter); /* * Enable counters and exec the command: @@ -321,7 +335,7 @@ static int run_perf_stat(int argc __used, const char **argv) close(go_pipe[1]); wait(&status); } else { - while(!done); + while(!done) sleep(1); } t1 = rdclock(); @@ -429,12 +443,14 @@ static void print_stat(int argc, const char **argv) fprintf(stderr, "\n"); fprintf(stderr, " Performance counter stats for "); - if(target_pid == -1) { + if(target_pid == -1 && target_tid == -1) { fprintf(stderr, "\'%s", argv[0]); for (i = 1; i < argc; i++) fprintf(stderr, " %s", argv[i]); - }else - fprintf(stderr, "task pid \'%d", target_pid); + } else if (target_pid != -1) + fprintf(stderr, "process id \'%d", target_pid); + else + fprintf(stderr, "thread id \'%d", target_tid); fprintf(stderr, "\'"); if (run_count > 1) @@ -459,7 +475,7 @@ static volatile int signr = -1; static void skip_signal(int signo) { - if(target_pid != -1) + if(child_pid == -1) done = 1; signr = signo; @@ -489,7 +505,9 @@ static const struct option options[] = { OPT_BOOLEAN('i', "inherit", &inherit, "child tasks inherit counters"), OPT_INTEGER('p', "pid", &target_pid, - "stat events on existing pid"), + "stat events on existing process id"), + OPT_INTEGER('t', "tid", &target_tid, + "stat events on existing thread id"), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_BOOLEAN('c', "scale", &scale, @@ -506,10 +524,11 @@ static const struct option options[] = { int cmd_stat(int argc, const char **argv, const char *prefix __used) { int status; + int i,j; argc = parse_options(argc, argv, options, stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc && target_pid == -1) + if (!argc && target_pid == -1 && target_tid == -1) usage_with_options(stat_usage, options); if (run_count <= 0) usage_with_options(stat_usage, options); @@ -525,6 +544,31 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) else nr_cpus = 1; + if (target_pid != -1) { + target_tid = target_pid; + thread_num = find_all_tid(target_pid, &all_tids); + if (thread_num <= 0) { + fprintf(stderr, "Can't find all threads of pid %d\n", + target_pid); + usage_with_options(stat_usage, options); + } + } else { + all_tids=malloc(sizeof(pid_t)); + if (!all_tids) + return -ENOMEM; + + all_tids[0] = target_tid; + thread_num = 1; + } + + for (i = 0; i < MAX_NR_CPUS; i++) { + for (j = 0; j < MAX_COUNTERS; j++) { + fd[i][j] = malloc(sizeof(int)*thread_num); + if (!fd[i][j]) + return -ENOMEM; + } + } + /* * We dont want to block the signals - that would cause * child tasks to inherit that and Ctrl-C would not work. diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0b719e3dde0..5f3ac9ff354 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -55,7 +55,7 @@ #include <linux/unistd.h> #include <linux/types.h> -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; +static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; static int system_wide = 0; @@ -65,6 +65,9 @@ static int count_filter = 5; static int print_entries; static int target_pid = -1; +static int target_tid = -1; +static pid_t *all_tids = NULL; +static int thread_num = 0; static int inherit = 0; static int profile_cpu = -1; static int nr_cpus = 0; @@ -133,7 +136,7 @@ static inline struct symbol *sym_entry__symbol(struct sym_entry *self) return ((void *)self) + symbol_conf.priv_size; } -static void get_term_dimensions(struct winsize *ws) +void get_term_dimensions(struct winsize *ws) { char *s = getenv("LINES"); @@ -169,7 +172,7 @@ static void sig_winch_handler(int sig __used) update_print_entries(&winsize); } -static void parse_source(struct sym_entry *syme) +static int parse_source(struct sym_entry *syme) { struct symbol *sym; struct sym_entry_source *source; @@ -180,12 +183,21 @@ static void parse_source(struct sym_entry *syme) u64 len; if (!syme) - return; + return -1; + + sym = sym_entry__symbol(syme); + map = syme->map; + + /* + * We can't annotate with just /proc/kallsyms + */ + if (map->dso->origin == DSO__ORIG_KERNEL) + return -1; if (syme->src == NULL) { syme->src = zalloc(sizeof(*source)); if (syme->src == NULL) - return; + return -1; pthread_mutex_init(&syme->src->lock, NULL); } @@ -195,9 +207,6 @@ static void parse_source(struct sym_entry *syme) pthread_mutex_lock(&source->lock); goto out_assign; } - - sym = sym_entry__symbol(syme); - map = syme->map; path = map->dso->long_name; len = sym->end - sym->start; @@ -209,7 +218,7 @@ static void parse_source(struct sym_entry *syme) file = popen(command, "r"); if (!file) - return; + return -1; pthread_mutex_lock(&source->lock); source->lines_tail = &source->lines; @@ -245,6 +254,7 @@ static void parse_source(struct sym_entry *syme) out_assign: sym_filter_entry = syme; pthread_mutex_unlock(&source->lock); + return 0; } static void __zero_source_counters(struct sym_entry *syme) @@ -411,6 +421,7 @@ static double sym_weight(const struct sym_entry *sym) static long samples; static long userspace_samples; +static long exact_samples; static const char CONSOLE_CLEAR[] = "[H[2J"; static void __list_insert_active_sym(struct sym_entry *syme) @@ -451,14 +462,15 @@ static void print_sym_table(void) int counter, snap = !display_weighted ? sym_counter : 0; float samples_per_sec = samples/delay_secs; float ksamples_per_sec = (samples-userspace_samples)/delay_secs; + float esamples_percent = (100.0*exact_samples)/samples; float sum_ksamples = 0.0; struct sym_entry *syme, *n; struct rb_root tmp = RB_ROOT; struct rb_node *nd; - int sym_width = 0, dso_width = 0, max_dso_width; + int sym_width = 0, dso_width = 0, dso_short_width = 0; const int win_width = winsize.ws_col - 1; - samples = userspace_samples = 0; + samples = userspace_samples = exact_samples = 0; /* Sort the active symbols */ pthread_mutex_lock(&active_symbols_lock); @@ -489,9 +501,10 @@ static void print_sym_table(void) puts(CONSOLE_CLEAR); printf("%-*.*s\n", win_width, win_width, graph_dotted_line); - printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", + printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% exact: %4.1f%% [", samples_per_sec, - 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); + 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)), + esamples_percent); if (nr_counters == 1 || !display_weighted) { printf("%Ld", (u64)attrs[0].sample_period); @@ -514,13 +527,15 @@ static void print_sym_table(void) if (target_pid != -1) printf(" (target_pid: %d", target_pid); + else if (target_tid != -1) + printf(" (target_tid: %d", target_tid); else printf(" (all"); if (profile_cpu != -1) printf(", cpu: %d)\n", profile_cpu); else { - if (target_pid != -1) + if (target_tid != -1) printf(")\n"); else printf(", %d CPUs)\n", nr_cpus); @@ -545,15 +560,20 @@ static void print_sym_table(void) if (syme->map->dso->long_name_len > dso_width) dso_width = syme->map->dso->long_name_len; + if (syme->map->dso->short_name_len > dso_short_width) + dso_short_width = syme->map->dso->short_name_len; + if (syme->name_len > sym_width) sym_width = syme->name_len; } printed = 0; - max_dso_width = winsize.ws_col - sym_width - 29; - if (dso_width > max_dso_width) - dso_width = max_dso_width; + if (sym_width + dso_width > winsize.ws_col - 29) { + dso_width = dso_short_width; + if (sym_width + dso_width > winsize.ws_col - 29) + sym_width = winsize.ws_col - dso_width - 29; + } putchar('\n'); if (nr_counters == 1) printf(" samples pcnt"); @@ -955,6 +975,9 @@ static void event__process_sample(const event_t *self, return; } + if (self->header.misc & PERF_RECORD_MISC_EXACT) + exact_samples++; + if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 || al.filtered) return; @@ -985,7 +1008,17 @@ static void event__process_sample(const event_t *self, if (sym_filter_entry_sched) { sym_filter_entry = sym_filter_entry_sched; sym_filter_entry_sched = NULL; - parse_source(sym_filter_entry); + if (parse_source(sym_filter_entry) < 0) { + struct symbol *sym = sym_entry__symbol(sym_filter_entry); + + pr_err("Can't annotate %s", sym->name); + if (sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) { + pr_err(": No vmlinux file was found in the path:\n"); + vmlinux_path__fprintf(stderr); + } else + pr_err(".\n"); + exit(1); + } } syme = symbol__priv(al.sym); @@ -1101,16 +1134,21 @@ static void perf_session__mmap_read_counter(struct perf_session *self, md->prev = old; } -static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; -static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; +static struct pollfd *event_array; +static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; static void perf_session__mmap_read(struct perf_session *self) { - int i, counter; + int i, counter, thread_index; for (i = 0; i < nr_cpus; i++) { for (counter = 0; counter < nr_counters; counter++) - perf_session__mmap_read_counter(self, &mmap_array[i][counter]); + for (thread_index = 0; + thread_index < thread_num; + thread_index++) { + perf_session__mmap_read_counter(self, + &mmap_array[i][counter][thread_index]); + } } } @@ -1121,9 +1159,10 @@ static void start_counter(int i, int counter) { struct perf_event_attr *attr; int cpu; + int thread_index; cpu = profile_cpu; - if (target_pid == -1 && profile_cpu == -1) + if (target_tid == -1 && profile_cpu == -1) cpu = cpumap[i]; attr = attrs + counter; @@ -1139,55 +1178,58 @@ static void start_counter(int i, int counter) attr->inherit = (cpu < 0) && inherit; attr->mmap = 1; + for (thread_index = 0; thread_index < thread_num; thread_index++) { try_again: - fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); - - if (fd[i][counter] < 0) { - int err = errno; + fd[i][counter][thread_index] = sys_perf_event_open(attr, + all_tids[thread_index], cpu, group_fd, 0); + + if (fd[i][counter][thread_index] < 0) { + int err = errno; + + if (err == EPERM || err == EACCES) + die("No permission - are you root?\n"); + /* + * If it's cycles then fall back to hrtimer + * based cpu-clock-tick sw counter, which + * is always available even if no PMU support: + */ + if (attr->type == PERF_TYPE_HARDWARE + && attr->config == PERF_COUNT_HW_CPU_CYCLES) { + + if (verbose) + warning(" ... trying to fall back to cpu-clock-ticks\n"); + + attr->type = PERF_TYPE_SOFTWARE; + attr->config = PERF_COUNT_SW_CPU_CLOCK; + goto try_again; + } + printf("\n"); + error("perfcounter syscall returned with %d (%s)\n", + fd[i][counter][thread_index], strerror(err)); + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + exit(-1); + } + assert(fd[i][counter][thread_index] >= 0); + fcntl(fd[i][counter][thread_index], F_SETFL, O_NONBLOCK); - if (err == EPERM || err == EACCES) - die("No permission - are you root?\n"); /* - * If it's cycles then fall back to hrtimer - * based cpu-clock-tick sw counter, which - * is always available even if no PMU support: + * First counter acts as the group leader: */ - if (attr->type == PERF_TYPE_HARDWARE - && attr->config == PERF_COUNT_HW_CPU_CYCLES) { - - if (verbose) - warning(" ... trying to fall back to cpu-clock-ticks\n"); - - attr->type = PERF_TYPE_SOFTWARE; - attr->config = PERF_COUNT_SW_CPU_CLOCK; - goto try_again; - } - printf("\n"); - error("perfcounter syscall returned with %d (%s)\n", - fd[i][counter], strerror(err)); - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); - exit(-1); + if (group && group_fd == -1) + group_fd = fd[i][counter][thread_index]; + + event_array[nr_poll].fd = fd[i][counter][thread_index]; + event_array[nr_poll].events = POLLIN; + nr_poll++; + + mmap_array[i][counter][thread_index].counter = counter; + mmap_array[i][counter][thread_index].prev = 0; + mmap_array[i][counter][thread_index].mask = mmap_pages*page_size - 1; + mmap_array[i][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size, + PROT_READ, MAP_SHARED, fd[i][counter][thread_index], 0); + if (mmap_array[i][counter][thread_index].base == MAP_FAILED) + die("failed to mmap with %d (%s)\n", errno, strerror(errno)); } - assert(fd[i][counter] >= 0); - fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); - - /* - * First counter acts as the group leader: - */ - if (group && group_fd == -1) - group_fd = fd[i][counter]; - - event_array[nr_poll].fd = fd[i][counter]; - event_array[nr_poll].events = POLLIN; - nr_poll++; - - mmap_array[i][counter].counter = counter; - mmap_array[i][counter].prev = 0; - mmap_array[i][counter].mask = mmap_pages*page_size - 1; - mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ, MAP_SHARED, fd[i][counter], 0); - if (mmap_array[i][counter].base == MAP_FAILED) - die("failed to mmap with %d (%s)\n", errno, strerror(errno)); } static int __cmd_top(void) @@ -1203,8 +1245,8 @@ static int __cmd_top(void) if (session == NULL) return -ENOMEM; - if (target_pid != -1) - event__synthesize_thread(target_pid, event__process, session); + if (target_tid != -1) + event__synthesize_thread(target_tid, event__process, session); else event__synthesize_threads(event__process, session); @@ -1215,7 +1257,7 @@ static int __cmd_top(void) } /* Wait for a minimal set of events before starting the snapshot */ - poll(event_array, nr_poll, 100); + poll(&event_array[0], nr_poll, 100); perf_session__mmap_read(session); @@ -1258,7 +1300,9 @@ static const struct option options[] = { OPT_INTEGER('c', "count", &default_interval, "event period to sample"), OPT_INTEGER('p', "pid", &target_pid, - "profile events on existing pid"), + "profile events on existing process id"), + OPT_INTEGER('t', "tid", &target_tid, + "profile events on existing thread id"), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_INTEGER('C', "CPU", &profile_cpu, @@ -1299,6 +1343,7 @@ static const struct option options[] = { int cmd_top(int argc, const char **argv, const char *prefix __used) { int counter; + int i,j; page_size = sysconf(_SC_PAGE_SIZE); @@ -1306,8 +1351,39 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (argc) usage_with_options(top_usage, options); + if (target_pid != -1) { + target_tid = target_pid; + thread_num = find_all_tid(target_pid, &all_tids); + if (thread_num <= 0) { + fprintf(stderr, "Can't find all threads of pid %d\n", + target_pid); + usage_with_options(top_usage, options); + } + } else { + all_tids=malloc(sizeof(pid_t)); + if (!all_tids) + return -ENOMEM; + + all_tids[0] = target_tid; + thread_num = 1; + } + + for (i = 0; i < MAX_NR_CPUS; i++) { + for (j = 0; j < MAX_COUNTERS; j++) { + fd[i][j] = malloc(sizeof(int)*thread_num); + mmap_array[i][j] = malloc( + sizeof(struct mmap_data)*thread_num); + if (!fd[i][j] || !mmap_array[i][j]) + return -ENOMEM; + } + } + event_array = malloc( + sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num); + if (!event_array) + return -ENOMEM; + /* CPU and PID are mutually exclusive */ - if (target_pid != -1 && profile_cpu != -1) { + if (target_tid > 0 && profile_cpu != -1) { printf("WARNING: PID switch overriding CPU\n"); sleep(1); profile_cpu = -1; @@ -1348,7 +1424,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) attrs[counter].sample_period = default_interval; } - if (target_pid != -1 || profile_cpu != -1) + if (target_tid != -1 || profile_cpu != -1) nr_cpus = 1; else nr_cpus = read_cpu_map(); diff --git a/tools/perf/perf.c b/tools/perf/perf.c index cd32c200cdb..d2de8393a33 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -16,6 +16,8 @@ #include "util/string.h" #include "util/debugfs.h" +bool use_browser; + const char perf_usage_string[] = "perf [--version] [--help] COMMAND [ARGS]"; @@ -265,6 +267,8 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) if (status) return status & 0xff; + exit_browser(); + /* Somebody closed stdout? */ if (fstat(fileno(stdout), &st)) return 0; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 6fb379bc1d1..aa786158b66 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -1,6 +1,10 @@ #ifndef _PERF_PERF_H #define _PERF_PERF_H +struct winsize; + +void get_term_dimensions(struct winsize *ws); + #if defined(__i386__) #include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 918eb376abe..47b12a3d11b 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -1,6 +1,7 @@ #ifndef __PERF_CACHE_H #define __PERF_CACHE_H +#include <stdbool.h> #include "util.h" #include "strbuf.h" #include "../perf.h" @@ -69,6 +70,19 @@ extern const char *pager_program; extern int pager_in_use(void); extern int pager_use_color; +extern bool use_browser; + +#ifdef NO_NEWT_SUPPORT +static inline void setup_browser(void) +{ + setup_pager(); +} +static inline void exit_browser(void) {} +#else +void setup_browser(void); +void exit_browser(void); +#endif + extern const char *editor_program; extern const char *excludes_file; diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index e88bca55a59..9da01914e0a 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -203,7 +203,10 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) int r; va_start(args, fmt); - r = color_vfprintf(fp, color, fmt, args); + if (use_browser) + r = vfprintf(fp, fmt, args); + else + r = color_vfprintf(fp, color, fmt, args); va_end(args); return r; } diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 0905600c385..033d66db863 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -6,6 +6,7 @@ #include <stdarg.h> #include <stdio.h> +#include "cache.h" #include "color.h" #include "event.h" #include "debug.h" @@ -21,7 +22,10 @@ int eprintf(int level, const char *fmt, ...) if (verbose >= level) { va_start(args, fmt); - ret = vfprintf(stderr, fmt, args); + if (use_browser) + ret = browser__show_help(fmt, args); + else + ret = vfprintf(stderr, fmt, args); va_end(args); } diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index c6c24c522de..0172edf3f15 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -7,9 +7,16 @@ extern int verbose; extern int dump_trace; -int eprintf(int level, - const char *fmt, ...) __attribute__((format(printf, 2, 3))); int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); void trace_event(event_t *event); +#ifdef NO_NEWT_SUPPORT +static inline int browser__show_help(const char *format __used, va_list ap __used) +{ + return 0; +} +#else +int browser__show_help(const char *format, va_list ap); +#endif + #endif /* __PERF_DEBUG_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 2be33c7dbf0..c37da8b8857 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -455,11 +455,11 @@ static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, return ret; } -static size_t hist_entry__fprintf(struct hist_entry *self, - struct perf_session *pair_session, - bool show_displacement, - long displacement, FILE *fp, - u64 session_total) +size_t hist_entry__fprintf(struct hist_entry *self, + struct perf_session *pair_session, + bool show_displacement, + long displacement, FILE *fp, + u64 session_total) { struct sort_entry *se; u64 count, total; @@ -485,9 +485,9 @@ static size_t hist_entry__fprintf(struct hist_entry *self, if (symbol_conf.show_nr_samples) { if (sep) - fprintf(fp, "%c%lld", *sep, count); + ret += fprintf(fp, "%c%lld", *sep, count); else - fprintf(fp, "%11lld", count); + ret += fprintf(fp, "%11lld", count); } if (pair_session) { @@ -518,9 +518,9 @@ static size_t hist_entry__fprintf(struct hist_entry *self, snprintf(bf, sizeof(bf), " "); if (sep) - fprintf(fp, "%c%s", *sep, bf); + ret += fprintf(fp, "%c%s", *sep, bf); else - fprintf(fp, "%6.6s", bf); + ret += fprintf(fp, "%6.6s", bf); } } @@ -528,27 +528,27 @@ static size_t hist_entry__fprintf(struct hist_entry *self, if (se->elide) continue; - fprintf(fp, "%s", sep ?: " "); + ret += fprintf(fp, "%s", sep ?: " "); ret += se->print(fp, self, se->width ? *se->width : 0); } - ret += fprintf(fp, "\n"); - - if (symbol_conf.use_callchain) { - int left_margin = 0; + return ret + fprintf(fp, "\n"); +} - if (sort__first_dimension == SORT_COMM) { - se = list_first_entry(&hist_entry__sort_list, typeof(*se), - list); - left_margin = se->width ? *se->width : 0; - left_margin -= thread__comm_len(self->thread); - } +static size_t hist_entry__fprintf_callchain(struct hist_entry *self, FILE *fp, + u64 session_total) +{ + int left_margin = 0; - hist_entry_callchain__fprintf(fp, self, session_total, - left_margin); + if (sort__first_dimension == SORT_COMM) { + struct sort_entry *se = list_first_entry(&hist_entry__sort_list, + typeof(*se), list); + left_margin = se->width ? *se->width : 0; + left_margin -= thread__comm_len(self->thread); } - return ret; + return hist_entry_callchain__fprintf(fp, self, session_total, + left_margin); } size_t perf_session__fprintf_hists(struct rb_root *hists, @@ -655,6 +655,10 @@ print_entries: } ret += hist_entry__fprintf(h, pair, show_displacement, displacement, fp, session_total); + + if (symbol_conf.use_callchain) + ret += hist_entry__fprintf_callchain(h, fp, session_total); + if (h->map == NULL && verbose > 1) { __map_groups__fprintf_maps(&h->thread->mg, MAP__FUNCTION, fp); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 16f360cce5b..fe366ce5db4 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -18,6 +18,11 @@ struct hist_entry *__perf_session__add_hist_entry(struct rb_root *hists, u64 count, bool *hit); extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); +size_t hist_entry__fprintf(struct hist_entry *self, + struct perf_session *pair_session, + bool show_displacement, + long displacement, FILE *fp, + u64 session_total); void hist_entry__free(struct hist_entry *); void perf_session__output_resort(struct rb_root *hists, u64 total_samples); diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index f2611655ab5..388ab1bfd11 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -85,16 +85,19 @@ simple_strtoul(const char *nptr, char **endptr, int base) return strtoul(nptr, endptr, base); } +int eprintf(int level, + const char *fmt, ...) __attribute__((format(printf, 2, 3))); + #ifndef pr_fmt #define pr_fmt(fmt) fmt #endif #define pr_err(fmt, ...) \ - do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) + eprintf(0, pr_fmt(fmt), ##__VA_ARGS__) #define pr_warning(fmt, ...) \ - do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) + eprintf(0, pr_fmt(fmt), ##__VA_ARGS__) #define pr_info(fmt, ...) \ - do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) + eprintf(0, pr_fmt(fmt), ##__VA_ARGS__) #define pr_debug(fmt, ...) \ eprintf(1, pr_fmt(fmt), ##__VA_ARGS__) #define pr_debugN(n, fmt, ...) \ diff --git a/tools/perf/util/newt.c b/tools/perf/util/newt.c new file mode 100644 index 00000000000..2d19e7a3e6e --- /dev/null +++ b/tools/perf/util/newt.c @@ -0,0 +1,207 @@ +#define _GNU_SOURCE +#include <stdio.h> +#undef _GNU_SOURCE + +#include <stdlib.h> +#include <newt.h> +#include <sys/ttydefaults.h> + +#include "cache.h" +#include "hist.h" +#include "session.h" +#include "sort.h" +#include "symbol.h" + +static void newt_form__set_exit_keys(newtComponent self) +{ + newtFormAddHotKey(self, NEWT_KEY_ESCAPE); + newtFormAddHotKey(self, 'Q'); + newtFormAddHotKey(self, 'q'); + newtFormAddHotKey(self, CTRL('c')); +} + +static newtComponent newt_form__new(void) +{ + newtComponent self = newtForm(NULL, NULL, 0); + if (self) + newt_form__set_exit_keys(self); + return self; +} + +static size_t hist_entry__append_browser(struct hist_entry *self, + newtComponent listbox, u64 total) +{ + char bf[1024]; + size_t len; + FILE *fp; + + if (symbol_conf.exclude_other && !self->parent) + return 0; + + fp = fmemopen(bf, sizeof(bf), "w"); + if (fp == NULL) + return 0; + + len = hist_entry__fprintf(self, NULL, false, 0, fp, total); + + fclose(fp); + newtListboxAppendEntry(listbox, bf, self); + return len; +} + +static void hist_entry__annotate_browser(struct hist_entry *self) +{ + FILE *fp; + int cols, rows; + newtComponent form, listbox; + struct newtExitStruct es; + char *str; + size_t line_len, max_line_len = 0; + size_t max_usable_width; + char *line = NULL; + + if (self->sym == NULL) + return; + + if (asprintf(&str, "perf annotate %s 2>&1 | expand", self->sym->name) < 0) + return; + + fp = popen(str, "r"); + if (fp == NULL) + goto out_free_str; + + newtPushHelpLine("Press ESC to exit"); + newtGetScreenSize(&cols, &rows); + listbox = newtListbox(0, 0, rows - 5, NEWT_FLAG_SCROLL); + + while (!feof(fp)) { + if (getline(&line, &line_len, fp) < 0 || !line_len) + break; + while (line_len != 0 && isspace(line[line_len - 1])) + line[--line_len] = '\0'; + + if (line_len > max_line_len) + max_line_len = line_len; + newtListboxAppendEntry(listbox, line, NULL); + } + fclose(fp); + free(line); + + max_usable_width = cols - 22; + if (max_line_len > max_usable_width) + max_line_len = max_usable_width; + + newtListboxSetWidth(listbox, max_line_len); + + newtCenteredWindow(max_line_len + 2, rows - 5, self->sym->name); + form = newt_form__new(); + newtFormAddComponents(form, listbox, NULL); + + newtFormRun(form, &es); + newtFormDestroy(form); + newtPopWindow(); + newtPopHelpLine(); +out_free_str: + free(str); +} + +void perf_session__browse_hists(struct rb_root *hists, u64 session_total, + const char *helpline) +{ + struct sort_entry *se; + struct rb_node *nd; + unsigned int width; + char *col_width = symbol_conf.col_width_list_str; + int rows; + size_t max_len = 0; + char str[1024]; + newtComponent form, listbox; + struct newtExitStruct es; + + snprintf(str, sizeof(str), "Samples: %Ld", session_total); + newtDrawRootText(0, 0, str); + newtPushHelpLine(helpline); + + newtGetScreenSize(NULL, &rows); + + form = newt_form__new(); + + listbox = newtListbox(1, 1, rows - 2, (NEWT_FLAG_SCROLL | + NEWT_FLAG_BORDER | + NEWT_FLAG_RETURNEXIT)); + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + width = strlen(se->header); + if (se->width) { + if (symbol_conf.col_width_list_str) { + if (col_width) { + *se->width = atoi(col_width); + col_width = strchr(col_width, ','); + if (col_width) + ++col_width; + } + } + *se->width = max(*se->width, width); + } + } + + for (nd = rb_first(hists); nd; nd = rb_next(nd)) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + size_t len = hist_entry__append_browser(h, listbox, session_total); + if (len > max_len) + max_len = len; + } + + newtListboxSetWidth(listbox, max_len); + newtFormAddComponents(form, listbox, NULL); + + while (1) { + struct hist_entry *selection; + + newtFormRun(form, &es); + if (es.reason == NEWT_EXIT_HOTKEY) + break; + selection = newtListboxGetCurrent(listbox); + hist_entry__annotate_browser(selection); + } + + newtFormDestroy(form); +} + +int browser__show_help(const char *format, va_list ap) +{ + int ret; + static int backlog; + static char msg[1024]; + + ret = vsnprintf(msg + backlog, sizeof(msg) - backlog, format, ap); + backlog += ret; + + if (msg[backlog - 1] == '\n') { + newtPopHelpLine(); + newtPushHelpLine(msg); + newtRefresh(); + backlog = 0; + } + + return ret; +} + +void setup_browser(void) +{ + if (!isatty(1)) + return; + + use_browser = true; + newtInit(); + newtCls(); + newtPushHelpLine(" "); +} + +void exit_browser(void) +{ + if (use_browser) + newtFinished(); +} diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 05d0c5c2030..a2014459125 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -656,6 +656,10 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr) return EVT_FAILED; n = hex2u64(str + 1, &config); if (n > 0) { + if (str[n+1] == 'p') { + attr->precise = 1; + n++; + } *strp = str + n + 1; attr->type = PERF_TYPE_RAW; attr->config = config; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 53181dbfe4a..c6603f3bb43 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -33,19 +33,25 @@ #include <limits.h> #undef _GNU_SOURCE +#include "util.h" #include "event.h" #include "string.h" #include "strlist.h" #include "debug.h" #include "cache.h" #include "color.h" +#include "symbol.h" +#include "thread.h" #include "parse-events.h" /* For debugfs_path */ #include "probe-event.h" +#include "probe-finder.h" #define MAX_CMDLEN 256 #define MAX_PROBE_ARGS 128 #define PERFPROBE_GROUP "probe" +bool probe_event_dry_run; /* Dry run flag */ + #define semantic_error(msg ...) die("Semantic error :" msg) /* If there is no space to write, returns -E2BIG. */ @@ -64,6 +70,37 @@ static int e_snprintf(char *str, size_t size, const char *format, ...) return ret; } +static struct map_groups kmap_groups; +static struct map *kmaps[MAP__NR_TYPES]; + +/* Initialize symbol maps for vmlinux */ +static void init_vmlinux(void) +{ + symbol_conf.sort_by_name = true; + if (symbol_conf.vmlinux_name == NULL) + symbol_conf.try_vmlinux_path = true; + else + pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name); + if (symbol__init() < 0) + die("Failed to init symbol map."); + + map_groups__init(&kmap_groups); + if (map_groups__create_kernel_maps(&kmap_groups, kmaps) < 0) + die("Failed to create kernel maps."); +} + +#ifndef NO_DWARF_SUPPORT +static int open_vmlinux(void) +{ + if (map__load(kmaps[MAP__FUNCTION], NULL) < 0) { + pr_debug("Failed to load kernel map.\n"); + return -EINVAL; + } + pr_debug("Try to open %s\n", kmaps[MAP__FUNCTION]->dso->long_name); + return open(kmaps[MAP__FUNCTION]->dso->long_name, O_RDONLY); +} +#endif + void parse_line_range_desc(const char *arg, struct line_range *lr) { const char *ptr; @@ -90,9 +127,9 @@ void parse_line_range_desc(const char *arg, struct line_range *lr) if (*tmp != '\0') semantic_error("Tailing with invalid character '%d'.", *tmp); - tmp = strndup(arg, (ptr - arg)); + tmp = xstrndup(arg, (ptr - arg)); } else - tmp = strdup(arg); + tmp = xstrdup(arg); if (strchr(tmp, '.')) lr->file = tmp; @@ -113,8 +150,9 @@ static bool check_event_name(const char *name) } /* Parse probepoint definition. */ -static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp) +static void parse_perf_probe_point(char *arg, struct perf_probe_event *pev) { + struct perf_probe_point *pp = &pev->point; char *ptr, *tmp; char c, nc = 0; /* @@ -135,7 +173,8 @@ static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp) if (!check_event_name(arg)) semantic_error("%s is bad for event name -it must " "follow C symbol-naming rule.", arg); - pp->event = strdup(arg); + pev->event = xstrdup(arg); + pev->group = NULL; arg = tmp; } @@ -147,17 +186,16 @@ static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp) /* Check arg is function or file and copy it */ if (strchr(arg, '.')) /* File */ - pp->file = strdup(arg); + pp->file = xstrdup(arg); else /* Function */ - pp->function = strdup(arg); - DIE_IF(pp->file == NULL && pp->function == NULL); + pp->function = xstrdup(arg); /* Parse other options */ while (ptr) { arg = ptr; c = nc; if (c == ';') { /* Lazy pattern must be the last part */ - pp->lazy_line = strdup(arg); + pp->lazy_line = xstrdup(arg); break; } ptr = strpbrk(arg, ";:+@%"); @@ -181,8 +219,7 @@ static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp) case '@': /* File name */ if (pp->file) semantic_error("SRC@SRC is not allowed."); - pp->file = strdup(arg); - DIE_IF(pp->file == NULL); + pp->file = xstrdup(arg); break; case '%': /* Probe places */ if (strcmp(arg, "return") == 0) { @@ -220,59 +257,108 @@ static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp) semantic_error("Offset/Line/Lazy pattern can't be used with " "return probe."); - pr_debug("symbol:%s file:%s line:%d offset:%d return:%d lazy:%s\n", + pr_debug("symbol:%s file:%s line:%d offset:%lu return:%d lazy:%s\n", pp->function, pp->file, pp->line, pp->offset, pp->retprobe, pp->lazy_line); } -/* Parse perf-probe event definition */ -void parse_perf_probe_event(const char *str, struct probe_point *pp, - bool *need_dwarf) +/* Parse perf-probe event argument */ +static void parse_perf_probe_arg(const char *str, struct perf_probe_arg *arg) +{ + const char *tmp; + struct perf_probe_arg_field **fieldp; + + pr_debug("parsing arg: %s into ", str); + + tmp = strpbrk(str, "-."); + if (!is_c_varname(str) || !tmp) { + /* A variable, register, symbol or special value */ + arg->name = xstrdup(str); + pr_debug("%s\n", arg->name); + return; + } + + /* Structure fields */ + arg->name = xstrndup(str, tmp - str); + pr_debug("%s, ", arg->name); + fieldp = &arg->field; + + do { + *fieldp = xzalloc(sizeof(struct perf_probe_arg_field)); + if (*tmp == '.') { + str = tmp + 1; + (*fieldp)->ref = false; + } else if (tmp[1] == '>') { + str = tmp + 2; + (*fieldp)->ref = true; + } else + semantic_error("Argument parse error: %s", str); + + tmp = strpbrk(str, "-."); + if (tmp) { + (*fieldp)->name = xstrndup(str, tmp - str); + pr_debug("%s(%d), ", (*fieldp)->name, (*fieldp)->ref); + fieldp = &(*fieldp)->next; + } + } while (tmp); + (*fieldp)->name = xstrdup(str); + pr_debug("%s(%d)\n", (*fieldp)->name, (*fieldp)->ref); +} + +/* Parse perf-probe event command */ +void parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev) { char **argv; int argc, i; - *need_dwarf = false; - - argv = argv_split(str, &argc); + argv = argv_split(cmd, &argc); if (!argv) die("argv_split failed."); if (argc > MAX_PROBE_ARGS + 1) semantic_error("Too many arguments"); /* Parse probe point */ - parse_perf_probe_probepoint(argv[0], pp); - if (pp->file || pp->line) - *need_dwarf = true; + parse_perf_probe_point(argv[0], pev); /* Copy arguments and ensure return probe has no C argument */ - pp->nr_args = argc - 1; - pp->args = zalloc(sizeof(char *) * pp->nr_args); - for (i = 0; i < pp->nr_args; i++) { - pp->args[i] = strdup(argv[i + 1]); - if (!pp->args[i]) - die("Failed to copy argument."); - if (is_c_varname(pp->args[i])) { - if (pp->retprobe) - semantic_error("You can't specify local" - " variable for kretprobe"); - *need_dwarf = true; - } + pev->nargs = argc - 1; + pev->args = xzalloc(sizeof(struct perf_probe_arg) * pev->nargs); + for (i = 0; i < pev->nargs; i++) { + parse_perf_probe_arg(argv[i + 1], &pev->args[i]); + if (is_c_varname(pev->args[i].name) && pev->point.retprobe) + semantic_error("You can't specify local variable for" + " kretprobe"); } argv_free(argv); } +/* Return true if this perf_probe_event requires debuginfo */ +bool perf_probe_event_need_dwarf(struct perf_probe_event *pev) +{ + int i; + + if (pev->point.file || pev->point.line || pev->point.lazy_line) + return true; + + for (i = 0; i < pev->nargs; i++) + if (is_c_varname(pev->args[i].name)) + return true; + + return false; +} + /* Parse kprobe_events event into struct probe_point */ -void parse_trace_kprobe_event(const char *str, struct probe_point *pp) +void parse_kprobe_trace_command(const char *cmd, struct kprobe_trace_event *tev) { + struct kprobe_trace_point *tp = &tev->point; char pr; char *p; int ret, i, argc; char **argv; - pr_debug("Parsing kprobe_events: %s\n", str); - argv = argv_split(str, &argc); + pr_debug("Parsing kprobe_events: %s\n", cmd); + argv = argv_split(cmd, &argc); if (!argv) die("argv_split failed."); if (argc < 2) @@ -280,132 +366,332 @@ void parse_trace_kprobe_event(const char *str, struct probe_point *pp) /* Scan event and group name. */ ret = sscanf(argv[0], "%c:%a[^/ \t]/%a[^ \t]", - &pr, (float *)(void *)&pp->group, - (float *)(void *)&pp->event); + &pr, (float *)(void *)&tev->group, + (float *)(void *)&tev->event); if (ret != 3) semantic_error("Failed to parse event name: %s", argv[0]); - pr_debug("Group:%s Event:%s probe:%c\n", pp->group, pp->event, pr); + pr_debug("Group:%s Event:%s probe:%c\n", tev->group, tev->event, pr); - pp->retprobe = (pr == 'r'); + tp->retprobe = (pr == 'r'); /* Scan function name and offset */ - ret = sscanf(argv[1], "%a[^+]+%d", (float *)(void *)&pp->function, - &pp->offset); + ret = sscanf(argv[1], "%a[^+]+%lu", (float *)(void *)&tp->symbol, + &tp->offset); if (ret == 1) - pp->offset = 0; + tp->offset = 0; - /* kprobe_events doesn't have this information */ - pp->line = 0; - pp->file = NULL; - - pp->nr_args = argc - 2; - pp->args = zalloc(sizeof(char *) * pp->nr_args); - for (i = 0; i < pp->nr_args; i++) { + tev->nargs = argc - 2; + tev->args = xzalloc(sizeof(struct kprobe_trace_arg) * tev->nargs); + for (i = 0; i < tev->nargs; i++) { p = strchr(argv[i + 2], '='); if (p) /* We don't need which register is assigned. */ - *p = '\0'; - pp->args[i] = strdup(argv[i + 2]); - if (!pp->args[i]) - die("Failed to copy argument."); + *p++ = '\0'; + else + p = argv[i + 2]; + tev->args[i].name = xstrdup(argv[i + 2]); + /* TODO: parse regs and offset */ + tev->args[i].value = xstrdup(p); } argv_free(argv); } -/* Synthesize only probe point (not argument) */ -int synthesize_perf_probe_point(struct probe_point *pp) +/* Compose only probe arg */ +int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len) { - char *buf; - char offs[64] = "", line[64] = ""; + struct perf_probe_arg_field *field = pa->field; int ret; + char *tmp = buf; - pp->probes[0] = buf = zalloc(MAX_CMDLEN); - pp->found = 1; - if (!buf) - die("Failed to allocate memory by zalloc."); + ret = e_snprintf(tmp, len, "%s", pa->name); + if (ret <= 0) + goto error; + tmp += ret; + len -= ret; + + while (field) { + ret = e_snprintf(tmp, len, "%s%s", field->ref ? "->" : ".", + field->name); + if (ret <= 0) + goto error; + tmp += ret; + len -= ret; + field = field->next; + } + return tmp - buf; +error: + die("Failed to synthesize perf probe argument: %s", strerror(-ret)); +} + +/* Compose only probe point (not argument) */ +static char *synthesize_perf_probe_point(struct perf_probe_point *pp) +{ + char *buf, *tmp; + char offs[32] = "", line[32] = "", file[32] = ""; + int ret, len; + + buf = xzalloc(MAX_CMDLEN); if (pp->offset) { - ret = e_snprintf(offs, 64, "+%d", pp->offset); + ret = e_snprintf(offs, 32, "+%lu", pp->offset); if (ret <= 0) goto error; } if (pp->line) { - ret = e_snprintf(line, 64, ":%d", pp->line); + ret = e_snprintf(line, 32, ":%d", pp->line); + if (ret <= 0) + goto error; + } + if (pp->file) { + len = strlen(pp->file) - 32; + if (len < 0) + len = 0; + tmp = strchr(pp->file + len, '/'); + if (!tmp) + tmp = pp->file + len - 1; + ret = e_snprintf(file, 32, "@%s", tmp + 1); if (ret <= 0) goto error; } if (pp->function) - ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->function, - offs, pp->retprobe ? "%return" : "", line); + ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s%s", pp->function, + offs, pp->retprobe ? "%return" : "", line, + file); else - ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", pp->file, line); - if (ret <= 0) { + ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", file, line); + if (ret <= 0) + goto error; + + return buf; error: - free(pp->probes[0]); - pp->probes[0] = NULL; - pp->found = 0; - } - return ret; + die("Failed to synthesize perf probe point: %s", strerror(-ret)); } -int synthesize_perf_probe_event(struct probe_point *pp) +#if 0 +char *synthesize_perf_probe_command(struct perf_probe_event *pev) { char *buf; int i, len, ret; - len = synthesize_perf_probe_point(pp); - if (len < 0) - return 0; + buf = synthesize_perf_probe_point(&pev->point); + if (!buf) + return NULL; - buf = pp->probes[0]; - for (i = 0; i < pp->nr_args; i++) { + len = strlen(buf); + for (i = 0; i < pev->nargs; i++) { ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s", - pp->args[i]); - if (ret <= 0) - goto error; + pev->args[i].name); + if (ret <= 0) { + free(buf); + return NULL; + } len += ret; } - pp->found = 1; - return pp->found; -error: - free(pp->probes[0]); - pp->probes[0] = NULL; + return buf; +} +#endif + +static int __synthesize_kprobe_trace_arg_ref(struct kprobe_trace_arg_ref *ref, + char **buf, size_t *buflen, + int depth) +{ + int ret; + if (ref->next) { + depth = __synthesize_kprobe_trace_arg_ref(ref->next, buf, + buflen, depth + 1); + if (depth < 0) + goto out; + } + + ret = e_snprintf(*buf, *buflen, "%+ld(", ref->offset); + if (ret < 0) + depth = ret; + else { + *buf += ret; + *buflen -= ret; + } +out: + return depth; - return ret; } -int synthesize_trace_kprobe_event(struct probe_point *pp) +static int synthesize_kprobe_trace_arg(struct kprobe_trace_arg *arg, + char *buf, size_t buflen) { + int ret, depth = 0; + char *tmp = buf; + + /* Argument name or separator */ + if (arg->name) + ret = e_snprintf(buf, buflen, " %s=", arg->name); + else + ret = e_snprintf(buf, buflen, " "); + if (ret < 0) + return ret; + buf += ret; + buflen -= ret; + + /* Dereferencing arguments */ + if (arg->ref) { + depth = __synthesize_kprobe_trace_arg_ref(arg->ref, &buf, + &buflen, 1); + if (depth < 0) + return depth; + } + + /* Print argument value */ + ret = e_snprintf(buf, buflen, "%s", arg->value); + if (ret < 0) + return ret; + buf += ret; + buflen -= ret; + + /* Closing */ + while (depth--) { + ret = e_snprintf(buf, buflen, ")"); + if (ret < 0) + return ret; + buf += ret; + buflen -= ret; + } + + return buf - tmp; +} + +char *synthesize_kprobe_trace_command(struct kprobe_trace_event *tev) +{ + struct kprobe_trace_point *tp = &tev->point; char *buf; int i, len, ret; - pp->probes[0] = buf = zalloc(MAX_CMDLEN); - if (!buf) - die("Failed to allocate memory by zalloc."); - ret = e_snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset); - if (ret <= 0) + buf = xzalloc(MAX_CMDLEN); + len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s+%lu", + tp->retprobe ? 'r' : 'p', + tev->group, tev->event, + tp->symbol, tp->offset); + if (len <= 0) goto error; - len = ret; - for (i = 0; i < pp->nr_args; i++) { - ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s", - pp->args[i]); + for (i = 0; i < tev->nargs; i++) { + ret = synthesize_kprobe_trace_arg(&tev->args[i], buf + len, + MAX_CMDLEN - len); if (ret <= 0) goto error; len += ret; } - pp->found = 1; - return pp->found; + return buf; error: - free(pp->probes[0]); - pp->probes[0] = NULL; + free(buf); + return NULL; +} - return ret; +void convert_to_perf_probe_event(struct kprobe_trace_event *tev, + struct perf_probe_event *pev) +{ + char buf[64]; + int i; +#ifndef NO_DWARF_SUPPORT + struct symbol *sym; + int fd, ret = 0; + + sym = map__find_symbol_by_name(kmaps[MAP__FUNCTION], + tev->point.symbol, NULL); + if (sym) { + fd = open_vmlinux(); + ret = find_perf_probe_point(fd, sym->start + tev->point.offset, + &pev->point); + close(fd); + } + if (ret <= 0) { + pev->point.function = xstrdup(tev->point.symbol); + pev->point.offset = tev->point.offset; + } +#else + /* Convert trace_point to probe_point */ + pev->point.function = xstrdup(tev->point.symbol); + pev->point.offset = tev->point.offset; +#endif + pev->point.retprobe = tev->point.retprobe; + + pev->event = xstrdup(tev->event); + pev->group = xstrdup(tev->group); + + /* Convert trace_arg to probe_arg */ + pev->nargs = tev->nargs; + pev->args = xzalloc(sizeof(struct perf_probe_arg) * pev->nargs); + for (i = 0; i < tev->nargs; i++) + if (tev->args[i].name) + pev->args[i].name = xstrdup(tev->args[i].name); + else { + synthesize_kprobe_trace_arg(&tev->args[i], buf, 64); + pev->args[i].name = xstrdup(buf); + } +} + +void clear_perf_probe_event(struct perf_probe_event *pev) +{ + struct perf_probe_point *pp = &pev->point; + struct perf_probe_arg_field *field, *next; + int i; + + if (pev->event) + free(pev->event); + if (pev->group) + free(pev->group); + if (pp->file) + free(pp->file); + if (pp->function) + free(pp->function); + if (pp->lazy_line) + free(pp->lazy_line); + for (i = 0; i < pev->nargs; i++) { + if (pev->args[i].name) + free(pev->args[i].name); + field = pev->args[i].field; + while (field) { + next = field->next; + if (field->name) + free(field->name); + free(field); + field = next; + } + } + if (pev->args) + free(pev->args); + memset(pev, 0, sizeof(*pev)); } -static int open_kprobe_events(int flags, int mode) +void clear_kprobe_trace_event(struct kprobe_trace_event *tev) +{ + struct kprobe_trace_arg_ref *ref, *next; + int i; + + if (tev->event) + free(tev->event); + if (tev->group) + free(tev->group); + if (tev->point.symbol) + free(tev->point.symbol); + for (i = 0; i < tev->nargs; i++) { + if (tev->args[i].name) + free(tev->args[i].name); + if (tev->args[i].value) + free(tev->args[i].value); + ref = tev->args[i].ref; + while (ref) { + next = ref->next; + free(ref); + ref = next; + } + } + if (tev->args) + free(tev->args); + memset(tev, 0, sizeof(*tev)); +} + +static int open_kprobe_events(bool readwrite) { char buf[PATH_MAX]; int ret; @@ -414,7 +700,11 @@ static int open_kprobe_events(int flags, int mode) if (ret < 0) die("Failed to make kprobe_events path."); - ret = open(buf, flags, mode); + if (readwrite && !probe_event_dry_run) + ret = open(buf, O_RDWR, O_APPEND); + else + ret = open(buf, O_RDONLY, 0); + if (ret < 0) { if (errno == ENOENT) die("kprobe_events file does not exist -" @@ -427,7 +717,7 @@ static int open_kprobe_events(int flags, int mode) } /* Get raw string list of current kprobe_events */ -static struct strlist *get_trace_kprobe_event_rawlist(int fd) +static struct strlist *get_kprobe_trace_command_rawlist(int fd) { int ret, idx; FILE *fp; @@ -455,99 +745,85 @@ static struct strlist *get_trace_kprobe_event_rawlist(int fd) return sl; } -/* Free and zero clear probe_point */ -static void clear_probe_point(struct probe_point *pp) -{ - int i; - - if (pp->event) - free(pp->event); - if (pp->group) - free(pp->group); - if (pp->function) - free(pp->function); - if (pp->file) - free(pp->file); - if (pp->lazy_line) - free(pp->lazy_line); - for (i = 0; i < pp->nr_args; i++) - free(pp->args[i]); - if (pp->args) - free(pp->args); - for (i = 0; i < pp->found; i++) - free(pp->probes[i]); - memset(pp, 0, sizeof(*pp)); -} - /* Show an event */ -static void show_perf_probe_event(const char *event, const char *place, - struct probe_point *pp) +static void show_perf_probe_event(struct perf_probe_event *pev) { int i, ret; char buf[128]; + char *place; + + /* Synthesize only event probe point */ + place = synthesize_perf_probe_point(&pev->point); - ret = e_snprintf(buf, 128, "%s:%s", pp->group, event); + ret = e_snprintf(buf, 128, "%s:%s", pev->group, pev->event); if (ret < 0) die("Failed to copy event: %s", strerror(-ret)); - printf(" %-40s (on %s", buf, place); + printf(" %-20s (on %s", buf, place); - if (pp->nr_args > 0) { + if (pev->nargs > 0) { printf(" with"); - for (i = 0; i < pp->nr_args; i++) - printf(" %s", pp->args[i]); + for (i = 0; i < pev->nargs; i++) { + synthesize_perf_probe_arg(&pev->args[i], buf, 128); + printf(" %s", buf); + } } printf(")\n"); + free(place); } /* List up current perf-probe events */ void show_perf_probe_events(void) { int fd; - struct probe_point pp; + struct kprobe_trace_event tev; + struct perf_probe_event pev; struct strlist *rawlist; struct str_node *ent; setup_pager(); - memset(&pp, 0, sizeof(pp)); + init_vmlinux(); + + memset(&tev, 0, sizeof(tev)); + memset(&pev, 0, sizeof(pev)); - fd = open_kprobe_events(O_RDONLY, 0); - rawlist = get_trace_kprobe_event_rawlist(fd); + fd = open_kprobe_events(false); + rawlist = get_kprobe_trace_command_rawlist(fd); close(fd); strlist__for_each(ent, rawlist) { - parse_trace_kprobe_event(ent->s, &pp); - /* Synthesize only event probe point */ - synthesize_perf_probe_point(&pp); + parse_kprobe_trace_command(ent->s, &tev); + convert_to_perf_probe_event(&tev, &pev); /* Show an event */ - show_perf_probe_event(pp.event, pp.probes[0], &pp); - clear_probe_point(&pp); + show_perf_probe_event(&pev); + clear_perf_probe_event(&pev); + clear_kprobe_trace_event(&tev); } strlist__delete(rawlist); } /* Get current perf-probe event names */ -static struct strlist *get_perf_event_names(int fd, bool include_group) +static struct strlist *get_kprobe_trace_event_names(int fd, bool include_group) { char buf[128]; struct strlist *sl, *rawlist; struct str_node *ent; - struct probe_point pp; + struct kprobe_trace_event tev; - memset(&pp, 0, sizeof(pp)); - rawlist = get_trace_kprobe_event_rawlist(fd); + memset(&tev, 0, sizeof(tev)); + rawlist = get_kprobe_trace_command_rawlist(fd); sl = strlist__new(true, NULL); strlist__for_each(ent, rawlist) { - parse_trace_kprobe_event(ent->s, &pp); + parse_kprobe_trace_command(ent->s, &tev); if (include_group) { - if (e_snprintf(buf, 128, "%s:%s", pp.group, - pp.event) < 0) + if (e_snprintf(buf, 128, "%s:%s", tev.group, + tev.event) < 0) die("Failed to copy group:event name."); strlist__add(sl, buf); } else - strlist__add(sl, pp.event); - clear_probe_point(&pp); + strlist__add(sl, tev.event); + clear_kprobe_trace_event(&tev); } strlist__delete(rawlist); @@ -555,14 +831,18 @@ static struct strlist *get_perf_event_names(int fd, bool include_group) return sl; } -static void write_trace_kprobe_event(int fd, const char *buf) +static void write_kprobe_trace_event(int fd, struct kprobe_trace_event *tev) { int ret; + char *buf = synthesize_kprobe_trace_command(tev); pr_debug("Writing event: %s\n", buf); - ret = write(fd, buf, strlen(buf)); - if (ret <= 0) - die("Failed to write event: %s", strerror(errno)); + if (!probe_event_dry_run) { + ret = write(fd, buf, strlen(buf)); + if (ret <= 0) + die("Failed to write event: %s", strerror(errno)); + } + free(buf); } static void get_new_event_name(char *buf, size_t len, const char *base, @@ -595,65 +875,187 @@ static void get_new_event_name(char *buf, size_t len, const char *base, die("Too many events are on the same function."); } -void add_trace_kprobe_events(struct probe_point *probes, int nr_probes, - bool force_add) +static void __add_kprobe_trace_events(struct perf_probe_event *pev, + struct kprobe_trace_event *tevs, + int ntevs, bool allow_suffix) { - int i, j, fd; - struct probe_point *pp; - char buf[MAX_CMDLEN]; - char event[64]; + int i, fd; + struct kprobe_trace_event *tev = NULL; + char buf[64]; + const char *event, *group; struct strlist *namelist; - bool allow_suffix; - fd = open_kprobe_events(O_RDWR, O_APPEND); + fd = open_kprobe_events(true); /* Get current event names */ - namelist = get_perf_event_names(fd, false); - - for (j = 0; j < nr_probes; j++) { - pp = probes + j; - if (!pp->event) - pp->event = strdup(pp->function); - if (!pp->group) - pp->group = strdup(PERFPROBE_GROUP); - DIE_IF(!pp->event || !pp->group); - /* If force_add is true, suffix search is allowed */ - allow_suffix = force_add; - for (i = 0; i < pp->found; i++) { - /* Get an unused new event name */ - get_new_event_name(event, 64, pp->event, namelist, - allow_suffix); - snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s\n", - pp->retprobe ? 'r' : 'p', - pp->group, event, - pp->probes[i]); - write_trace_kprobe_event(fd, buf); - printf("Added new event:\n"); - /* Get the first parameter (probe-point) */ - sscanf(pp->probes[i], "%s", buf); - show_perf_probe_event(event, buf, pp); - /* Add added event name to namelist */ - strlist__add(namelist, event); - /* - * Probes after the first probe which comes from same - * user input are always allowed to add suffix, because - * there might be several addresses corresponding to - * one code line. - */ - allow_suffix = true; - } + namelist = get_kprobe_trace_event_names(fd, false); + + printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":"); + for (i = 0; i < ntevs; i++) { + tev = &tevs[i]; + if (pev->event) + event = pev->event; + else + if (pev->point.function) + event = pev->point.function; + else + event = tev->point.symbol; + if (pev->group) + group = pev->group; + else + group = PERFPROBE_GROUP; + + /* Get an unused new event name */ + get_new_event_name(buf, 64, event, namelist, allow_suffix); + event = buf; + + tev->event = xstrdup(event); + tev->group = xstrdup(group); + write_kprobe_trace_event(fd, tev); + /* Add added event name to namelist */ + strlist__add(namelist, event); + + /* Trick here - save current event/group */ + event = pev->event; + group = pev->group; + pev->event = tev->event; + pev->group = tev->group; + show_perf_probe_event(pev); + /* Trick here - restore current event/group */ + pev->event = (char *)event; + pev->group = (char *)group; + + /* + * Probes after the first probe which comes from same + * user input are always allowed to add suffix, because + * there might be several addresses corresponding to + * one code line. + */ + allow_suffix = true; } /* Show how to use the event. */ printf("\nYou can now use it on all perf tools, such as:\n\n"); - printf("\tperf record -e %s:%s -a sleep 1\n\n", PERFPROBE_GROUP, event); + printf("\tperf record -e %s:%s -a sleep 1\n\n", tev->group, tev->event); strlist__delete(namelist); close(fd); } +static int convert_to_kprobe_trace_events(struct perf_probe_event *pev, + struct kprobe_trace_event **tevs) +{ + struct symbol *sym; + bool need_dwarf; +#ifndef NO_DWARF_SUPPORT + int fd; +#endif + int ntevs = 0, i; + struct kprobe_trace_event *tev; + + need_dwarf = perf_probe_event_need_dwarf(pev); + + if (need_dwarf) +#ifdef NO_DWARF_SUPPORT + die("Debuginfo-analysis is not supported"); +#else /* !NO_DWARF_SUPPORT */ + pr_debug("Some probes require debuginfo.\n"); + + fd = open_vmlinux(); + if (fd < 0) { + if (need_dwarf) + die("Could not open debuginfo file."); + + pr_debug("Could not open vmlinux/module file." + " Try to use symbols.\n"); + goto end_dwarf; + } + + /* Searching probe points */ + ntevs = find_kprobe_trace_events(fd, pev, tevs); + + if (ntevs > 0) /* Found */ + goto found; + + if (ntevs == 0) /* No error but failed to find probe point. */ + die("Probe point '%s' not found. - probe not added.", + synthesize_perf_probe_point(&pev->point)); + + /* Error path */ + if (need_dwarf) { + if (ntevs == -ENOENT) + pr_warning("No dwarf info found in the vmlinux - please rebuild with CONFIG_DEBUG_INFO=y.\n"); + die("Could not analyze debuginfo."); + } + pr_debug("An error occurred in debuginfo analysis." + " Try to use symbols.\n"); + +end_dwarf: +#endif /* !NO_DWARF_SUPPORT */ + + /* Allocate trace event buffer */ + ntevs = 1; + tev = *tevs = xzalloc(sizeof(struct kprobe_trace_event)); + + /* Copy parameters */ + tev->point.symbol = xstrdup(pev->point.function); + tev->point.offset = pev->point.offset; + tev->nargs = pev->nargs; + if (tev->nargs) { + tev->args = xzalloc(sizeof(struct kprobe_trace_arg) + * tev->nargs); + for (i = 0; i < tev->nargs; i++) + tev->args[i].value = xstrdup(pev->args[i].name); + } + + /* Currently just checking function name from symbol map */ + sym = map__find_symbol_by_name(kmaps[MAP__FUNCTION], + tev->point.symbol, NULL); + if (!sym) + die("Kernel symbol \'%s\' not found - probe not added.", + tev->point.symbol); +#ifndef NO_DWARF_SUPPORT +found: + close(fd); +#endif + return ntevs; +} + +struct __event_package { + struct perf_probe_event *pev; + struct kprobe_trace_event *tevs; + int ntevs; +}; + +void add_perf_probe_events(struct perf_probe_event *pevs, int npevs, + bool force_add) +{ + int i; + struct __event_package *pkgs; + + pkgs = xzalloc(sizeof(struct __event_package) * npevs); + + /* Init vmlinux path */ + init_vmlinux(); + + /* Loop 1: convert all events */ + for (i = 0; i < npevs; i++) { + pkgs[i].pev = &pevs[i]; + /* Convert with or without debuginfo */ + pkgs[i].ntevs = convert_to_kprobe_trace_events(pkgs[i].pev, + &pkgs[i].tevs); + } + + /* Loop 2: add all events */ + for (i = 0; i < npevs; i++) + __add_kprobe_trace_events(pkgs[i].pev, pkgs[i].tevs, + pkgs[i].ntevs, force_add); + /* TODO: cleanup all trace events? */ +} + static void __del_trace_kprobe_event(int fd, struct str_node *ent) { char *p; char buf[128]; + int ret; /* Convert from perf-probe event to trace-kprobe event */ if (e_snprintf(buf, 128, "-:%s", ent->s) < 0) @@ -663,7 +1065,10 @@ static void __del_trace_kprobe_event(int fd, struct str_node *ent) die("Internal error: %s should have ':' but not.", ent->s); *p = '/'; - write_trace_kprobe_event(fd, buf); + pr_debug("Writing event: %s\n", buf); + ret = write(fd, buf, strlen(buf)); + if (ret <= 0) + die("Failed to write event: %s", strerror(errno)); printf("Remove event: %s\n", ent->s); } @@ -696,7 +1101,7 @@ static void del_trace_kprobe_event(int fd, const char *group, pr_info("Info: event \"%s\" does not exist, could not remove it.\n", buf); } -void del_trace_kprobe_events(struct strlist *dellist) +void del_perf_probe_events(struct strlist *dellist) { int fd; const char *group, *event; @@ -704,14 +1109,12 @@ void del_trace_kprobe_events(struct strlist *dellist) struct str_node *ent; struct strlist *namelist; - fd = open_kprobe_events(O_RDWR, O_APPEND); + fd = open_kprobe_events(true); /* Get current event names */ - namelist = get_perf_event_names(fd, true); + namelist = get_kprobe_trace_event_names(fd, true); strlist__for_each(ent, dellist) { - str = strdup(ent->s); - if (!str) - die("Failed to copy event."); + str = xstrdup(ent->s); pr_debug("Parsing: %s\n", str); p = strchr(str, ':'); if (p) { @@ -771,6 +1174,21 @@ void show_line_range(struct line_range *lr) unsigned int l = 1; struct line_node *ln; FILE *fp; +#ifndef NO_DWARF_SUPPORT + int fd, ret; +#endif + + /* Search a line range */ + init_vmlinux(); +#ifndef NO_DWARF_SUPPORT + fd = open_vmlinux(); + if (fd < 0) + die("Could not open debuginfo file."); + ret = find_line_range(fd, lr); + if (ret <= 0) + die("Source line is not found.\n"); + close(fd); +#endif setup_pager(); @@ -800,3 +1218,5 @@ void show_line_range(struct line_range *lr) fclose(fp); } + + diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 711287d4bae..cd308b0a4d9 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -2,22 +2,123 @@ #define _PROBE_EVENT_H #include <stdbool.h> -#include "probe-finder.h" #include "strlist.h" -extern void parse_line_range_desc(const char *arg, struct line_range *lr); -extern void parse_perf_probe_event(const char *str, struct probe_point *pp, - bool *need_dwarf); -extern int synthesize_perf_probe_point(struct probe_point *pp); -extern int synthesize_perf_probe_event(struct probe_point *pp); -extern void parse_trace_kprobe_event(const char *str, struct probe_point *pp); -extern int synthesize_trace_kprobe_event(struct probe_point *pp); -extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes, - bool force_add); -extern void del_trace_kprobe_events(struct strlist *dellist); +extern bool probe_event_dry_run; + +/* kprobe-tracer tracing point */ +struct kprobe_trace_point { + char *symbol; /* Base symbol */ + unsigned long offset; /* Offset from symbol */ + bool retprobe; /* Return probe flag */ +}; + +/* kprobe-tracer tracing argument referencing offset */ +struct kprobe_trace_arg_ref { + struct kprobe_trace_arg_ref *next; /* Next reference */ + long offset; /* Offset value */ +}; + +/* kprobe-tracer tracing argument */ +struct kprobe_trace_arg { + char *name; /* Argument name */ + char *value; /* Base value */ + struct kprobe_trace_arg_ref *ref; /* Referencing offset */ +}; + +/* kprobe-tracer tracing event (point + arg) */ +struct kprobe_trace_event { + char *event; /* Event name */ + char *group; /* Group name */ + struct kprobe_trace_point point; /* Trace point */ + int nargs; /* Number of args */ + struct kprobe_trace_arg *args; /* Arguments */ +}; + +/* Perf probe probing point */ +struct perf_probe_point { + char *file; /* File path */ + char *function; /* Function name */ + int line; /* Line number */ + char *lazy_line; /* Lazy matching pattern */ + unsigned long offset; /* Offset from function entry */ + bool retprobe; /* Return probe flag */ +}; + +/* Perf probe probing argument field chain */ +struct perf_probe_arg_field { + struct perf_probe_arg_field *next; /* Next field */ + char *name; /* Name of the field */ + bool ref; /* Referencing flag */ +}; + +/* Perf probe probing argument */ +struct perf_probe_arg { + char *name; /* Argument name */ + struct perf_probe_arg_field *field; /* Structure fields */ +}; + +/* Perf probe probing event (point + arg) */ +struct perf_probe_event { + char *event; /* Event name */ + char *group; /* Group name */ + struct perf_probe_point point; /* Probe point */ + int nargs; /* Number of arguments */ + struct perf_probe_arg *args; /* Arguments */ +}; + + +/* Line number container */ +struct line_node { + struct list_head list; + unsigned int line; +}; + +/* Line range */ +struct line_range { + char *file; /* File name */ + char *function; /* Function name */ + unsigned int start; /* Start line number */ + unsigned int end; /* End line number */ + int offset; /* Start line offset */ + char *path; /* Real path name */ + struct list_head line_list; /* Visible lines */ +}; + +/* Command string to events */ +extern void parse_perf_probe_command(const char *cmd, + struct perf_probe_event *pev); +extern void parse_kprobe_trace_command(const char *cmd, + struct kprobe_trace_event *tev); + +/* Events to command string */ +extern char *synthesize_perf_probe_command(struct perf_probe_event *pev); +extern char *synthesize_kprobe_trace_command(struct kprobe_trace_event *tev); +extern int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, + size_t len); + +/* Check the perf_probe_event needs debuginfo */ +extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev); + +/* Convert from kprobe_trace_event to perf_probe_event */ +extern void convert_to_perf_probe_event(struct kprobe_trace_event *tev, + struct perf_probe_event *pev); + +/* Release event contents */ +extern void clear_perf_probe_event(struct perf_probe_event *pev); +extern void clear_kprobe_trace_event(struct kprobe_trace_event *tev); + +/* Command string to line-range */ +extern void parse_line_range_desc(const char *cmd, struct line_range *lr); + + +extern void add_perf_probe_events(struct perf_probe_event *pevs, int ntevs, + bool force_add); +extern void del_perf_probe_events(struct strlist *dellist); extern void show_perf_probe_events(void); extern void show_line_range(struct line_range *lr); + /* Maximum index number of event-name postfix */ #define MAX_EVENT_INDEX 1024 diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 1e6c65ebbd8..db52ec2e84d 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -125,8 +125,7 @@ static void line_list__add_line(struct list_head *head, unsigned int line) p = head; found: pr_debug("line list: add a line %u\n", line); - ln = zalloc(sizeof(struct line_node)); - DIE_IF(ln == NULL); + ln = xzalloc(sizeof(struct line_node)); ln->line = line; INIT_LIST_HEAD(&ln->list); list_add(&ln->list, p); @@ -187,6 +186,84 @@ static const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname) return src; } +/* Compare diename and tname */ +static bool die_compare_name(Dwarf_Die *dw_die, const char *tname) +{ + const char *name; + name = dwarf_diename(dw_die); + DIE_IF(name == NULL); + return strcmp(tname, name); +} + +/* Get entry pc(or low pc, 1st entry of ranges) of the die */ +static Dwarf_Addr die_get_entrypc(Dwarf_Die *dw_die) +{ + Dwarf_Addr epc; + int ret; + + ret = dwarf_entrypc(dw_die, &epc); + DIE_IF(ret == -1); + return epc; +} + +/* Get type die, but skip qualifiers and typedef */ +static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) +{ + Dwarf_Attribute attr; + int tag; + + do { + if (dwarf_attr(vr_die, DW_AT_type, &attr) == NULL || + dwarf_formref_die(&attr, die_mem) == NULL) + return NULL; + + tag = dwarf_tag(die_mem); + vr_die = die_mem; + } while (tag == DW_TAG_const_type || + tag == DW_TAG_restrict_type || + tag == DW_TAG_volatile_type || + tag == DW_TAG_shared_type || + tag == DW_TAG_typedef); + + return die_mem; +} + +/* Return values for die_find callbacks */ +enum { + DIE_FIND_CB_FOUND = 0, /* End of Search */ + DIE_FIND_CB_CHILD = 1, /* Search only children */ + DIE_FIND_CB_SIBLING = 2, /* Search only siblings */ + DIE_FIND_CB_CONTINUE = 3, /* Search children and siblings */ +}; + +/* Search a child die */ +static Dwarf_Die *die_find_child(Dwarf_Die *rt_die, + int (*callback)(Dwarf_Die *, void *), + void *data, Dwarf_Die *die_mem) +{ + Dwarf_Die child_die; + int ret; + + ret = dwarf_child(rt_die, die_mem); + if (ret != 0) + return NULL; + + do { + ret = callback(die_mem, data); + if (ret == DIE_FIND_CB_FOUND) + return die_mem; + + if ((ret & DIE_FIND_CB_CHILD) && + die_find_child(die_mem, callback, data, &child_die)) { + memcpy(die_mem, &child_die, sizeof(Dwarf_Die)); + return die_mem; + } + } while ((ret & DIE_FIND_CB_SIBLING) && + dwarf_siblingof(die_mem, die_mem) == 0); + + return NULL; +} + struct __addr_die_search_param { Dwarf_Addr addr; Dwarf_Die *die_mem; @@ -205,8 +282,8 @@ static int __die_search_func_cb(Dwarf_Die *fn_die, void *data) } /* Search a real subprogram including this line, */ -static Dwarf_Die *die_get_real_subprogram(Dwarf_Die *cu_die, Dwarf_Addr addr, - Dwarf_Die *die_mem) +static Dwarf_Die *die_find_real_subprogram(Dwarf_Die *cu_die, Dwarf_Addr addr, + Dwarf_Die *die_mem) { struct __addr_die_search_param ad; ad.addr = addr; @@ -218,77 +295,64 @@ static Dwarf_Die *die_get_real_subprogram(Dwarf_Die *cu_die, Dwarf_Addr addr, return die_mem; } -/* Similar to dwarf_getfuncs, but returns inlined_subroutine if exists. */ -static Dwarf_Die *die_get_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, - Dwarf_Die *die_mem) +/* die_find callback for inline function search */ +static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data) { - Dwarf_Die child_die; - int ret; + Dwarf_Addr *addr = data; - ret = dwarf_child(sp_die, die_mem); - if (ret != 0) - return NULL; + if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine && + dwarf_haspc(die_mem, *addr)) + return DIE_FIND_CB_FOUND; - do { - if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine && - dwarf_haspc(die_mem, addr)) - return die_mem; - - if (die_get_inlinefunc(die_mem, addr, &child_die)) { - memcpy(die_mem, &child_die, sizeof(Dwarf_Die)); - return die_mem; - } - } while (dwarf_siblingof(die_mem, die_mem) == 0); - - return NULL; + return DIE_FIND_CB_CONTINUE; } -/* Compare diename and tname */ -static bool die_compare_name(Dwarf_Die *dw_die, const char *tname) +/* Similar to dwarf_getfuncs, but returns inlined_subroutine if exists. */ +static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, + Dwarf_Die *die_mem) { - const char *name; - name = dwarf_diename(dw_die); - DIE_IF(name == NULL); - return strcmp(tname, name); + return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem); } -/* Get entry pc(or low pc, 1st entry of ranges) of the die */ -static Dwarf_Addr die_get_entrypc(Dwarf_Die *dw_die) +static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data) { - Dwarf_Addr epc; - int ret; + const char *name = data; + int tag; - ret = dwarf_entrypc(dw_die, &epc); - DIE_IF(ret == -1); - return epc; + tag = dwarf_tag(die_mem); + if ((tag == DW_TAG_formal_parameter || + tag == DW_TAG_variable) && + (die_compare_name(die_mem, name) == 0)) + return DIE_FIND_CB_FOUND; + + return DIE_FIND_CB_CONTINUE; } -/* Get a variable die */ +/* Find a variable called 'name' */ static Dwarf_Die *die_find_variable(Dwarf_Die *sp_die, const char *name, Dwarf_Die *die_mem) { - Dwarf_Die child_die; - int tag; - int ret; + return die_find_child(sp_die, __die_find_variable_cb, (void *)name, + die_mem); +} - ret = dwarf_child(sp_die, die_mem); - if (ret != 0) - return NULL; +static int __die_find_member_cb(Dwarf_Die *die_mem, void *data) +{ + const char *name = data; - do { - tag = dwarf_tag(die_mem); - if ((tag == DW_TAG_formal_parameter || - tag == DW_TAG_variable) && - (die_compare_name(die_mem, name) == 0)) - return die_mem; + if ((dwarf_tag(die_mem) == DW_TAG_member) && + (die_compare_name(die_mem, name) == 0)) + return DIE_FIND_CB_FOUND; - if (die_find_variable(die_mem, name, &child_die)) { - memcpy(die_mem, &child_die, sizeof(Dwarf_Die)); - return die_mem; - } - } while (dwarf_siblingof(die_mem, die_mem) == 0); + return DIE_FIND_CB_SIBLING; +} - return NULL; +/* Find a member called 'name' */ +static Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, + Dwarf_Die *die_mem) +{ + return die_find_child(st_die, __die_find_member_cb, (void *)name, + die_mem); } /* @@ -296,19 +360,20 @@ static Dwarf_Die *die_find_variable(Dwarf_Die *sp_die, const char *name, */ /* Show a location */ -static void show_location(Dwarf_Op *op, struct probe_finder *pf) +static void convert_location(Dwarf_Op *op, struct probe_finder *pf) { unsigned int regn; Dwarf_Word offs = 0; - int deref = 0, ret; + bool ref = false; const char *regs; + struct kprobe_trace_arg *tvar = pf->tvar; /* TODO: support CFA */ /* If this is based on frame buffer, set the offset */ if (op->atom == DW_OP_fbreg) { if (pf->fb_ops == NULL) die("The attribute of frame base is not supported.\n"); - deref = 1; + ref = true; offs = op->number; op = &pf->fb_ops[0]; } @@ -316,13 +381,13 @@ static void show_location(Dwarf_Op *op, struct probe_finder *pf) if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) { regn = op->atom - DW_OP_breg0; offs += op->number; - deref = 1; + ref = true; } else if (op->atom >= DW_OP_reg0 && op->atom <= DW_OP_reg31) { regn = op->atom - DW_OP_reg0; } else if (op->atom == DW_OP_bregx) { regn = op->number; offs += op->number2; - deref = 1; + ref = true; } else if (op->atom == DW_OP_regx) { regn = op->number; } else @@ -332,17 +397,71 @@ static void show_location(Dwarf_Op *op, struct probe_finder *pf) if (!regs) die("%u exceeds max register number.", regn); - if (deref) - ret = snprintf(pf->buf, pf->len, " %s=+%ju(%s)", - pf->var, (uintmax_t)offs, regs); - else - ret = snprintf(pf->buf, pf->len, " %s=%s", pf->var, regs); - DIE_IF(ret < 0); - DIE_IF(ret >= pf->len); + tvar->value = xstrdup(regs); + if (ref) { + tvar->ref = xzalloc(sizeof(struct kprobe_trace_arg_ref)); + tvar->ref->offset = (long)offs; + } +} + +static void convert_variable_fields(Dwarf_Die *vr_die, const char *varname, + struct perf_probe_arg_field *field, + struct kprobe_trace_arg_ref **ref_ptr) +{ + struct kprobe_trace_arg_ref *ref = *ref_ptr; + Dwarf_Attribute attr; + Dwarf_Die member; + Dwarf_Die type; + Dwarf_Word offs; + + pr_debug("converting %s in %s\n", field->name, varname); + if (die_get_real_type(vr_die, &type) == NULL) + die("Failed to get a type information of %s.", varname); + + /* Check the pointer and dereference */ + if (dwarf_tag(&type) == DW_TAG_pointer_type) { + if (!field->ref) + die("Semantic error: %s must be referred by '->'", + field->name); + /* Get the type pointed by this pointer */ + if (die_get_real_type(&type, &type) == NULL) + die("Failed to get a type information of %s.", varname); + + ref = xzalloc(sizeof(struct kprobe_trace_arg_ref)); + if (*ref_ptr) + (*ref_ptr)->next = ref; + else + *ref_ptr = ref; + } else { + if (field->ref) + die("Semantic error: %s must be referred by '.'", + field->name); + if (!ref) + die("Structure on a register is not supported yet."); + } + + /* Verify it is a data structure */ + if (dwarf_tag(&type) != DW_TAG_structure_type) + die("%s is not a data structure.", varname); + + if (die_find_member(&type, field->name, &member) == NULL) + die("%s(tyep:%s) has no member %s.", varname, + dwarf_diename(&type), field->name); + + /* Get the offset of the field */ + if (dwarf_attr(&member, DW_AT_data_member_location, &attr) == NULL || + dwarf_formudata(&attr, &offs) != 0) + die("Failed to get the offset of %s.", field->name); + ref->offset += (long)offs; + + /* Converting next field */ + if (field->next) + convert_variable_fields(&member, field->name, field->next, + &ref); } /* Show a variables in kprobe event format */ -static void show_variable(Dwarf_Die *vr_die, struct probe_finder *pf) +static void convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) { Dwarf_Attribute attr; Dwarf_Op *expr; @@ -352,111 +471,100 @@ static void show_variable(Dwarf_Die *vr_die, struct probe_finder *pf) if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL) goto error; /* TODO: handle more than 1 exprs */ - ret = dwarf_getlocation_addr(&attr, (pf->addr - pf->cu_base), - &expr, &nexpr, 1); + ret = dwarf_getlocation_addr(&attr, pf->addr, &expr, &nexpr, 1); if (ret <= 0 || nexpr == 0) goto error; - show_location(expr, pf); + convert_location(expr, pf); + + if (pf->pvar->field) + convert_variable_fields(vr_die, pf->pvar->name, + pf->pvar->field, &pf->tvar->ref); /* *expr will be cached in libdw. Don't free it. */ return ; error: /* TODO: Support const_value */ die("Failed to find the location of %s at this address.\n" - " Perhaps, it has been optimized out.", pf->var); + " Perhaps, it has been optimized out.", pf->pvar->name); } /* Find a variable in a subprogram die */ static void find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) { - int ret; Dwarf_Die vr_die; + char buf[128]; /* TODO: Support struct members and arrays */ - if (!is_c_varname(pf->var)) { - /* Output raw parameters */ - ret = snprintf(pf->buf, pf->len, " %s", pf->var); - DIE_IF(ret < 0); - DIE_IF(ret >= pf->len); - return ; + if (!is_c_varname(pf->pvar->name)) { + /* Copy raw parameters */ + pf->tvar->value = xstrdup(pf->pvar->name); + } else { + synthesize_perf_probe_arg(pf->pvar, buf, 128); + pf->tvar->name = xstrdup(buf); + pr_debug("Searching '%s' variable in context.\n", + pf->pvar->name); + /* Search child die for local variables and parameters. */ + if (!die_find_variable(sp_die, pf->pvar->name, &vr_die)) + die("Failed to find '%s' in this function.", + pf->pvar->name); + convert_variable(&vr_die, pf); } - - pr_debug("Searching '%s' variable in context.\n", pf->var); - /* Search child die for local variables and parameters. */ - if (!die_find_variable(sp_die, pf->var, &vr_die)) - die("Failed to find '%s' in this function.", pf->var); - - show_variable(&vr_die, pf); } /* Show a probe point to output buffer */ -static void show_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) +static void convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) { - struct probe_point *pp = pf->pp; + struct kprobe_trace_event *tev; Dwarf_Addr eaddr; Dwarf_Die die_mem; const char *name; - char tmp[MAX_PROBE_BUFFER]; - int ret, i, len; + int ret, i; Dwarf_Attribute fb_attr; size_t nops; + if (pf->ntevs == MAX_PROBES) + die("Too many( > %d) probe point found.\n", MAX_PROBES); + tev = &pf->tevs[pf->ntevs++]; + /* If no real subprogram, find a real one */ if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) { - sp_die = die_get_real_subprogram(&pf->cu_die, + sp_die = die_find_real_subprogram(&pf->cu_die, pf->addr, &die_mem); if (!sp_die) die("Probe point is not found in subprograms."); } - /* Output name of probe point */ + /* Copy the name of probe point */ name = dwarf_diename(sp_die); if (name) { dwarf_entrypc(sp_die, &eaddr); - ret = snprintf(tmp, MAX_PROBE_BUFFER, "%s+%lu", name, - (unsigned long)(pf->addr - eaddr)); - /* Copy the function name if possible */ - if (!pp->function) { - pp->function = strdup(name); - pp->offset = (size_t)(pf->addr - eaddr); - } - } else { + tev->point.symbol = xstrdup(name); + tev->point.offset = (unsigned long)(pf->addr - eaddr); + } else /* This function has no name. */ - ret = snprintf(tmp, MAX_PROBE_BUFFER, "0x%jx", - (uintmax_t)pf->addr); - if (!pp->function) { - /* TODO: Use _stext */ - pp->function = strdup(""); - pp->offset = (size_t)pf->addr; - } - } - DIE_IF(ret < 0); - DIE_IF(ret >= MAX_PROBE_BUFFER); - len = ret; - pr_debug("Probe point found: %s\n", tmp); + tev->point.offset = (unsigned long)pf->addr; + + pr_debug("Probe point found: %s+%lu\n", tev->point.symbol, + tev->point.offset); /* Get the frame base attribute/ops */ dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr); - ret = dwarf_getlocation_addr(&fb_attr, (pf->addr - pf->cu_base), - &pf->fb_ops, &nops, 1); + ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); if (ret <= 0 || nops == 0) pf->fb_ops = NULL; /* Find each argument */ /* TODO: use dwarf_cfi_addrframe */ - for (i = 0; i < pp->nr_args; i++) { - pf->var = pp->args[i]; - pf->buf = &tmp[len]; - pf->len = MAX_PROBE_BUFFER - len; + tev->nargs = pf->pev->nargs; + tev->args = xzalloc(sizeof(struct kprobe_trace_arg) * tev->nargs); + for (i = 0; i < pf->pev->nargs; i++) { + pf->pvar = &pf->pev->args[i]; + pf->tvar = &tev->args[i]; find_variable(sp_die, pf); - len += strlen(pf->buf); } /* *pf->fb_ops will be cached in libdw. Don't free it. */ pf->fb_ops = NULL; - - pp->probes[pp->found] = strdup(tmp); - pp->found++; } /* Find probe point from its line number */ @@ -488,7 +596,7 @@ static void find_probe_point_by_line(struct probe_finder *pf) (int)i, lineno, (uintmax_t)addr); pf->addr = addr; - show_probe_point(NULL, pf); + convert_probe_point(NULL, pf); /* Continuing, because target line might be inlined. */ } } @@ -505,8 +613,7 @@ static int find_lazy_match_lines(struct list_head *head, if (fd < 0) die("failed to open %s", fname); DIE_IF(fstat(fd, &st) < 0); - fbuf = malloc(st.st_size + 2); - DIE_IF(fbuf == NULL); + fbuf = xmalloc(st.st_size + 2); DIE_IF(read(fd, fbuf, st.st_size) < 0); close(fd); fbuf[st.st_size] = '\n'; /* Dummy line */ @@ -540,7 +647,7 @@ static void find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) if (list_empty(&pf->lcache)) { /* Matching lazy line pattern */ ret = find_lazy_match_lines(&pf->lcache, pf->fname, - pf->pp->lazy_line); + pf->pev->point.lazy_line); if (ret <= 0) die("No matched lines found in %s.", pf->fname); } @@ -565,7 +672,7 @@ static void find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) if (!dwarf_haspc(sp_die, addr)) continue; /* Address filtering 2: No child include addr? */ - if (die_get_inlinefunc(sp_die, addr, &die_mem)) + if (die_find_inlinefunc(sp_die, addr, &die_mem)) continue; } @@ -573,7 +680,7 @@ static void find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) (int)i, lineno, (unsigned long long)addr); pf->addr = addr; - show_probe_point(sp_die, pf); + convert_probe_point(sp_die, pf); /* Continuing, because target line might be inlined. */ } /* TODO: deallocate lines, but how? */ @@ -582,7 +689,7 @@ static void find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) static int probe_point_inline_cb(Dwarf_Die *in_die, void *data) { struct probe_finder *pf = (struct probe_finder *)data; - struct probe_point *pp = pf->pp; + struct perf_probe_point *pp = &pf->pev->point; if (pp->lazy_line) find_probe_point_lazy(in_die, pf); @@ -593,7 +700,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data) pr_debug("found inline addr: 0x%jx\n", (uintmax_t)pf->addr); - show_probe_point(in_die, pf); + convert_probe_point(in_die, pf); } return DWARF_CB_OK; @@ -603,7 +710,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data) static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) { struct probe_finder *pf = (struct probe_finder *)data; - struct probe_point *pp = pf->pp; + struct perf_probe_point *pp = &pf->pev->point; /* Check tag and diename */ if (dwarf_tag(sp_die) != DW_TAG_subprogram || @@ -623,7 +730,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) pf->addr = die_get_entrypc(sp_die); pf->addr += pp->offset; /* TODO: Check the address in this function */ - show_probe_point(sp_die, pf); + convert_probe_point(sp_die, pf); } } else /* Inlined function: search instances */ @@ -637,21 +744,25 @@ static void find_probe_point_by_func(struct probe_finder *pf) dwarf_getfuncs(&pf->cu_die, probe_point_search_cb, pf, 0); } -/* Find a probe point */ -int find_probe_point(int fd, struct probe_point *pp) +/* Find kprobe_trace_events specified by perf_probe_event from debuginfo */ +int find_kprobe_trace_events(int fd, struct perf_probe_event *pev, + struct kprobe_trace_event **tevs) { - struct probe_finder pf = {.pp = pp}; - int ret; + struct probe_finder pf = {.pev = pev}; + struct perf_probe_point *pp = &pev->point; Dwarf_Off off, noff; size_t cuhl; Dwarf_Die *diep; Dwarf *dbg; + pf.tevs = xzalloc(sizeof(struct kprobe_trace_event) * MAX_PROBES); + *tevs = pf.tevs; + pf.ntevs = 0; + dbg = dwarf_begin(fd, DWARF_C_READ); if (!dbg) return -ENOENT; - pp->found = 0; off = 0; line_list__init(&pf.lcache); /* Loop on CUs (Compilation Unit) */ @@ -668,10 +779,6 @@ int find_probe_point(int fd, struct probe_point *pp) pf.fname = NULL; if (!pp->file || pf.fname) { - /* Save CU base address (for frame_base) */ - ret = dwarf_lowpc(&pf.cu_die, &pf.cu_base); - if (ret != 0) - pf.cu_base = 0; if (pp->function) find_probe_point_by_func(&pf); else if (pp->lazy_line) @@ -686,9 +793,79 @@ int find_probe_point(int fd, struct probe_point *pp) line_list__free(&pf.lcache); dwarf_end(dbg); - return pp->found; + return pf.ntevs; } +/* Reverse search */ +int find_perf_probe_point(int fd, unsigned long addr, + struct perf_probe_point *ppt) +{ + Dwarf_Die cudie, spdie, indie; + Dwarf *dbg; + Dwarf_Line *line; + Dwarf_Addr laddr, eaddr; + const char *tmp; + int lineno, ret = 0; + + dbg = dwarf_begin(fd, DWARF_C_READ); + if (!dbg) + return -ENOENT; + + /* Find cu die */ + if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr, &cudie)) + return -EINVAL; + + /* Find a corresponding line */ + line = dwarf_getsrc_die(&cudie, (Dwarf_Addr)addr); + if (line) { + dwarf_lineaddr(line, &laddr); + if ((Dwarf_Addr)addr == laddr) { + dwarf_lineno(line, &lineno); + ppt->line = lineno; + + tmp = dwarf_linesrc(line, NULL, NULL); + DIE_IF(!tmp); + ppt->file = xstrdup(tmp); + ret = 1; + } + } + + /* Find a corresponding function */ + if (die_find_real_subprogram(&cudie, (Dwarf_Addr)addr, &spdie)) { + tmp = dwarf_diename(&spdie); + if (!tmp) + goto end; + + dwarf_entrypc(&spdie, &eaddr); + if (!lineno) { + /* We don't have a line number, let's use offset */ + ppt->function = xstrdup(tmp); + ppt->offset = addr - (unsigned long)eaddr; + ret = 1; + goto end; + } + if (die_find_inlinefunc(&spdie, (Dwarf_Addr)addr, &indie)) { + /* addr in an inline function */ + tmp = dwarf_diename(&indie); + if (!tmp) + goto end; + dwarf_decl_line(&indie, &lineno); + } else { + if (eaddr == addr) /* No offset: function entry */ + lineno = ppt->line; + else + dwarf_decl_line(&spdie, &lineno); + } + ppt->function = xstrdup(tmp); + ppt->line -= lineno; /* Make a relative line number */ + } + +end: + dwarf_end(dbg); + return ret; +} + + /* Find line range from its line number */ static void find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf) { @@ -720,7 +897,7 @@ static void find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf) continue; /* Address filtering 2: No child include addr? */ - if (die_get_inlinefunc(sp_die, addr, &die_mem)) + if (die_find_inlinefunc(sp_die, addr, &die_mem)) continue; } @@ -731,7 +908,7 @@ static void find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf) /* Copy real path */ if (!lf->lr->path) - lf->lr->path = strdup(src); + lf->lr->path = xstrdup(src); line_list__add_line(&lf->lr->line_list, (unsigned int)lineno); } /* Update status */ diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index d1a651793ba..2f2307d4139 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -3,6 +3,7 @@ #include <stdbool.h> #include "util.h" +#include "probe-event.h" #define MAX_PATH_LEN 256 #define MAX_PROBE_BUFFER 1024 @@ -14,68 +15,36 @@ static inline int is_c_varname(const char *name) return isalpha(name[0]) || name[0] == '_'; } -struct probe_point { - char *event; /* Event name */ - char *group; /* Event group */ - - /* Inputs */ - char *file; /* File name */ - int line; /* Line number */ - char *lazy_line; /* Lazy line pattern */ - - char *function; /* Function name */ - int offset; /* Offset bytes */ - - int nr_args; /* Number of arguments */ - char **args; /* Arguments */ - - int retprobe; /* Return probe */ - - /* Output */ - int found; /* Number of found probe points */ - char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/ -}; - -/* Line number container */ -struct line_node { - struct list_head list; - unsigned int line; -}; +#ifndef NO_DWARF_SUPPORT +/* Find kprobe_trace_events specified by perf_probe_event from debuginfo */ +extern int find_kprobe_trace_events(int fd, struct perf_probe_event *pev, + struct kprobe_trace_event **tevs); -/* Line range */ -struct line_range { - char *file; /* File name */ - char *function; /* Function name */ - unsigned int start; /* Start line number */ - unsigned int end; /* End line number */ - int offset; /* Start line offset */ - char *path; /* Real path name */ - struct list_head line_list; /* Visible lines */ -}; +/* Find a perf_probe_point from debuginfo */ +extern int find_perf_probe_point(int fd, unsigned long addr, + struct perf_probe_point *ppt); -#ifndef NO_DWARF_SUPPORT -extern int find_probe_point(int fd, struct probe_point *pp); extern int find_line_range(int fd, struct line_range *lr); #include <dwarf.h> #include <libdw.h> struct probe_finder { - struct probe_point *pp; /* Target probe point */ + struct perf_probe_event *pev; /* Target probe event */ + int ntevs; /* number of trace events */ + struct kprobe_trace_event *tevs; /* Result trace events */ /* For function searching */ Dwarf_Addr addr; /* Address */ - const char *fname; /* File name */ + const char *fname; /* Real file name */ int lno; /* Line number */ Dwarf_Die cu_die; /* Current CU */ + struct list_head lcache; /* Line cache for lazy match */ /* For variable searching */ Dwarf_Op *fb_ops; /* Frame base attribute */ - Dwarf_Addr cu_base; /* Current CU base address */ - const char *var; /* Current variable name */ - char *buf; /* Current output buffer */ - int len; /* Length of output buffer */ - struct list_head lcache; /* Line cache for lazy match */ + struct perf_probe_arg *pvar; /* Current target variable */ + struct kprobe_trace_arg *tvar; /* Current result variable */ }; struct line_finder { diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 5c33417eebb..34d73395baa 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -86,4 +86,13 @@ static inline struct map * { return map_groups__new_module(&self->kmaps, start, filename); } + +#ifdef NO_NEWT_SUPPORT +static inline void perf_session__browse_hists(struct rb_root *hists __used, + u64 session_total __used, + const char *helpline __used) {} +#else +void perf_session__browse_hists(struct rb_root *hists, u64 session_total, + const char *helpline); +#endif #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 323c0aea0a9..3eb9de4baef 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -18,18 +18,6 @@ #define NT_GNU_BUILD_ID 3 #endif -enum dso_origin { - DSO__ORIG_KERNEL = 0, - DSO__ORIG_JAVA_JIT, - DSO__ORIG_BUILD_ID_CACHE, - DSO__ORIG_FEDORA, - DSO__ORIG_UBUNTU, - DSO__ORIG_BUILDID, - DSO__ORIG_DSO, - DSO__ORIG_KMODULE, - DSO__ORIG_NOT_FOUND, -}; - static void dsos__add(struct list_head *head, struct dso *dso); static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); static int dso__load_kernel_sym(struct dso *self, struct map *map, @@ -163,9 +151,17 @@ void dso__set_long_name(struct dso *self, char *name) self->long_name_len = strlen(name); } +static void dso__set_short_name(struct dso *self, const char *name) +{ + if (name == NULL) + return; + self->short_name = name; + self->short_name_len = strlen(name); +} + static void dso__set_basename(struct dso *self) { - self->short_name = basename(self->long_name); + dso__set_short_name(self, basename(self->long_name)); } struct dso *dso__new(const char *name) @@ -176,7 +172,7 @@ struct dso *dso__new(const char *name) int i; strcpy(self->name, name); dso__set_long_name(self, self->name); - self->short_name = self->name; + dso__set_short_name(self, self->name); for (i = 0; i < MAP__NR_TYPES; ++i) self->symbols[i] = self->symbol_names[i] = RB_ROOT; self->slen_calculated = 0; @@ -862,8 +858,8 @@ out_close: if (err == 0) return nr; out: - pr_warning("%s: problems reading %s PLT info.\n", - __func__, self->long_name); + pr_debug("%s: problems reading %s PLT info.\n", + __func__, self->long_name); return 0; } @@ -897,7 +893,6 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, struct kmap *kmap = self->kernel ? map__kmap(map) : NULL; struct map *curr_map = map; struct dso *curr_dso = self; - size_t dso_name_len = strlen(self->short_name); Elf_Data *symstrs, *secstrs; uint32_t nr_syms; int err = -1; @@ -987,7 +982,8 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, char dso_name[PATH_MAX]; if (strcmp(section_name, - curr_dso->short_name + dso_name_len) == 0) + (curr_dso->short_name + + self->short_name_len)) == 0) goto new_symbol; if (strcmp(section_name, ".text") == 0) { @@ -1017,7 +1013,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, } curr_map->map_ip = identity__map_ip; curr_map->unmap_ip = identity__map_ip; - curr_dso->origin = DSO__ORIG_KERNEL; + curr_dso->origin = self->origin; map_groups__insert(kmap->kmaps, curr_map); dsos__add(&dsos__kernel, curr_dso); dso__set_loaded(curr_dso, map->type); @@ -1782,7 +1778,7 @@ struct dso *dso__new_kernel(const char *name) struct dso *self = dso__new(name ?: "[kernel.kallsyms]"); if (self != NULL) { - self->short_name = "[kernel]"; + dso__set_short_name(self, "[kernel]"); self->kernel = 1; } @@ -1887,6 +1883,17 @@ out_fail: return -1; } +size_t vmlinux_path__fprintf(FILE *fp) +{ + int i; + size_t printed = 0; + + for (i = 0; i < vmlinux_path__nr_entries; ++i) + printed += fprintf(fp, "[%d] %s\n", i, vmlinux_path[i]); + + return printed; +} + static int setup_list(struct strlist **list, const char *list_str, const char *list_name) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 280dadd32a0..0da2455d5b9 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -106,13 +106,15 @@ struct dso { u8 has_build_id:1; u8 kernel:1; u8 hit:1; + u8 annotate_warned:1; unsigned char origin; u8 sorted_by_name; u8 loaded; u8 build_id[BUILD_ID_SIZE]; - u16 long_name_len; const char *short_name; char *long_name; + u16 long_name_len; + u16 short_name_len; char name[0]; }; @@ -149,6 +151,19 @@ size_t dsos__fprintf_buildid(FILE *fp, bool with_hits); size_t dso__fprintf_buildid(struct dso *self, FILE *fp); size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); + +enum dso_origin { + DSO__ORIG_KERNEL = 0, + DSO__ORIG_JAVA_JIT, + DSO__ORIG_BUILD_ID_CACHE, + DSO__ORIG_FEDORA, + DSO__ORIG_UBUNTU, + DSO__ORIG_BUILDID, + DSO__ORIG_DSO, + DSO__ORIG_KMODULE, + DSO__ORIG_NOT_FOUND, +}; + char dso__symtab_origin(const struct dso *self); void dso__set_long_name(struct dso *self, char *name); void dso__set_build_id(struct dso *self, void *build_id); @@ -168,4 +183,6 @@ int kallsyms__parse(const char *filename, void *arg, int symbol__init(void); bool symbol_type__is_a(char symbol_type, enum map_type map_type); +size_t vmlinux_path__fprintf(FILE *fp); + #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index fa968312ee7..ea6506234d5 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -7,6 +7,37 @@ #include "util.h" #include "debug.h" +int find_all_tid(int pid, pid_t ** all_tid) +{ + char name[256]; + int items; + struct dirent **namelist = NULL; + int ret = 0; + int i; + + sprintf(name, "/proc/%d/task", pid); + items = scandir(name, &namelist, NULL, NULL); + if (items <= 0) + return -ENOENT; + *all_tid = malloc(sizeof(pid_t) * items); + if (!*all_tid) { + ret = -ENOMEM; + goto failure; + } + + for (i = 0; i < items; i++) + (*all_tid)[i] = atoi(namelist[i]->d_name); + + ret = items; + +failure: + for (i=0; i<items; i++) + free(namelist[i]); + free(namelist); + + return ret; +} + void map_groups__init(struct map_groups *self) { int i; @@ -348,3 +379,4 @@ struct symbol *map_groups__find_symbol(struct map_groups *self, return NULL; } + diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index dcf70303e58..a81426a891b 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -23,6 +23,7 @@ struct thread { int comm_len; }; +int find_all_tid(int pid, pid_t ** all_tid); void map_groups__init(struct map_groups *self); int thread__set_comm(struct thread *self, const char *comm); int thread__comm_len(struct thread *self); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 0f5b2a6f108..52701087ce0 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -295,6 +295,13 @@ extern void *xmemdupz(const void *data, size_t len); extern char *xstrndup(const char *str, size_t len); extern void *xrealloc(void *ptr, size_t size) __attribute__((weak)); +static inline void *xzalloc(size_t size) +{ + void *buf = xmalloc(size); + + return memset(buf, 0, size); +} + static inline void *zalloc(size_t size) { return calloc(1, size); @@ -309,6 +316,7 @@ static inline int has_extension(const char *filename, const char *ext) { size_t len = strlen(filename); size_t extlen = strlen(ext); + return len > extlen && !memcmp(filename + len - extlen, ext, extlen); } @@ -322,6 +330,7 @@ static inline int has_extension(const char *filename, const char *ext) #undef isalnum #undef tolower #undef toupper + extern unsigned char sane_ctype[256]; #define GIT_SPACE 0x01 #define GIT_DIGIT 0x02 |