From 4f41c013f553957765902fb01475972f0af3e8e7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 May 2010 18:08:32 +0200 Subject: perf/ftrace: Optimize perf/tracepoint interaction for single events When we've got but a single event per tracepoint there is no reason to try and multiplex it so don't. Signed-off-by: Peter Zijlstra Tested-by: Ingo Molnar Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/trace/trace_event_perf.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace_event_perf.c') diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 0565bb42566..89b780a7c52 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -27,13 +27,15 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) /* Count the events in use (per event id, not per instance) */ static int total_ref_count; -static int perf_trace_event_enable(struct ftrace_event_call *event) +static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) { char *buf; int ret = -ENOMEM; - if (event->perf_refcount++ > 0) + if (event->perf_refcount++ > 0) { + event->perf_data = NULL; return 0; + } if (!total_ref_count) { buf = (char *)alloc_percpu(perf_trace_t); @@ -51,6 +53,7 @@ static int perf_trace_event_enable(struct ftrace_event_call *event) ret = event->perf_event_enable(event); if (!ret) { + event->perf_data = data; total_ref_count++; return 0; } @@ -68,7 +71,7 @@ fail_buf: return ret; } -int perf_trace_enable(int event_id) +int perf_trace_enable(int event_id, void *data) { struct ftrace_event_call *event; int ret = -EINVAL; @@ -77,7 +80,7 @@ int perf_trace_enable(int event_id) list_for_each_entry(event, &ftrace_events, list) { if (event->id == event_id && event->perf_event_enable && try_module_get(event->mod)) { - ret = perf_trace_event_enable(event); + ret = perf_trace_event_enable(event, data); break; } } -- cgit v1.2.3-70-g09d2 From b7e2ecef92d2e7785e6d76b41e5ba8bcbc45259d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 May 2010 10:52:27 +0200 Subject: perf, trace: Optimize tracepoints by removing IRQ-disable from perf/tracepoint interaction Improves performance. Acked-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: <1274259525.5605.10352.camel@twins> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 9 ++--- include/trace/ftrace.h | 17 ++++------ kernel/trace/trace_event_perf.c | 73 ++++++++++++++++------------------------- kernel/trace/trace_kprobe.c | 10 +++--- kernel/trace/trace_syscalls.c | 10 +++--- 5 files changed, 47 insertions(+), 72 deletions(-) (limited to 'kernel/trace/trace_event_perf.c') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index a9775dd7f7f..126071bc90a 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -197,20 +197,17 @@ extern void perf_trace_disable(int event_id); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); -extern void * -perf_trace_buf_prepare(int size, unsigned short type, int *rctxp, - unsigned long *irq_flags); +extern void *perf_trace_buf_prepare(int size, unsigned short type, + struct pt_regs *regs, int *rctxp); static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, - u64 count, unsigned long irq_flags, struct pt_regs *regs, - void *event) + u64 count, struct pt_regs *regs, void *event) { struct trace_entry *entry = raw_data; perf_tp_event(entry->type, addr, count, raw_data, size, regs, event); perf_swevent_put_recursion_context(rctx); - local_irq_restore(irq_flags); } #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 1016b216293..f282885057d 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -768,7 +768,6 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ - unsigned long irq_flags; \ int __entry_size; \ int __data_size; \ int rctx; \ @@ -781,17 +780,18 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \ if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE, \ "profile buffer not large enough")) \ return; \ + \ entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare( \ - __entry_size, event_call->id, &rctx, &irq_flags); \ + __entry_size, event_call->id, __regs, &rctx); \ if (!entry) \ return; \ + \ tstruct \ \ { assign; } \ \ perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ - __count, irq_flags, __regs, \ - event_call->perf_data); \ + __count, __regs, event_call->perf_data); \ } #undef DEFINE_EVENT @@ -799,13 +799,10 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \ static notrace void perf_trace_##call(proto) \ { \ struct ftrace_event_call *event_call = &event_##call; \ - struct pt_regs *__regs = &get_cpu_var(perf_trace_regs); \ - \ - perf_fetch_caller_regs(__regs, 1); \ - \ - perf_trace_templ_##template(event_call, __regs, args); \ + struct pt_regs __regs; \ \ - put_cpu_var(perf_trace_regs); \ + perf_fetch_caller_regs(&__regs, 1); \ + perf_trace_templ_##template(event_call, &__regs, args); \ } #undef DEFINE_EVENT_PRINT diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 89b780a7c52..a1304f8c444 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -9,13 +9,9 @@ #include #include "trace.h" -DEFINE_PER_CPU(struct pt_regs, perf_trace_regs); -EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs); - EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); -static char *perf_trace_buf; -static char *perf_trace_buf_nmi; +static char *perf_trace_buf[4]; /* * Force it to be aligned to unsigned long to avoid misaligned accesses @@ -29,7 +25,6 @@ static int total_ref_count; static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) { - char *buf; int ret = -ENOMEM; if (event->perf_refcount++ > 0) { @@ -38,17 +33,16 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) } if (!total_ref_count) { - buf = (char *)alloc_percpu(perf_trace_t); - if (!buf) - goto fail_buf; - - rcu_assign_pointer(perf_trace_buf, buf); + char *buf; + int i; - buf = (char *)alloc_percpu(perf_trace_t); - if (!buf) - goto fail_buf_nmi; + for (i = 0; i < 4; i++) { + buf = (char *)alloc_percpu(perf_trace_t); + if (!buf) + goto fail_buf; - rcu_assign_pointer(perf_trace_buf_nmi, buf); + rcu_assign_pointer(perf_trace_buf[i], buf); + } } ret = event->perf_event_enable(event); @@ -58,14 +52,15 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) return 0; } -fail_buf_nmi: +fail_buf: if (!total_ref_count) { - free_percpu(perf_trace_buf_nmi); - free_percpu(perf_trace_buf); - perf_trace_buf_nmi = NULL; - perf_trace_buf = NULL; + int i; + + for (i = 0; i < 4; i++) { + free_percpu(perf_trace_buf[i]); + perf_trace_buf[i] = NULL; + } } -fail_buf: event->perf_refcount--; return ret; @@ -91,19 +86,19 @@ int perf_trace_enable(int event_id, void *data) static void perf_trace_event_disable(struct ftrace_event_call *event) { - char *buf, *nmi_buf; - if (--event->perf_refcount > 0) return; event->perf_event_disable(event); if (!--total_ref_count) { - buf = perf_trace_buf; - rcu_assign_pointer(perf_trace_buf, NULL); + char *buf[4]; + int i; - nmi_buf = perf_trace_buf_nmi; - rcu_assign_pointer(perf_trace_buf_nmi, NULL); + for (i = 0; i < 4; i++) { + buf[i] = perf_trace_buf[i]; + rcu_assign_pointer(perf_trace_buf[i], NULL); + } /* * Ensure every events in profiling have finished before @@ -111,8 +106,8 @@ static void perf_trace_event_disable(struct ftrace_event_call *event) */ synchronize_sched(); - free_percpu(buf); - free_percpu(nmi_buf); + for (i = 0; i < 4; i++) + free_percpu(buf[i]); } } @@ -132,47 +127,37 @@ void perf_trace_disable(int event_id) } __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, - int *rctxp, unsigned long *irq_flags) + struct pt_regs *regs, int *rctxp) { struct trace_entry *entry; char *trace_buf, *raw_data; - int pc, cpu; + int pc; BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); pc = preempt_count(); - /* Protect the per cpu buffer, begin the rcu read side */ - local_irq_save(*irq_flags); - *rctxp = perf_swevent_get_recursion_context(); if (*rctxp < 0) goto err_recursion; - cpu = smp_processor_id(); - - if (in_nmi()) - trace_buf = rcu_dereference_sched(perf_trace_buf_nmi); - else - trace_buf = rcu_dereference_sched(perf_trace_buf); - + trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]); if (!trace_buf) goto err; - raw_data = per_cpu_ptr(trace_buf, cpu); + raw_data = per_cpu_ptr(trace_buf, smp_processor_id()); /* zero the dead bytes from align to not leak stack to user */ memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); entry = (struct trace_entry *)raw_data; - tracing_generic_entry_update(entry, *irq_flags, pc); + tracing_generic_entry_update(entry, regs->flags, pc); entry->type = type; return raw_data; err: perf_swevent_put_recursion_context(*rctxp); err_recursion: - local_irq_restore(*irq_flags); return NULL; } EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 2d7bf4146be..20c96de0aea 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1343,7 +1343,6 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, struct kprobe_trace_entry_head *entry; u8 *data; int size, __size, i; - unsigned long irq_flags; int rctx; __size = sizeof(*entry) + tp->size; @@ -1353,7 +1352,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, "profile buffer not large enough")) return; - entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags); + entry = perf_trace_buf_prepare(size, call->id, regs, &rctx); if (!entry) return; @@ -1362,7 +1361,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, for (i = 0; i < tp->nr_args; i++) call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); - perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs, call->perf_data); + perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, call->perf_data); } /* Kretprobe profile handler */ @@ -1374,7 +1373,6 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, struct kretprobe_trace_entry_head *entry; u8 *data; int size, __size, i; - unsigned long irq_flags; int rctx; __size = sizeof(*entry) + tp->size; @@ -1384,7 +1382,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, "profile buffer not large enough")) return; - entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags); + entry = perf_trace_buf_prepare(size, call->id, regs, &rctx); if (!entry) return; @@ -1395,7 +1393,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, - irq_flags, regs, call->perf_data); + regs, call->perf_data); } static int probe_perf_enable(struct ftrace_event_call *call) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 9eff1a4b49b..a657cefbb13 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -438,7 +438,6 @@ static void perf_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; - unsigned long flags; int syscall_nr; int rctx; int size; @@ -461,14 +460,14 @@ static void perf_syscall_enter(struct pt_regs *regs, long id) return; rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, - sys_data->enter_event->id, &rctx, &flags); + sys_data->enter_event->id, regs, &rctx); if (!rec) return; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs, + perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, sys_data->enter_event->perf_data); } @@ -511,7 +510,6 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; - unsigned long flags; int syscall_nr; int rctx; int size; @@ -537,14 +535,14 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret) return; rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, - sys_data->exit_event->id, &rctx, &flags); + sys_data->exit_event->id, regs, &rctx); if (!rec) return; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs, + perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, sys_data->exit_event->perf_data); } -- cgit v1.2.3-70-g09d2 From 1c024eca51fdc965290acf342ae16a476c2189d0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 May 2010 14:02:22 +0200 Subject: perf, trace: Optimize tracepoints by using per-tracepoint-per-cpu hlist to track events Avoid the swevent hash-table by using per-tracepoint hlists. Also, avoid conditionals on the fast path by ordering with probe unregister so that we should never get on the callback path without the data being there. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: <20100521090710.473188012@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 16 ++--- include/linux/perf_event.h | 6 +- include/trace/ftrace.h | 4 +- kernel/perf_event.c | 94 ++++++++++++++--------------- kernel/trace/trace_event_perf.c | 127 +++++++++++++++++++++------------------- kernel/trace/trace_kprobe.c | 9 ++- kernel/trace/trace_syscalls.c | 11 ++-- 7 files changed, 143 insertions(+), 124 deletions(-) (limited to 'kernel/trace/trace_event_perf.c') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 126071bc90a..7024b7d1126 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -133,7 +133,7 @@ struct ftrace_event_call { void *data; int perf_refcount; - void *perf_data; + struct hlist_head *perf_events; int (*perf_event_enable)(struct ftrace_event_call *); void (*perf_event_disable)(struct ftrace_event_call *); }; @@ -192,9 +192,11 @@ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); -extern int perf_trace_enable(int event_id, void *data); -extern void perf_trace_disable(int event_id); -extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, +extern int perf_trace_init(struct perf_event *event); +extern void perf_trace_destroy(struct perf_event *event); +extern int perf_trace_enable(struct perf_event *event); +extern void perf_trace_disable(struct perf_event *event); +extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); extern void *perf_trace_buf_prepare(int size, unsigned short type, @@ -202,11 +204,9 @@ extern void *perf_trace_buf_prepare(int size, unsigned short type, static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, - u64 count, struct pt_regs *regs, void *event) + u64 count, struct pt_regs *regs, void *head) { - struct trace_entry *entry = raw_data; - - perf_tp_event(entry->type, addr, count, raw_data, size, regs, event); + perf_tp_event(addr, count, raw_data, size, regs, head); perf_swevent_put_recursion_context(rctx); } #endif diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fe50347dc64..7cd7b356447 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -727,6 +727,7 @@ struct perf_event { perf_overflow_handler_t overflow_handler; #ifdef CONFIG_EVENT_TRACING + struct ftrace_event_call *tp_event; struct event_filter *filter; #endif @@ -992,8 +993,9 @@ static inline bool perf_paranoid_kernel(void) } extern void perf_event_init(void); -extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, - int entry_size, struct pt_regs *regs, void *event); +extern void perf_tp_event(u64 addr, u64 count, void *record, + int entry_size, struct pt_regs *regs, + struct hlist_head *head); extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index f282885057d..4eb2148f132 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -768,6 +768,7 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ + struct hlist_head *head; \ int __entry_size; \ int __data_size; \ int rctx; \ @@ -790,8 +791,9 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \ \ { assign; } \ \ + head = per_cpu_ptr(event_call->perf_events, smp_processor_id());\ perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ - __count, __regs, event_call->perf_data); \ + __count, __regs, head); \ } #undef DEFINE_EVENT diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 45b7aec5545..3f2cc313ee2 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4005,9 +4005,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, perf_swevent_overflow(event, 0, nmi, data, regs); } -static int perf_tp_event_match(struct perf_event *event, - struct perf_sample_data *data); - static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { @@ -4037,10 +4034,6 @@ static int perf_swevent_match(struct perf_event *event, if (perf_exclude_event(event, regs)) return 0; - if (event->attr.type == PERF_TYPE_TRACEPOINT && - !perf_tp_event_match(event, data)) - return 0; - return 1; } @@ -4122,7 +4115,7 @@ end: int perf_swevent_get_recursion_context(void) { - struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); int rctx; if (in_nmi()) @@ -4134,10 +4127,8 @@ int perf_swevent_get_recursion_context(void) else rctx = 0; - if (cpuctx->recursion[rctx]) { - put_cpu_var(perf_cpu_context); + if (cpuctx->recursion[rctx]) return -1; - } cpuctx->recursion[rctx]++; barrier(); @@ -4151,7 +4142,6 @@ void perf_swevent_put_recursion_context(int rctx) struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); barrier(); cpuctx->recursion[rctx]--; - put_cpu_var(perf_cpu_context); } EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); @@ -4162,6 +4152,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi, struct perf_sample_data data; int rctx; + preempt_disable_notrace(); rctx = perf_swevent_get_recursion_context(); if (rctx < 0) return; @@ -4171,6 +4162,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi, do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); perf_swevent_put_recursion_context(rctx); + preempt_enable_notrace(); } static void perf_swevent_read(struct perf_event *event) @@ -4486,11 +4478,43 @@ static int swevent_hlist_get(struct perf_event *event) #ifdef CONFIG_EVENT_TRACING -void perf_tp_event(int event_id, u64 addr, u64 count, void *record, - int entry_size, struct pt_regs *regs, void *event) +static const struct pmu perf_ops_tracepoint = { + .enable = perf_trace_enable, + .disable = perf_trace_disable, + .read = perf_swevent_read, + .unthrottle = perf_swevent_unthrottle, +}; + +static int perf_tp_filter_match(struct perf_event *event, + struct perf_sample_data *data) +{ + void *record = data->raw->data; + + if (likely(!event->filter) || filter_match_preds(event->filter, record)) + return 1; + return 0; +} + +static int perf_tp_event_match(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + if (perf_exclude_event(event, regs)) + return 0; + + if (!perf_tp_filter_match(event, data)) + return 0; + + return 1; +} + +void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, + struct pt_regs *regs, struct hlist_head *head) { - const int type = PERF_TYPE_TRACEPOINT; struct perf_sample_data data; + struct perf_event *event; + struct hlist_node *node; + struct perf_raw_record raw = { .size = entry_size, .data = record, @@ -4499,30 +4523,18 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, perf_sample_data_init(&data, addr); data.raw = &raw; - if (!event) { - do_perf_sw_event(type, event_id, count, 1, &data, regs); - return; + rcu_read_lock(); + hlist_for_each_entry_rcu(event, node, head, hlist_entry) { + if (perf_tp_event_match(event, &data, regs)) + perf_swevent_add(event, count, 1, &data, regs); } - - if (perf_swevent_match(event, type, event_id, &data, regs)) - perf_swevent_add(event, count, 1, &data, regs); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(perf_tp_event); -static int perf_tp_event_match(struct perf_event *event, - struct perf_sample_data *data) -{ - void *record = data->raw->data; - - if (likely(!event->filter) || filter_match_preds(event->filter, record)) - return 1; - return 0; -} - static void tp_perf_event_destroy(struct perf_event *event) { - perf_trace_disable(event->attr.config); - swevent_hlist_put(event); + perf_trace_destroy(event); } static const struct pmu *tp_perf_event_init(struct perf_event *event) @@ -4538,17 +4550,13 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); - if (perf_trace_enable(event->attr.config, event)) + err = perf_trace_init(event); + if (err) return NULL; event->destroy = tp_perf_event_destroy; - err = swevent_hlist_get(event); - if (err) { - perf_trace_disable(event->attr.config); - return ERR_PTR(err); - } - return &perf_ops_generic; + return &perf_ops_tracepoint; } static int perf_event_set_filter(struct perf_event *event, void __user *arg) @@ -4576,12 +4584,6 @@ static void perf_event_free_filter(struct perf_event *event) #else -static int perf_tp_event_match(struct perf_event *event, - struct perf_sample_data *data) -{ - return 1; -} - static const struct pmu *tp_perf_event_init(struct perf_event *event) { return NULL; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a1304f8c444..39d5ea7b065 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -23,14 +23,25 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) /* Count the events in use (per event id, not per instance) */ static int total_ref_count; -static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) +static int perf_trace_event_init(struct ftrace_event_call *tp_event, + struct perf_event *p_event) { + struct hlist_head *list; int ret = -ENOMEM; + int cpu; - if (event->perf_refcount++ > 0) { - event->perf_data = NULL; + p_event->tp_event = tp_event; + if (tp_event->perf_refcount++ > 0) return 0; - } + + list = alloc_percpu(struct hlist_head); + if (!list) + goto fail; + + for_each_possible_cpu(cpu) + INIT_HLIST_HEAD(per_cpu_ptr(list, cpu)); + + tp_event->perf_events = list; if (!total_ref_count) { char *buf; @@ -39,20 +50,20 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) for (i = 0; i < 4; i++) { buf = (char *)alloc_percpu(perf_trace_t); if (!buf) - goto fail_buf; + goto fail; - rcu_assign_pointer(perf_trace_buf[i], buf); + perf_trace_buf[i] = buf; } } - ret = event->perf_event_enable(event); - if (!ret) { - event->perf_data = data; - total_ref_count++; - return 0; - } + ret = tp_event->perf_event_enable(tp_event); + if (ret) + goto fail; -fail_buf: + total_ref_count++; + return 0; + +fail: if (!total_ref_count) { int i; @@ -61,21 +72,26 @@ fail_buf: perf_trace_buf[i] = NULL; } } - event->perf_refcount--; + + if (!--tp_event->perf_refcount) { + free_percpu(tp_event->perf_events); + tp_event->perf_events = NULL; + } return ret; } -int perf_trace_enable(int event_id, void *data) +int perf_trace_init(struct perf_event *p_event) { - struct ftrace_event_call *event; + struct ftrace_event_call *tp_event; + int event_id = p_event->attr.config; int ret = -EINVAL; mutex_lock(&event_mutex); - list_for_each_entry(event, &ftrace_events, list) { - if (event->id == event_id && event->perf_event_enable && - try_module_get(event->mod)) { - ret = perf_trace_event_enable(event, data); + list_for_each_entry(tp_event, &ftrace_events, list) { + if (tp_event->id == event_id && tp_event->perf_event_enable && + try_module_get(tp_event->mod)) { + ret = perf_trace_event_init(tp_event, p_event); break; } } @@ -84,53 +100,52 @@ int perf_trace_enable(int event_id, void *data) return ret; } -static void perf_trace_event_disable(struct ftrace_event_call *event) +int perf_trace_enable(struct perf_event *p_event) { - if (--event->perf_refcount > 0) - return; + struct ftrace_event_call *tp_event = p_event->tp_event; + struct hlist_head *list; - event->perf_event_disable(event); + list = tp_event->perf_events; + if (WARN_ON_ONCE(!list)) + return -EINVAL; - if (!--total_ref_count) { - char *buf[4]; - int i; - - for (i = 0; i < 4; i++) { - buf[i] = perf_trace_buf[i]; - rcu_assign_pointer(perf_trace_buf[i], NULL); - } + list = per_cpu_ptr(list, smp_processor_id()); + hlist_add_head_rcu(&p_event->hlist_entry, list); - /* - * Ensure every events in profiling have finished before - * releasing the buffers - */ - synchronize_sched(); + return 0; +} - for (i = 0; i < 4; i++) - free_percpu(buf[i]); - } +void perf_trace_disable(struct perf_event *p_event) +{ + hlist_del_rcu(&p_event->hlist_entry); } -void perf_trace_disable(int event_id) +void perf_trace_destroy(struct perf_event *p_event) { - struct ftrace_event_call *event; + struct ftrace_event_call *tp_event = p_event->tp_event; + int i; - mutex_lock(&event_mutex); - list_for_each_entry(event, &ftrace_events, list) { - if (event->id == event_id) { - perf_trace_event_disable(event); - module_put(event->mod); - break; + if (--tp_event->perf_refcount > 0) + return; + + tp_event->perf_event_disable(tp_event); + + free_percpu(tp_event->perf_events); + tp_event->perf_events = NULL; + + if (!--total_ref_count) { + for (i = 0; i < 4; i++) { + free_percpu(perf_trace_buf[i]); + perf_trace_buf[i] = NULL; } } - mutex_unlock(&event_mutex); } __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, struct pt_regs *regs, int *rctxp) { struct trace_entry *entry; - char *trace_buf, *raw_data; + char *raw_data; int pc; BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); @@ -139,13 +154,9 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, *rctxp = perf_swevent_get_recursion_context(); if (*rctxp < 0) - goto err_recursion; - - trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]); - if (!trace_buf) - goto err; + return NULL; - raw_data = per_cpu_ptr(trace_buf, smp_processor_id()); + raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id()); /* zero the dead bytes from align to not leak stack to user */ memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); @@ -155,9 +166,5 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, entry->type = type; return raw_data; -err: - perf_swevent_put_recursion_context(*rctxp); -err_recursion: - return NULL; } EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 20c96de0aea..4681f60dac0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1341,6 +1341,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry_head *entry; + struct hlist_head *head; u8 *data; int size, __size, i; int rctx; @@ -1361,7 +1362,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, for (i = 0; i < tp->nr_args; i++) call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); - perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, call->perf_data); + head = per_cpu_ptr(call->perf_events, smp_processor_id()); + perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); } /* Kretprobe profile handler */ @@ -1371,6 +1373,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry_head *entry; + struct hlist_head *head; u8 *data; int size, __size, i; int rctx; @@ -1392,8 +1395,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, for (i = 0; i < tp->nr_args; i++) call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); - perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, - regs, call->perf_data); + head = per_cpu_ptr(call->perf_events, smp_processor_id()); + perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); } static int probe_perf_enable(struct ftrace_event_call *call) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a657cefbb13..eb769f27029 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -438,6 +438,7 @@ static void perf_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; + struct hlist_head *head; int syscall_nr; int rctx; int size; @@ -467,8 +468,9 @@ static void perf_syscall_enter(struct pt_regs *regs, long id) rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, - sys_data->enter_event->perf_data); + + head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id()); + perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); } int perf_sysenter_enable(struct ftrace_event_call *call) @@ -510,6 +512,7 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; + struct hlist_head *head; int syscall_nr; int rctx; int size; @@ -542,8 +545,8 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret) rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, - sys_data->exit_event->perf_data); + head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id()); + perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); } int perf_sysexit_enable(struct ftrace_event_call *call) -- cgit v1.2.3-70-g09d2