diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-05-19 14:02:22 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-05-21 11:37:56 +0200 |
commit | 1c024eca51fdc965290acf342ae16a476c2189d0 (patch) | |
tree | 28dc160cc70a20eeb8b8825d6d52ea88a6188413 /kernel/trace | |
parent | b7e2ecef92d2e7785e6d76b41e5ba8bcbc45259d (diff) |
perf, trace: Optimize tracepoints by using per-tracepoint-per-cpu hlist to track events
Avoid the swevent hash-table by using per-tracepoint
hlists.
Also, avoid conditionals on the fast path by ordering
with probe unregister so that we should never get on
the callback path without the data being there.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.473188012@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/trace_event_perf.c | 127 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 9 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 11 |
3 files changed, 80 insertions, 67 deletions
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a1304f8c444..39d5ea7b065 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -23,14 +23,25 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) /* Count the events in use (per event id, not per instance) */ static int total_ref_count; -static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) +static int perf_trace_event_init(struct ftrace_event_call *tp_event, + struct perf_event *p_event) { + struct hlist_head *list; int ret = -ENOMEM; + int cpu; - if (event->perf_refcount++ > 0) { - event->perf_data = NULL; + p_event->tp_event = tp_event; + if (tp_event->perf_refcount++ > 0) return 0; - } + + list = alloc_percpu(struct hlist_head); + if (!list) + goto fail; + + for_each_possible_cpu(cpu) + INIT_HLIST_HEAD(per_cpu_ptr(list, cpu)); + + tp_event->perf_events = list; if (!total_ref_count) { char *buf; @@ -39,20 +50,20 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) for (i = 0; i < 4; i++) { buf = (char *)alloc_percpu(perf_trace_t); if (!buf) - goto fail_buf; + goto fail; - rcu_assign_pointer(perf_trace_buf[i], buf); + perf_trace_buf[i] = buf; } } - ret = event->perf_event_enable(event); - if (!ret) { - event->perf_data = data; - total_ref_count++; - return 0; - } + ret = tp_event->perf_event_enable(tp_event); + if (ret) + goto fail; -fail_buf: + total_ref_count++; + return 0; + +fail: if (!total_ref_count) { int i; @@ -61,21 +72,26 @@ fail_buf: perf_trace_buf[i] = NULL; } } - event->perf_refcount--; + + if (!--tp_event->perf_refcount) { + free_percpu(tp_event->perf_events); + tp_event->perf_events = NULL; + } return ret; } -int perf_trace_enable(int event_id, void *data) +int perf_trace_init(struct perf_event *p_event) { - struct ftrace_event_call *event; + struct ftrace_event_call *tp_event; + int event_id = p_event->attr.config; int ret = -EINVAL; mutex_lock(&event_mutex); - list_for_each_entry(event, &ftrace_events, list) { - if (event->id == event_id && event->perf_event_enable && - try_module_get(event->mod)) { - ret = perf_trace_event_enable(event, data); + list_for_each_entry(tp_event, &ftrace_events, list) { + if (tp_event->id == event_id && tp_event->perf_event_enable && + try_module_get(tp_event->mod)) { + ret = perf_trace_event_init(tp_event, p_event); break; } } @@ -84,53 +100,52 @@ int perf_trace_enable(int event_id, void *data) return ret; } -static void perf_trace_event_disable(struct ftrace_event_call *event) +int perf_trace_enable(struct perf_event *p_event) { - if (--event->perf_refcount > 0) - return; + struct ftrace_event_call *tp_event = p_event->tp_event; + struct hlist_head *list; - event->perf_event_disable(event); + list = tp_event->perf_events; + if (WARN_ON_ONCE(!list)) + return -EINVAL; - if (!--total_ref_count) { - char *buf[4]; - int i; - - for (i = 0; i < 4; i++) { - buf[i] = perf_trace_buf[i]; - rcu_assign_pointer(perf_trace_buf[i], NULL); - } + list = per_cpu_ptr(list, smp_processor_id()); + hlist_add_head_rcu(&p_event->hlist_entry, list); - /* - * Ensure every events in profiling have finished before - * releasing the buffers - */ - synchronize_sched(); + return 0; +} - for (i = 0; i < 4; i++) - free_percpu(buf[i]); - } +void perf_trace_disable(struct perf_event *p_event) +{ + hlist_del_rcu(&p_event->hlist_entry); } -void perf_trace_disable(int event_id) +void perf_trace_destroy(struct perf_event *p_event) { - struct ftrace_event_call *event; + struct ftrace_event_call *tp_event = p_event->tp_event; + int i; - mutex_lock(&event_mutex); - list_for_each_entry(event, &ftrace_events, list) { - if (event->id == event_id) { - perf_trace_event_disable(event); - module_put(event->mod); - break; + if (--tp_event->perf_refcount > 0) + return; + + tp_event->perf_event_disable(tp_event); + + free_percpu(tp_event->perf_events); + tp_event->perf_events = NULL; + + if (!--total_ref_count) { + for (i = 0; i < 4; i++) { + free_percpu(perf_trace_buf[i]); + perf_trace_buf[i] = NULL; } } - mutex_unlock(&event_mutex); } __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, struct pt_regs *regs, int *rctxp) { struct trace_entry *entry; - char *trace_buf, *raw_data; + char *raw_data; int pc; BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); @@ -139,13 +154,9 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, *rctxp = perf_swevent_get_recursion_context(); if (*rctxp < 0) - goto err_recursion; - - trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]); - if (!trace_buf) - goto err; + return NULL; - raw_data = per_cpu_ptr(trace_buf, smp_processor_id()); + raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id()); /* zero the dead bytes from align to not leak stack to user */ memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); @@ -155,9 +166,5 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, entry->type = type; return raw_data; -err: - perf_swevent_put_recursion_context(*rctxp); -err_recursion: - return NULL; } EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 20c96de0aea..4681f60dac0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1341,6 +1341,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry_head *entry; + struct hlist_head *head; u8 *data; int size, __size, i; int rctx; @@ -1361,7 +1362,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, for (i = 0; i < tp->nr_args; i++) call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); - perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, call->perf_data); + head = per_cpu_ptr(call->perf_events, smp_processor_id()); + perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); } /* Kretprobe profile handler */ @@ -1371,6 +1373,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry_head *entry; + struct hlist_head *head; u8 *data; int size, __size, i; int rctx; @@ -1392,8 +1395,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, for (i = 0; i < tp->nr_args; i++) call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); - perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, - regs, call->perf_data); + head = per_cpu_ptr(call->perf_events, smp_processor_id()); + perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); } static int probe_perf_enable(struct ftrace_event_call *call) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a657cefbb13..eb769f27029 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -438,6 +438,7 @@ static void perf_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; + struct hlist_head *head; int syscall_nr; int rctx; int size; @@ -467,8 +468,9 @@ static void perf_syscall_enter(struct pt_regs *regs, long id) rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, - sys_data->enter_event->perf_data); + + head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id()); + perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); } int perf_sysenter_enable(struct ftrace_event_call *call) @@ -510,6 +512,7 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; + struct hlist_head *head; int syscall_nr; int rctx; int size; @@ -542,8 +545,8 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret) rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, - sys_data->exit_event->perf_data); + head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id()); + perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); } int perf_sysexit_enable(struct ftrace_event_call *call) |