From ad3d6f508738323c0e843c4dbdd421c1aeb59cd8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 17 Jun 2013 18:02:43 +0200 Subject: perf tools: Do not elide parent symbol column I found the parent symbol column data interesting even if there's another sorting enabled. Switching it on. Previous behaviour: $ perf report -i perf.data.delete -p perf_session__delete -x + 3.60% perf perf [.] __rb_change_child + 1.89% perf perf [.] rb_erase + 1.89% perf perf [.] rb_erase + 1.83% perf perf [.] free@plt Current behaviour: $ perf report -i perf.data.delete -p perf_session__delete -x + 3.60% perf perf [.] __rb_change_child perf_session__delete + 1.89% perf perf [.] rb_erase perf_session__delete_dead_threads + 1.89% perf perf [.] rb_erase perf_session__delete_threads + 1.83% perf perf [.] free@plt perf_session__delete Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-r79fn89bhqz16ixa5zmyflrd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3662047cc6b..6ab49dafef2 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -931,14 +931,6 @@ repeat: if (parent_pattern != default_parent_pattern) { if (sort_dimension__add("parent") < 0) goto error; - - /* - * Only show the parent fields if we explicitly - * sort that way. If we only use parent machinery - * for filtering, we don't want it. - */ - if (!strstr(sort_order, "parent")) - sort_parent.elide = 1; } if (argc) { -- cgit v1.2.3-70-g09d2 From d4ae0a6f7c79be64c8f3551dd149189f8c4480eb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Jun 2013 13:54:13 +0200 Subject: perf report: Fix perf_session__delete removal There's no point of having out_delete label with perf_session__delete call within __cmd_report function, because it's called at the end of the cmd_report function. The speed up due to commenting out the perf_session__delete at the end does not seem relevant anymore. Measured speedup for ~1GB data file with 222466 FORKS events is around 0.5%. $ perf report -i perf.data.delete -P perf_session__delete -s parent + 99.51% [other] + 0.49% perf_session__delete Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1372161253-22081-6-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 6ab49dafef2..ee2ca3eb22d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -497,7 +497,7 @@ static int __cmd_report(struct perf_report *rep) ret = perf_session__cpu_bitmap(session, rep->cpu_list, rep->cpu_bitmap); if (ret) - goto out_delete; + return ret; } if (use_browser <= 0) @@ -508,11 +508,11 @@ static int __cmd_report(struct perf_report *rep) ret = perf_report__setup_sample_type(rep); if (ret) - goto out_delete; + return ret; ret = perf_session__process_events(session, &rep->tool); if (ret) - goto out_delete; + return ret; kernel_map = session->machines.host.vmlinux_maps[MAP__FUNCTION]; kernel_kmap = map__kmap(kernel_map); @@ -547,7 +547,7 @@ static int __cmd_report(struct perf_report *rep) if (dump_trace) { perf_session__fprintf_nr_events(session, stdout); - goto out_delete; + return 0; } nr_samples = 0; @@ -572,7 +572,7 @@ static int __cmd_report(struct perf_report *rep) if (nr_samples == 0) { ui__error("The %s file has no samples!\n", session->filename); - goto out_delete; + return 0; } list_for_each_entry(pos, &session->evlist->entries, node) @@ -598,19 +598,6 @@ static int __cmd_report(struct perf_report *rep) } else perf_evlist__tty_browse_hists(session->evlist, rep, help); -out_delete: - /* - * Speed up the exit process, for large files this can - * take quite a while. - * - * XXX Enable this when using valgrind or if we ever - * librarize this command. - * - * Also experiment with obstacks to see how much speed - * up we'll get here. - * - * perf_session__delete(session); - */ return ret; } -- cgit v1.2.3-70-g09d2 From b21484f1a1f300d422cfe5d4f8f50015e22cea24 Mon Sep 17 00:00:00 2001 From: Greg Price Date: Thu, 6 Dec 2012 21:48:05 -0800 Subject: perf report/top: Add option to collapse undesired parts of call graph For example, in an application with an expensive function implemented with deeply nested recursive calls, the default call-graph presentation is dominated by the different callchains within that function. By ignoring these callees, we can collect the callchains leading into the function and compactly identify what to blame for expensive calls. For example, in this report the callers of garbage_collect() are scattered across the tree: $ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z] 22.03% ruby [.] gc_mark --- gc_mark |--59.40%-- mark_keyvalue | st_foreach | gc_mark_children | |--99.75%-- rb_gc_mark | | rb_vm_mark | | gc_mark_children | | gc_marks | | |--99.00%-- garbage_collect If we ignore the callees of garbage_collect(), its callers are coalesced: $ perf report --ignore-callees garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z] 72.92% ruby [.] garbage_collect --- garbage_collect vm_xmalloc |--47.08%-- ruby_xmalloc | st_insert2 | rb_hash_aset | |--98.45%-- features_index_add | | rb_provide_feature | | rb_require_safe | | vm_call_method Signed-off-by: Greg Price Tested-by: Jiri Olsa Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20130623031720.GW22203@biohazard-cafe.mit.edu Link: http://lkml.kernel.org/r/20130708115746.GO22203@biohazard-cafe.mit.edu Cc: Fengguang Wu [ remove spaces at beginning of line, reported by Fengguang Wu ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 5 +++++ tools/perf/Documentation/perf-top.txt | 5 +++++ tools/perf/builtin-report.c | 27 ++++++++++++++++++++++++--- tools/perf/builtin-top.c | 6 ++++-- tools/perf/util/machine.c | 24 +++++++++++++++--------- tools/perf/util/machine.h | 4 +++- tools/perf/util/session.c | 3 +-- tools/perf/util/sort.c | 2 ++ tools/perf/util/sort.h | 4 ++++ 9 files changed, 63 insertions(+), 17 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 66dab7410c1..747ff50284b 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -135,6 +135,11 @@ OPTIONS --inverted:: alias for inverted caller based call graph. +--ignore-callees=:: + Ignore callees of the function(s) matching the given regex. + This has the effect of collecting the callers of each such + function into one place in the call-graph tree. + --pretty=:: Pretty printing style. key: normal, raw diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 7fdd1909e37..58d6598a968 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -155,6 +155,11 @@ Default is to monitor all CPUS. Default: fractal,0.5,callee. +--ignore-callees=:: + Ignore callees of the function(s) matching the given regex. + This has the effect of collecting the callers of each such + function into one place in the call-graph tree. + --percent-limit:: Do not show entries which have an overhead under that percent. (Default: 0). diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ee2ca3eb22d..9a7e54d701c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -89,7 +89,7 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { err = machine__resolve_callchain(machine, evsel, al->thread, - sample, &parent); + sample, &parent, al); if (err) return err; } @@ -180,7 +180,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { err = machine__resolve_callchain(machine, evsel, al->thread, - sample, &parent); + sample, &parent, al); if (err) return err; } @@ -254,7 +254,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { err = machine__resolve_callchain(machine, evsel, al->thread, - sample, &parent); + sample, &parent, al); if (err) return err; } @@ -681,6 +681,24 @@ setup: return 0; } +int +report_parse_ignore_callees_opt(const struct option *opt __maybe_unused, + const char *arg, int unset __maybe_unused) +{ + if (arg) { + int err = regcomp(&ignore_callees_regex, arg, REG_EXTENDED); + if (err) { + char buf[BUFSIZ]; + regerror(err, &ignore_callees_regex, buf, sizeof(buf)); + pr_err("Invalid --ignore-callees regex: %s\n%s", arg, buf); + return -1; + } + have_ignore_callees = 1; + } + + return 0; +} + static int parse_branch_mode(const struct option *opt __maybe_unused, const char *str __maybe_unused, int unset) @@ -771,6 +789,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt), OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, "alias for inverted call graph"), + OPT_CALLBACK(0, "ignore-callees", NULL, "regex", + "ignore callees of these functions in call graphs", + report_parse_ignore_callees_opt), OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a237059f51c..bbf46357277 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -773,8 +773,7 @@ static void perf_event__process_sample(struct perf_tool *tool, sample->callchain) { err = machine__resolve_callchain(machine, evsel, al.thread, sample, - &parent); - + &parent, &al); if (err) return; } @@ -1109,6 +1108,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, "mode[,dump_size]", record_callchain_help, &parse_callchain_opt, "fp"), + OPT_CALLBACK(0, "ignore-callees", NULL, "regex", + "ignore callees of these functions in call graphs", + report_parse_ignore_callees_opt), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 5dd5026a82e..f9f9d6381b9 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1058,11 +1058,10 @@ int machine__process_event(struct machine *machine, union perf_event *event) return ret; } -static bool symbol__match_parent_regex(struct symbol *sym) +static bool symbol__match_regex(struct symbol *sym, regex_t *regex) { - if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) + if (sym->name && !regexec(regex, sym->name, 0, NULL, 0)) return 1; - return 0; } @@ -1159,8 +1158,8 @@ struct branch_info *machine__resolve_bstack(struct machine *machine, static int machine__resolve_callchain_sample(struct machine *machine, struct thread *thread, struct ip_callchain *chain, - struct symbol **parent) - + struct symbol **parent, + struct addr_location *root_al) { u8 cpumode = PERF_RECORD_MISC_USER; unsigned int i; @@ -1211,8 +1210,15 @@ static int machine__resolve_callchain_sample(struct machine *machine, MAP__FUNCTION, ip, &al, NULL); if (al.sym != NULL) { if (sort__has_parent && !*parent && - symbol__match_parent_regex(al.sym)) + symbol__match_regex(al.sym, &parent_regex)) *parent = al.sym; + else if (have_ignore_callees && root_al && + symbol__match_regex(al.sym, &ignore_callees_regex)) { + /* Treat this symbol as the root, + forgetting its callees. */ + *root_al = al; + callchain_cursor_reset(&callchain_cursor); + } if (!symbol_conf.use_callchain) break; } @@ -1237,13 +1243,13 @@ int machine__resolve_callchain(struct machine *machine, struct perf_evsel *evsel, struct thread *thread, struct perf_sample *sample, - struct symbol **parent) - + struct symbol **parent, + struct addr_location *root_al) { int ret; ret = machine__resolve_callchain_sample(machine, thread, - sample->callchain, parent); + sample->callchain, parent, root_al); if (ret) return ret; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index e49ba01b793..5bb6244194d 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -5,6 +5,7 @@ #include #include "map.h" +struct addr_location; struct branch_stack; struct perf_evsel; struct perf_sample; @@ -83,7 +84,8 @@ int machine__resolve_callchain(struct machine *machine, struct perf_evsel *evsel, struct thread *thread, struct perf_sample *sample, - struct symbol **parent); + struct symbol **parent, + struct addr_location *root_al); /* * Default guest kernel is defined by parameter --guestkallsyms diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 951a1cfb317..1eb58eedcac 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1406,9 +1406,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, if (symbol_conf.use_callchain && sample->callchain) { - if (machine__resolve_callchain(machine, evsel, al.thread, - sample, NULL) != 0) { + sample, NULL, NULL) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); return; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 8deee19d2e7..cb2b108635e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -7,6 +7,8 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; const char default_sort_order[] = "comm,dso,symbol"; const char *sort_order = default_sort_order; +regex_t ignore_callees_regex; +int have_ignore_callees = 0; int sort__need_collapse = 0; int sort__has_parent = 0; int sort__has_sym = 0; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 45ac84c1e03..a4a6d0b1ea0 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -29,6 +29,8 @@ extern const char *sort_order; extern const char default_parent_pattern[]; extern const char *parent_pattern; extern const char default_sort_order[]; +extern regex_t ignore_callees_regex; +extern int have_ignore_callees; extern int sort__need_collapse; extern int sort__has_parent; extern int sort__has_sym; @@ -183,4 +185,6 @@ int setup_sorting(void); extern int sort_dimension__add(const char *); void sort__setup_elide(FILE *fp); +int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); + #endif /* __PERF_SORT_H */ -- cgit v1.2.3-70-g09d2 From 2b8bfa6bb8a7d26935207710397386759b42125c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 31 Jan 2013 23:34:25 +0100 Subject: perf tools: Centralize default columns init in perf_hpp__init Now when diff command is separated from other standard outputs, we can use perf_hpp__init to initialize all standard columns. Moving PERF_HPP__OVERHEAD column init back to perf_hpp__init, and removing extra enable calls. Signed-off-by: Jiri Olsa Reviewed-by: Namhyung Kim Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-nj2xk89tj972tbqswfs498ex@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 1 - tools/perf/ui/browsers/hists.c | 2 -- tools/perf/ui/hist.c | 2 ++ tools/perf/ui/setup.c | 1 - 4 files changed, 2 insertions(+), 4 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9a7e54d701c..188c265751c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -861,7 +861,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) setup_browser(true); else { use_browser = 0; - perf_hpp__column_enable(PERF_HPP__OVERHEAD); perf_hpp__init(); } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 2cb39164a1c..7ef36c36047 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -703,8 +703,6 @@ __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us, NULL) void hist_browser__init_hpp(void) { - perf_hpp__column_enable(PERF_HPP__OVERHEAD); - perf_hpp__init(); perf_hpp__format[PERF_HPP__OVERHEAD].color = diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 5440d56d884..f45c97f694d 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -421,6 +421,8 @@ LIST_HEAD(perf_hpp__list); void perf_hpp__init(void) { + perf_hpp__column_enable(PERF_HPP__OVERHEAD); + if (symbol_conf.show_cpu_utilization) { perf_hpp__column_enable(PERF_HPP__OVERHEAD_SYS); perf_hpp__column_enable(PERF_HPP__OVERHEAD_US); diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c index ae6a789cb0f..47d9a571f26 100644 --- a/tools/perf/ui/setup.c +++ b/tools/perf/ui/setup.c @@ -30,7 +30,6 @@ void setup_browser(bool fallback_to_pager) if (fallback_to_pager) setup_pager(); - perf_hpp__column_enable(PERF_HPP__OVERHEAD); perf_hpp__init(); break; } -- cgit v1.2.3-70-g09d2 From 6065210db932fd183cbc8bc77558fee275360e71 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 11 Jul 2013 17:28:33 +0200 Subject: perf tools: Remove event types framework completely Removing event types framework completely. The only remainder (apart from few comments) is following enum: enum perf_user_event_type { ... PERF_RECORD_HEADER_EVENT_TYPE = 65, /* deprecated */ ... } It's kept as deprecated, resulting in error when processed in perf_session__process_user_event function. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1373556513-3000-6-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 7 ---- tools/perf/builtin-record.c | 7 ---- tools/perf/builtin-report.c | 1 - tools/perf/builtin-script.c | 1 - tools/perf/util/event.h | 2 +- tools/perf/util/header.c | 90 --------------------------------------------- tools/perf/util/header.h | 13 ------- tools/perf/util/session.c | 11 ------ tools/perf/util/tool.h | 3 -- 9 files changed, 1 insertion(+), 134 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index ad1296c6f88..1d8de2e4a40 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -67,12 +67,6 @@ static int perf_event__repipe_op2_synth(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } -static int perf_event__repipe_event_type_synth(struct perf_tool *tool, - union perf_event *event) -{ - return perf_event__repipe_synth(tool, event); -} - static int perf_event__repipe_attr(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist) @@ -402,7 +396,6 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .throttle = perf_event__repipe, .unthrottle = perf_event__repipe, .attr = perf_event__repipe_attr, - .event_type = perf_event__repipe_event_type_synth, .tracing_data = perf_event__repipe_op2_synth, .finished_round = perf_event__repipe_op2_synth, .build_id = perf_event__repipe_op2_synth, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1f5243c1c47..a41ac41546c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -474,13 +474,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) goto out_delete_session; } - err = perf_event__synthesize_event_types(tool, process_synthesized_event, - machine); - if (err < 0) { - pr_err("Couldn't synthesize event_types.\n"); - goto out_delete_session; - } - if (have_tracepoints(&evsel_list->entries)) { /* * FIXME err <= 0 here actually means that diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 188c265751c..a34c587900c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -741,7 +741,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) .lost = perf_event__process_lost, .read = process_read_event, .attr = perf_event__process_attr, - .event_type = perf_event__process_event_type, .tracing_data = perf_event__process_tracing_data, .build_id = perf_event__process_build_id, .ordered_samples = true, diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3de8979fe87..ecb697998d3 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -524,7 +524,6 @@ static struct perf_tool perf_script = { .exit = perf_event__process_exit, .fork = perf_event__process_fork, .attr = perf_event__process_attr, - .event_type = perf_event__process_event_type, .tracing_data = perf_event__process_tracing_data, .build_id = perf_event__process_build_id, .ordered_samples = true, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 181389535c0..1ebb8fb0178 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -116,7 +116,7 @@ struct build_id_event { enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_USER_TYPE_START = 64, PERF_RECORD_HEADER_ATTR = 64, - PERF_RECORD_HEADER_EVENT_TYPE = 65, + PERF_RECORD_HEADER_EVENT_TYPE = 65, /* depreceated */ PERF_RECORD_HEADER_TRACING_DATA = 66, PERF_RECORD_HEADER_BUILD_ID = 67, PERF_RECORD_FINISHED_ROUND = 68, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 88626678bfc..b28a65ecd61 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -25,41 +25,9 @@ static bool no_buildid_cache = false; -static int trace_event_count; -static struct perf_trace_event_type *trace_events; - static u32 header_argc; static const char **header_argv; -int perf_header__push_event(u64 id, const char *name) -{ - struct perf_trace_event_type *nevents; - - if (strlen(name) > MAX_EVENT_NAME) - pr_warning("Event %s will be truncated\n", name); - - nevents = realloc(trace_events, (trace_event_count + 1) * sizeof(*trace_events)); - if (nevents == NULL) - return -ENOMEM; - trace_events = nevents; - - memset(&trace_events[trace_event_count], 0, sizeof(struct perf_trace_event_type)); - trace_events[trace_event_count].event_id = id; - strncpy(trace_events[trace_event_count].name, name, MAX_EVENT_NAME - 1); - trace_event_count++; - return 0; -} - -char *perf_header__find_event(u64 id) -{ - int i; - for (i = 0 ; i < trace_event_count; i++) { - if (trace_events[i].event_id == id) - return trace_events[i].name; - } - return NULL; -} - /* * magic2 = "PERFILE2" * must be a numerical value to let the endianness @@ -2936,64 +2904,6 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, return 0; } -int perf_event__synthesize_event_type(struct perf_tool *tool, - u64 event_id, char *name, - perf_event__handler_t process, - struct machine *machine) -{ - union perf_event ev; - size_t size = 0; - int err = 0; - - memset(&ev, 0, sizeof(ev)); - - ev.event_type.event_type.event_id = event_id; - memset(ev.event_type.event_type.name, 0, MAX_EVENT_NAME); - strncpy(ev.event_type.event_type.name, name, MAX_EVENT_NAME - 1); - - ev.event_type.header.type = PERF_RECORD_HEADER_EVENT_TYPE; - size = strlen(ev.event_type.event_type.name); - size = PERF_ALIGN(size, sizeof(u64)); - ev.event_type.header.size = sizeof(ev.event_type) - - (sizeof(ev.event_type.event_type.name) - size); - - err = process(tool, &ev, NULL, machine); - - return err; -} - -int perf_event__synthesize_event_types(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) -{ - struct perf_trace_event_type *type; - int i, err = 0; - - for (i = 0; i < trace_event_count; i++) { - type = &trace_events[i]; - - err = perf_event__synthesize_event_type(tool, type->event_id, - type->name, process, - machine); - if (err) { - pr_debug("failed to create perf header event type\n"); - return err; - } - } - - return err; -} - -int perf_event__process_event_type(struct perf_tool *tool __maybe_unused, - union perf_event *event) -{ - if (perf_header__push_event(event->event_type.event_type.event_id, - event->event_type.event_type.name) < 0) - return -ENOMEM; - - return 0; -} - int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct perf_evlist *evlist, perf_event__handler_t process) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 298982fb195..669fda531da 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -102,9 +102,6 @@ int perf_session__write_header(struct perf_session *session, int fd, bool at_exit); int perf_header__write_pipe(int fd); -int perf_header__push_event(u64 id, const char *name); -char *perf_header__find_event(u64 id); - void perf_header__set_feat(struct perf_header *header, int feat); void perf_header__clear_feat(struct perf_header *header, int feat); bool perf_header__has_feat(const struct perf_header *header, int feat); @@ -132,16 +129,6 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, int perf_event__process_attr(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist); -int perf_event__synthesize_event_type(struct perf_tool *tool, - u64 event_id, char *name, - perf_event__handler_t process, - struct machine *machine); -int perf_event__synthesize_event_types(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine); -int perf_event__process_event_type(struct perf_tool *tool, - union perf_event *event); - int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct perf_evlist *evlist, perf_event__handler_t process); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1eb58eedcac..d0d9f946a1b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -241,13 +241,6 @@ static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, return 0; } -static int process_event_type_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - static int process_finished_round(struct perf_tool *tool, union perf_event *event, struct perf_session *session); @@ -274,8 +267,6 @@ static void perf_tool__fill_defaults(struct perf_tool *tool) tool->unthrottle = process_event_stub; if (tool->attr == NULL) tool->attr = process_event_synth_attr_stub; - if (tool->event_type == NULL) - tool->event_type = process_event_type_stub; if (tool->tracing_data == NULL) tool->tracing_data = process_event_synth_tracing_data_stub; if (tool->build_id == NULL) @@ -928,8 +919,6 @@ static int perf_session__process_user_event(struct perf_session *session, union if (err == 0) perf_session__set_id_hdr_size(session); return err; - case PERF_RECORD_HEADER_EVENT_TYPE: - return tool->event_type(tool, event); case PERF_RECORD_HEADER_TRACING_DATA: /* setup for reading amidst mmap */ lseek(session->fd, file_offset, SEEK_SET); diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 88f8cbdb8a3..62b16b6165b 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -22,8 +22,6 @@ typedef int (*event_attr_op)(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist); -typedef int (*event_simple_op)(struct perf_tool *tool, union perf_event *event); - typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event, struct perf_session *session); @@ -39,7 +37,6 @@ struct perf_tool { unthrottle; event_attr_op attr; event_op2 tracing_data; - event_simple_op event_type; event_op2 finished_round, build_id; bool ordered_samples; -- cgit v1.2.3-70-g09d2 From 99571ab3d9b342a717295a9c7e2b4495ee19e32a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jul 2013 15:33:57 -0700 Subject: perf tools: Support callchain sorting based on addresses With programs with very large functions it can be useful to distinguish the callgraph nodes on more than just function names. So for example if you have multiple calls to the same function, it ends up being separate nodes in the chain. This patch adds a new key field to the callgraph options, that allows comparing nodes on functions (as today, default) and addresses. Longer term it would be nice to also handle src lines, but that would need more changes and address is a reasonable proxy for it today. I right now reference the global params, as there was no simple way to register a params pointer. Signed-off-by: Andi Kleen Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-0uskktybf0e7wrnoi5e9b9it@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 8 ++++++-- tools/perf/builtin-report.c | 19 +++++++++++++++---- tools/perf/util/callchain.c | 7 +++++-- tools/perf/util/callchain.h | 6 ++++++ tools/perf/util/hist.c | 3 ++- 5 files changed, 34 insertions(+), 9 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 747ff50284b..2b8097ee39d 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -115,7 +115,7 @@ OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. --g [type,min[,limit],order]:: +-g [type,min[,limit],order[,key]]:: --call-graph:: Display call chains using type, min percent threshold, optional print limit and order. @@ -129,7 +129,11 @@ OPTIONS - callee: callee based call graph. - caller: inverted caller based call graph. - Default: fractal,0.5,callee. + key can be: + - function: compare on functions + - address: compare on individual code addresses + + Default: fractal,0.5,callee,function. -G:: --inverted:: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index a34c587900c..d785d89ed22 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -667,12 +667,23 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) } /* get the call chain order */ - if (!strcmp(tok2, "caller")) + if (!strncmp(tok2, "caller", strlen("caller"))) callchain_param.order = ORDER_CALLER; - else if (!strcmp(tok2, "callee")) + else if (!strncmp(tok2, "callee", strlen("callee"))) callchain_param.order = ORDER_CALLEE; else return -1; + + /* Get the sort key */ + tok2 = strtok(NULL, ","); + if (!tok2) + goto setup; + if (!strncmp(tok2, "function", strlen("function"))) + callchain_param.key = CCKEY_FUNCTION; + else if (!strncmp(tok2, "address", strlen("address"))) + callchain_param.key = CCKEY_ADDRESS; + else + return -1; setup: if (callchain_register_param(&callchain_param) < 0) { fprintf(stderr, "Can't register callchain params\n"); @@ -784,8 +795,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, "Only display entries with parent-match"), OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", - "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit and callchain order. " - "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt), + "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " + "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt), OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, "alias for inverted call graph"), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 42b6a632fe7..4fee33b229b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -15,6 +15,7 @@ #include #include +#include "hist.h" #include "util.h" #include "callchain.h" @@ -327,7 +328,8 @@ append_chain(struct callchain_node *root, /* * Lookup in the current node * If we have a symbol, then compare the start to match - * anywhere inside a function. + * anywhere inside a function, unless function + * mode is disabled. */ list_for_each_entry(cnode, &root->val, list) { struct callchain_cursor_node *node; @@ -339,7 +341,8 @@ append_chain(struct callchain_node *root, sym = node->sym; - if (cnode->ms.sym && sym) { + if (cnode->ms.sym && sym && + callchain_param.key == CCKEY_FUNCTION) { if (cnode->ms.sym->start != sym->start) break; } else if (cnode->ip != node->ip) diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3ee9f67d5af..812d5a0ff2b 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -41,12 +41,18 @@ struct callchain_param; typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *, u64, struct callchain_param *); +enum chain_key { + CCKEY_FUNCTION, + CCKEY_ADDRESS +}; + struct callchain_param { enum chain_mode mode; u32 print_limit; double min_percent; sort_chain_func_t sort; enum chain_order order; + enum chain_key key; }; struct callchain_list { diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index a9dd1b9d890..46a0d35a05e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -24,7 +24,8 @@ enum hist_filter { struct callchain_param callchain_param = { .mode = CHAIN_GRAPH_REL, .min_percent = 0.5, - .order = ORDER_CALLEE + .order = ORDER_CALLEE, + .key = CCKEY_FUNCTION }; u16 hists__col_len(struct hists *hists, enum hist_column col) -- cgit v1.2.3-70-g09d2 From b8681711271a0124d9495dae2e1ac0616b0ed47a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 8 Aug 2013 14:32:22 +0300 Subject: perf report: Set the machines symbol filter Take into use the machines' symbol filter member. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1375961547-30267-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index d785d89ed22..f06a5a228c7 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -49,7 +49,6 @@ struct perf_report { bool mem_mode; struct perf_read_values show_threads_values; const char *pretty_printing_style; - symbol_filter_t annotate_init; const char *cpu_list; const char *symbol_filter_str; float min_percent; @@ -306,7 +305,7 @@ static int process_sample_event(struct perf_tool *tool, int ret; if (perf_event__preprocess_sample(event, machine, &al, sample, - rep->annotate_init) < 0) { + machine->symbol_filter) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -924,7 +923,8 @@ repeat: */ if (use_browser == 1 && sort__has_sym) { symbol_conf.priv_size = sizeof(struct annotation); - report.annotate_init = symbol__annotate_init; + machines__set_symbol_filter(&session->machines, + symbol__annotate_init); /* * For searching by name on the "Browse map details". * providing it only in verbose mode not to bloat too -- cgit v1.2.3-70-g09d2 From e44baa3ea1eaa09d7d247a9b245fcff06561bf96 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 8 Aug 2013 14:32:25 +0300 Subject: perf tools: Remove filter parameter of perf_event__preprocess_sample() Now that the symbol filter is recorded on the machine there is no need to pass it to perf_event__preprocess_sample(). So remove it. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1375961547-30267-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 3 +-- tools/perf/builtin-diff.c | 2 +- tools/perf/builtin-mem.c | 3 +-- tools/perf/builtin-report.c | 3 +-- tools/perf/builtin-script.c | 2 +- tools/perf/builtin-top.c | 3 +-- tools/perf/tests/hists_link.c | 4 ++-- tools/perf/util/event.c | 8 ++++---- tools/perf/util/event.h | 3 +-- tools/perf/util/session.c | 3 +-- 10 files changed, 14 insertions(+), 20 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 9754cb140a5..f988d380c52 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -90,8 +90,7 @@ static int process_sample_event(struct perf_tool *tool, struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool); struct addr_location al; - if (perf_event__preprocess_sample(event, machine, &al, sample, - machine->symbol_filter) < 0) { + if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 93de3ac177c..f28799e94f2 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -319,7 +319,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, { struct addr_location al; - if (perf_event__preprocess_sample(event, machine, &al, sample, NULL) < 0) { + if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index f96168c769c..706a1faa955 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -68,8 +68,7 @@ dump_raw_samples(struct perf_tool *tool, struct addr_location al; const char *fmt; - if (perf_event__preprocess_sample(event, machine, &al, sample, - NULL) < 0) { + if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f06a5a228c7..958a56a0e39 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -304,8 +304,7 @@ static int process_sample_event(struct perf_tool *tool, struct addr_location al; int ret; - if (perf_event__preprocess_sample(event, machine, &al, sample, - machine->symbol_filter) < 0) { + if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 33b2d830eab..a7d623f39c4 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -520,7 +520,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, return 0; } - if (perf_event__preprocess_sample(event, machine, &al, sample, 0) < 0) { + if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { pr_err("problem processing %d event, skipping it.\n", event->header.type); return -1; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a63ade22cbc..e37521fc715 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -716,8 +716,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) top->exact_samples++; - if (perf_event__preprocess_sample(event, machine, &al, sample, - machine->symbol_filter) < 0 || + if (perf_event__preprocess_sample(event, machine, &al, sample) < 0 || al.filtered) return; diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 89085a9615e..50bfb01183e 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -220,7 +220,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) }; if (perf_event__preprocess_sample(&event, machine, &al, - &sample, 0) < 0) + &sample) < 0) goto out; he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1); @@ -244,7 +244,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) }; if (perf_event__preprocess_sample(&event, machine, &al, - &sample, 0) < 0) + &sample) < 0) goto out; he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index cc7c0c9c9ea..f3cf771d362 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -683,8 +683,7 @@ void thread__find_addr_location(struct thread *thread, struct machine *machine, int perf_event__preprocess_sample(const union perf_event *event, struct machine *machine, struct addr_location *al, - struct perf_sample *sample, - symbol_filter_t filter) + struct perf_sample *sample) { u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread = machine__findnew_thread(machine, event->ip.pid); @@ -709,7 +708,7 @@ int perf_event__preprocess_sample(const union perf_event *event, machine__create_kernel_maps(machine); thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, - event->ip.ip, al, filter); + event->ip.ip, al, machine->symbol_filter); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : ""); @@ -727,7 +726,8 @@ int perf_event__preprocess_sample(const union perf_event *event, dso->long_name))))) goto out_filtered; - al->sym = map__find_symbol(al->map, al->addr, filter); + al->sym = map__find_symbol(al->map, al->addr, + machine->symbol_filter); } if (symbol_conf.sym_list && diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 6119a649d86..15db071d96b 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -234,8 +234,7 @@ struct addr_location; int perf_event__preprocess_sample(const union perf_event *self, struct machine *machine, struct addr_location *al, - struct perf_sample *sample, - symbol_filter_t filter); + struct perf_sample *sample); const char *perf_event__name(unsigned int id); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 4d9028eef34..de16a773685 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1503,8 +1503,7 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, int print_oneline = print_opts & PRINT_IP_OPT_ONELINE; char s = print_oneline ? ' ' : '\t'; - if (perf_event__preprocess_sample(event, machine, &al, sample, - NULL) < 0) { + if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { error("problem processing %d event, skipping it.\n", event->header.type); return; -- cgit v1.2.3-70-g09d2 From 75562573bab35b129cfd342fc2bcf89da84a6644 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 27 Aug 2013 11:23:09 +0300 Subject: perf tools: Add support for PERF_SAMPLE_IDENTIFIER Enable parsing of samples with sample format bit PERF_SAMPLE_IDENTIFIER. In addition, if the kernel supports it, prefer it to selecting PERF_SAMPLE_ID thereby allowing non-matching sample types. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1377591794-30553-8-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/util/event.h | 3 +- tools/perf/util/evlist.c | 111 +++++++++++++++++++++++++++++++++++++++--- tools/perf/util/evlist.h | 8 ++- tools/perf/util/evsel.c | 101 ++++++++++++++++++++++++++++++++++++-- tools/perf/util/evsel.h | 14 +++++- tools/perf/util/record.c | 89 +++++++++++++++++++++++++++++++-- tools/perf/util/session.c | 2 +- 9 files changed, 310 insertions(+), 22 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 958a56a0e39..9725aa37541 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -365,7 +365,7 @@ static int process_read_event(struct perf_tool *tool, static int perf_report__setup_sample_type(struct perf_report *rep) { struct perf_session *self = rep->session; - u64 sample_type = perf_evlist__sample_type(self->evlist); + u64 sample_type = perf_evlist__combined_sample_type(self->evlist); if (!self->fd_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 5b1b5aba722..c4185b9aeb8 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -72,7 +72,7 @@ int test__basic_mmap(void) } evsels[i]->attr.wakeup_events = 1; - perf_evsel__set_sample_id(evsels[i]); + perf_evsel__set_sample_id(evsels[i], false); perf_evlist__add(evlist, evsels[i]); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 19d911c011c..491333910cf 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -53,7 +53,8 @@ struct read_event { (PERF_SAMPLE_IP | PERF_SAMPLE_TID | \ PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \ PERF_SAMPLE_ID | PERF_SAMPLE_STREAM_ID | \ - PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) + PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_IDENTIFIER) struct sample_event { struct perf_event_header header; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 9d682e5f718..6a629af5137 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -49,6 +49,21 @@ struct perf_evlist *perf_evlist__new(void) return evlist; } +/** + * perf_evlist__set_id_pos - set the positions of event ids. + * @evlist: selected event list + * + * Events with compatible sample types all have the same id_pos + * and is_pos. For convenience, put a copy on evlist. + */ +void perf_evlist__set_id_pos(struct perf_evlist *evlist) +{ + struct perf_evsel *first = perf_evlist__first(evlist); + + evlist->id_pos = first->id_pos; + evlist->is_pos = first->is_pos; +} + static void perf_evlist__purge(struct perf_evlist *evlist) { struct perf_evsel *pos, *n; @@ -79,15 +94,20 @@ void perf_evlist__delete(struct perf_evlist *evlist) void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) { list_add_tail(&entry->node, &evlist->entries); - ++evlist->nr_entries; + if (!evlist->nr_entries++) + perf_evlist__set_id_pos(evlist); } void perf_evlist__splice_list_tail(struct perf_evlist *evlist, struct list_head *list, int nr_entries) { + bool set_id_pos = !evlist->nr_entries; + list_splice_tail(list, &evlist->entries); evlist->nr_entries += nr_entries; + if (set_id_pos) + perf_evlist__set_id_pos(evlist); } void __perf_evlist__set_leader(struct list_head *list) @@ -349,6 +369,55 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) return NULL; } +static int perf_evlist__event2id(struct perf_evlist *evlist, + union perf_event *event, u64 *id) +{ + const u64 *array = event->sample.array; + ssize_t n; + + n = (event->header.size - sizeof(event->header)) >> 3; + + if (event->header.type == PERF_RECORD_SAMPLE) { + if (evlist->id_pos >= n) + return -1; + *id = array[evlist->id_pos]; + } else { + if (evlist->is_pos > n) + return -1; + n -= evlist->is_pos; + *id = array[n]; + } + return 0; +} + +static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, + union perf_event *event) +{ + struct hlist_head *head; + struct perf_sample_id *sid; + int hash; + u64 id; + + if (evlist->nr_entries == 1) + return perf_evlist__first(evlist); + + if (perf_evlist__event2id(evlist, event, &id)) + return NULL; + + /* Synthesized events have an id of zero */ + if (!id) + return perf_evlist__first(evlist); + + hash = hash_64(id, PERF_EVLIST__HLIST_BITS); + head = &evlist->heads[hash]; + + hlist_for_each_entry(sid, head, node) { + if (sid->id == id) + return sid->evsel; + } + return NULL; +} + union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) { struct perf_mmap *md = &evlist->mmap[idx]; @@ -659,20 +728,40 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) { - struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; + struct perf_evsel *pos; - list_for_each_entry_continue(pos, &evlist->entries, node) { - if (first->attr.sample_type != pos->attr.sample_type) + if (evlist->nr_entries == 1) + return true; + + if (evlist->id_pos < 0 || evlist->is_pos < 0) + return false; + + list_for_each_entry(pos, &evlist->entries, node) { + if (pos->id_pos != evlist->id_pos || + pos->is_pos != evlist->is_pos) return false; } return true; } -u64 perf_evlist__sample_type(struct perf_evlist *evlist) +u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) { - struct perf_evsel *first = perf_evlist__first(evlist); - return first->attr.sample_type; + struct perf_evsel *evsel; + + if (evlist->combined_sample_type) + return evlist->combined_sample_type; + + list_for_each_entry(evsel, &evlist->entries, node) + evlist->combined_sample_type |= evsel->attr.sample_type; + + return evlist->combined_sample_type; +} + +u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) +{ + evlist->combined_sample_type = 0; + return __perf_evlist__combined_sample_type(evlist); } bool perf_evlist__valid_read_format(struct perf_evlist *evlist) @@ -727,6 +816,9 @@ u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) if (sample_type & PERF_SAMPLE_CPU) size += sizeof(data->cpu) * 2; + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + size += sizeof(data->id); out: return size; } @@ -885,7 +977,10 @@ int perf_evlist__start_workload(struct perf_evlist *evlist) int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample) { - struct perf_evsel *evsel = perf_evlist__first(evlist); + struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); + + if (!evsel) + return -EFAULT; return perf_evsel__parse_sample(evsel, event, sample); } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 327ababa67b..ab95d727363 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -32,6 +32,9 @@ struct perf_evlist { int nr_fds; int nr_mmaps; int mmap_len; + int id_pos; + int is_pos; + u64 combined_sample_type; struct { int cork_fd; pid_t pid; @@ -85,6 +88,8 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx); int perf_evlist__open(struct perf_evlist *evlist); void perf_evlist__close(struct perf_evlist *evlist); +void perf_evlist__set_id_pos(struct perf_evlist *evlist); +bool perf_can_sample_identifier(void); void perf_evlist__config(struct perf_evlist *evlist, struct perf_record_opts *opts); @@ -121,7 +126,8 @@ void __perf_evlist__set_leader(struct list_head *list); void perf_evlist__set_leader(struct perf_evlist *evlist); u64 perf_evlist__read_format(struct perf_evlist *evlist); -u64 perf_evlist__sample_type(struct perf_evlist *evlist); +u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist); +u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist); bool perf_evlist__sample_id_all(struct perf_evlist *evlist); u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 7e328c47f3b..db4e431cb6c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -31,7 +31,7 @@ static struct { #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) -static int __perf_evsel__sample_size(u64 sample_type) +int __perf_evsel__sample_size(u64 sample_type) { u64 mask = sample_type & PERF_SAMPLE_MASK; int size = 0; @@ -47,6 +47,72 @@ static int __perf_evsel__sample_size(u64 sample_type) return size; } +/** + * __perf_evsel__calc_id_pos - calculate id_pos. + * @sample_type: sample type + * + * This function returns the position of the event id (PERF_SAMPLE_ID or + * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct + * sample_event. + */ +static int __perf_evsel__calc_id_pos(u64 sample_type) +{ + int idx = 0; + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + return 0; + + if (!(sample_type & PERF_SAMPLE_ID)) + return -1; + + if (sample_type & PERF_SAMPLE_IP) + idx += 1; + + if (sample_type & PERF_SAMPLE_TID) + idx += 1; + + if (sample_type & PERF_SAMPLE_TIME) + idx += 1; + + if (sample_type & PERF_SAMPLE_ADDR) + idx += 1; + + return idx; +} + +/** + * __perf_evsel__calc_is_pos - calculate is_pos. + * @sample_type: sample type + * + * This function returns the position (counting backwards) of the event id + * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if + * sample_id_all is used there is an id sample appended to non-sample events. + */ +static int __perf_evsel__calc_is_pos(u64 sample_type) +{ + int idx = 1; + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + return 1; + + if (!(sample_type & PERF_SAMPLE_ID)) + return -1; + + if (sample_type & PERF_SAMPLE_CPU) + idx += 1; + + if (sample_type & PERF_SAMPLE_STREAM_ID) + idx += 1; + + return idx; +} + +void perf_evsel__calc_id_pos(struct perf_evsel *evsel) +{ + evsel->id_pos = __perf_evsel__calc_id_pos(evsel->attr.sample_type); + evsel->is_pos = __perf_evsel__calc_is_pos(evsel->attr.sample_type); +} + void hists__init(struct hists *hists) { memset(hists, 0, sizeof(*hists)); @@ -63,6 +129,7 @@ void __perf_evsel__set_sample_bit(struct perf_evsel *evsel, if (!(evsel->attr.sample_type & bit)) { evsel->attr.sample_type |= bit; evsel->sample_size += sizeof(u64); + perf_evsel__calc_id_pos(evsel); } } @@ -72,12 +139,19 @@ void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel, if (evsel->attr.sample_type & bit) { evsel->attr.sample_type &= ~bit; evsel->sample_size -= sizeof(u64); + perf_evsel__calc_id_pos(evsel); } } -void perf_evsel__set_sample_id(struct perf_evsel *evsel) +void perf_evsel__set_sample_id(struct perf_evsel *evsel, + bool can_sample_identifier) { - perf_evsel__set_sample_bit(evsel, ID); + if (can_sample_identifier) { + perf_evsel__reset_sample_bit(evsel, ID); + perf_evsel__set_sample_bit(evsel, IDENTIFIER); + } else { + perf_evsel__set_sample_bit(evsel, ID); + } evsel->attr.read_format |= PERF_FORMAT_ID; } @@ -90,6 +164,7 @@ void perf_evsel__init(struct perf_evsel *evsel, INIT_LIST_HEAD(&evsel->node); hists__init(&evsel->hists); evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); + perf_evsel__calc_id_pos(evsel); } struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) @@ -509,7 +584,7 @@ void perf_evsel__config(struct perf_evsel *evsel, * We need ID even in case of single event, because * PERF_SAMPLE_READ process ID specific data. */ - perf_evsel__set_sample_id(evsel); + perf_evsel__set_sample_id(evsel, false); /* * Apply group format only if we belong to group @@ -1088,6 +1163,11 @@ static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel, array += ((event->header.size - sizeof(event->header)) / sizeof(u64)) - 1; + if (type & PERF_SAMPLE_IDENTIFIER) { + sample->id = *array; + array--; + } + if (type & PERF_SAMPLE_CPU) { u.val64 = *array; if (swapped) { @@ -1184,6 +1264,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (evsel->sample_size + sizeof(event->header) > event->header.size) return -EFAULT; + data->id = -1ULL; + if (type & PERF_SAMPLE_IDENTIFIER) { + data->id = *array; + array++; + } + if (type & PERF_SAMPLE_IP) { data->ip = *array; array++; @@ -1214,7 +1300,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } - data->id = -1ULL; if (type & PERF_SAMPLE_ID) { data->id = *array; array++; @@ -1396,6 +1481,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, array = event->sample.array; + if (type & PERF_SAMPLE_IDENTIFIER) { + *array = sample->id; + array++; + } + if (type & PERF_SAMPLE_IP) { *array = sample->ip; array++; @@ -1584,6 +1674,7 @@ static int sample_type__fprintf(FILE *fp, bool *first, u64 value) bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), + bit_name(IDENTIFIER), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 532a5f925da..4a7bdc713ba 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -48,6 +48,12 @@ struct perf_sample_id { * @name - Can be set to retain the original event name passed by the user, * so that when showing results in tools such as 'perf stat', we * show the name used, not some alias. + * @id_pos: the position of the event id (PERF_SAMPLE_ID or + * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of + * struct sample_event + * @is_pos: the position (counting backwards) of the event id (PERF_SAMPLE_ID or + * PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if sample_id_all + * is used there is an id sample appended to non-sample events */ struct perf_evsel { struct list_head node; @@ -74,6 +80,8 @@ struct perf_evsel { } handler; struct cpu_map *cpus; unsigned int sample_size; + int id_pos; + int is_pos; bool supported; bool needs_swap; /* parse modifier helper */ @@ -104,6 +112,9 @@ void perf_evsel__delete(struct perf_evsel *evsel); void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts); +int __perf_evsel__sample_size(u64 sample_type); +void perf_evsel__calc_id_pos(struct perf_evsel *evsel); + bool perf_evsel__is_cache_op_valid(u8 type, u8 op); #define PERF_EVSEL__MAX_ALIASES 8 @@ -142,7 +153,8 @@ void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel, #define perf_evsel__reset_sample_bit(evsel, bit) \ __perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit) -void perf_evsel__set_sample_id(struct perf_evsel *evsel); +void perf_evsel__set_sample_id(struct perf_evsel *evsel, + bool use_sample_identifier); int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, const char *filter); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 9b5ef793313..18d73aa2f0f 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -1,11 +1,83 @@ #include "evlist.h" #include "evsel.h" #include "cpumap.h" +#include "parse-events.h" + +typedef void (*setup_probe_fn_t)(struct perf_evsel *evsel); + +static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) +{ + struct perf_evlist *evlist; + struct perf_evsel *evsel; + int err = -EAGAIN, fd; + + evlist = perf_evlist__new(); + if (!evlist) + return -ENOMEM; + + if (parse_events(evlist, str)) + goto out_delete; + + evsel = perf_evlist__first(evlist); + + fd = sys_perf_event_open(&evsel->attr, -1, cpu, -1, 0); + if (fd < 0) + goto out_delete; + close(fd); + + fn(evsel); + + fd = sys_perf_event_open(&evsel->attr, -1, cpu, -1, 0); + if (fd < 0) { + if (errno == EINVAL) + err = -EINVAL; + goto out_delete; + } + close(fd); + err = 0; + +out_delete: + perf_evlist__delete(evlist); + return err; +} + +static bool perf_probe_api(setup_probe_fn_t fn) +{ + const char *try[] = {"cycles:u", "instructions:u", "cpu-clock", NULL}; + struct cpu_map *cpus; + int cpu, ret, i = 0; + + cpus = cpu_map__new(NULL); + if (!cpus) + return false; + cpu = cpus->map[0]; + cpu_map__delete(cpus); + + do { + ret = perf_do_probe_api(fn, cpu, try[i++]); + if (!ret) + return true; + } while (ret == -EAGAIN && try[i]); + + return false; +} + +static void perf_probe_sample_identifier(struct perf_evsel *evsel) +{ + evsel->attr.sample_type |= PERF_SAMPLE_IDENTIFIER; +} + +bool perf_can_sample_identifier(void) +{ + return perf_probe_api(perf_probe_sample_identifier); +} void perf_evlist__config(struct perf_evlist *evlist, struct perf_record_opts *opts) { struct perf_evsel *evsel; + bool use_sample_identifier = false; + /* * Set the evsel leader links before we configure attributes, * since some might depend on this info. @@ -16,10 +88,21 @@ void perf_evlist__config(struct perf_evlist *evlist, if (evlist->cpus->map[0] < 0) opts->no_inherit = true; - list_for_each_entry(evsel, &evlist->entries, node) { + list_for_each_entry(evsel, &evlist->entries, node) perf_evsel__config(evsel, opts); - if (evlist->nr_entries > 1) - perf_evsel__set_sample_id(evsel); + if (evlist->nr_entries > 1) { + struct perf_evsel *first = perf_evlist__first(evlist); + + list_for_each_entry(evsel, &evlist->entries, node) { + if (evsel->attr.sample_type == first->attr.sample_type) + continue; + use_sample_identifier = perf_can_sample_identifier(); + break; + } + list_for_each_entry(evsel, &evlist->entries, node) + perf_evsel__set_sample_id(evsel, use_sample_identifier); } + + perf_evlist__set_id_pos(evlist); } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index c3ac483be48..07642a7b934 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -739,7 +739,7 @@ static void perf_session__print_tstamp(struct perf_session *session, union perf_event *event, struct perf_sample *sample) { - u64 sample_type = perf_evlist__sample_type(session->evlist); + u64 sample_type = __perf_evlist__combined_sample_type(session->evlist); if (event->header.type != PERF_RECORD_SAMPLE && !perf_evlist__sample_id_all(session->evlist)) { -- cgit v1.2.3-70-g09d2